1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2018 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
26 #define INCLUDE_STRING
28 #include "coretypes.h"
37 #include "stringpool.h"
44 #include "diagnostic-core.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
51 #include "insn-attr.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
65 #include "target-globals.h"
67 #include "tm-constrs.h"
69 #include "optabs-libfuncs.h"
74 /* This file should be included last. */
75 #include "target-def.h"
77 /* Forward definitions of types. */
78 typedef struct minipool_node Mnode
;
79 typedef struct minipool_fixup Mfix
;
81 /* The last .arch and .fpu assembly strings that we printed. */
82 static std::string arm_last_printed_arch_string
;
83 static std::string arm_last_printed_fpu_string
;
85 void (*arm_lang_output_object_attributes_hook
)(void);
92 /* Forward function declarations. */
93 static bool arm_const_not_ok_for_debug_p (rtx
);
94 static int arm_needs_doubleword_align (machine_mode
, const_tree
);
95 static int arm_compute_static_chain_stack_bytes (void);
96 static arm_stack_offsets
*arm_get_frame_offsets (void);
97 static void arm_compute_frame_layout (void);
98 static void arm_add_gc_roots (void);
99 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
100 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
101 static unsigned bit_count (unsigned long);
102 static unsigned bitmap_popcount (const sbitmap
);
103 static int arm_address_register_rtx_p (rtx
, int);
104 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
105 static bool is_called_in_ARM_mode (tree
);
106 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
107 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
108 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
109 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
110 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
111 inline static int thumb1_index_register_rtx_p (rtx
, int);
112 static int thumb_far_jump_used_p (void);
113 static bool thumb_force_lr_save (void);
114 static unsigned arm_size_return_regs (void);
115 static bool arm_assemble_integer (rtx
, unsigned int, int);
116 static void arm_print_operand (FILE *, rtx
, int);
117 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
118 static bool arm_print_operand_punct_valid_p (unsigned char code
);
119 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
120 static arm_cc
get_arm_condition_code (rtx
);
121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
122 static const char *output_multi_immediate (rtx
*, const char *, const char *,
124 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
125 static struct machine_function
*arm_init_machine_status (void);
126 static void thumb_exit (FILE *, int);
127 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
128 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
129 static Mnode
*add_minipool_forward_ref (Mfix
*);
130 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
131 static Mnode
*add_minipool_backward_ref (Mfix
*);
132 static void assign_minipool_offsets (Mfix
*);
133 static void arm_print_value (FILE *, rtx
);
134 static void dump_minipool (rtx_insn
*);
135 static int arm_barrier_cost (rtx_insn
*);
136 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
137 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
138 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
140 static void arm_reorg (void);
141 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
143 static unsigned long arm_compute_save_core_reg_mask (void);
144 static unsigned long arm_isr_value (tree
);
145 static unsigned long arm_compute_func_type (void);
146 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
147 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
148 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
150 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
152 static tree
arm_handle_cmse_nonsecure_entry (tree
*, tree
, tree
, int, bool *);
153 static tree
arm_handle_cmse_nonsecure_call (tree
*, tree
, tree
, int, bool *);
154 static void arm_output_function_epilogue (FILE *);
155 static void arm_output_function_prologue (FILE *);
156 static int arm_comp_type_attributes (const_tree
, const_tree
);
157 static void arm_set_default_type_attributes (tree
);
158 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
159 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
160 static int optimal_immediate_sequence (enum rtx_code code
,
161 unsigned HOST_WIDE_INT val
,
162 struct four_ints
*return_sequence
);
163 static int optimal_immediate_sequence_1 (enum rtx_code code
,
164 unsigned HOST_WIDE_INT val
,
165 struct four_ints
*return_sequence
,
167 static int arm_get_strip_length (int);
168 static bool arm_function_ok_for_sibcall (tree
, tree
);
169 static machine_mode
arm_promote_function_mode (const_tree
,
172 static bool arm_return_in_memory (const_tree
, const_tree
);
173 static rtx
arm_function_value (const_tree
, const_tree
, bool);
174 static rtx
arm_libcall_value_1 (machine_mode
);
175 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
176 static bool arm_function_value_regno_p (const unsigned int);
177 static void arm_internal_label (FILE *, const char *, unsigned long);
178 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
180 static bool arm_have_conditional_execution (void);
181 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
182 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
183 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
184 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
185 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
186 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
187 static void emit_constant_insn (rtx cond
, rtx pattern
);
188 static rtx_insn
*emit_set_insn (rtx
, rtx
);
189 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
190 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
192 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
194 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
196 static pad_direction
arm_function_arg_padding (machine_mode
, const_tree
);
197 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
198 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
200 static rtx
aapcs_libcall_value (machine_mode
);
201 static int aapcs_select_return_coproc (const_tree
, const_tree
);
203 #ifdef OBJECT_FORMAT_ELF
204 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
205 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
208 static void arm_encode_section_info (tree
, rtx
, int);
211 static void arm_file_end (void);
212 static void arm_file_start (void);
213 static void arm_insert_attributes (tree
, tree
*);
215 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
217 static bool arm_pass_by_reference (cumulative_args_t
,
218 machine_mode
, const_tree
, bool);
219 static bool arm_promote_prototypes (const_tree
);
220 static bool arm_default_short_enums (void);
221 static bool arm_align_anon_bitfield (void);
222 static bool arm_return_in_msb (const_tree
);
223 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
224 static bool arm_return_in_memory (const_tree
, const_tree
);
226 static void arm_unwind_emit (FILE *, rtx_insn
*);
227 static bool arm_output_ttype (rtx
);
228 static void arm_asm_emit_except_personality (rtx
);
230 static void arm_asm_init_sections (void);
231 static rtx
arm_dwarf_register_span (rtx
);
233 static tree
arm_cxx_guard_type (void);
234 static bool arm_cxx_guard_mask_bit (void);
235 static tree
arm_get_cookie_size (tree
);
236 static bool arm_cookie_has_size (void);
237 static bool arm_cxx_cdtor_returns_this (void);
238 static bool arm_cxx_key_method_may_be_inline (void);
239 static void arm_cxx_determine_class_data_visibility (tree
);
240 static bool arm_cxx_class_data_always_comdat (void);
241 static bool arm_cxx_use_aeabi_atexit (void);
242 static void arm_init_libfuncs (void);
243 static tree
arm_build_builtin_va_list (void);
244 static void arm_expand_builtin_va_start (tree
, rtx
);
245 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
246 static void arm_option_override (void);
247 static void arm_option_save (struct cl_target_option
*, struct gcc_options
*);
248 static void arm_option_restore (struct gcc_options
*,
249 struct cl_target_option
*);
250 static void arm_override_options_after_change (void);
251 static void arm_option_print (FILE *, int, struct cl_target_option
*);
252 static void arm_set_current_function (tree
);
253 static bool arm_can_inline_p (tree
, tree
);
254 static void arm_relayout_function (tree
);
255 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
256 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
257 static bool arm_sched_can_speculate_insn (rtx_insn
*);
258 static bool arm_macro_fusion_p (void);
259 static bool arm_cannot_copy_insn_p (rtx_insn
*);
260 static int arm_issue_rate (void);
261 static int arm_first_cycle_multipass_dfa_lookahead (void);
262 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
263 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
264 static bool arm_output_addr_const_extra (FILE *, rtx
);
265 static bool arm_allocate_stack_slots_for_args (void);
266 static bool arm_warn_func_return (tree
);
267 static tree
arm_promoted_type (const_tree t
);
268 static bool arm_scalar_mode_supported_p (scalar_mode
);
269 static bool arm_frame_pointer_required (void);
270 static bool arm_can_eliminate (const int, const int);
271 static void arm_asm_trampoline_template (FILE *);
272 static void arm_trampoline_init (rtx
, tree
, rtx
);
273 static rtx
arm_trampoline_adjust_address (rtx
);
274 static rtx_insn
*arm_pic_static_addr (rtx orig
, rtx reg
);
275 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
276 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
277 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
278 static bool arm_array_mode_supported_p (machine_mode
,
279 unsigned HOST_WIDE_INT
);
280 static machine_mode
arm_preferred_simd_mode (scalar_mode
);
281 static bool arm_class_likely_spilled_p (reg_class_t
);
282 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
283 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
284 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
288 static void arm_conditional_register_usage (void);
289 static enum flt_eval_method
arm_excess_precision (enum excess_precision_type
);
290 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
291 static void arm_autovectorize_vector_sizes (vector_sizes
*);
292 static int arm_default_branch_cost (bool, bool);
293 static int arm_cortex_a5_branch_cost (bool, bool);
294 static int arm_cortex_m_branch_cost (bool, bool);
295 static int arm_cortex_m7_branch_cost (bool, bool);
297 static bool arm_vectorize_vec_perm_const (machine_mode
, rtx
, rtx
, rtx
,
298 const vec_perm_indices
&);
300 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
304 int misalign ATTRIBUTE_UNUSED
);
305 static unsigned arm_add_stmt_cost (void *data
, int count
,
306 enum vect_cost_for_stmt kind
,
307 struct _stmt_vec_info
*stmt_info
,
309 enum vect_cost_model_location where
);
311 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
312 bool op0_preserve_value
);
313 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
315 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
316 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
318 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
319 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
320 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
322 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
323 static opt_scalar_float_mode
arm_floatn_mode (int, bool);
324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode
);
325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode
);
326 static bool arm_modes_tieable_p (machine_mode
, machine_mode
);
327 static HOST_WIDE_INT
arm_constant_alignment (const_tree
, HOST_WIDE_INT
);
329 /* Table of machine attributes. */
330 static const struct attribute_spec arm_attribute_table
[] =
332 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
333 affects_type_identity, handler, exclude } */
334 /* Function calls made to this symbol must be done indirectly, because
335 it may lie outside of the 26 bit addressing range of a normal function
337 { "long_call", 0, 0, false, true, true, false, NULL
, NULL
},
338 /* Whereas these functions are always known to reside within the 26 bit
340 { "short_call", 0, 0, false, true, true, false, NULL
, NULL
},
341 /* Specify the procedure call conventions for a function. */
342 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute
,
344 /* Interrupt Service Routines have special prologue and epilogue requirements. */
345 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
347 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
349 { "naked", 0, 0, true, false, false, false,
350 arm_handle_fndecl_attribute
, NULL
},
352 /* ARM/PE has three new attributes:
354 dllexport - for exporting a function/variable that will live in a dll
355 dllimport - for importing a function/variable from a dll
357 Microsoft allows multiple declspecs in one __declspec, separating
358 them with spaces. We do NOT support this. Instead, use __declspec
361 { "dllimport", 0, 0, true, false, false, false, NULL
, NULL
},
362 { "dllexport", 0, 0, true, false, false, false, NULL
, NULL
},
363 { "interfacearm", 0, 0, true, false, false, false,
364 arm_handle_fndecl_attribute
, NULL
},
365 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
366 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute
,
368 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute
,
370 { "notshared", 0, 0, false, true, false, false,
371 arm_handle_notshared_attribute
, NULL
},
373 /* ARMv8-M Security Extensions support. */
374 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
375 arm_handle_cmse_nonsecure_entry
, NULL
},
376 { "cmse_nonsecure_call", 0, 0, true, false, false, true,
377 arm_handle_cmse_nonsecure_call
, NULL
},
378 { NULL
, 0, 0, false, false, false, false, NULL
, NULL
}
381 /* Initialize the GCC target structure. */
382 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
383 #undef TARGET_MERGE_DECL_ATTRIBUTES
384 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
387 #undef TARGET_LEGITIMIZE_ADDRESS
388 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
390 #undef TARGET_ATTRIBUTE_TABLE
391 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
393 #undef TARGET_INSERT_ATTRIBUTES
394 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
396 #undef TARGET_ASM_FILE_START
397 #define TARGET_ASM_FILE_START arm_file_start
398 #undef TARGET_ASM_FILE_END
399 #define TARGET_ASM_FILE_END arm_file_end
401 #undef TARGET_ASM_ALIGNED_SI_OP
402 #define TARGET_ASM_ALIGNED_SI_OP NULL
403 #undef TARGET_ASM_INTEGER
404 #define TARGET_ASM_INTEGER arm_assemble_integer
406 #undef TARGET_PRINT_OPERAND
407 #define TARGET_PRINT_OPERAND arm_print_operand
408 #undef TARGET_PRINT_OPERAND_ADDRESS
409 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
410 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
411 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
413 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
414 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
416 #undef TARGET_ASM_FUNCTION_PROLOGUE
417 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
419 #undef TARGET_ASM_FUNCTION_EPILOGUE
420 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
422 #undef TARGET_CAN_INLINE_P
423 #define TARGET_CAN_INLINE_P arm_can_inline_p
425 #undef TARGET_RELAYOUT_FUNCTION
426 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
428 #undef TARGET_OPTION_OVERRIDE
429 #define TARGET_OPTION_OVERRIDE arm_option_override
431 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
432 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
434 #undef TARGET_OPTION_SAVE
435 #define TARGET_OPTION_SAVE arm_option_save
437 #undef TARGET_OPTION_RESTORE
438 #define TARGET_OPTION_RESTORE arm_option_restore
440 #undef TARGET_OPTION_PRINT
441 #define TARGET_OPTION_PRINT arm_option_print
443 #undef TARGET_COMP_TYPE_ATTRIBUTES
444 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
446 #undef TARGET_SCHED_CAN_SPECULATE_INSN
447 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
449 #undef TARGET_SCHED_MACRO_FUSION_P
450 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
452 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
453 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
455 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
456 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
458 #undef TARGET_SCHED_ADJUST_COST
459 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
461 #undef TARGET_SET_CURRENT_FUNCTION
462 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
464 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
465 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
467 #undef TARGET_SCHED_REORDER
468 #define TARGET_SCHED_REORDER arm_sched_reorder
470 #undef TARGET_REGISTER_MOVE_COST
471 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
473 #undef TARGET_MEMORY_MOVE_COST
474 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
476 #undef TARGET_ENCODE_SECTION_INFO
478 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
480 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
483 #undef TARGET_STRIP_NAME_ENCODING
484 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
486 #undef TARGET_ASM_INTERNAL_LABEL
487 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
489 #undef TARGET_FLOATN_MODE
490 #define TARGET_FLOATN_MODE arm_floatn_mode
492 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
493 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
495 #undef TARGET_FUNCTION_VALUE
496 #define TARGET_FUNCTION_VALUE arm_function_value
498 #undef TARGET_LIBCALL_VALUE
499 #define TARGET_LIBCALL_VALUE arm_libcall_value
501 #undef TARGET_FUNCTION_VALUE_REGNO_P
502 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
504 #undef TARGET_ASM_OUTPUT_MI_THUNK
505 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
506 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
507 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
509 #undef TARGET_RTX_COSTS
510 #define TARGET_RTX_COSTS arm_rtx_costs
511 #undef TARGET_ADDRESS_COST
512 #define TARGET_ADDRESS_COST arm_address_cost
514 #undef TARGET_SHIFT_TRUNCATION_MASK
515 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
516 #undef TARGET_VECTOR_MODE_SUPPORTED_P
517 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
518 #undef TARGET_ARRAY_MODE_SUPPORTED_P
519 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
520 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
521 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
522 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
523 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
524 arm_autovectorize_vector_sizes
526 #undef TARGET_MACHINE_DEPENDENT_REORG
527 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
529 #undef TARGET_INIT_BUILTINS
530 #define TARGET_INIT_BUILTINS arm_init_builtins
531 #undef TARGET_EXPAND_BUILTIN
532 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
533 #undef TARGET_BUILTIN_DECL
534 #define TARGET_BUILTIN_DECL arm_builtin_decl
536 #undef TARGET_INIT_LIBFUNCS
537 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
539 #undef TARGET_PROMOTE_FUNCTION_MODE
540 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
541 #undef TARGET_PROMOTE_PROTOTYPES
542 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
543 #undef TARGET_PASS_BY_REFERENCE
544 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
545 #undef TARGET_ARG_PARTIAL_BYTES
546 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
547 #undef TARGET_FUNCTION_ARG
548 #define TARGET_FUNCTION_ARG arm_function_arg
549 #undef TARGET_FUNCTION_ARG_ADVANCE
550 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
551 #undef TARGET_FUNCTION_ARG_PADDING
552 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
553 #undef TARGET_FUNCTION_ARG_BOUNDARY
554 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
556 #undef TARGET_SETUP_INCOMING_VARARGS
557 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
559 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
560 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
562 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
563 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
564 #undef TARGET_TRAMPOLINE_INIT
565 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
566 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
567 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
569 #undef TARGET_WARN_FUNC_RETURN
570 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
572 #undef TARGET_DEFAULT_SHORT_ENUMS
573 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
575 #undef TARGET_ALIGN_ANON_BITFIELD
576 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
578 #undef TARGET_NARROW_VOLATILE_BITFIELD
579 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
581 #undef TARGET_CXX_GUARD_TYPE
582 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
584 #undef TARGET_CXX_GUARD_MASK_BIT
585 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
587 #undef TARGET_CXX_GET_COOKIE_SIZE
588 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
590 #undef TARGET_CXX_COOKIE_HAS_SIZE
591 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
593 #undef TARGET_CXX_CDTOR_RETURNS_THIS
594 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
596 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
597 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
599 #undef TARGET_CXX_USE_AEABI_ATEXIT
600 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
602 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
603 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
604 arm_cxx_determine_class_data_visibility
606 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
607 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
609 #undef TARGET_RETURN_IN_MSB
610 #define TARGET_RETURN_IN_MSB arm_return_in_msb
612 #undef TARGET_RETURN_IN_MEMORY
613 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
615 #undef TARGET_MUST_PASS_IN_STACK
616 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
619 #undef TARGET_ASM_UNWIND_EMIT
620 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
622 /* EABI unwinding tables use a different format for the typeinfo tables. */
623 #undef TARGET_ASM_TTYPE
624 #define TARGET_ASM_TTYPE arm_output_ttype
626 #undef TARGET_ARM_EABI_UNWINDER
627 #define TARGET_ARM_EABI_UNWINDER true
629 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
630 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
632 #endif /* ARM_UNWIND_INFO */
634 #undef TARGET_ASM_INIT_SECTIONS
635 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
637 #undef TARGET_DWARF_REGISTER_SPAN
638 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
640 #undef TARGET_CANNOT_COPY_INSN_P
641 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
644 #undef TARGET_HAVE_TLS
645 #define TARGET_HAVE_TLS true
648 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
649 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
651 #undef TARGET_LEGITIMATE_CONSTANT_P
652 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
654 #undef TARGET_CANNOT_FORCE_CONST_MEM
655 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
657 #undef TARGET_MAX_ANCHOR_OFFSET
658 #define TARGET_MAX_ANCHOR_OFFSET 4095
660 /* The minimum is set such that the total size of the block
661 for a particular anchor is -4088 + 1 + 4095 bytes, which is
662 divisible by eight, ensuring natural spacing of anchors. */
663 #undef TARGET_MIN_ANCHOR_OFFSET
664 #define TARGET_MIN_ANCHOR_OFFSET -4088
666 #undef TARGET_SCHED_ISSUE_RATE
667 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
669 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
670 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
671 arm_first_cycle_multipass_dfa_lookahead
673 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
674 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
675 arm_first_cycle_multipass_dfa_lookahead_guard
677 #undef TARGET_MANGLE_TYPE
678 #define TARGET_MANGLE_TYPE arm_mangle_type
680 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
681 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
683 #undef TARGET_BUILD_BUILTIN_VA_LIST
684 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
685 #undef TARGET_EXPAND_BUILTIN_VA_START
686 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
687 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
688 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
691 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
692 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
695 #undef TARGET_LEGITIMATE_ADDRESS_P
696 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
698 #undef TARGET_PREFERRED_RELOAD_CLASS
699 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
701 #undef TARGET_PROMOTED_TYPE
702 #define TARGET_PROMOTED_TYPE arm_promoted_type
704 #undef TARGET_SCALAR_MODE_SUPPORTED_P
705 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
707 #undef TARGET_COMPUTE_FRAME_LAYOUT
708 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
710 #undef TARGET_FRAME_POINTER_REQUIRED
711 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
713 #undef TARGET_CAN_ELIMINATE
714 #define TARGET_CAN_ELIMINATE arm_can_eliminate
716 #undef TARGET_CONDITIONAL_REGISTER_USAGE
717 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
719 #undef TARGET_CLASS_LIKELY_SPILLED_P
720 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
722 #undef TARGET_VECTORIZE_BUILTINS
723 #define TARGET_VECTORIZE_BUILTINS
725 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
726 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
727 arm_builtin_vectorized_function
729 #undef TARGET_VECTOR_ALIGNMENT
730 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
732 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
733 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
734 arm_vector_alignment_reachable
736 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
737 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
738 arm_builtin_support_vector_misalignment
740 #undef TARGET_PREFERRED_RENAME_CLASS
741 #define TARGET_PREFERRED_RENAME_CLASS \
742 arm_preferred_rename_class
744 #undef TARGET_VECTORIZE_VEC_PERM_CONST
745 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
747 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
748 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
749 arm_builtin_vectorization_cost
750 #undef TARGET_VECTORIZE_ADD_STMT_COST
751 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
753 #undef TARGET_CANONICALIZE_COMPARISON
754 #define TARGET_CANONICALIZE_COMPARISON \
755 arm_canonicalize_comparison
757 #undef TARGET_ASAN_SHADOW_OFFSET
758 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
760 #undef MAX_INSN_PER_IT_BLOCK
761 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
763 #undef TARGET_CAN_USE_DOLOOP_P
764 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
766 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
767 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
769 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
770 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
772 #undef TARGET_SCHED_FUSION_PRIORITY
773 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
775 #undef TARGET_ASM_FUNCTION_SECTION
776 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
778 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
779 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
781 #undef TARGET_SECTION_TYPE_FLAGS
782 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
784 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
785 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
787 #undef TARGET_C_EXCESS_PRECISION
788 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
790 /* Although the architecture reserves bits 0 and 1, only the former is
791 used for ARM/Thumb ISA selection in v7 and earlier versions. */
792 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
793 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
795 #undef TARGET_FIXED_CONDITION_CODE_REGS
796 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
798 #undef TARGET_HARD_REGNO_NREGS
799 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
800 #undef TARGET_HARD_REGNO_MODE_OK
801 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
803 #undef TARGET_MODES_TIEABLE_P
804 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
806 #undef TARGET_CAN_CHANGE_MODE_CLASS
807 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
809 #undef TARGET_CONSTANT_ALIGNMENT
810 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
812 /* Obstack for minipool constant handling. */
813 static struct obstack minipool_obstack
;
814 static char * minipool_startobj
;
816 /* The maximum number of insns skipped which
817 will be conditionalised if possible. */
818 static int max_insns_skipped
= 5;
820 extern FILE * asm_out_file
;
822 /* True if we are currently building a constant table. */
823 int making_const_table
;
825 /* The processor for which instructions should be scheduled. */
826 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
828 /* The current tuning set. */
829 const struct tune_params
*current_tune
;
831 /* Which floating point hardware to schedule for. */
834 /* Used for Thumb call_via trampolines. */
835 rtx thumb_call_via_label
[14];
836 static int thumb_call_reg_needed
;
838 /* The bits in this mask specify which instruction scheduling options should
840 unsigned int tune_flags
= 0;
842 /* The highest ARM architecture version supported by the
844 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
846 /* Active target architecture and tuning. */
848 struct arm_build_target arm_active_target
;
850 /* The following are used in the arm.md file as equivalents to bits
851 in the above two flag variables. */
853 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
856 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
859 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
862 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
865 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
868 /* Nonzero if this chip supports the ARM 6K extensions. */
871 /* Nonzero if this chip supports the ARM 6KZ extensions. */
874 /* Nonzero if instructions present in ARMv6-M can be used. */
877 /* Nonzero if this chip supports the ARM 7 extensions. */
880 /* Nonzero if this chip supports the Large Physical Address Extension. */
881 int arm_arch_lpae
= 0;
883 /* Nonzero if instructions not present in the 'M' profile can be used. */
884 int arm_arch_notm
= 0;
886 /* Nonzero if instructions present in ARMv7E-M can be used. */
889 /* Nonzero if instructions present in ARMv8 can be used. */
892 /* Nonzero if this chip supports the ARMv8.1 extensions. */
895 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
898 /* Nonzero if this chip supports the FP16 instructions extension of ARM
900 int arm_fp16_inst
= 0;
902 /* Nonzero if this chip can benefit from load scheduling. */
903 int arm_ld_sched
= 0;
905 /* Nonzero if this chip is a StrongARM. */
906 int arm_tune_strongarm
= 0;
908 /* Nonzero if this chip supports Intel Wireless MMX technology. */
909 int arm_arch_iwmmxt
= 0;
911 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
912 int arm_arch_iwmmxt2
= 0;
914 /* Nonzero if this chip is an XScale. */
915 int arm_arch_xscale
= 0;
917 /* Nonzero if tuning for XScale */
918 int arm_tune_xscale
= 0;
920 /* Nonzero if we want to tune for stores that access the write-buffer.
921 This typically means an ARM6 or ARM7 with MMU or MPU. */
922 int arm_tune_wbuf
= 0;
924 /* Nonzero if tuning for Cortex-A9. */
925 int arm_tune_cortex_a9
= 0;
927 /* Nonzero if we should define __THUMB_INTERWORK__ in the
929 XXX This is a bit of a hack, it's intended to help work around
930 problems in GLD which doesn't understand that armv5t code is
931 interworking clean. */
932 int arm_cpp_interwork
= 0;
934 /* Nonzero if chip supports Thumb 1. */
937 /* Nonzero if chip supports Thumb 2. */
940 /* Nonzero if chip supports integer division instruction. */
941 int arm_arch_arm_hwdiv
;
942 int arm_arch_thumb_hwdiv
;
944 /* Nonzero if chip disallows volatile memory access in IT block. */
945 int arm_arch_no_volatile_ce
;
947 /* Nonzero if we should use Neon to handle 64-bits operations rather
948 than core registers. */
949 int prefer_neon_for_64bits
= 0;
951 /* Nonzero if we shouldn't use literal pools. */
952 bool arm_disable_literal_pool
= false;
954 /* The register number to be used for the PIC offset register. */
955 unsigned arm_pic_register
= INVALID_REGNUM
;
957 enum arm_pcs arm_pcs_default
;
959 /* For an explanation of these variables, see final_prescan_insn below. */
961 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
962 enum arm_cond_code arm_current_cc
;
965 int arm_target_label
;
966 /* The number of conditionally executed insns, including the current insn. */
967 int arm_condexec_count
= 0;
968 /* A bitmask specifying the patterns for the IT block.
969 Zero means do not output an IT block before this insn. */
970 int arm_condexec_mask
= 0;
971 /* The number of bits used in arm_condexec_mask. */
972 int arm_condexec_masklen
= 0;
974 /* Nonzero if chip supports the ARMv8 CRC instructions. */
975 int arm_arch_crc
= 0;
977 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
978 int arm_arch_dotprod
= 0;
980 /* Nonzero if chip supports the ARMv8-M security extensions. */
981 int arm_arch_cmse
= 0;
983 /* Nonzero if the core has a very small, high-latency, multiply unit. */
984 int arm_m_profile_small_mul
= 0;
986 /* The condition codes of the ARM, and the inverse function. */
987 static const char * const arm_condition_codes
[] =
989 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
990 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
993 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
994 int arm_regs_in_sequence
[] =
996 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
999 #define ARM_LSL_NAME "lsl"
1000 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1002 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1003 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1004 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1006 /* Initialization code. */
1010 enum processor_type scheduler
;
1011 unsigned int tune_flags
;
1012 const struct tune_params
*tune
;
1015 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1016 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1023 /* arm generic vectorizer costs. */
1025 struct cpu_vec_costs arm_default_vec_cost
= {
1026 1, /* scalar_stmt_cost. */
1027 1, /* scalar load_cost. */
1028 1, /* scalar_store_cost. */
1029 1, /* vec_stmt_cost. */
1030 1, /* vec_to_scalar_cost. */
1031 1, /* scalar_to_vec_cost. */
1032 1, /* vec_align_load_cost. */
1033 1, /* vec_unalign_load_cost. */
1034 1, /* vec_unalign_store_cost. */
1035 1, /* vec_store_cost. */
1036 3, /* cond_taken_branch_cost. */
1037 1, /* cond_not_taken_branch_cost. */
1040 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1041 #include "aarch-cost-tables.h"
1045 const struct cpu_cost_table cortexa9_extra_costs
=
1052 COSTS_N_INSNS (1), /* shift_reg. */
1053 COSTS_N_INSNS (1), /* arith_shift. */
1054 COSTS_N_INSNS (2), /* arith_shift_reg. */
1056 COSTS_N_INSNS (1), /* log_shift_reg. */
1057 COSTS_N_INSNS (1), /* extend. */
1058 COSTS_N_INSNS (2), /* extend_arith. */
1059 COSTS_N_INSNS (1), /* bfi. */
1060 COSTS_N_INSNS (1), /* bfx. */
1064 true /* non_exec_costs_exec. */
1069 COSTS_N_INSNS (3), /* simple. */
1070 COSTS_N_INSNS (3), /* flag_setting. */
1071 COSTS_N_INSNS (2), /* extend. */
1072 COSTS_N_INSNS (3), /* add. */
1073 COSTS_N_INSNS (2), /* extend_add. */
1074 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1078 0, /* simple (N/A). */
1079 0, /* flag_setting (N/A). */
1080 COSTS_N_INSNS (4), /* extend. */
1082 COSTS_N_INSNS (4), /* extend_add. */
1088 COSTS_N_INSNS (2), /* load. */
1089 COSTS_N_INSNS (2), /* load_sign_extend. */
1090 COSTS_N_INSNS (2), /* ldrd. */
1091 COSTS_N_INSNS (2), /* ldm_1st. */
1092 1, /* ldm_regs_per_insn_1st. */
1093 2, /* ldm_regs_per_insn_subsequent. */
1094 COSTS_N_INSNS (5), /* loadf. */
1095 COSTS_N_INSNS (5), /* loadd. */
1096 COSTS_N_INSNS (1), /* load_unaligned. */
1097 COSTS_N_INSNS (2), /* store. */
1098 COSTS_N_INSNS (2), /* strd. */
1099 COSTS_N_INSNS (2), /* stm_1st. */
1100 1, /* stm_regs_per_insn_1st. */
1101 2, /* stm_regs_per_insn_subsequent. */
1102 COSTS_N_INSNS (1), /* storef. */
1103 COSTS_N_INSNS (1), /* stored. */
1104 COSTS_N_INSNS (1), /* store_unaligned. */
1105 COSTS_N_INSNS (1), /* loadv. */
1106 COSTS_N_INSNS (1) /* storev. */
1111 COSTS_N_INSNS (14), /* div. */
1112 COSTS_N_INSNS (4), /* mult. */
1113 COSTS_N_INSNS (7), /* mult_addsub. */
1114 COSTS_N_INSNS (30), /* fma. */
1115 COSTS_N_INSNS (3), /* addsub. */
1116 COSTS_N_INSNS (1), /* fpconst. */
1117 COSTS_N_INSNS (1), /* neg. */
1118 COSTS_N_INSNS (3), /* compare. */
1119 COSTS_N_INSNS (3), /* widen. */
1120 COSTS_N_INSNS (3), /* narrow. */
1121 COSTS_N_INSNS (3), /* toint. */
1122 COSTS_N_INSNS (3), /* fromint. */
1123 COSTS_N_INSNS (3) /* roundint. */
1127 COSTS_N_INSNS (24), /* div. */
1128 COSTS_N_INSNS (5), /* mult. */
1129 COSTS_N_INSNS (8), /* mult_addsub. */
1130 COSTS_N_INSNS (30), /* fma. */
1131 COSTS_N_INSNS (3), /* addsub. */
1132 COSTS_N_INSNS (1), /* fpconst. */
1133 COSTS_N_INSNS (1), /* neg. */
1134 COSTS_N_INSNS (3), /* compare. */
1135 COSTS_N_INSNS (3), /* widen. */
1136 COSTS_N_INSNS (3), /* narrow. */
1137 COSTS_N_INSNS (3), /* toint. */
1138 COSTS_N_INSNS (3), /* fromint. */
1139 COSTS_N_INSNS (3) /* roundint. */
1144 COSTS_N_INSNS (1) /* alu. */
1148 const struct cpu_cost_table cortexa8_extra_costs
=
1154 COSTS_N_INSNS (1), /* shift. */
1156 COSTS_N_INSNS (1), /* arith_shift. */
1157 0, /* arith_shift_reg. */
1158 COSTS_N_INSNS (1), /* log_shift. */
1159 0, /* log_shift_reg. */
1161 0, /* extend_arith. */
1167 true /* non_exec_costs_exec. */
1172 COSTS_N_INSNS (1), /* simple. */
1173 COSTS_N_INSNS (1), /* flag_setting. */
1174 COSTS_N_INSNS (1), /* extend. */
1175 COSTS_N_INSNS (1), /* add. */
1176 COSTS_N_INSNS (1), /* extend_add. */
1177 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1181 0, /* simple (N/A). */
1182 0, /* flag_setting (N/A). */
1183 COSTS_N_INSNS (2), /* extend. */
1185 COSTS_N_INSNS (2), /* extend_add. */
1191 COSTS_N_INSNS (1), /* load. */
1192 COSTS_N_INSNS (1), /* load_sign_extend. */
1193 COSTS_N_INSNS (1), /* ldrd. */
1194 COSTS_N_INSNS (1), /* ldm_1st. */
1195 1, /* ldm_regs_per_insn_1st. */
1196 2, /* ldm_regs_per_insn_subsequent. */
1197 COSTS_N_INSNS (1), /* loadf. */
1198 COSTS_N_INSNS (1), /* loadd. */
1199 COSTS_N_INSNS (1), /* load_unaligned. */
1200 COSTS_N_INSNS (1), /* store. */
1201 COSTS_N_INSNS (1), /* strd. */
1202 COSTS_N_INSNS (1), /* stm_1st. */
1203 1, /* stm_regs_per_insn_1st. */
1204 2, /* stm_regs_per_insn_subsequent. */
1205 COSTS_N_INSNS (1), /* storef. */
1206 COSTS_N_INSNS (1), /* stored. */
1207 COSTS_N_INSNS (1), /* store_unaligned. */
1208 COSTS_N_INSNS (1), /* loadv. */
1209 COSTS_N_INSNS (1) /* storev. */
1214 COSTS_N_INSNS (36), /* div. */
1215 COSTS_N_INSNS (11), /* mult. */
1216 COSTS_N_INSNS (20), /* mult_addsub. */
1217 COSTS_N_INSNS (30), /* fma. */
1218 COSTS_N_INSNS (9), /* addsub. */
1219 COSTS_N_INSNS (3), /* fpconst. */
1220 COSTS_N_INSNS (3), /* neg. */
1221 COSTS_N_INSNS (6), /* compare. */
1222 COSTS_N_INSNS (4), /* widen. */
1223 COSTS_N_INSNS (4), /* narrow. */
1224 COSTS_N_INSNS (8), /* toint. */
1225 COSTS_N_INSNS (8), /* fromint. */
1226 COSTS_N_INSNS (8) /* roundint. */
1230 COSTS_N_INSNS (64), /* div. */
1231 COSTS_N_INSNS (16), /* mult. */
1232 COSTS_N_INSNS (25), /* mult_addsub. */
1233 COSTS_N_INSNS (30), /* fma. */
1234 COSTS_N_INSNS (9), /* addsub. */
1235 COSTS_N_INSNS (3), /* fpconst. */
1236 COSTS_N_INSNS (3), /* neg. */
1237 COSTS_N_INSNS (6), /* compare. */
1238 COSTS_N_INSNS (6), /* widen. */
1239 COSTS_N_INSNS (6), /* narrow. */
1240 COSTS_N_INSNS (8), /* toint. */
1241 COSTS_N_INSNS (8), /* fromint. */
1242 COSTS_N_INSNS (8) /* roundint. */
1247 COSTS_N_INSNS (1) /* alu. */
1251 const struct cpu_cost_table cortexa5_extra_costs
=
1257 COSTS_N_INSNS (1), /* shift. */
1258 COSTS_N_INSNS (1), /* shift_reg. */
1259 COSTS_N_INSNS (1), /* arith_shift. */
1260 COSTS_N_INSNS (1), /* arith_shift_reg. */
1261 COSTS_N_INSNS (1), /* log_shift. */
1262 COSTS_N_INSNS (1), /* log_shift_reg. */
1263 COSTS_N_INSNS (1), /* extend. */
1264 COSTS_N_INSNS (1), /* extend_arith. */
1265 COSTS_N_INSNS (1), /* bfi. */
1266 COSTS_N_INSNS (1), /* bfx. */
1267 COSTS_N_INSNS (1), /* clz. */
1268 COSTS_N_INSNS (1), /* rev. */
1270 true /* non_exec_costs_exec. */
1277 COSTS_N_INSNS (1), /* flag_setting. */
1278 COSTS_N_INSNS (1), /* extend. */
1279 COSTS_N_INSNS (1), /* add. */
1280 COSTS_N_INSNS (1), /* extend_add. */
1281 COSTS_N_INSNS (7) /* idiv. */
1285 0, /* simple (N/A). */
1286 0, /* flag_setting (N/A). */
1287 COSTS_N_INSNS (1), /* extend. */
1289 COSTS_N_INSNS (2), /* extend_add. */
1295 COSTS_N_INSNS (1), /* load. */
1296 COSTS_N_INSNS (1), /* load_sign_extend. */
1297 COSTS_N_INSNS (6), /* ldrd. */
1298 COSTS_N_INSNS (1), /* ldm_1st. */
1299 1, /* ldm_regs_per_insn_1st. */
1300 2, /* ldm_regs_per_insn_subsequent. */
1301 COSTS_N_INSNS (2), /* loadf. */
1302 COSTS_N_INSNS (4), /* loadd. */
1303 COSTS_N_INSNS (1), /* load_unaligned. */
1304 COSTS_N_INSNS (1), /* store. */
1305 COSTS_N_INSNS (3), /* strd. */
1306 COSTS_N_INSNS (1), /* stm_1st. */
1307 1, /* stm_regs_per_insn_1st. */
1308 2, /* stm_regs_per_insn_subsequent. */
1309 COSTS_N_INSNS (2), /* storef. */
1310 COSTS_N_INSNS (2), /* stored. */
1311 COSTS_N_INSNS (1), /* store_unaligned. */
1312 COSTS_N_INSNS (1), /* loadv. */
1313 COSTS_N_INSNS (1) /* storev. */
1318 COSTS_N_INSNS (15), /* div. */
1319 COSTS_N_INSNS (3), /* mult. */
1320 COSTS_N_INSNS (7), /* mult_addsub. */
1321 COSTS_N_INSNS (7), /* fma. */
1322 COSTS_N_INSNS (3), /* addsub. */
1323 COSTS_N_INSNS (3), /* fpconst. */
1324 COSTS_N_INSNS (3), /* neg. */
1325 COSTS_N_INSNS (3), /* compare. */
1326 COSTS_N_INSNS (3), /* widen. */
1327 COSTS_N_INSNS (3), /* narrow. */
1328 COSTS_N_INSNS (3), /* toint. */
1329 COSTS_N_INSNS (3), /* fromint. */
1330 COSTS_N_INSNS (3) /* roundint. */
1334 COSTS_N_INSNS (30), /* div. */
1335 COSTS_N_INSNS (6), /* mult. */
1336 COSTS_N_INSNS (10), /* mult_addsub. */
1337 COSTS_N_INSNS (7), /* fma. */
1338 COSTS_N_INSNS (3), /* addsub. */
1339 COSTS_N_INSNS (3), /* fpconst. */
1340 COSTS_N_INSNS (3), /* neg. */
1341 COSTS_N_INSNS (3), /* compare. */
1342 COSTS_N_INSNS (3), /* widen. */
1343 COSTS_N_INSNS (3), /* narrow. */
1344 COSTS_N_INSNS (3), /* toint. */
1345 COSTS_N_INSNS (3), /* fromint. */
1346 COSTS_N_INSNS (3) /* roundint. */
1351 COSTS_N_INSNS (1) /* alu. */
1356 const struct cpu_cost_table cortexa7_extra_costs
=
1362 COSTS_N_INSNS (1), /* shift. */
1363 COSTS_N_INSNS (1), /* shift_reg. */
1364 COSTS_N_INSNS (1), /* arith_shift. */
1365 COSTS_N_INSNS (1), /* arith_shift_reg. */
1366 COSTS_N_INSNS (1), /* log_shift. */
1367 COSTS_N_INSNS (1), /* log_shift_reg. */
1368 COSTS_N_INSNS (1), /* extend. */
1369 COSTS_N_INSNS (1), /* extend_arith. */
1370 COSTS_N_INSNS (1), /* bfi. */
1371 COSTS_N_INSNS (1), /* bfx. */
1372 COSTS_N_INSNS (1), /* clz. */
1373 COSTS_N_INSNS (1), /* rev. */
1375 true /* non_exec_costs_exec. */
1382 COSTS_N_INSNS (1), /* flag_setting. */
1383 COSTS_N_INSNS (1), /* extend. */
1384 COSTS_N_INSNS (1), /* add. */
1385 COSTS_N_INSNS (1), /* extend_add. */
1386 COSTS_N_INSNS (7) /* idiv. */
1390 0, /* simple (N/A). */
1391 0, /* flag_setting (N/A). */
1392 COSTS_N_INSNS (1), /* extend. */
1394 COSTS_N_INSNS (2), /* extend_add. */
1400 COSTS_N_INSNS (1), /* load. */
1401 COSTS_N_INSNS (1), /* load_sign_extend. */
1402 COSTS_N_INSNS (3), /* ldrd. */
1403 COSTS_N_INSNS (1), /* ldm_1st. */
1404 1, /* ldm_regs_per_insn_1st. */
1405 2, /* ldm_regs_per_insn_subsequent. */
1406 COSTS_N_INSNS (2), /* loadf. */
1407 COSTS_N_INSNS (2), /* loadd. */
1408 COSTS_N_INSNS (1), /* load_unaligned. */
1409 COSTS_N_INSNS (1), /* store. */
1410 COSTS_N_INSNS (3), /* strd. */
1411 COSTS_N_INSNS (1), /* stm_1st. */
1412 1, /* stm_regs_per_insn_1st. */
1413 2, /* stm_regs_per_insn_subsequent. */
1414 COSTS_N_INSNS (2), /* storef. */
1415 COSTS_N_INSNS (2), /* stored. */
1416 COSTS_N_INSNS (1), /* store_unaligned. */
1417 COSTS_N_INSNS (1), /* loadv. */
1418 COSTS_N_INSNS (1) /* storev. */
1423 COSTS_N_INSNS (15), /* div. */
1424 COSTS_N_INSNS (3), /* mult. */
1425 COSTS_N_INSNS (7), /* mult_addsub. */
1426 COSTS_N_INSNS (7), /* fma. */
1427 COSTS_N_INSNS (3), /* addsub. */
1428 COSTS_N_INSNS (3), /* fpconst. */
1429 COSTS_N_INSNS (3), /* neg. */
1430 COSTS_N_INSNS (3), /* compare. */
1431 COSTS_N_INSNS (3), /* widen. */
1432 COSTS_N_INSNS (3), /* narrow. */
1433 COSTS_N_INSNS (3), /* toint. */
1434 COSTS_N_INSNS (3), /* fromint. */
1435 COSTS_N_INSNS (3) /* roundint. */
1439 COSTS_N_INSNS (30), /* div. */
1440 COSTS_N_INSNS (6), /* mult. */
1441 COSTS_N_INSNS (10), /* mult_addsub. */
1442 COSTS_N_INSNS (7), /* fma. */
1443 COSTS_N_INSNS (3), /* addsub. */
1444 COSTS_N_INSNS (3), /* fpconst. */
1445 COSTS_N_INSNS (3), /* neg. */
1446 COSTS_N_INSNS (3), /* compare. */
1447 COSTS_N_INSNS (3), /* widen. */
1448 COSTS_N_INSNS (3), /* narrow. */
1449 COSTS_N_INSNS (3), /* toint. */
1450 COSTS_N_INSNS (3), /* fromint. */
1451 COSTS_N_INSNS (3) /* roundint. */
1456 COSTS_N_INSNS (1) /* alu. */
1460 const struct cpu_cost_table cortexa12_extra_costs
=
1467 COSTS_N_INSNS (1), /* shift_reg. */
1468 COSTS_N_INSNS (1), /* arith_shift. */
1469 COSTS_N_INSNS (1), /* arith_shift_reg. */
1470 COSTS_N_INSNS (1), /* log_shift. */
1471 COSTS_N_INSNS (1), /* log_shift_reg. */
1473 COSTS_N_INSNS (1), /* extend_arith. */
1475 COSTS_N_INSNS (1), /* bfx. */
1476 COSTS_N_INSNS (1), /* clz. */
1477 COSTS_N_INSNS (1), /* rev. */
1479 true /* non_exec_costs_exec. */
1484 COSTS_N_INSNS (2), /* simple. */
1485 COSTS_N_INSNS (3), /* flag_setting. */
1486 COSTS_N_INSNS (2), /* extend. */
1487 COSTS_N_INSNS (3), /* add. */
1488 COSTS_N_INSNS (2), /* extend_add. */
1489 COSTS_N_INSNS (18) /* idiv. */
1493 0, /* simple (N/A). */
1494 0, /* flag_setting (N/A). */
1495 COSTS_N_INSNS (3), /* extend. */
1497 COSTS_N_INSNS (3), /* extend_add. */
1503 COSTS_N_INSNS (3), /* load. */
1504 COSTS_N_INSNS (3), /* load_sign_extend. */
1505 COSTS_N_INSNS (3), /* ldrd. */
1506 COSTS_N_INSNS (3), /* ldm_1st. */
1507 1, /* ldm_regs_per_insn_1st. */
1508 2, /* ldm_regs_per_insn_subsequent. */
1509 COSTS_N_INSNS (3), /* loadf. */
1510 COSTS_N_INSNS (3), /* loadd. */
1511 0, /* load_unaligned. */
1515 1, /* stm_regs_per_insn_1st. */
1516 2, /* stm_regs_per_insn_subsequent. */
1517 COSTS_N_INSNS (2), /* storef. */
1518 COSTS_N_INSNS (2), /* stored. */
1519 0, /* store_unaligned. */
1520 COSTS_N_INSNS (1), /* loadv. */
1521 COSTS_N_INSNS (1) /* storev. */
1526 COSTS_N_INSNS (17), /* div. */
1527 COSTS_N_INSNS (4), /* mult. */
1528 COSTS_N_INSNS (8), /* mult_addsub. */
1529 COSTS_N_INSNS (8), /* fma. */
1530 COSTS_N_INSNS (4), /* addsub. */
1531 COSTS_N_INSNS (2), /* fpconst. */
1532 COSTS_N_INSNS (2), /* neg. */
1533 COSTS_N_INSNS (2), /* compare. */
1534 COSTS_N_INSNS (4), /* widen. */
1535 COSTS_N_INSNS (4), /* narrow. */
1536 COSTS_N_INSNS (4), /* toint. */
1537 COSTS_N_INSNS (4), /* fromint. */
1538 COSTS_N_INSNS (4) /* roundint. */
1542 COSTS_N_INSNS (31), /* div. */
1543 COSTS_N_INSNS (4), /* mult. */
1544 COSTS_N_INSNS (8), /* mult_addsub. */
1545 COSTS_N_INSNS (8), /* fma. */
1546 COSTS_N_INSNS (4), /* addsub. */
1547 COSTS_N_INSNS (2), /* fpconst. */
1548 COSTS_N_INSNS (2), /* neg. */
1549 COSTS_N_INSNS (2), /* compare. */
1550 COSTS_N_INSNS (4), /* widen. */
1551 COSTS_N_INSNS (4), /* narrow. */
1552 COSTS_N_INSNS (4), /* toint. */
1553 COSTS_N_INSNS (4), /* fromint. */
1554 COSTS_N_INSNS (4) /* roundint. */
1559 COSTS_N_INSNS (1) /* alu. */
1563 const struct cpu_cost_table cortexa15_extra_costs
=
1571 COSTS_N_INSNS (1), /* arith_shift. */
1572 COSTS_N_INSNS (1), /* arith_shift_reg. */
1573 COSTS_N_INSNS (1), /* log_shift. */
1574 COSTS_N_INSNS (1), /* log_shift_reg. */
1576 COSTS_N_INSNS (1), /* extend_arith. */
1577 COSTS_N_INSNS (1), /* bfi. */
1582 true /* non_exec_costs_exec. */
1587 COSTS_N_INSNS (2), /* simple. */
1588 COSTS_N_INSNS (3), /* flag_setting. */
1589 COSTS_N_INSNS (2), /* extend. */
1590 COSTS_N_INSNS (2), /* add. */
1591 COSTS_N_INSNS (2), /* extend_add. */
1592 COSTS_N_INSNS (18) /* idiv. */
1596 0, /* simple (N/A). */
1597 0, /* flag_setting (N/A). */
1598 COSTS_N_INSNS (3), /* extend. */
1600 COSTS_N_INSNS (3), /* extend_add. */
1606 COSTS_N_INSNS (3), /* load. */
1607 COSTS_N_INSNS (3), /* load_sign_extend. */
1608 COSTS_N_INSNS (3), /* ldrd. */
1609 COSTS_N_INSNS (4), /* ldm_1st. */
1610 1, /* ldm_regs_per_insn_1st. */
1611 2, /* ldm_regs_per_insn_subsequent. */
1612 COSTS_N_INSNS (4), /* loadf. */
1613 COSTS_N_INSNS (4), /* loadd. */
1614 0, /* load_unaligned. */
1617 COSTS_N_INSNS (1), /* stm_1st. */
1618 1, /* stm_regs_per_insn_1st. */
1619 2, /* stm_regs_per_insn_subsequent. */
1622 0, /* store_unaligned. */
1623 COSTS_N_INSNS (1), /* loadv. */
1624 COSTS_N_INSNS (1) /* storev. */
1629 COSTS_N_INSNS (17), /* div. */
1630 COSTS_N_INSNS (4), /* mult. */
1631 COSTS_N_INSNS (8), /* mult_addsub. */
1632 COSTS_N_INSNS (8), /* fma. */
1633 COSTS_N_INSNS (4), /* addsub. */
1634 COSTS_N_INSNS (2), /* fpconst. */
1635 COSTS_N_INSNS (2), /* neg. */
1636 COSTS_N_INSNS (5), /* compare. */
1637 COSTS_N_INSNS (4), /* widen. */
1638 COSTS_N_INSNS (4), /* narrow. */
1639 COSTS_N_INSNS (4), /* toint. */
1640 COSTS_N_INSNS (4), /* fromint. */
1641 COSTS_N_INSNS (4) /* roundint. */
1645 COSTS_N_INSNS (31), /* div. */
1646 COSTS_N_INSNS (4), /* mult. */
1647 COSTS_N_INSNS (8), /* mult_addsub. */
1648 COSTS_N_INSNS (8), /* fma. */
1649 COSTS_N_INSNS (4), /* addsub. */
1650 COSTS_N_INSNS (2), /* fpconst. */
1651 COSTS_N_INSNS (2), /* neg. */
1652 COSTS_N_INSNS (2), /* compare. */
1653 COSTS_N_INSNS (4), /* widen. */
1654 COSTS_N_INSNS (4), /* narrow. */
1655 COSTS_N_INSNS (4), /* toint. */
1656 COSTS_N_INSNS (4), /* fromint. */
1657 COSTS_N_INSNS (4) /* roundint. */
1662 COSTS_N_INSNS (1) /* alu. */
1666 const struct cpu_cost_table v7m_extra_costs
=
1674 0, /* arith_shift. */
1675 COSTS_N_INSNS (1), /* arith_shift_reg. */
1677 COSTS_N_INSNS (1), /* log_shift_reg. */
1679 COSTS_N_INSNS (1), /* extend_arith. */
1684 COSTS_N_INSNS (1), /* non_exec. */
1685 false /* non_exec_costs_exec. */
1690 COSTS_N_INSNS (1), /* simple. */
1691 COSTS_N_INSNS (1), /* flag_setting. */
1692 COSTS_N_INSNS (2), /* extend. */
1693 COSTS_N_INSNS (1), /* add. */
1694 COSTS_N_INSNS (3), /* extend_add. */
1695 COSTS_N_INSNS (8) /* idiv. */
1699 0, /* simple (N/A). */
1700 0, /* flag_setting (N/A). */
1701 COSTS_N_INSNS (2), /* extend. */
1703 COSTS_N_INSNS (3), /* extend_add. */
1709 COSTS_N_INSNS (2), /* load. */
1710 0, /* load_sign_extend. */
1711 COSTS_N_INSNS (3), /* ldrd. */
1712 COSTS_N_INSNS (2), /* ldm_1st. */
1713 1, /* ldm_regs_per_insn_1st. */
1714 1, /* ldm_regs_per_insn_subsequent. */
1715 COSTS_N_INSNS (2), /* loadf. */
1716 COSTS_N_INSNS (3), /* loadd. */
1717 COSTS_N_INSNS (1), /* load_unaligned. */
1718 COSTS_N_INSNS (2), /* store. */
1719 COSTS_N_INSNS (3), /* strd. */
1720 COSTS_N_INSNS (2), /* stm_1st. */
1721 1, /* stm_regs_per_insn_1st. */
1722 1, /* stm_regs_per_insn_subsequent. */
1723 COSTS_N_INSNS (2), /* storef. */
1724 COSTS_N_INSNS (3), /* stored. */
1725 COSTS_N_INSNS (1), /* store_unaligned. */
1726 COSTS_N_INSNS (1), /* loadv. */
1727 COSTS_N_INSNS (1) /* storev. */
1732 COSTS_N_INSNS (7), /* div. */
1733 COSTS_N_INSNS (2), /* mult. */
1734 COSTS_N_INSNS (5), /* mult_addsub. */
1735 COSTS_N_INSNS (3), /* fma. */
1736 COSTS_N_INSNS (1), /* addsub. */
1748 COSTS_N_INSNS (15), /* div. */
1749 COSTS_N_INSNS (5), /* mult. */
1750 COSTS_N_INSNS (7), /* mult_addsub. */
1751 COSTS_N_INSNS (7), /* fma. */
1752 COSTS_N_INSNS (3), /* addsub. */
1765 COSTS_N_INSNS (1) /* alu. */
1769 const struct addr_mode_cost_table generic_addr_mode_costs
=
1773 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1774 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1775 COSTS_N_INSNS (0) /* AMO_WB. */
1779 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1780 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1781 COSTS_N_INSNS (0) /* AMO_WB. */
1785 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1786 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1787 COSTS_N_INSNS (0) /* AMO_WB. */
1791 const struct tune_params arm_slowmul_tune
=
1793 &generic_extra_costs
, /* Insn extra costs. */
1794 &generic_addr_mode_costs
, /* Addressing mode costs. */
1795 NULL
, /* Sched adj cost. */
1796 arm_default_branch_cost
,
1797 &arm_default_vec_cost
,
1798 3, /* Constant limit. */
1799 5, /* Max cond insns. */
1800 8, /* Memset max inline. */
1801 1, /* Issue rate. */
1802 ARM_PREFETCH_NOT_BENEFICIAL
,
1803 tune_params::PREF_CONST_POOL_TRUE
,
1804 tune_params::PREF_LDRD_FALSE
,
1805 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1806 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1807 tune_params::DISPARAGE_FLAGS_NEITHER
,
1808 tune_params::PREF_NEON_64_FALSE
,
1809 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1810 tune_params::FUSE_NOTHING
,
1811 tune_params::SCHED_AUTOPREF_OFF
1814 const struct tune_params arm_fastmul_tune
=
1816 &generic_extra_costs
, /* Insn extra costs. */
1817 &generic_addr_mode_costs
, /* Addressing mode costs. */
1818 NULL
, /* Sched adj cost. */
1819 arm_default_branch_cost
,
1820 &arm_default_vec_cost
,
1821 1, /* Constant limit. */
1822 5, /* Max cond insns. */
1823 8, /* Memset max inline. */
1824 1, /* Issue rate. */
1825 ARM_PREFETCH_NOT_BENEFICIAL
,
1826 tune_params::PREF_CONST_POOL_TRUE
,
1827 tune_params::PREF_LDRD_FALSE
,
1828 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1829 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1830 tune_params::DISPARAGE_FLAGS_NEITHER
,
1831 tune_params::PREF_NEON_64_FALSE
,
1832 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1833 tune_params::FUSE_NOTHING
,
1834 tune_params::SCHED_AUTOPREF_OFF
1837 /* StrongARM has early execution of branches, so a sequence that is worth
1838 skipping is shorter. Set max_insns_skipped to a lower value. */
1840 const struct tune_params arm_strongarm_tune
=
1842 &generic_extra_costs
, /* Insn extra costs. */
1843 &generic_addr_mode_costs
, /* Addressing mode costs. */
1844 NULL
, /* Sched adj cost. */
1845 arm_default_branch_cost
,
1846 &arm_default_vec_cost
,
1847 1, /* Constant limit. */
1848 3, /* Max cond insns. */
1849 8, /* Memset max inline. */
1850 1, /* Issue rate. */
1851 ARM_PREFETCH_NOT_BENEFICIAL
,
1852 tune_params::PREF_CONST_POOL_TRUE
,
1853 tune_params::PREF_LDRD_FALSE
,
1854 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1855 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1856 tune_params::DISPARAGE_FLAGS_NEITHER
,
1857 tune_params::PREF_NEON_64_FALSE
,
1858 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1859 tune_params::FUSE_NOTHING
,
1860 tune_params::SCHED_AUTOPREF_OFF
1863 const struct tune_params arm_xscale_tune
=
1865 &generic_extra_costs
, /* Insn extra costs. */
1866 &generic_addr_mode_costs
, /* Addressing mode costs. */
1867 xscale_sched_adjust_cost
,
1868 arm_default_branch_cost
,
1869 &arm_default_vec_cost
,
1870 2, /* Constant limit. */
1871 3, /* Max cond insns. */
1872 8, /* Memset max inline. */
1873 1, /* Issue rate. */
1874 ARM_PREFETCH_NOT_BENEFICIAL
,
1875 tune_params::PREF_CONST_POOL_TRUE
,
1876 tune_params::PREF_LDRD_FALSE
,
1877 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1878 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1879 tune_params::DISPARAGE_FLAGS_NEITHER
,
1880 tune_params::PREF_NEON_64_FALSE
,
1881 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1882 tune_params::FUSE_NOTHING
,
1883 tune_params::SCHED_AUTOPREF_OFF
1886 const struct tune_params arm_9e_tune
=
1888 &generic_extra_costs
, /* Insn extra costs. */
1889 &generic_addr_mode_costs
, /* Addressing mode costs. */
1890 NULL
, /* Sched adj cost. */
1891 arm_default_branch_cost
,
1892 &arm_default_vec_cost
,
1893 1, /* Constant limit. */
1894 5, /* Max cond insns. */
1895 8, /* Memset max inline. */
1896 1, /* Issue rate. */
1897 ARM_PREFETCH_NOT_BENEFICIAL
,
1898 tune_params::PREF_CONST_POOL_TRUE
,
1899 tune_params::PREF_LDRD_FALSE
,
1900 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1901 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1902 tune_params::DISPARAGE_FLAGS_NEITHER
,
1903 tune_params::PREF_NEON_64_FALSE
,
1904 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1905 tune_params::FUSE_NOTHING
,
1906 tune_params::SCHED_AUTOPREF_OFF
1909 const struct tune_params arm_marvell_pj4_tune
=
1911 &generic_extra_costs
, /* Insn extra costs. */
1912 &generic_addr_mode_costs
, /* Addressing mode costs. */
1913 NULL
, /* Sched adj cost. */
1914 arm_default_branch_cost
,
1915 &arm_default_vec_cost
,
1916 1, /* Constant limit. */
1917 5, /* Max cond insns. */
1918 8, /* Memset max inline. */
1919 2, /* Issue rate. */
1920 ARM_PREFETCH_NOT_BENEFICIAL
,
1921 tune_params::PREF_CONST_POOL_TRUE
,
1922 tune_params::PREF_LDRD_FALSE
,
1923 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1924 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1925 tune_params::DISPARAGE_FLAGS_NEITHER
,
1926 tune_params::PREF_NEON_64_FALSE
,
1927 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1928 tune_params::FUSE_NOTHING
,
1929 tune_params::SCHED_AUTOPREF_OFF
1932 const struct tune_params arm_v6t2_tune
=
1934 &generic_extra_costs
, /* Insn extra costs. */
1935 &generic_addr_mode_costs
, /* Addressing mode costs. */
1936 NULL
, /* Sched adj cost. */
1937 arm_default_branch_cost
,
1938 &arm_default_vec_cost
,
1939 1, /* Constant limit. */
1940 5, /* Max cond insns. */
1941 8, /* Memset max inline. */
1942 1, /* Issue rate. */
1943 ARM_PREFETCH_NOT_BENEFICIAL
,
1944 tune_params::PREF_CONST_POOL_FALSE
,
1945 tune_params::PREF_LDRD_FALSE
,
1946 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1947 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1948 tune_params::DISPARAGE_FLAGS_NEITHER
,
1949 tune_params::PREF_NEON_64_FALSE
,
1950 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1951 tune_params::FUSE_NOTHING
,
1952 tune_params::SCHED_AUTOPREF_OFF
1956 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1957 const struct tune_params arm_cortex_tune
=
1959 &generic_extra_costs
,
1960 &generic_addr_mode_costs
, /* Addressing mode costs. */
1961 NULL
, /* Sched adj cost. */
1962 arm_default_branch_cost
,
1963 &arm_default_vec_cost
,
1964 1, /* Constant limit. */
1965 5, /* Max cond insns. */
1966 8, /* Memset max inline. */
1967 2, /* Issue rate. */
1968 ARM_PREFETCH_NOT_BENEFICIAL
,
1969 tune_params::PREF_CONST_POOL_FALSE
,
1970 tune_params::PREF_LDRD_FALSE
,
1971 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1972 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1973 tune_params::DISPARAGE_FLAGS_NEITHER
,
1974 tune_params::PREF_NEON_64_FALSE
,
1975 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1976 tune_params::FUSE_NOTHING
,
1977 tune_params::SCHED_AUTOPREF_OFF
1980 const struct tune_params arm_cortex_a8_tune
=
1982 &cortexa8_extra_costs
,
1983 &generic_addr_mode_costs
, /* Addressing mode costs. */
1984 NULL
, /* Sched adj cost. */
1985 arm_default_branch_cost
,
1986 &arm_default_vec_cost
,
1987 1, /* Constant limit. */
1988 5, /* Max cond insns. */
1989 8, /* Memset max inline. */
1990 2, /* Issue rate. */
1991 ARM_PREFETCH_NOT_BENEFICIAL
,
1992 tune_params::PREF_CONST_POOL_FALSE
,
1993 tune_params::PREF_LDRD_FALSE
,
1994 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1995 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1996 tune_params::DISPARAGE_FLAGS_NEITHER
,
1997 tune_params::PREF_NEON_64_FALSE
,
1998 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1999 tune_params::FUSE_NOTHING
,
2000 tune_params::SCHED_AUTOPREF_OFF
2003 const struct tune_params arm_cortex_a7_tune
=
2005 &cortexa7_extra_costs
,
2006 &generic_addr_mode_costs
, /* Addressing mode costs. */
2007 NULL
, /* Sched adj cost. */
2008 arm_default_branch_cost
,
2009 &arm_default_vec_cost
,
2010 1, /* Constant limit. */
2011 5, /* Max cond insns. */
2012 8, /* Memset max inline. */
2013 2, /* Issue rate. */
2014 ARM_PREFETCH_NOT_BENEFICIAL
,
2015 tune_params::PREF_CONST_POOL_FALSE
,
2016 tune_params::PREF_LDRD_FALSE
,
2017 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2018 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2019 tune_params::DISPARAGE_FLAGS_NEITHER
,
2020 tune_params::PREF_NEON_64_FALSE
,
2021 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2022 tune_params::FUSE_NOTHING
,
2023 tune_params::SCHED_AUTOPREF_OFF
2026 const struct tune_params arm_cortex_a15_tune
=
2028 &cortexa15_extra_costs
,
2029 &generic_addr_mode_costs
, /* Addressing mode costs. */
2030 NULL
, /* Sched adj cost. */
2031 arm_default_branch_cost
,
2032 &arm_default_vec_cost
,
2033 1, /* Constant limit. */
2034 2, /* Max cond insns. */
2035 8, /* Memset max inline. */
2036 3, /* Issue rate. */
2037 ARM_PREFETCH_NOT_BENEFICIAL
,
2038 tune_params::PREF_CONST_POOL_FALSE
,
2039 tune_params::PREF_LDRD_TRUE
,
2040 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2041 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2042 tune_params::DISPARAGE_FLAGS_ALL
,
2043 tune_params::PREF_NEON_64_FALSE
,
2044 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2045 tune_params::FUSE_NOTHING
,
2046 tune_params::SCHED_AUTOPREF_FULL
2049 const struct tune_params arm_cortex_a35_tune
=
2051 &cortexa53_extra_costs
,
2052 &generic_addr_mode_costs
, /* Addressing mode costs. */
2053 NULL
, /* Sched adj cost. */
2054 arm_default_branch_cost
,
2055 &arm_default_vec_cost
,
2056 1, /* Constant limit. */
2057 5, /* Max cond insns. */
2058 8, /* Memset max inline. */
2059 1, /* Issue rate. */
2060 ARM_PREFETCH_NOT_BENEFICIAL
,
2061 tune_params::PREF_CONST_POOL_FALSE
,
2062 tune_params::PREF_LDRD_FALSE
,
2063 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2064 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2065 tune_params::DISPARAGE_FLAGS_NEITHER
,
2066 tune_params::PREF_NEON_64_FALSE
,
2067 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2068 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2069 tune_params::SCHED_AUTOPREF_OFF
2072 const struct tune_params arm_cortex_a53_tune
=
2074 &cortexa53_extra_costs
,
2075 &generic_addr_mode_costs
, /* Addressing mode costs. */
2076 NULL
, /* Sched adj cost. */
2077 arm_default_branch_cost
,
2078 &arm_default_vec_cost
,
2079 1, /* Constant limit. */
2080 5, /* Max cond insns. */
2081 8, /* Memset max inline. */
2082 2, /* Issue rate. */
2083 ARM_PREFETCH_NOT_BENEFICIAL
,
2084 tune_params::PREF_CONST_POOL_FALSE
,
2085 tune_params::PREF_LDRD_FALSE
,
2086 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2087 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2088 tune_params::DISPARAGE_FLAGS_NEITHER
,
2089 tune_params::PREF_NEON_64_FALSE
,
2090 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2091 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2092 tune_params::SCHED_AUTOPREF_OFF
2095 const struct tune_params arm_cortex_a57_tune
=
2097 &cortexa57_extra_costs
,
2098 &generic_addr_mode_costs
, /* addressing mode costs */
2099 NULL
, /* Sched adj cost. */
2100 arm_default_branch_cost
,
2101 &arm_default_vec_cost
,
2102 1, /* Constant limit. */
2103 2, /* Max cond insns. */
2104 8, /* Memset max inline. */
2105 3, /* Issue rate. */
2106 ARM_PREFETCH_NOT_BENEFICIAL
,
2107 tune_params::PREF_CONST_POOL_FALSE
,
2108 tune_params::PREF_LDRD_TRUE
,
2109 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2110 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2111 tune_params::DISPARAGE_FLAGS_ALL
,
2112 tune_params::PREF_NEON_64_FALSE
,
2113 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2114 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2115 tune_params::SCHED_AUTOPREF_FULL
2118 const struct tune_params arm_exynosm1_tune
=
2120 &exynosm1_extra_costs
,
2121 &generic_addr_mode_costs
, /* Addressing mode costs. */
2122 NULL
, /* Sched adj cost. */
2123 arm_default_branch_cost
,
2124 &arm_default_vec_cost
,
2125 1, /* Constant limit. */
2126 2, /* Max cond insns. */
2127 8, /* Memset max inline. */
2128 3, /* Issue rate. */
2129 ARM_PREFETCH_NOT_BENEFICIAL
,
2130 tune_params::PREF_CONST_POOL_FALSE
,
2131 tune_params::PREF_LDRD_TRUE
,
2132 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2133 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2134 tune_params::DISPARAGE_FLAGS_ALL
,
2135 tune_params::PREF_NEON_64_FALSE
,
2136 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2137 tune_params::FUSE_NOTHING
,
2138 tune_params::SCHED_AUTOPREF_OFF
2141 const struct tune_params arm_xgene1_tune
=
2143 &xgene1_extra_costs
,
2144 &generic_addr_mode_costs
, /* Addressing mode costs. */
2145 NULL
, /* Sched adj cost. */
2146 arm_default_branch_cost
,
2147 &arm_default_vec_cost
,
2148 1, /* Constant limit. */
2149 2, /* Max cond insns. */
2150 32, /* Memset max inline. */
2151 4, /* Issue rate. */
2152 ARM_PREFETCH_NOT_BENEFICIAL
,
2153 tune_params::PREF_CONST_POOL_FALSE
,
2154 tune_params::PREF_LDRD_TRUE
,
2155 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2156 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2157 tune_params::DISPARAGE_FLAGS_ALL
,
2158 tune_params::PREF_NEON_64_FALSE
,
2159 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2160 tune_params::FUSE_NOTHING
,
2161 tune_params::SCHED_AUTOPREF_OFF
2164 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2165 less appealing. Set max_insns_skipped to a low value. */
2167 const struct tune_params arm_cortex_a5_tune
=
2169 &cortexa5_extra_costs
,
2170 &generic_addr_mode_costs
, /* Addressing mode costs. */
2171 NULL
, /* Sched adj cost. */
2172 arm_cortex_a5_branch_cost
,
2173 &arm_default_vec_cost
,
2174 1, /* Constant limit. */
2175 1, /* Max cond insns. */
2176 8, /* Memset max inline. */
2177 2, /* Issue rate. */
2178 ARM_PREFETCH_NOT_BENEFICIAL
,
2179 tune_params::PREF_CONST_POOL_FALSE
,
2180 tune_params::PREF_LDRD_FALSE
,
2181 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2182 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2183 tune_params::DISPARAGE_FLAGS_NEITHER
,
2184 tune_params::PREF_NEON_64_FALSE
,
2185 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2186 tune_params::FUSE_NOTHING
,
2187 tune_params::SCHED_AUTOPREF_OFF
2190 const struct tune_params arm_cortex_a9_tune
=
2192 &cortexa9_extra_costs
,
2193 &generic_addr_mode_costs
, /* Addressing mode costs. */
2194 cortex_a9_sched_adjust_cost
,
2195 arm_default_branch_cost
,
2196 &arm_default_vec_cost
,
2197 1, /* Constant limit. */
2198 5, /* Max cond insns. */
2199 8, /* Memset max inline. */
2200 2, /* Issue rate. */
2201 ARM_PREFETCH_BENEFICIAL(4,32,32),
2202 tune_params::PREF_CONST_POOL_FALSE
,
2203 tune_params::PREF_LDRD_FALSE
,
2204 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2205 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2206 tune_params::DISPARAGE_FLAGS_NEITHER
,
2207 tune_params::PREF_NEON_64_FALSE
,
2208 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2209 tune_params::FUSE_NOTHING
,
2210 tune_params::SCHED_AUTOPREF_OFF
2213 const struct tune_params arm_cortex_a12_tune
=
2215 &cortexa12_extra_costs
,
2216 &generic_addr_mode_costs
, /* Addressing mode costs. */
2217 NULL
, /* Sched adj cost. */
2218 arm_default_branch_cost
,
2219 &arm_default_vec_cost
, /* Vectorizer costs. */
2220 1, /* Constant limit. */
2221 2, /* Max cond insns. */
2222 8, /* Memset max inline. */
2223 2, /* Issue rate. */
2224 ARM_PREFETCH_NOT_BENEFICIAL
,
2225 tune_params::PREF_CONST_POOL_FALSE
,
2226 tune_params::PREF_LDRD_TRUE
,
2227 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2228 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2229 tune_params::DISPARAGE_FLAGS_ALL
,
2230 tune_params::PREF_NEON_64_FALSE
,
2231 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2232 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2233 tune_params::SCHED_AUTOPREF_OFF
2236 const struct tune_params arm_cortex_a73_tune
=
2238 &cortexa57_extra_costs
,
2239 &generic_addr_mode_costs
, /* Addressing mode costs. */
2240 NULL
, /* Sched adj cost. */
2241 arm_default_branch_cost
,
2242 &arm_default_vec_cost
, /* Vectorizer costs. */
2243 1, /* Constant limit. */
2244 2, /* Max cond insns. */
2245 8, /* Memset max inline. */
2246 2, /* Issue rate. */
2247 ARM_PREFETCH_NOT_BENEFICIAL
,
2248 tune_params::PREF_CONST_POOL_FALSE
,
2249 tune_params::PREF_LDRD_TRUE
,
2250 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2251 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2252 tune_params::DISPARAGE_FLAGS_ALL
,
2253 tune_params::PREF_NEON_64_FALSE
,
2254 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2255 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2256 tune_params::SCHED_AUTOPREF_FULL
2259 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2260 cycle to execute each. An LDR from the constant pool also takes two cycles
2261 to execute, but mildly increases pipelining opportunity (consecutive
2262 loads/stores can be pipelined together, saving one cycle), and may also
2263 improve icache utilisation. Hence we prefer the constant pool for such
2266 const struct tune_params arm_v7m_tune
=
2269 &generic_addr_mode_costs
, /* Addressing mode costs. */
2270 NULL
, /* Sched adj cost. */
2271 arm_cortex_m_branch_cost
,
2272 &arm_default_vec_cost
,
2273 1, /* Constant limit. */
2274 2, /* Max cond insns. */
2275 8, /* Memset max inline. */
2276 1, /* Issue rate. */
2277 ARM_PREFETCH_NOT_BENEFICIAL
,
2278 tune_params::PREF_CONST_POOL_TRUE
,
2279 tune_params::PREF_LDRD_FALSE
,
2280 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2281 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2282 tune_params::DISPARAGE_FLAGS_NEITHER
,
2283 tune_params::PREF_NEON_64_FALSE
,
2284 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2285 tune_params::FUSE_NOTHING
,
2286 tune_params::SCHED_AUTOPREF_OFF
2289 /* Cortex-M7 tuning. */
2291 const struct tune_params arm_cortex_m7_tune
=
2294 &generic_addr_mode_costs
, /* Addressing mode costs. */
2295 NULL
, /* Sched adj cost. */
2296 arm_cortex_m7_branch_cost
,
2297 &arm_default_vec_cost
,
2298 0, /* Constant limit. */
2299 1, /* Max cond insns. */
2300 8, /* Memset max inline. */
2301 2, /* Issue rate. */
2302 ARM_PREFETCH_NOT_BENEFICIAL
,
2303 tune_params::PREF_CONST_POOL_TRUE
,
2304 tune_params::PREF_LDRD_FALSE
,
2305 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2306 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2307 tune_params::DISPARAGE_FLAGS_NEITHER
,
2308 tune_params::PREF_NEON_64_FALSE
,
2309 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2310 tune_params::FUSE_NOTHING
,
2311 tune_params::SCHED_AUTOPREF_OFF
2314 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2315 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2317 const struct tune_params arm_v6m_tune
=
2319 &generic_extra_costs
, /* Insn extra costs. */
2320 &generic_addr_mode_costs
, /* Addressing mode costs. */
2321 NULL
, /* Sched adj cost. */
2322 arm_default_branch_cost
,
2323 &arm_default_vec_cost
, /* Vectorizer costs. */
2324 1, /* Constant limit. */
2325 5, /* Max cond insns. */
2326 8, /* Memset max inline. */
2327 1, /* Issue rate. */
2328 ARM_PREFETCH_NOT_BENEFICIAL
,
2329 tune_params::PREF_CONST_POOL_FALSE
,
2330 tune_params::PREF_LDRD_FALSE
,
2331 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2332 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2333 tune_params::DISPARAGE_FLAGS_NEITHER
,
2334 tune_params::PREF_NEON_64_FALSE
,
2335 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2336 tune_params::FUSE_NOTHING
,
2337 tune_params::SCHED_AUTOPREF_OFF
2340 const struct tune_params arm_fa726te_tune
=
2342 &generic_extra_costs
, /* Insn extra costs. */
2343 &generic_addr_mode_costs
, /* Addressing mode costs. */
2344 fa726te_sched_adjust_cost
,
2345 arm_default_branch_cost
,
2346 &arm_default_vec_cost
,
2347 1, /* Constant limit. */
2348 5, /* Max cond insns. */
2349 8, /* Memset max inline. */
2350 2, /* Issue rate. */
2351 ARM_PREFETCH_NOT_BENEFICIAL
,
2352 tune_params::PREF_CONST_POOL_TRUE
,
2353 tune_params::PREF_LDRD_FALSE
,
2354 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2355 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2356 tune_params::DISPARAGE_FLAGS_NEITHER
,
2357 tune_params::PREF_NEON_64_FALSE
,
2358 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2359 tune_params::FUSE_NOTHING
,
2360 tune_params::SCHED_AUTOPREF_OFF
2363 /* Auto-generated CPU, FPU and architecture tables. */
2364 #include "arm-cpu-data.h"
2366 /* The name of the preprocessor macro to define for this architecture. PROFILE
2367 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2368 is thus chosen to be big enough to hold the longest architecture name. */
2370 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2372 /* Supported TLS relocations. */
2380 TLS_DESCSEQ
/* GNU scheme */
2383 /* The maximum number of insns to be used when loading a constant. */
2385 arm_constant_limit (bool size_p
)
2387 return size_p
? 1 : current_tune
->constant_limit
;
2390 /* Emit an insn that's a simple single-set. Both the operands must be known
2392 inline static rtx_insn
*
2393 emit_set_insn (rtx x
, rtx y
)
2395 return emit_insn (gen_rtx_SET (x
, y
));
2398 /* Return the number of bits set in VALUE. */
2400 bit_count (unsigned long value
)
2402 unsigned long count
= 0;
2407 value
&= value
- 1; /* Clear the least-significant set bit. */
2413 /* Return the number of bits set in BMAP. */
2415 bitmap_popcount (const sbitmap bmap
)
2417 unsigned int count
= 0;
2419 sbitmap_iterator sbi
;
2421 EXECUTE_IF_SET_IN_BITMAP (bmap
, 0, n
, sbi
)
2430 } arm_fixed_mode_set
;
2432 /* A small helper for setting fixed-point library libfuncs. */
2435 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2436 const char *funcname
, const char *modename
,
2441 if (num_suffix
== 0)
2442 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2444 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2446 set_optab_libfunc (optable
, mode
, buffer
);
2450 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2451 machine_mode from
, const char *funcname
,
2452 const char *toname
, const char *fromname
)
2455 const char *maybe_suffix_2
= "";
2457 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2458 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2459 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2460 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2461 maybe_suffix_2
= "2";
2463 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2466 set_conv_libfunc (optable
, to
, from
, buffer
);
2469 /* Set up library functions unique to ARM. */
2472 arm_init_libfuncs (void)
2474 /* For Linux, we have access to kernel support for atomic operations. */
2475 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2476 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2478 /* There are no special library functions unless we are using the
2483 /* The functions below are described in Section 4 of the "Run-Time
2484 ABI for the ARM architecture", Version 1.0. */
2486 /* Double-precision floating-point arithmetic. Table 2. */
2487 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2488 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2489 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2490 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2491 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2493 /* Double-precision comparisons. Table 3. */
2494 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2495 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2496 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2497 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2498 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2499 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2500 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2502 /* Single-precision floating-point arithmetic. Table 4. */
2503 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2504 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2505 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2506 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2507 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2509 /* Single-precision comparisons. Table 5. */
2510 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2511 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2512 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2513 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2514 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2515 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2516 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2518 /* Floating-point to integer conversions. Table 6. */
2519 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2520 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2521 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2522 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2523 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2524 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2525 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2526 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2528 /* Conversions between floating types. Table 7. */
2529 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2530 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2532 /* Integer to floating-point conversions. Table 8. */
2533 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2534 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2535 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2536 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2537 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2538 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2539 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2540 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2542 /* Long long. Table 9. */
2543 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2544 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2545 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2546 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2547 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2548 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2549 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2550 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2552 /* Integer (32/32->32) division. \S 4.3.1. */
2553 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2554 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2556 /* The divmod functions are designed so that they can be used for
2557 plain division, even though they return both the quotient and the
2558 remainder. The quotient is returned in the usual location (i.e.,
2559 r0 for SImode, {r0, r1} for DImode), just as would be expected
2560 for an ordinary division routine. Because the AAPCS calling
2561 conventions specify that all of { r0, r1, r2, r3 } are
2562 callee-saved registers, there is no need to tell the compiler
2563 explicitly that those registers are clobbered by these
2565 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2566 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2568 /* For SImode division the ABI provides div-without-mod routines,
2569 which are faster. */
2570 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2571 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2573 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2574 divmod libcalls instead. */
2575 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2576 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2577 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2578 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2580 /* Half-precision float operations. The compiler handles all operations
2581 with NULL libfuncs by converting the SFmode. */
2582 switch (arm_fp16_format
)
2584 case ARM_FP16_FORMAT_IEEE
:
2585 case ARM_FP16_FORMAT_ALTERNATIVE
:
2588 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2589 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2591 : "__gnu_f2h_alternative"));
2592 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2593 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2595 : "__gnu_h2f_alternative"));
2597 set_conv_libfunc (trunc_optab
, HFmode
, DFmode
,
2598 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2600 : "__gnu_d2h_alternative"));
2603 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2604 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2605 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2606 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2607 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2610 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2611 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2612 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2613 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2614 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2615 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2616 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2623 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2625 const arm_fixed_mode_set fixed_arith_modes
[] =
2628 { E_UQQmode
, "uqq" },
2630 { E_UHQmode
, "uhq" },
2632 { E_USQmode
, "usq" },
2634 { E_UDQmode
, "udq" },
2636 { E_UTQmode
, "utq" },
2638 { E_UHAmode
, "uha" },
2640 { E_USAmode
, "usa" },
2642 { E_UDAmode
, "uda" },
2644 { E_UTAmode
, "uta" }
2646 const arm_fixed_mode_set fixed_conv_modes
[] =
2649 { E_UQQmode
, "uqq" },
2651 { E_UHQmode
, "uhq" },
2653 { E_USQmode
, "usq" },
2655 { E_UDQmode
, "udq" },
2657 { E_UTQmode
, "utq" },
2659 { E_UHAmode
, "uha" },
2661 { E_USAmode
, "usa" },
2663 { E_UDAmode
, "uda" },
2665 { E_UTAmode
, "uta" },
2676 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2678 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2679 "add", fixed_arith_modes
[i
].name
, 3);
2680 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2681 "ssadd", fixed_arith_modes
[i
].name
, 3);
2682 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2683 "usadd", fixed_arith_modes
[i
].name
, 3);
2684 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2685 "sub", fixed_arith_modes
[i
].name
, 3);
2686 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2687 "sssub", fixed_arith_modes
[i
].name
, 3);
2688 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2689 "ussub", fixed_arith_modes
[i
].name
, 3);
2690 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2691 "mul", fixed_arith_modes
[i
].name
, 3);
2692 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2693 "ssmul", fixed_arith_modes
[i
].name
, 3);
2694 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2695 "usmul", fixed_arith_modes
[i
].name
, 3);
2696 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2697 "div", fixed_arith_modes
[i
].name
, 3);
2698 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2699 "udiv", fixed_arith_modes
[i
].name
, 3);
2700 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2701 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2702 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2703 "usdiv", fixed_arith_modes
[i
].name
, 3);
2704 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2705 "neg", fixed_arith_modes
[i
].name
, 2);
2706 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2707 "ssneg", fixed_arith_modes
[i
].name
, 2);
2708 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2709 "usneg", fixed_arith_modes
[i
].name
, 2);
2710 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2711 "ashl", fixed_arith_modes
[i
].name
, 3);
2712 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2713 "ashr", fixed_arith_modes
[i
].name
, 3);
2714 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2715 "lshr", fixed_arith_modes
[i
].name
, 3);
2716 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2717 "ssashl", fixed_arith_modes
[i
].name
, 3);
2718 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2719 "usashl", fixed_arith_modes
[i
].name
, 3);
2720 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2721 "cmp", fixed_arith_modes
[i
].name
, 2);
2724 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2725 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2728 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2729 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2732 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2733 fixed_conv_modes
[j
].mode
, "fract",
2734 fixed_conv_modes
[i
].name
,
2735 fixed_conv_modes
[j
].name
);
2736 arm_set_fixed_conv_libfunc (satfract_optab
,
2737 fixed_conv_modes
[i
].mode
,
2738 fixed_conv_modes
[j
].mode
, "satfract",
2739 fixed_conv_modes
[i
].name
,
2740 fixed_conv_modes
[j
].name
);
2741 arm_set_fixed_conv_libfunc (fractuns_optab
,
2742 fixed_conv_modes
[i
].mode
,
2743 fixed_conv_modes
[j
].mode
, "fractuns",
2744 fixed_conv_modes
[i
].name
,
2745 fixed_conv_modes
[j
].name
);
2746 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2747 fixed_conv_modes
[i
].mode
,
2748 fixed_conv_modes
[j
].mode
, "satfractuns",
2749 fixed_conv_modes
[i
].name
,
2750 fixed_conv_modes
[j
].name
);
2754 if (TARGET_AAPCS_BASED
)
2755 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2758 /* On AAPCS systems, this is the "struct __va_list". */
2759 static GTY(()) tree va_list_type
;
2761 /* Return the type to use as __builtin_va_list. */
2763 arm_build_builtin_va_list (void)
2768 if (!TARGET_AAPCS_BASED
)
2769 return std_build_builtin_va_list ();
2771 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2779 The C Library ABI further reinforces this definition in \S
2782 We must follow this definition exactly. The structure tag
2783 name is visible in C++ mangled names, and thus forms a part
2784 of the ABI. The field name may be used by people who
2785 #include <stdarg.h>. */
2786 /* Create the type. */
2787 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2788 /* Give it the required name. */
2789 va_list_name
= build_decl (BUILTINS_LOCATION
,
2791 get_identifier ("__va_list"),
2793 DECL_ARTIFICIAL (va_list_name
) = 1;
2794 TYPE_NAME (va_list_type
) = va_list_name
;
2795 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2796 /* Create the __ap field. */
2797 ap_field
= build_decl (BUILTINS_LOCATION
,
2799 get_identifier ("__ap"),
2801 DECL_ARTIFICIAL (ap_field
) = 1;
2802 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2803 TYPE_FIELDS (va_list_type
) = ap_field
;
2804 /* Compute its layout. */
2805 layout_type (va_list_type
);
2807 return va_list_type
;
2810 /* Return an expression of type "void *" pointing to the next
2811 available argument in a variable-argument list. VALIST is the
2812 user-level va_list object, of type __builtin_va_list. */
2814 arm_extract_valist_ptr (tree valist
)
2816 if (TREE_TYPE (valist
) == error_mark_node
)
2817 return error_mark_node
;
2819 /* On an AAPCS target, the pointer is stored within "struct
2821 if (TARGET_AAPCS_BASED
)
2823 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2824 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2825 valist
, ap_field
, NULL_TREE
);
2831 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2833 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2835 valist
= arm_extract_valist_ptr (valist
);
2836 std_expand_builtin_va_start (valist
, nextarg
);
2839 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2841 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2844 valist
= arm_extract_valist_ptr (valist
);
2845 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2848 /* Check any incompatible options that the user has specified. */
2850 arm_option_check_internal (struct gcc_options
*opts
)
2852 int flags
= opts
->x_target_flags
;
2854 /* iWMMXt and NEON are incompatible. */
2856 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_neon
))
2857 error ("iWMMXt and NEON are incompatible");
2859 /* Make sure that the processor choice does not conflict with any of the
2860 other command line choices. */
2861 if (TARGET_ARM_P (flags
)
2862 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
))
2863 error ("target CPU does not support ARM mode");
2865 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2866 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2867 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2869 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2870 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2872 /* If this target is normally configured to use APCS frames, warn if they
2873 are turned off and debugging is turned on. */
2874 if (TARGET_ARM_P (flags
)
2875 && write_symbols
!= NO_DEBUG
2876 && !TARGET_APCS_FRAME
2877 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2878 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2880 /* iWMMXt unsupported under Thumb mode. */
2881 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2882 error ("iWMMXt unsupported under Thumb mode");
2884 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2885 error ("can not use -mtp=cp15 with 16-bit Thumb");
2887 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2889 error ("RTP PIC is incompatible with Thumb");
2893 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2895 if ((target_pure_code
|| target_slow_flash_data
)
2896 && (!TARGET_HAVE_MOVT
|| arm_arch_notm
|| flag_pic
|| TARGET_NEON
))
2898 const char *flag
= (target_pure_code
? "-mpure-code" :
2899 "-mslow-flash-data");
2900 error ("%s only supports non-pic code on M-profile targets with the "
2901 "MOVT instruction", flag
);
2906 /* Recompute the global settings depending on target attribute options. */
2909 arm_option_params_internal (void)
2911 /* If we are not using the default (ARM mode) section anchor offset
2912 ranges, then set the correct ranges now. */
2915 /* Thumb-1 LDR instructions cannot have negative offsets.
2916 Permissible positive offset ranges are 5-bit (for byte loads),
2917 6-bit (for halfword loads), or 7-bit (for word loads).
2918 Empirical results suggest a 7-bit anchor range gives the best
2919 overall code size. */
2920 targetm
.min_anchor_offset
= 0;
2921 targetm
.max_anchor_offset
= 127;
2923 else if (TARGET_THUMB2
)
2925 /* The minimum is set such that the total size of the block
2926 for a particular anchor is 248 + 1 + 4095 bytes, which is
2927 divisible by eight, ensuring natural spacing of anchors. */
2928 targetm
.min_anchor_offset
= -248;
2929 targetm
.max_anchor_offset
= 4095;
2933 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2934 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2937 /* Increase the number of conditional instructions with -Os. */
2938 max_insns_skipped
= optimize_size
? 4 : current_tune
->max_insns_skipped
;
2940 /* For THUMB2, we limit the conditional sequence to one IT block. */
2942 max_insns_skipped
= MIN (max_insns_skipped
, MAX_INSN_PER_IT_BLOCK
);
2945 /* True if -mflip-thumb should next add an attribute for the default
2946 mode, false if it should next add an attribute for the opposite mode. */
2947 static GTY(()) bool thumb_flipper
;
2949 /* Options after initial target override. */
2950 static GTY(()) tree init_optimize
;
2953 arm_override_options_after_change_1 (struct gcc_options
*opts
)
2955 /* -falign-functions without argument: supply one. */
2956 if (opts
->x_flag_align_functions
&& !opts
->x_str_align_functions
)
2957 opts
->x_str_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
2958 && opts
->x_optimize_size
? "2" : "4";
2961 /* Implement targetm.override_options_after_change. */
2964 arm_override_options_after_change (void)
2966 arm_configure_build_target (&arm_active_target
,
2967 TREE_TARGET_OPTION (target_option_default_node
),
2968 &global_options_set
, false);
2970 arm_override_options_after_change_1 (&global_options
);
2973 /* Implement TARGET_OPTION_SAVE. */
2975 arm_option_save (struct cl_target_option
*ptr
, struct gcc_options
*opts
)
2977 ptr
->x_arm_arch_string
= opts
->x_arm_arch_string
;
2978 ptr
->x_arm_cpu_string
= opts
->x_arm_cpu_string
;
2979 ptr
->x_arm_tune_string
= opts
->x_arm_tune_string
;
2982 /* Implement TARGET_OPTION_RESTORE. */
2984 arm_option_restore (struct gcc_options
*opts
, struct cl_target_option
*ptr
)
2986 opts
->x_arm_arch_string
= ptr
->x_arm_arch_string
;
2987 opts
->x_arm_cpu_string
= ptr
->x_arm_cpu_string
;
2988 opts
->x_arm_tune_string
= ptr
->x_arm_tune_string
;
2989 arm_configure_build_target (&arm_active_target
, ptr
, &global_options_set
,
2993 /* Reset options between modes that the user has specified. */
2995 arm_option_override_internal (struct gcc_options
*opts
,
2996 struct gcc_options
*opts_set
)
2998 arm_override_options_after_change_1 (opts
);
3000 if (TARGET_INTERWORK
&& !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3002 /* The default is to enable interworking, so this warning message would
3003 be confusing to users who have just compiled with
3004 eg, -march=armv4. */
3005 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3006 opts
->x_target_flags
&= ~MASK_INTERWORK
;
3009 if (TARGET_THUMB_P (opts
->x_target_flags
)
3010 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3012 warning (0, "target CPU does not support THUMB instructions");
3013 opts
->x_target_flags
&= ~MASK_THUMB
;
3016 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
3018 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3019 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
3022 /* Callee super interworking implies thumb interworking. Adding
3023 this to the flags here simplifies the logic elsewhere. */
3024 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
3025 opts
->x_target_flags
|= MASK_INTERWORK
;
3027 /* need to remember initial values so combinaisons of options like
3028 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3029 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
3031 if (! opts_set
->x_arm_restrict_it
)
3032 opts
->x_arm_restrict_it
= arm_arch8
;
3034 /* ARM execution state and M profile don't have [restrict] IT. */
3035 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
3036 opts
->x_arm_restrict_it
= 0;
3038 /* Enable -munaligned-access by default for
3039 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3040 i.e. Thumb2 and ARM state only.
3041 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3042 - ARMv8 architecture-base processors.
3044 Disable -munaligned-access by default for
3045 - all pre-ARMv6 architecture-based processors
3046 - ARMv6-M architecture-based processors
3047 - ARMv8-M Baseline processors. */
3049 if (! opts_set
->x_unaligned_access
)
3051 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
3052 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
3054 else if (opts
->x_unaligned_access
== 1
3055 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3057 warning (0, "target CPU does not support unaligned accesses");
3058 opts
->x_unaligned_access
= 0;
3061 /* Don't warn since it's on by default in -O2. */
3062 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3063 opts
->x_flag_schedule_insns
= 0;
3065 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
3067 /* Disable shrink-wrap when optimizing function for size, since it tends to
3068 generate additional returns. */
3069 if (optimize_function_for_size_p (cfun
)
3070 && TARGET_THUMB2_P (opts
->x_target_flags
))
3071 opts
->x_flag_shrink_wrap
= false;
3073 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
3075 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3076 - epilogue_insns - does not accurately model the corresponding insns
3077 emitted in the asm file. In particular, see the comment in thumb_exit
3078 'Find out how many of the (return) argument registers we can corrupt'.
3079 As a consequence, the epilogue may clobber registers without fipa-ra
3080 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3081 TODO: Accurately model clobbers for epilogue_insns and reenable
3083 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3084 opts
->x_flag_ipa_ra
= 0;
3086 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3088 /* Thumb2 inline assembly code should always use unified syntax.
3089 This will apply to ARM and Thumb1 eventually. */
3090 opts
->x_inline_asm_unified
= TARGET_THUMB2_P (opts
->x_target_flags
);
3092 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3093 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3097 static sbitmap isa_all_fpubits
;
3098 static sbitmap isa_quirkbits
;
3100 /* Configure a build target TARGET from the user-specified options OPTS and
3101 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3102 architecture have been specified, but the two are not identical. */
3104 arm_configure_build_target (struct arm_build_target
*target
,
3105 struct cl_target_option
*opts
,
3106 struct gcc_options
*opts_set
,
3107 bool warn_compatible
)
3109 const cpu_option
*arm_selected_tune
= NULL
;
3110 const arch_option
*arm_selected_arch
= NULL
;
3111 const cpu_option
*arm_selected_cpu
= NULL
;
3112 const arm_fpu_desc
*arm_selected_fpu
= NULL
;
3113 const char *tune_opts
= NULL
;
3114 const char *arch_opts
= NULL
;
3115 const char *cpu_opts
= NULL
;
3117 bitmap_clear (target
->isa
);
3118 target
->core_name
= NULL
;
3119 target
->arch_name
= NULL
;
3121 if (opts_set
->x_arm_arch_string
)
3123 arm_selected_arch
= arm_parse_arch_option_name (all_architectures
,
3125 opts
->x_arm_arch_string
);
3126 arch_opts
= strchr (opts
->x_arm_arch_string
, '+');
3129 if (opts_set
->x_arm_cpu_string
)
3131 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "-mcpu",
3132 opts
->x_arm_cpu_string
);
3133 cpu_opts
= strchr (opts
->x_arm_cpu_string
, '+');
3134 arm_selected_tune
= arm_selected_cpu
;
3135 /* If taking the tuning from -mcpu, we don't need to rescan the
3136 options for tuning. */
3139 if (opts_set
->x_arm_tune_string
)
3141 arm_selected_tune
= arm_parse_cpu_option_name (all_cores
, "-mtune",
3142 opts
->x_arm_tune_string
);
3143 tune_opts
= strchr (opts
->x_arm_tune_string
, '+');
3146 if (arm_selected_arch
)
3148 arm_initialize_isa (target
->isa
, arm_selected_arch
->common
.isa_bits
);
3149 arm_parse_option_features (target
->isa
, &arm_selected_arch
->common
,
3152 if (arm_selected_cpu
)
3154 auto_sbitmap
cpu_isa (isa_num_bits
);
3155 auto_sbitmap
isa_delta (isa_num_bits
);
3157 arm_initialize_isa (cpu_isa
, arm_selected_cpu
->common
.isa_bits
);
3158 arm_parse_option_features (cpu_isa
, &arm_selected_cpu
->common
,
3160 bitmap_xor (isa_delta
, cpu_isa
, target
->isa
);
3161 /* Ignore any bits that are quirk bits. */
3162 bitmap_and_compl (isa_delta
, isa_delta
, isa_quirkbits
);
3163 /* Ignore (for now) any bits that might be set by -mfpu. */
3164 bitmap_and_compl (isa_delta
, isa_delta
, isa_all_fpubits
);
3166 if (!bitmap_empty_p (isa_delta
))
3168 if (warn_compatible
)
3169 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3170 arm_selected_cpu
->common
.name
,
3171 arm_selected_arch
->common
.name
);
3172 /* -march wins for code generation.
3173 -mcpu wins for default tuning. */
3174 if (!arm_selected_tune
)
3175 arm_selected_tune
= arm_selected_cpu
;
3177 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3178 target
->arch_name
= arm_selected_arch
->common
.name
;
3182 /* Architecture and CPU are essentially the same.
3183 Prefer the CPU setting. */
3184 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3185 target
->core_name
= arm_selected_cpu
->common
.name
;
3186 /* Copy the CPU's capabilities, so that we inherit the
3187 appropriate extensions and quirks. */
3188 bitmap_copy (target
->isa
, cpu_isa
);
3193 /* Pick a CPU based on the architecture. */
3194 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3195 target
->arch_name
= arm_selected_arch
->common
.name
;
3196 /* Note: target->core_name is left unset in this path. */
3199 else if (arm_selected_cpu
)
3201 target
->core_name
= arm_selected_cpu
->common
.name
;
3202 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3203 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3205 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3207 /* If the user did not specify a processor or architecture, choose
3211 const cpu_option
*sel
;
3212 auto_sbitmap
sought_isa (isa_num_bits
);
3213 bitmap_clear (sought_isa
);
3214 auto_sbitmap
default_isa (isa_num_bits
);
3216 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "default CPU",
3217 TARGET_CPU_DEFAULT
);
3218 cpu_opts
= strchr (TARGET_CPU_DEFAULT
, '+');
3219 gcc_assert (arm_selected_cpu
->common
.name
);
3221 /* RWE: All of the selection logic below (to the end of this
3222 'if' clause) looks somewhat suspect. It appears to be mostly
3223 there to support forcing thumb support when the default CPU
3224 does not have thumb (somewhat dubious in terms of what the
3225 user might be expecting). I think it should be removed once
3226 support for the pre-thumb era cores is removed. */
3227 sel
= arm_selected_cpu
;
3228 arm_initialize_isa (default_isa
, sel
->common
.isa_bits
);
3229 arm_parse_option_features (default_isa
, &arm_selected_cpu
->common
,
3232 /* Now check to see if the user has specified any command line
3233 switches that require certain abilities from the cpu. */
3235 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3236 bitmap_set_bit (sought_isa
, isa_bit_thumb
);
3238 /* If there are such requirements and the default CPU does not
3239 satisfy them, we need to run over the complete list of
3240 cores looking for one that is satisfactory. */
3241 if (!bitmap_empty_p (sought_isa
)
3242 && !bitmap_subset_p (sought_isa
, default_isa
))
3244 auto_sbitmap
candidate_isa (isa_num_bits
);
3245 /* We're only interested in a CPU with at least the
3246 capabilities of the default CPU and the required
3247 additional features. */
3248 bitmap_ior (default_isa
, default_isa
, sought_isa
);
3250 /* Try to locate a CPU type that supports all of the abilities
3251 of the default CPU, plus the extra abilities requested by
3253 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3255 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3256 /* An exact match? */
3257 if (bitmap_equal_p (default_isa
, candidate_isa
))
3261 if (sel
->common
.name
== NULL
)
3263 unsigned current_bit_count
= isa_num_bits
;
3264 const cpu_option
*best_fit
= NULL
;
3266 /* Ideally we would like to issue an error message here
3267 saying that it was not possible to find a CPU compatible
3268 with the default CPU, but which also supports the command
3269 line options specified by the programmer, and so they
3270 ought to use the -mcpu=<name> command line option to
3271 override the default CPU type.
3273 If we cannot find a CPU that has exactly the
3274 characteristics of the default CPU and the given
3275 command line options we scan the array again looking
3276 for a best match. The best match must have at least
3277 the capabilities of the perfect match. */
3278 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3280 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3282 if (bitmap_subset_p (default_isa
, candidate_isa
))
3286 bitmap_and_compl (candidate_isa
, candidate_isa
,
3288 count
= bitmap_popcount (candidate_isa
);
3290 if (count
< current_bit_count
)
3293 current_bit_count
= count
;
3297 gcc_assert (best_fit
);
3301 arm_selected_cpu
= sel
;
3304 /* Now we know the CPU, we can finally initialize the target
3306 target
->core_name
= arm_selected_cpu
->common
.name
;
3307 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3308 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3310 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3313 gcc_assert (arm_selected_cpu
);
3314 gcc_assert (arm_selected_arch
);
3316 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
)
3318 arm_selected_fpu
= &all_fpus
[opts
->x_arm_fpu_index
];
3319 auto_sbitmap
fpu_bits (isa_num_bits
);
3321 arm_initialize_isa (fpu_bits
, arm_selected_fpu
->isa_bits
);
3322 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpubits
);
3323 bitmap_ior (target
->isa
, target
->isa
, fpu_bits
);
3326 if (!arm_selected_tune
)
3327 arm_selected_tune
= arm_selected_cpu
;
3328 else /* Validate the features passed to -mtune. */
3329 arm_parse_option_features (NULL
, &arm_selected_tune
->common
, tune_opts
);
3331 const cpu_tune
*tune_data
= &all_tunes
[arm_selected_tune
- all_cores
];
3333 /* Finish initializing the target structure. */
3334 target
->arch_pp_name
= arm_selected_arch
->arch
;
3335 target
->base_arch
= arm_selected_arch
->base_arch
;
3336 target
->profile
= arm_selected_arch
->profile
;
3338 target
->tune_flags
= tune_data
->tune_flags
;
3339 target
->tune
= tune_data
->tune
;
3340 target
->tune_core
= tune_data
->scheduler
;
3341 arm_option_reconfigure_globals ();
3344 /* Fix up any incompatible options that the user has specified. */
3346 arm_option_override (void)
3348 static const enum isa_feature fpu_bitlist
[]
3349 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
3350 static const enum isa_feature quirk_bitlist
[] = { ISA_ALL_QUIRKS
, isa_nobit
};
3351 cl_target_option opts
;
3353 isa_quirkbits
= sbitmap_alloc (isa_num_bits
);
3354 arm_initialize_isa (isa_quirkbits
, quirk_bitlist
);
3356 isa_all_fpubits
= sbitmap_alloc (isa_num_bits
);
3357 arm_initialize_isa (isa_all_fpubits
, fpu_bitlist
);
3359 arm_active_target
.isa
= sbitmap_alloc (isa_num_bits
);
3361 if (!global_options_set
.x_arm_fpu_index
)
3366 ok
= opt_enum_arg_to_value (OPT_mfpu_
, FPUTYPE_AUTO
, &fpu_index
,
3369 arm_fpu_index
= (enum fpu_type
) fpu_index
;
3372 cl_target_option_save (&opts
, &global_options
);
3373 arm_configure_build_target (&arm_active_target
, &opts
, &global_options_set
,
3376 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3377 SUBTARGET_OVERRIDE_OPTIONS
;
3380 /* Initialize boolean versions of the architectural flags, for use
3381 in the arm.md file and for enabling feature flags. */
3382 arm_option_reconfigure_globals ();
3384 arm_tune
= arm_active_target
.tune_core
;
3385 tune_flags
= arm_active_target
.tune_flags
;
3386 current_tune
= arm_active_target
.tune
;
3388 /* TBD: Dwarf info for apcs frame is not handled yet. */
3389 if (TARGET_APCS_FRAME
)
3390 flag_shrink_wrap
= false;
3392 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3394 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3395 target_flags
|= MASK_APCS_FRAME
;
3398 if (TARGET_POKE_FUNCTION_NAME
)
3399 target_flags
|= MASK_APCS_FRAME
;
3401 if (TARGET_APCS_REENT
&& flag_pic
)
3402 error ("-fpic and -mapcs-reent are incompatible");
3404 if (TARGET_APCS_REENT
)
3405 warning (0, "APCS reentrant code not supported. Ignored");
3407 /* Set up some tuning parameters. */
3408 arm_ld_sched
= (tune_flags
& TF_LDSCHED
) != 0;
3409 arm_tune_strongarm
= (tune_flags
& TF_STRONG
) != 0;
3410 arm_tune_wbuf
= (tune_flags
& TF_WBUF
) != 0;
3411 arm_tune_xscale
= (tune_flags
& TF_XSCALE
) != 0;
3412 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3413 arm_m_profile_small_mul
= (tune_flags
& TF_SMALLMUL
) != 0;
3415 /* For arm2/3 there is no need to do any scheduling if we are doing
3416 software floating-point. */
3417 if (TARGET_SOFT_FLOAT
&& (tune_flags
& TF_NO_MODE32
))
3418 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3420 /* Override the default structure alignment for AAPCS ABI. */
3421 if (!global_options_set
.x_arm_structure_size_boundary
)
3423 if (TARGET_AAPCS_BASED
)
3424 arm_structure_size_boundary
= 8;
3428 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3430 if (arm_structure_size_boundary
!= 8
3431 && arm_structure_size_boundary
!= 32
3432 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3434 if (ARM_DOUBLEWORD_ALIGN
)
3436 "structure size boundary can only be set to 8, 32 or 64");
3438 warning (0, "structure size boundary can only be set to 8 or 32");
3439 arm_structure_size_boundary
3440 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3444 if (TARGET_VXWORKS_RTP
)
3446 if (!global_options_set
.x_arm_pic_data_is_text_relative
)
3447 arm_pic_data_is_text_relative
= 0;
3450 && !arm_pic_data_is_text_relative
3451 && !(global_options_set
.x_target_flags
& MASK_SINGLE_PIC_BASE
))
3452 /* When text & data segments don't have a fixed displacement, the
3453 intended use is with a single, read only, pic base register.
3454 Unless the user explicitly requested not to do that, set
3456 target_flags
|= MASK_SINGLE_PIC_BASE
;
3458 /* If stack checking is disabled, we can use r10 as the PIC register,
3459 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3460 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3462 if (TARGET_VXWORKS_RTP
)
3463 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3464 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3467 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3468 arm_pic_register
= 9;
3470 if (arm_pic_register_string
!= NULL
)
3472 int pic_register
= decode_reg_name (arm_pic_register_string
);
3475 warning (0, "-mpic-register= is useless without -fpic");
3477 /* Prevent the user from choosing an obviously stupid PIC register. */
3478 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3479 || pic_register
== HARD_FRAME_POINTER_REGNUM
3480 || pic_register
== STACK_POINTER_REGNUM
3481 || pic_register
>= PC_REGNUM
3482 || (TARGET_VXWORKS_RTP
3483 && (unsigned int) pic_register
!= arm_pic_register
))
3484 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3486 arm_pic_register
= pic_register
;
3489 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3490 if (fix_cm3_ldrd
== 2)
3492 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_cm3_ldrd
))
3498 /* Hot/Cold partitioning is not currently supported, since we can't
3499 handle literal pool placement in that case. */
3500 if (flag_reorder_blocks_and_partition
)
3502 inform (input_location
,
3503 "-freorder-blocks-and-partition not supported on this architecture");
3504 flag_reorder_blocks_and_partition
= 0;
3505 flag_reorder_blocks
= 1;
3509 /* Hoisting PIC address calculations more aggressively provides a small,
3510 but measurable, size reduction for PIC code. Therefore, we decrease
3511 the bar for unrestricted expression hoisting to the cost of PIC address
3512 calculation, which is 2 instructions. */
3513 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3514 global_options
.x_param_values
,
3515 global_options_set
.x_param_values
);
3517 /* ARM EABI defaults to strict volatile bitfields. */
3518 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3519 && abi_version_at_least(2))
3520 flag_strict_volatile_bitfields
= 1;
3522 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3523 have deemed it beneficial (signified by setting
3524 prefetch.num_slots to 1 or more). */
3525 if (flag_prefetch_loop_arrays
< 0
3528 && current_tune
->prefetch
.num_slots
> 0)
3529 flag_prefetch_loop_arrays
= 1;
3531 /* Set up parameters to be used in prefetching algorithm. Do not
3532 override the defaults unless we are tuning for a core we have
3533 researched values for. */
3534 if (current_tune
->prefetch
.num_slots
> 0)
3535 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3536 current_tune
->prefetch
.num_slots
,
3537 global_options
.x_param_values
,
3538 global_options_set
.x_param_values
);
3539 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3540 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3541 current_tune
->prefetch
.l1_cache_line_size
,
3542 global_options
.x_param_values
,
3543 global_options_set
.x_param_values
);
3544 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3545 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3546 current_tune
->prefetch
.l1_cache_size
,
3547 global_options
.x_param_values
,
3548 global_options_set
.x_param_values
);
3550 /* Use Neon to perform 64-bits operations rather than core
3552 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3553 if (use_neon_for_64bits
== 1)
3554 prefer_neon_for_64bits
= true;
3556 /* Use the alternative scheduling-pressure algorithm by default. */
3557 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3558 global_options
.x_param_values
,
3559 global_options_set
.x_param_values
);
3561 /* Look through ready list and all of queue for instructions
3562 relevant for L2 auto-prefetcher. */
3563 int param_sched_autopref_queue_depth
;
3565 switch (current_tune
->sched_autopref
)
3567 case tune_params::SCHED_AUTOPREF_OFF
:
3568 param_sched_autopref_queue_depth
= -1;
3571 case tune_params::SCHED_AUTOPREF_RANK
:
3572 param_sched_autopref_queue_depth
= 0;
3575 case tune_params::SCHED_AUTOPREF_FULL
:
3576 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3583 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3584 param_sched_autopref_queue_depth
,
3585 global_options
.x_param_values
,
3586 global_options_set
.x_param_values
);
3588 /* Currently, for slow flash data, we just disable literal pools. We also
3589 disable it for pure-code. */
3590 if (target_slow_flash_data
|| target_pure_code
)
3591 arm_disable_literal_pool
= true;
3593 /* Disable scheduling fusion by default if it's not armv7 processor
3594 or doesn't prefer ldrd/strd. */
3595 if (flag_schedule_fusion
== 2
3596 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3597 flag_schedule_fusion
= 0;
3599 /* Need to remember initial options before they are overriden. */
3600 init_optimize
= build_optimization_node (&global_options
);
3602 arm_options_perform_arch_sanity_checks ();
3603 arm_option_override_internal (&global_options
, &global_options_set
);
3604 arm_option_check_internal (&global_options
);
3605 arm_option_params_internal ();
3607 /* Create the default target_options structure. */
3608 target_option_default_node
= target_option_current_node
3609 = build_target_option_node (&global_options
);
3611 /* Register global variables with the garbage collector. */
3612 arm_add_gc_roots ();
3614 /* Init initial mode for testing. */
3615 thumb_flipper
= TARGET_THUMB
;
3619 /* Reconfigure global status flags from the active_target.isa. */
3621 arm_option_reconfigure_globals (void)
3623 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_active_target
.arch_pp_name
);
3624 arm_base_arch
= arm_active_target
.base_arch
;
3626 /* Initialize boolean versions of the architectural flags, for use
3627 in the arm.md file. */
3628 arm_arch4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv4
);
3629 arm_arch4t
= arm_arch4
&& bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3630 arm_arch5t
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5t
);
3631 arm_arch5te
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5te
);
3632 arm_arch6
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6
);
3633 arm_arch6k
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6k
);
3634 arm_arch_notm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
);
3635 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3636 arm_arch7
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7
);
3637 arm_arch7em
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7em
);
3638 arm_arch8
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8
);
3639 arm_arch8_1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_1
);
3640 arm_arch8_2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_2
);
3641 arm_arch_thumb1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3642 arm_arch_thumb2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb2
);
3643 arm_arch_xscale
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_xscale
);
3644 arm_arch_iwmmxt
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt
);
3645 arm_arch_iwmmxt2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt2
);
3646 arm_arch_thumb_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_tdiv
);
3647 arm_arch_arm_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_adiv
);
3648 arm_arch_crc
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_crc32
);
3649 arm_arch_cmse
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_cmse
);
3650 arm_fp16_inst
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_fp16
);
3651 arm_arch_lpae
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_lpae
);
3654 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3655 error ("selected fp16 options are incompatible");
3656 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3659 /* And finally, set up some quirks. */
3660 arm_arch_no_volatile_ce
3661 = bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_volatile_ce
);
3662 arm_arch6kz
= arm_arch6k
&& bitmap_bit_p (arm_active_target
.isa
,
3663 isa_bit_quirk_armv6kz
);
3665 /* Use the cp15 method if it is available. */
3666 if (target_thread_pointer
== TP_AUTO
)
3668 if (arm_arch6k
&& !TARGET_THUMB1
)
3669 target_thread_pointer
= TP_CP15
;
3671 target_thread_pointer
= TP_SOFT
;
3675 /* Perform some validation between the desired architecture and the rest of the
3678 arm_options_perform_arch_sanity_checks (void)
3680 /* V5T code we generate is completely interworking capable, so we turn off
3681 TARGET_INTERWORK here to avoid many tests later on. */
3683 /* XXX However, we must pass the right pre-processor defines to CPP
3684 or GLD can get confused. This is a hack. */
3685 if (TARGET_INTERWORK
)
3686 arm_cpp_interwork
= 1;
3689 target_flags
&= ~MASK_INTERWORK
;
3691 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3692 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3694 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3695 error ("iwmmxt abi requires an iwmmxt capable cpu");
3697 /* BPABI targets use linker tricks to allow interworking on cores
3698 without thumb support. */
3699 if (TARGET_INTERWORK
3701 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3703 warning (0, "target CPU does not support interworking" );
3704 target_flags
&= ~MASK_INTERWORK
;
3707 /* If soft-float is specified then don't use FPU. */
3708 if (TARGET_SOFT_FLOAT
)
3709 arm_fpu_attr
= FPU_NONE
;
3711 arm_fpu_attr
= FPU_VFP
;
3713 if (TARGET_AAPCS_BASED
)
3715 if (TARGET_CALLER_INTERWORKING
)
3716 error ("AAPCS does not support -mcaller-super-interworking");
3718 if (TARGET_CALLEE_INTERWORKING
)
3719 error ("AAPCS does not support -mcallee-super-interworking");
3722 /* __fp16 support currently assumes the core has ldrh. */
3723 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3724 sorry ("__fp16 and no ldrh");
3726 if (use_cmse
&& !arm_arch_cmse
)
3727 error ("target CPU does not support ARMv8-M Security Extensions");
3729 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3730 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3731 if (use_cmse
&& LAST_VFP_REGNUM
> LAST_LO_VFP_REGNUM
)
3732 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3735 if (TARGET_AAPCS_BASED
)
3737 if (arm_abi
== ARM_ABI_IWMMXT
)
3738 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3739 else if (TARGET_HARD_FLOAT_ABI
)
3741 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3742 if (!bitmap_bit_p (arm_active_target
.isa
, isa_bit_vfpv2
))
3743 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3746 arm_pcs_default
= ARM_PCS_AAPCS
;
3750 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
3751 sorry ("-mfloat-abi=hard and VFP");
3753 if (arm_abi
== ARM_ABI_APCS
)
3754 arm_pcs_default
= ARM_PCS_APCS
;
3756 arm_pcs_default
= ARM_PCS_ATPCS
;
3761 arm_add_gc_roots (void)
3763 gcc_obstack_init(&minipool_obstack
);
3764 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3767 /* A table of known ARM exception types.
3768 For use with the interrupt function attribute. */
3772 const char *const arg
;
3773 const unsigned long return_value
;
3777 static const isr_attribute_arg isr_attribute_args
[] =
3779 { "IRQ", ARM_FT_ISR
},
3780 { "irq", ARM_FT_ISR
},
3781 { "FIQ", ARM_FT_FIQ
},
3782 { "fiq", ARM_FT_FIQ
},
3783 { "ABORT", ARM_FT_ISR
},
3784 { "abort", ARM_FT_ISR
},
3785 { "ABORT", ARM_FT_ISR
},
3786 { "abort", ARM_FT_ISR
},
3787 { "UNDEF", ARM_FT_EXCEPTION
},
3788 { "undef", ARM_FT_EXCEPTION
},
3789 { "SWI", ARM_FT_EXCEPTION
},
3790 { "swi", ARM_FT_EXCEPTION
},
3791 { NULL
, ARM_FT_NORMAL
}
3794 /* Returns the (interrupt) function type of the current
3795 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3797 static unsigned long
3798 arm_isr_value (tree argument
)
3800 const isr_attribute_arg
* ptr
;
3804 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3806 /* No argument - default to IRQ. */
3807 if (argument
== NULL_TREE
)
3810 /* Get the value of the argument. */
3811 if (TREE_VALUE (argument
) == NULL_TREE
3812 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3813 return ARM_FT_UNKNOWN
;
3815 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3817 /* Check it against the list of known arguments. */
3818 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3819 if (streq (arg
, ptr
->arg
))
3820 return ptr
->return_value
;
3822 /* An unrecognized interrupt type. */
3823 return ARM_FT_UNKNOWN
;
3826 /* Computes the type of the current function. */
3828 static unsigned long
3829 arm_compute_func_type (void)
3831 unsigned long type
= ARM_FT_UNKNOWN
;
3835 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3837 /* Decide if the current function is volatile. Such functions
3838 never return, and many memory cycles can be saved by not storing
3839 register values that will never be needed again. This optimization
3840 was added to speed up context switching in a kernel application. */
3842 && (TREE_NOTHROW (current_function_decl
)
3843 || !(flag_unwind_tables
3845 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3846 && TREE_THIS_VOLATILE (current_function_decl
))
3847 type
|= ARM_FT_VOLATILE
;
3849 if (cfun
->static_chain_decl
!= NULL
)
3850 type
|= ARM_FT_NESTED
;
3852 attr
= DECL_ATTRIBUTES (current_function_decl
);
3854 a
= lookup_attribute ("naked", attr
);
3856 type
|= ARM_FT_NAKED
;
3858 a
= lookup_attribute ("isr", attr
);
3860 a
= lookup_attribute ("interrupt", attr
);
3863 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3865 type
|= arm_isr_value (TREE_VALUE (a
));
3867 if (lookup_attribute ("cmse_nonsecure_entry", attr
))
3868 type
|= ARM_FT_CMSE_ENTRY
;
3873 /* Returns the type of the current function. */
3876 arm_current_func_type (void)
3878 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3879 cfun
->machine
->func_type
= arm_compute_func_type ();
3881 return cfun
->machine
->func_type
;
3885 arm_allocate_stack_slots_for_args (void)
3887 /* Naked functions should not allocate stack slots for arguments. */
3888 return !IS_NAKED (arm_current_func_type ());
3892 arm_warn_func_return (tree decl
)
3894 /* Naked functions are implemented entirely in assembly, including the
3895 return sequence, so suppress warnings about this. */
3896 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3900 /* Output assembler code for a block containing the constant parts
3901 of a trampoline, leaving space for the variable parts.
3903 On the ARM, (if r8 is the static chain regnum, and remembering that
3904 referencing pc adds an offset of 8) the trampoline looks like:
3907 .word static chain value
3908 .word function's address
3909 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3912 arm_asm_trampoline_template (FILE *f
)
3914 fprintf (f
, "\t.syntax unified\n");
3918 fprintf (f
, "\t.arm\n");
3919 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3920 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3922 else if (TARGET_THUMB2
)
3924 fprintf (f
, "\t.thumb\n");
3925 /* The Thumb-2 trampoline is similar to the arm implementation.
3926 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3927 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3928 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3929 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3933 ASM_OUTPUT_ALIGN (f
, 2);
3934 fprintf (f
, "\t.code\t16\n");
3935 fprintf (f
, ".Ltrampoline_start:\n");
3936 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3937 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3938 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3939 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3940 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3941 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3943 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3944 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3947 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3950 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3952 rtx fnaddr
, mem
, a_tramp
;
3954 emit_block_move (m_tramp
, assemble_trampoline_template (),
3955 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3957 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3958 emit_move_insn (mem
, chain_value
);
3960 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3961 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3962 emit_move_insn (mem
, fnaddr
);
3964 a_tramp
= XEXP (m_tramp
, 0);
3965 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3966 LCT_NORMAL
, VOIDmode
, a_tramp
, Pmode
,
3967 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3970 /* Thumb trampolines should be entered in thumb mode, so set
3971 the bottom bit of the address. */
3974 arm_trampoline_adjust_address (rtx addr
)
3977 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3978 NULL
, 0, OPTAB_LIB_WIDEN
);
3982 /* Return 1 if it is possible to return using a single instruction.
3983 If SIBLING is non-null, this is a test for a return before a sibling
3984 call. SIBLING is the call insn, so we can examine its register usage. */
3987 use_return_insn (int iscond
, rtx sibling
)
3990 unsigned int func_type
;
3991 unsigned long saved_int_regs
;
3992 unsigned HOST_WIDE_INT stack_adjust
;
3993 arm_stack_offsets
*offsets
;
3995 /* Never use a return instruction before reload has run. */
3996 if (!reload_completed
)
3999 func_type
= arm_current_func_type ();
4001 /* Naked, volatile and stack alignment functions need special
4003 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
4006 /* So do interrupt functions that use the frame pointer and Thumb
4007 interrupt functions. */
4008 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
4011 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
4012 && !optimize_function_for_size_p (cfun
))
4015 offsets
= arm_get_frame_offsets ();
4016 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
4018 /* As do variadic functions. */
4019 if (crtl
->args
.pretend_args_size
4020 || cfun
->machine
->uses_anonymous_args
4021 /* Or if the function calls __builtin_eh_return () */
4022 || crtl
->calls_eh_return
4023 /* Or if the function calls alloca */
4024 || cfun
->calls_alloca
4025 /* Or if there is a stack adjustment. However, if the stack pointer
4026 is saved on the stack, we can use a pre-incrementing stack load. */
4027 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
4028 && stack_adjust
== 4))
4029 /* Or if the static chain register was saved above the frame, under the
4030 assumption that the stack pointer isn't saved on the stack. */
4031 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
4032 && arm_compute_static_chain_stack_bytes() != 0))
4035 saved_int_regs
= offsets
->saved_regs_mask
;
4037 /* Unfortunately, the insn
4039 ldmib sp, {..., sp, ...}
4041 triggers a bug on most SA-110 based devices, such that the stack
4042 pointer won't be correctly restored if the instruction takes a
4043 page fault. We work around this problem by popping r3 along with
4044 the other registers, since that is never slower than executing
4045 another instruction.
4047 We test for !arm_arch5t here, because code for any architecture
4048 less than this could potentially be run on one of the buggy
4050 if (stack_adjust
== 4 && !arm_arch5t
&& TARGET_ARM
)
4052 /* Validate that r3 is a call-clobbered register (always true in
4053 the default abi) ... */
4054 if (!call_used_regs
[3])
4057 /* ... that it isn't being used for a return value ... */
4058 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
4061 /* ... or for a tail-call argument ... */
4064 gcc_assert (CALL_P (sibling
));
4066 if (find_regno_fusage (sibling
, USE
, 3))
4070 /* ... and that there are no call-saved registers in r0-r2
4071 (always true in the default ABI). */
4072 if (saved_int_regs
& 0x7)
4076 /* Can't be done if interworking with Thumb, and any registers have been
4078 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
4081 /* On StrongARM, conditional returns are expensive if they aren't
4082 taken and multiple registers have been stacked. */
4083 if (iscond
&& arm_tune_strongarm
)
4085 /* Conditional return when just the LR is stored is a simple
4086 conditional-load instruction, that's not expensive. */
4087 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
4091 && arm_pic_register
!= INVALID_REGNUM
4092 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
4096 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4097 several instructions if anything needs to be popped. */
4098 if (saved_int_regs
&& IS_CMSE_ENTRY (func_type
))
4101 /* If there are saved registers but the LR isn't saved, then we need
4102 two instructions for the return. */
4103 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
4106 /* Can't be done if any of the VFP regs are pushed,
4107 since this also requires an insn. */
4108 if (TARGET_HARD_FLOAT
)
4109 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
4110 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
4113 if (TARGET_REALLY_IWMMXT
)
4114 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
4115 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
4121 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4122 shrink-wrapping if possible. This is the case if we need to emit a
4123 prologue, which we can test by looking at the offsets. */
4125 use_simple_return_p (void)
4127 arm_stack_offsets
*offsets
;
4129 /* Note this function can be called before or after reload. */
4130 if (!reload_completed
)
4131 arm_compute_frame_layout ();
4133 offsets
= arm_get_frame_offsets ();
4134 return offsets
->outgoing_args
!= 0;
4137 /* Return TRUE if int I is a valid immediate ARM constant. */
4140 const_ok_for_arm (HOST_WIDE_INT i
)
4144 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4145 be all zero, or all one. */
4146 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
4147 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
4148 != ((~(unsigned HOST_WIDE_INT
) 0)
4149 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
4152 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
4154 /* Fast return for 0 and small values. We must do this for zero, since
4155 the code below can't handle that one case. */
4156 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
4159 /* Get the number of trailing zeros. */
4160 lowbit
= ffs((int) i
) - 1;
4162 /* Only even shifts are allowed in ARM mode so round down to the
4163 nearest even number. */
4167 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
4172 /* Allow rotated constants in ARM mode. */
4174 && ((i
& ~0xc000003f) == 0
4175 || (i
& ~0xf000000f) == 0
4176 || (i
& ~0xfc000003) == 0))
4179 else if (TARGET_THUMB2
)
4183 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4186 if (i
== v
|| i
== (v
| (v
<< 8)))
4189 /* Allow repeated pattern 0xXY00XY00. */
4195 else if (TARGET_HAVE_MOVT
)
4197 /* Thumb-1 Targets with MOVT. */
4207 /* Return true if I is a valid constant for the operation CODE. */
4209 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
4211 if (const_ok_for_arm (i
))
4217 /* See if we can use movw. */
4218 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
4221 /* Otherwise, try mvn. */
4222 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4225 /* See if we can use addw or subw. */
4227 && ((i
& 0xfffff000) == 0
4228 || ((-i
) & 0xfffff000) == 0))
4249 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4251 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4257 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4261 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4268 /* Return true if I is a valid di mode constant for the operation CODE. */
4270 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4272 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4273 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4274 rtx hi
= GEN_INT (hi_val
);
4275 rtx lo
= GEN_INT (lo_val
);
4285 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
4286 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
4288 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4295 /* Emit a sequence of insns to handle a large constant.
4296 CODE is the code of the operation required, it can be any of SET, PLUS,
4297 IOR, AND, XOR, MINUS;
4298 MODE is the mode in which the operation is being performed;
4299 VAL is the integer to operate on;
4300 SOURCE is the other operand (a register, or a null-pointer for SET);
4301 SUBTARGETS means it is safe to create scratch registers if that will
4302 either produce a simpler sequence, or we will want to cse the values.
4303 Return value is the number of insns emitted. */
4305 /* ??? Tweak this for thumb2. */
4307 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4308 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4312 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4313 cond
= COND_EXEC_TEST (PATTERN (insn
));
4317 if (subtargets
|| code
== SET
4318 || (REG_P (target
) && REG_P (source
)
4319 && REGNO (target
) != REGNO (source
)))
4321 /* After arm_reorg has been called, we can't fix up expensive
4322 constants by pushing them into memory so we must synthesize
4323 them in-line, regardless of the cost. This is only likely to
4324 be more costly on chips that have load delay slots and we are
4325 compiling without running the scheduler (so no splitting
4326 occurred before the final instruction emission).
4328 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4330 if (!cfun
->machine
->after_arm_reorg
4332 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4334 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4339 /* Currently SET is the only monadic value for CODE, all
4340 the rest are diadic. */
4341 if (TARGET_USE_MOVT
)
4342 arm_emit_movpair (target
, GEN_INT (val
));
4344 emit_set_insn (target
, GEN_INT (val
));
4350 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4352 if (TARGET_USE_MOVT
)
4353 arm_emit_movpair (temp
, GEN_INT (val
));
4355 emit_set_insn (temp
, GEN_INT (val
));
4357 /* For MINUS, the value is subtracted from, since we never
4358 have subtraction of a constant. */
4360 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4362 emit_set_insn (target
,
4363 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4369 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4373 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4374 ARM/THUMB2 immediates, and add up to VAL.
4375 Thr function return value gives the number of insns required. */
4377 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4378 struct four_ints
*return_sequence
)
4380 int best_consecutive_zeros
= 0;
4384 struct four_ints tmp_sequence
;
4386 /* If we aren't targeting ARM, the best place to start is always at
4387 the bottom, otherwise look more closely. */
4390 for (i
= 0; i
< 32; i
+= 2)
4392 int consecutive_zeros
= 0;
4394 if (!(val
& (3 << i
)))
4396 while ((i
< 32) && !(val
& (3 << i
)))
4398 consecutive_zeros
+= 2;
4401 if (consecutive_zeros
> best_consecutive_zeros
)
4403 best_consecutive_zeros
= consecutive_zeros
;
4404 best_start
= i
- consecutive_zeros
;
4411 /* So long as it won't require any more insns to do so, it's
4412 desirable to emit a small constant (in bits 0...9) in the last
4413 insn. This way there is more chance that it can be combined with
4414 a later addressing insn to form a pre-indexed load or store
4415 operation. Consider:
4417 *((volatile int *)0xe0000100) = 1;
4418 *((volatile int *)0xe0000110) = 2;
4420 We want this to wind up as:
4424 str rB, [rA, #0x100]
4426 str rB, [rA, #0x110]
4428 rather than having to synthesize both large constants from scratch.
4430 Therefore, we calculate how many insns would be required to emit
4431 the constant starting from `best_start', and also starting from
4432 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4433 yield a shorter sequence, we may as well use zero. */
4434 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4436 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4438 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4439 if (insns2
<= insns1
)
4441 *return_sequence
= tmp_sequence
;
4449 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4451 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4452 struct four_ints
*return_sequence
, int i
)
4454 int remainder
= val
& 0xffffffff;
4457 /* Try and find a way of doing the job in either two or three
4460 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4461 location. We start at position I. This may be the MSB, or
4462 optimial_immediate_sequence may have positioned it at the largest block
4463 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4464 wrapping around to the top of the word when we drop off the bottom.
4465 In the worst case this code should produce no more than four insns.
4467 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4468 constants, shifted to any arbitrary location. We should always start
4473 unsigned int b1
, b2
, b3
, b4
;
4474 unsigned HOST_WIDE_INT result
;
4477 gcc_assert (insns
< 4);
4482 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4483 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4486 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4487 /* We can use addw/subw for the last 12 bits. */
4491 /* Use an 8-bit shifted/rotated immediate. */
4495 result
= remainder
& ((0x0ff << end
)
4496 | ((i
< end
) ? (0xff >> (32 - end
))
4503 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4504 arbitrary shifts. */
4505 i
-= TARGET_ARM
? 2 : 1;
4509 /* Next, see if we can do a better job with a thumb2 replicated
4512 We do it this way around to catch the cases like 0x01F001E0 where
4513 two 8-bit immediates would work, but a replicated constant would
4516 TODO: 16-bit constants that don't clear all the bits, but still win.
4517 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4520 b1
= (remainder
& 0xff000000) >> 24;
4521 b2
= (remainder
& 0x00ff0000) >> 16;
4522 b3
= (remainder
& 0x0000ff00) >> 8;
4523 b4
= remainder
& 0xff;
4527 /* The 8-bit immediate already found clears b1 (and maybe b2),
4528 but must leave b3 and b4 alone. */
4530 /* First try to find a 32-bit replicated constant that clears
4531 almost everything. We can assume that we can't do it in one,
4532 or else we wouldn't be here. */
4533 unsigned int tmp
= b1
& b2
& b3
& b4
;
4534 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4536 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4537 + (tmp
== b3
) + (tmp
== b4
);
4539 && (matching_bytes
>= 3
4540 || (matching_bytes
== 2
4541 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4543 /* At least 3 of the bytes match, and the fourth has at
4544 least as many bits set, or two of the bytes match
4545 and it will only require one more insn to finish. */
4553 /* Second, try to find a 16-bit replicated constant that can
4554 leave three of the bytes clear. If b2 or b4 is already
4555 zero, then we can. If the 8-bit from above would not
4556 clear b2 anyway, then we still win. */
4557 else if (b1
== b3
&& (!b2
|| !b4
4558 || (remainder
& 0x00ff0000 & ~result
)))
4560 result
= remainder
& 0xff00ff00;
4566 /* The 8-bit immediate already found clears b2 (and maybe b3)
4567 and we don't get here unless b1 is alredy clear, but it will
4568 leave b4 unchanged. */
4570 /* If we can clear b2 and b4 at once, then we win, since the
4571 8-bits couldn't possibly reach that far. */
4574 result
= remainder
& 0x00ff00ff;
4580 return_sequence
->i
[insns
++] = result
;
4581 remainder
&= ~result
;
4583 if (code
== SET
|| code
== MINUS
)
4591 /* Emit an instruction with the indicated PATTERN. If COND is
4592 non-NULL, conditionalize the execution of the instruction on COND
4596 emit_constant_insn (rtx cond
, rtx pattern
)
4599 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4600 emit_insn (pattern
);
4603 /* As above, but extra parameter GENERATE which, if clear, suppresses
4607 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4608 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4609 int subtargets
, int generate
)
4613 int final_invert
= 0;
4615 int set_sign_bit_copies
= 0;
4616 int clear_sign_bit_copies
= 0;
4617 int clear_zero_bit_copies
= 0;
4618 int set_zero_bit_copies
= 0;
4619 int insns
= 0, neg_insns
, inv_insns
;
4620 unsigned HOST_WIDE_INT temp1
, temp2
;
4621 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4622 struct four_ints
*immediates
;
4623 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4625 /* Find out which operations are safe for a given CODE. Also do a quick
4626 check for degenerate cases; these can occur when DImode operations
4639 if (remainder
== 0xffffffff)
4642 emit_constant_insn (cond
,
4643 gen_rtx_SET (target
,
4644 GEN_INT (ARM_SIGN_EXTEND (val
))));
4650 if (reload_completed
&& rtx_equal_p (target
, source
))
4654 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4663 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4666 if (remainder
== 0xffffffff)
4668 if (reload_completed
&& rtx_equal_p (target
, source
))
4671 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4680 if (reload_completed
&& rtx_equal_p (target
, source
))
4683 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4687 if (remainder
== 0xffffffff)
4690 emit_constant_insn (cond
,
4691 gen_rtx_SET (target
,
4692 gen_rtx_NOT (mode
, source
)));
4699 /* We treat MINUS as (val - source), since (source - val) is always
4700 passed as (source + (-val)). */
4704 emit_constant_insn (cond
,
4705 gen_rtx_SET (target
,
4706 gen_rtx_NEG (mode
, source
)));
4709 if (const_ok_for_arm (val
))
4712 emit_constant_insn (cond
,
4713 gen_rtx_SET (target
,
4714 gen_rtx_MINUS (mode
, GEN_INT (val
),
4725 /* If we can do it in one insn get out quickly. */
4726 if (const_ok_for_op (val
, code
))
4729 emit_constant_insn (cond
,
4730 gen_rtx_SET (target
,
4732 ? gen_rtx_fmt_ee (code
, mode
, source
,
4738 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4740 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4741 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4745 if (mode
== SImode
&& i
== 16)
4746 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4748 emit_constant_insn (cond
,
4749 gen_zero_extendhisi2
4750 (target
, gen_lowpart (HImode
, source
)));
4752 /* Extz only supports SImode, but we can coerce the operands
4754 emit_constant_insn (cond
,
4755 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4756 gen_lowpart (SImode
, source
),
4757 GEN_INT (i
), const0_rtx
));
4763 /* Calculate a few attributes that may be useful for specific
4765 /* Count number of leading zeros. */
4766 for (i
= 31; i
>= 0; i
--)
4768 if ((remainder
& (1 << i
)) == 0)
4769 clear_sign_bit_copies
++;
4774 /* Count number of leading 1's. */
4775 for (i
= 31; i
>= 0; i
--)
4777 if ((remainder
& (1 << i
)) != 0)
4778 set_sign_bit_copies
++;
4783 /* Count number of trailing zero's. */
4784 for (i
= 0; i
<= 31; i
++)
4786 if ((remainder
& (1 << i
)) == 0)
4787 clear_zero_bit_copies
++;
4792 /* Count number of trailing 1's. */
4793 for (i
= 0; i
<= 31; i
++)
4795 if ((remainder
& (1 << i
)) != 0)
4796 set_zero_bit_copies
++;
4804 /* See if we can do this by sign_extending a constant that is known
4805 to be negative. This is a good, way of doing it, since the shift
4806 may well merge into a subsequent insn. */
4807 if (set_sign_bit_copies
> 1)
4809 if (const_ok_for_arm
4810 (temp1
= ARM_SIGN_EXTEND (remainder
4811 << (set_sign_bit_copies
- 1))))
4815 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4816 emit_constant_insn (cond
,
4817 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4818 emit_constant_insn (cond
,
4819 gen_ashrsi3 (target
, new_src
,
4820 GEN_INT (set_sign_bit_copies
- 1)));
4824 /* For an inverted constant, we will need to set the low bits,
4825 these will be shifted out of harm's way. */
4826 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4827 if (const_ok_for_arm (~temp1
))
4831 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4832 emit_constant_insn (cond
,
4833 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4834 emit_constant_insn (cond
,
4835 gen_ashrsi3 (target
, new_src
,
4836 GEN_INT (set_sign_bit_copies
- 1)));
4842 /* See if we can calculate the value as the difference between two
4843 valid immediates. */
4844 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4846 int topshift
= clear_sign_bit_copies
& ~1;
4848 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4849 & (0xff000000 >> topshift
));
4851 /* If temp1 is zero, then that means the 9 most significant
4852 bits of remainder were 1 and we've caused it to overflow.
4853 When topshift is 0 we don't need to do anything since we
4854 can borrow from 'bit 32'. */
4855 if (temp1
== 0 && topshift
!= 0)
4856 temp1
= 0x80000000 >> (topshift
- 1);
4858 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4860 if (const_ok_for_arm (temp2
))
4864 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4865 emit_constant_insn (cond
,
4866 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4867 emit_constant_insn (cond
,
4868 gen_addsi3 (target
, new_src
,
4876 /* See if we can generate this by setting the bottom (or the top)
4877 16 bits, and then shifting these into the other half of the
4878 word. We only look for the simplest cases, to do more would cost
4879 too much. Be careful, however, not to generate this when the
4880 alternative would take fewer insns. */
4881 if (val
& 0xffff0000)
4883 temp1
= remainder
& 0xffff0000;
4884 temp2
= remainder
& 0x0000ffff;
4886 /* Overlaps outside this range are best done using other methods. */
4887 for (i
= 9; i
< 24; i
++)
4889 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4890 && !const_ok_for_arm (temp2
))
4892 rtx new_src
= (subtargets
4893 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4895 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4896 source
, subtargets
, generate
);
4904 gen_rtx_ASHIFT (mode
, source
,
4911 /* Don't duplicate cases already considered. */
4912 for (i
= 17; i
< 24; i
++)
4914 if (((temp1
| (temp1
>> i
)) == remainder
)
4915 && !const_ok_for_arm (temp1
))
4917 rtx new_src
= (subtargets
4918 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4920 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4921 source
, subtargets
, generate
);
4926 gen_rtx_SET (target
,
4929 gen_rtx_LSHIFTRT (mode
, source
,
4940 /* If we have IOR or XOR, and the constant can be loaded in a
4941 single instruction, and we can find a temporary to put it in,
4942 then this can be done in two instructions instead of 3-4. */
4944 /* TARGET can't be NULL if SUBTARGETS is 0 */
4945 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4947 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4951 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4953 emit_constant_insn (cond
,
4954 gen_rtx_SET (sub
, GEN_INT (val
)));
4955 emit_constant_insn (cond
,
4956 gen_rtx_SET (target
,
4957 gen_rtx_fmt_ee (code
, mode
,
4968 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4969 and the remainder 0s for e.g. 0xfff00000)
4970 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4972 This can be done in 2 instructions by using shifts with mov or mvn.
4977 mvn r0, r0, lsr #12 */
4978 if (set_sign_bit_copies
> 8
4979 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
4983 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4984 rtx shift
= GEN_INT (set_sign_bit_copies
);
4990 gen_rtx_ASHIFT (mode
,
4995 gen_rtx_SET (target
,
4997 gen_rtx_LSHIFTRT (mode
, sub
,
5004 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5006 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5008 For eg. r0 = r0 | 0xfff
5013 if (set_zero_bit_copies
> 8
5014 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
5018 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5019 rtx shift
= GEN_INT (set_zero_bit_copies
);
5025 gen_rtx_LSHIFTRT (mode
,
5030 gen_rtx_SET (target
,
5032 gen_rtx_ASHIFT (mode
, sub
,
5038 /* This will never be reached for Thumb2 because orn is a valid
5039 instruction. This is for Thumb1 and the ARM 32 bit cases.
5041 x = y | constant (such that ~constant is a valid constant)
5043 x = ~(~y & ~constant).
5045 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
5049 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5050 emit_constant_insn (cond
,
5052 gen_rtx_NOT (mode
, source
)));
5055 sub
= gen_reg_rtx (mode
);
5056 emit_constant_insn (cond
,
5058 gen_rtx_AND (mode
, source
,
5060 emit_constant_insn (cond
,
5061 gen_rtx_SET (target
,
5062 gen_rtx_NOT (mode
, sub
)));
5069 /* See if two shifts will do 2 or more insn's worth of work. */
5070 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
5072 HOST_WIDE_INT shift_mask
= ((0xffffffff
5073 << (32 - clear_sign_bit_copies
))
5076 if ((remainder
| shift_mask
) != 0xffffffff)
5078 HOST_WIDE_INT new_val
5079 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5083 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5084 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
5085 new_src
, source
, subtargets
, 1);
5090 rtx targ
= subtargets
? NULL_RTX
: target
;
5091 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5092 targ
, source
, subtargets
, 0);
5098 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5099 rtx shift
= GEN_INT (clear_sign_bit_copies
);
5101 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
5102 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
5108 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
5110 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
5112 if ((remainder
| shift_mask
) != 0xffffffff)
5114 HOST_WIDE_INT new_val
5115 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5118 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5120 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5121 new_src
, source
, subtargets
, 1);
5126 rtx targ
= subtargets
? NULL_RTX
: target
;
5128 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5129 targ
, source
, subtargets
, 0);
5135 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5136 rtx shift
= GEN_INT (clear_zero_bit_copies
);
5138 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
5139 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
5151 /* Calculate what the instruction sequences would be if we generated it
5152 normally, negated, or inverted. */
5154 /* AND cannot be split into multiple insns, so invert and use BIC. */
5157 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
5160 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
5165 if (can_invert
|| final_invert
)
5166 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
5171 immediates
= &pos_immediates
;
5173 /* Is the negated immediate sequence more efficient? */
5174 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
5177 immediates
= &neg_immediates
;
5182 /* Is the inverted immediate sequence more efficient?
5183 We must allow for an extra NOT instruction for XOR operations, although
5184 there is some chance that the final 'mvn' will get optimized later. */
5185 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
5188 immediates
= &inv_immediates
;
5196 /* Now output the chosen sequence as instructions. */
5199 for (i
= 0; i
< insns
; i
++)
5201 rtx new_src
, temp1_rtx
;
5203 temp1
= immediates
->i
[i
];
5205 if (code
== SET
|| code
== MINUS
)
5206 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
5207 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
5208 new_src
= gen_reg_rtx (mode
);
5214 else if (can_negate
)
5217 temp1
= trunc_int_for_mode (temp1
, mode
);
5218 temp1_rtx
= GEN_INT (temp1
);
5222 else if (code
== MINUS
)
5223 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
5225 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
5227 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5232 can_negate
= can_invert
;
5236 else if (code
== MINUS
)
5244 emit_constant_insn (cond
, gen_rtx_SET (target
,
5245 gen_rtx_NOT (mode
, source
)));
5252 /* Canonicalize a comparison so that we are more likely to recognize it.
5253 This can be done for a few constant compares, where we can make the
5254 immediate value easier to load. */
5257 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5258 bool op0_preserve_value
)
5261 unsigned HOST_WIDE_INT i
, maxval
;
5263 mode
= GET_MODE (*op0
);
5264 if (mode
== VOIDmode
)
5265 mode
= GET_MODE (*op1
);
5267 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5269 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5270 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5271 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5272 for GTU/LEU in Thumb mode. */
5276 if (*code
== GT
|| *code
== LE
5277 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
5279 /* Missing comparison. First try to use an available
5281 if (CONST_INT_P (*op1
))
5289 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5291 *op1
= GEN_INT (i
+ 1);
5292 *code
= *code
== GT
? GE
: LT
;
5298 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5299 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5301 *op1
= GEN_INT (i
+ 1);
5302 *code
= *code
== GTU
? GEU
: LTU
;
5311 /* If that did not work, reverse the condition. */
5312 if (!op0_preserve_value
)
5314 std::swap (*op0
, *op1
);
5315 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5321 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5322 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5323 to facilitate possible combining with a cmp into 'ands'. */
5325 && GET_CODE (*op0
) == ZERO_EXTEND
5326 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5327 && GET_MODE (XEXP (*op0
, 0)) == QImode
5328 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5329 && subreg_lowpart_p (XEXP (*op0
, 0))
5330 && *op1
== const0_rtx
)
5331 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5334 /* Comparisons smaller than DImode. Only adjust comparisons against
5335 an out-of-range constant. */
5336 if (!CONST_INT_P (*op1
)
5337 || const_ok_for_arm (INTVAL (*op1
))
5338 || const_ok_for_arm (- INTVAL (*op1
)))
5352 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5354 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5355 *code
= *code
== GT
? GE
: LT
;
5363 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5365 *op1
= GEN_INT (i
- 1);
5366 *code
= *code
== GE
? GT
: LE
;
5373 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5374 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5376 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5377 *code
= *code
== GTU
? GEU
: LTU
;
5385 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5387 *op1
= GEN_INT (i
- 1);
5388 *code
= *code
== GEU
? GTU
: LEU
;
5399 /* Define how to find the value returned by a function. */
5402 arm_function_value(const_tree type
, const_tree func
,
5403 bool outgoing ATTRIBUTE_UNUSED
)
5406 int unsignedp ATTRIBUTE_UNUSED
;
5407 rtx r ATTRIBUTE_UNUSED
;
5409 mode
= TYPE_MODE (type
);
5411 if (TARGET_AAPCS_BASED
)
5412 return aapcs_allocate_return_reg (mode
, type
, func
);
5414 /* Promote integer types. */
5415 if (INTEGRAL_TYPE_P (type
))
5416 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5418 /* Promotes small structs returned in a register to full-word size
5419 for big-endian AAPCS. */
5420 if (arm_return_in_msb (type
))
5422 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5423 if (size
% UNITS_PER_WORD
!= 0)
5425 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5426 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
5430 return arm_libcall_value_1 (mode
);
5433 /* libcall hashtable helpers. */
5435 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5437 static inline hashval_t
hash (const rtx_def
*);
5438 static inline bool equal (const rtx_def
*, const rtx_def
*);
5439 static inline void remove (rtx_def
*);
5443 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5445 return rtx_equal_p (p1
, p2
);
5449 libcall_hasher::hash (const rtx_def
*p1
)
5451 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5454 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5457 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5459 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5463 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5465 static bool init_done
= false;
5466 static libcall_table_type
*libcall_htab
= NULL
;
5472 libcall_htab
= new libcall_table_type (31);
5473 add_libcall (libcall_htab
,
5474 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5475 add_libcall (libcall_htab
,
5476 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5477 add_libcall (libcall_htab
,
5478 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5479 add_libcall (libcall_htab
,
5480 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5482 add_libcall (libcall_htab
,
5483 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5484 add_libcall (libcall_htab
,
5485 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5486 add_libcall (libcall_htab
,
5487 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5488 add_libcall (libcall_htab
,
5489 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5491 add_libcall (libcall_htab
,
5492 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5493 add_libcall (libcall_htab
,
5494 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5495 add_libcall (libcall_htab
,
5496 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5497 add_libcall (libcall_htab
,
5498 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5499 add_libcall (libcall_htab
,
5500 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5501 add_libcall (libcall_htab
,
5502 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5503 add_libcall (libcall_htab
,
5504 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5505 add_libcall (libcall_htab
,
5506 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5508 /* Values from double-precision helper functions are returned in core
5509 registers if the selected core only supports single-precision
5510 arithmetic, even if we are using the hard-float ABI. The same is
5511 true for single-precision helpers, but we will never be using the
5512 hard-float ABI on a CPU which doesn't support single-precision
5513 operations in hardware. */
5514 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5515 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5516 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5517 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5518 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5519 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5520 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5521 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5522 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5523 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5524 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5525 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5527 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5529 add_libcall (libcall_htab
,
5530 convert_optab_libfunc (trunc_optab
, HFmode
, DFmode
));
5533 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5537 arm_libcall_value_1 (machine_mode mode
)
5539 if (TARGET_AAPCS_BASED
)
5540 return aapcs_libcall_value (mode
);
5541 else if (TARGET_IWMMXT_ABI
5542 && arm_vector_mode_supported_p (mode
))
5543 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5545 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5548 /* Define how to find the value returned by a library function
5549 assuming the value has mode MODE. */
5552 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5554 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5555 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5557 /* The following libcalls return their result in integer registers,
5558 even though they return a floating point value. */
5559 if (arm_libcall_uses_aapcs_base (libcall
))
5560 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5564 return arm_libcall_value_1 (mode
);
5567 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5570 arm_function_value_regno_p (const unsigned int regno
)
5572 if (regno
== ARG_REGISTER (1)
5574 && TARGET_AAPCS_BASED
5575 && TARGET_HARD_FLOAT
5576 && regno
== FIRST_VFP_REGNUM
)
5577 || (TARGET_IWMMXT_ABI
5578 && regno
== FIRST_IWMMXT_REGNUM
))
5584 /* Determine the amount of memory needed to store the possible return
5585 registers of an untyped call. */
5587 arm_apply_result_size (void)
5593 if (TARGET_HARD_FLOAT_ABI
)
5595 if (TARGET_IWMMXT_ABI
)
5602 /* Decide whether TYPE should be returned in memory (true)
5603 or in a register (false). FNTYPE is the type of the function making
5606 arm_return_in_memory (const_tree type
, const_tree fntype
)
5610 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5612 if (TARGET_AAPCS_BASED
)
5614 /* Simple, non-aggregate types (ie not including vectors and
5615 complex) are always returned in a register (or registers).
5616 We don't care about which register here, so we can short-cut
5617 some of the detail. */
5618 if (!AGGREGATE_TYPE_P (type
)
5619 && TREE_CODE (type
) != VECTOR_TYPE
5620 && TREE_CODE (type
) != COMPLEX_TYPE
)
5623 /* Any return value that is no larger than one word can be
5625 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5628 /* Check any available co-processors to see if they accept the
5629 type as a register candidate (VFP, for example, can return
5630 some aggregates in consecutive registers). These aren't
5631 available if the call is variadic. */
5632 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5635 /* Vector values should be returned using ARM registers, not
5636 memory (unless they're over 16 bytes, which will break since
5637 we only have four call-clobbered registers to play with). */
5638 if (TREE_CODE (type
) == VECTOR_TYPE
)
5639 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5641 /* The rest go in memory. */
5645 if (TREE_CODE (type
) == VECTOR_TYPE
)
5646 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5648 if (!AGGREGATE_TYPE_P (type
) &&
5649 (TREE_CODE (type
) != VECTOR_TYPE
))
5650 /* All simple types are returned in registers. */
5653 if (arm_abi
!= ARM_ABI_APCS
)
5655 /* ATPCS and later return aggregate types in memory only if they are
5656 larger than a word (or are variable size). */
5657 return (size
< 0 || size
> UNITS_PER_WORD
);
5660 /* For the arm-wince targets we choose to be compatible with Microsoft's
5661 ARM and Thumb compilers, which always return aggregates in memory. */
5663 /* All structures/unions bigger than one word are returned in memory.
5664 Also catch the case where int_size_in_bytes returns -1. In this case
5665 the aggregate is either huge or of variable size, and in either case
5666 we will want to return it via memory and not in a register. */
5667 if (size
< 0 || size
> UNITS_PER_WORD
)
5670 if (TREE_CODE (type
) == RECORD_TYPE
)
5674 /* For a struct the APCS says that we only return in a register
5675 if the type is 'integer like' and every addressable element
5676 has an offset of zero. For practical purposes this means
5677 that the structure can have at most one non bit-field element
5678 and that this element must be the first one in the structure. */
5680 /* Find the first field, ignoring non FIELD_DECL things which will
5681 have been created by C++. */
5682 for (field
= TYPE_FIELDS (type
);
5683 field
&& TREE_CODE (field
) != FIELD_DECL
;
5684 field
= DECL_CHAIN (field
))
5688 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5690 /* Check that the first field is valid for returning in a register. */
5692 /* ... Floats are not allowed */
5693 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5696 /* ... Aggregates that are not themselves valid for returning in
5697 a register are not allowed. */
5698 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5701 /* Now check the remaining fields, if any. Only bitfields are allowed,
5702 since they are not addressable. */
5703 for (field
= DECL_CHAIN (field
);
5705 field
= DECL_CHAIN (field
))
5707 if (TREE_CODE (field
) != FIELD_DECL
)
5710 if (!DECL_BIT_FIELD_TYPE (field
))
5717 if (TREE_CODE (type
) == UNION_TYPE
)
5721 /* Unions can be returned in registers if every element is
5722 integral, or can be returned in an integer register. */
5723 for (field
= TYPE_FIELDS (type
);
5725 field
= DECL_CHAIN (field
))
5727 if (TREE_CODE (field
) != FIELD_DECL
)
5730 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5733 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5739 #endif /* not ARM_WINCE */
5741 /* Return all other types in memory. */
5745 const struct pcs_attribute_arg
5749 } pcs_attribute_args
[] =
5751 {"aapcs", ARM_PCS_AAPCS
},
5752 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5754 /* We could recognize these, but changes would be needed elsewhere
5755 * to implement them. */
5756 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5757 {"atpcs", ARM_PCS_ATPCS
},
5758 {"apcs", ARM_PCS_APCS
},
5760 {NULL
, ARM_PCS_UNKNOWN
}
5764 arm_pcs_from_attribute (tree attr
)
5766 const struct pcs_attribute_arg
*ptr
;
5769 /* Get the value of the argument. */
5770 if (TREE_VALUE (attr
) == NULL_TREE
5771 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5772 return ARM_PCS_UNKNOWN
;
5774 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5776 /* Check it against the list of known arguments. */
5777 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5778 if (streq (arg
, ptr
->arg
))
5781 /* An unrecognized interrupt type. */
5782 return ARM_PCS_UNKNOWN
;
5785 /* Get the PCS variant to use for this call. TYPE is the function's type
5786 specification, DECL is the specific declartion. DECL may be null if
5787 the call could be indirect or if this is a library call. */
5789 arm_get_pcs_model (const_tree type
, const_tree decl
)
5791 bool user_convention
= false;
5792 enum arm_pcs user_pcs
= arm_pcs_default
;
5797 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5800 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5801 user_convention
= true;
5804 if (TARGET_AAPCS_BASED
)
5806 /* Detect varargs functions. These always use the base rules
5807 (no argument is ever a candidate for a co-processor
5809 bool base_rules
= stdarg_p (type
);
5811 if (user_convention
)
5813 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5814 sorry ("non-AAPCS derived PCS variant");
5815 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5816 error ("variadic functions must use the base AAPCS variant");
5820 return ARM_PCS_AAPCS
;
5821 else if (user_convention
)
5823 else if (decl
&& flag_unit_at_a_time
)
5825 /* Local functions never leak outside this compilation unit,
5826 so we are free to use whatever conventions are
5828 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5829 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5831 return ARM_PCS_AAPCS_LOCAL
;
5834 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5835 sorry ("PCS variant");
5837 /* For everything else we use the target's default. */
5838 return arm_pcs_default
;
5843 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5844 const_tree fntype ATTRIBUTE_UNUSED
,
5845 rtx libcall ATTRIBUTE_UNUSED
,
5846 const_tree fndecl ATTRIBUTE_UNUSED
)
5848 /* Record the unallocated VFP registers. */
5849 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5850 pcum
->aapcs_vfp_reg_alloc
= 0;
5853 /* Walk down the type tree of TYPE counting consecutive base elements.
5854 If *MODEP is VOIDmode, then set it to the first valid floating point
5855 type. If a non-floating point type is found, or if a floating point
5856 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5857 otherwise return the count in the sub-tree. */
5859 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5864 switch (TREE_CODE (type
))
5867 mode
= TYPE_MODE (type
);
5868 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
)
5871 if (*modep
== VOIDmode
)
5880 mode
= TYPE_MODE (TREE_TYPE (type
));
5881 if (mode
!= DFmode
&& mode
!= SFmode
)
5884 if (*modep
== VOIDmode
)
5893 /* Use V2SImode and V4SImode as representatives of all 64-bit
5894 and 128-bit vector types, whether or not those modes are
5895 supported with the present options. */
5896 size
= int_size_in_bytes (type
);
5909 if (*modep
== VOIDmode
)
5912 /* Vector modes are considered to be opaque: two vectors are
5913 equivalent for the purposes of being homogeneous aggregates
5914 if they are the same size. */
5923 tree index
= TYPE_DOMAIN (type
);
5925 /* Can't handle incomplete types nor sizes that are not
5927 if (!COMPLETE_TYPE_P (type
)
5928 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5931 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5934 || !TYPE_MAX_VALUE (index
)
5935 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5936 || !TYPE_MIN_VALUE (index
)
5937 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5941 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5942 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5944 /* There must be no padding. */
5945 if (wi::to_wide (TYPE_SIZE (type
))
5946 != count
* GET_MODE_BITSIZE (*modep
))
5958 /* Can't handle incomplete types nor sizes that are not
5960 if (!COMPLETE_TYPE_P (type
)
5961 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5964 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5966 if (TREE_CODE (field
) != FIELD_DECL
)
5969 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5975 /* There must be no padding. */
5976 if (wi::to_wide (TYPE_SIZE (type
))
5977 != count
* GET_MODE_BITSIZE (*modep
))
5984 case QUAL_UNION_TYPE
:
5986 /* These aren't very interesting except in a degenerate case. */
5991 /* Can't handle incomplete types nor sizes that are not
5993 if (!COMPLETE_TYPE_P (type
)
5994 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5997 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5999 if (TREE_CODE (field
) != FIELD_DECL
)
6002 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
6005 count
= count
> sub_count
? count
: sub_count
;
6008 /* There must be no padding. */
6009 if (wi::to_wide (TYPE_SIZE (type
))
6010 != count
* GET_MODE_BITSIZE (*modep
))
6023 /* Return true if PCS_VARIANT should use VFP registers. */
6025 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
6027 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
6029 static bool seen_thumb1_vfp
= false;
6031 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
6033 sorry ("Thumb-1 hard-float VFP ABI");
6034 /* sorry() is not immediately fatal, so only display this once. */
6035 seen_thumb1_vfp
= true;
6041 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
6044 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
6045 (TARGET_VFP_DOUBLE
|| !is_double
));
6048 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6049 suitable for passing or returning in VFP registers for the PCS
6050 variant selected. If it is, then *BASE_MODE is updated to contain
6051 a machine mode describing each element of the argument's type and
6052 *COUNT to hold the number of such elements. */
6054 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
6055 machine_mode mode
, const_tree type
,
6056 machine_mode
*base_mode
, int *count
)
6058 machine_mode new_mode
= VOIDmode
;
6060 /* If we have the type information, prefer that to working things
6061 out from the mode. */
6064 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
6066 if (ag_count
> 0 && ag_count
<= 4)
6071 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
6072 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
6073 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6078 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6081 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
6087 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
6090 *base_mode
= new_mode
;
6095 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
6096 machine_mode mode
, const_tree type
)
6098 int count ATTRIBUTE_UNUSED
;
6099 machine_mode ag_mode ATTRIBUTE_UNUSED
;
6101 if (!use_vfp_abi (pcs_variant
, false))
6103 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6108 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6111 if (!use_vfp_abi (pcum
->pcs_variant
, false))
6114 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
6115 &pcum
->aapcs_vfp_rmode
,
6116 &pcum
->aapcs_vfp_rcount
);
6119 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6120 for the behaviour of this function. */
6123 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6124 const_tree type ATTRIBUTE_UNUSED
)
6127 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
6128 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
6129 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
6132 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
6133 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
6135 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
6137 || (mode
== TImode
&& ! TARGET_NEON
)
6138 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
6141 int rcount
= pcum
->aapcs_vfp_rcount
;
6143 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
6147 /* Avoid using unsupported vector modes. */
6148 if (rmode
== V2SImode
)
6150 else if (rmode
== V4SImode
)
6157 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
6158 for (i
= 0; i
< rcount
; i
++)
6160 rtx tmp
= gen_rtx_REG (rmode
,
6161 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
6162 tmp
= gen_rtx_EXPR_LIST
6164 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
6165 XVECEXP (par
, 0, i
) = tmp
;
6168 pcum
->aapcs_reg
= par
;
6171 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
6177 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6178 comment there for the behaviour of this function. */
6181 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
6183 const_tree type ATTRIBUTE_UNUSED
)
6185 if (!use_vfp_abi (pcs_variant
, false))
6189 || (GET_MODE_CLASS (mode
) == MODE_INT
6190 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
6194 machine_mode ag_mode
;
6199 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6204 if (ag_mode
== V2SImode
)
6206 else if (ag_mode
== V4SImode
)
6212 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
6213 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
6214 for (i
= 0; i
< count
; i
++)
6216 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
6217 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
6218 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
6219 XVECEXP (par
, 0, i
) = tmp
;
6225 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
6229 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6230 machine_mode mode ATTRIBUTE_UNUSED
,
6231 const_tree type ATTRIBUTE_UNUSED
)
6233 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6234 pcum
->aapcs_vfp_reg_alloc
= 0;
6238 #define AAPCS_CP(X) \
6240 aapcs_ ## X ## _cum_init, \
6241 aapcs_ ## X ## _is_call_candidate, \
6242 aapcs_ ## X ## _allocate, \
6243 aapcs_ ## X ## _is_return_candidate, \
6244 aapcs_ ## X ## _allocate_return_reg, \
6245 aapcs_ ## X ## _advance \
6248 /* Table of co-processors that can be used to pass arguments in
6249 registers. Idealy no arugment should be a candidate for more than
6250 one co-processor table entry, but the table is processed in order
6251 and stops after the first match. If that entry then fails to put
6252 the argument into a co-processor register, the argument will go on
6256 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6257 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6259 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6260 BLKmode) is a candidate for this co-processor's registers; this
6261 function should ignore any position-dependent state in
6262 CUMULATIVE_ARGS and only use call-type dependent information. */
6263 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6265 /* Return true if the argument does get a co-processor register; it
6266 should set aapcs_reg to an RTX of the register allocated as is
6267 required for a return from FUNCTION_ARG. */
6268 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6270 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6271 be returned in this co-processor's registers. */
6272 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6274 /* Allocate and return an RTX element to hold the return type of a call. This
6275 routine must not fail and will only be called if is_return_candidate
6276 returned true with the same parameters. */
6277 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6279 /* Finish processing this argument and prepare to start processing
6281 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6282 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6290 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6295 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6296 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6303 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6305 /* We aren't passed a decl, so we can't check that a call is local.
6306 However, it isn't clear that that would be a win anyway, since it
6307 might limit some tail-calling opportunities. */
6308 enum arm_pcs pcs_variant
;
6312 const_tree fndecl
= NULL_TREE
;
6314 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6317 fntype
= TREE_TYPE (fntype
);
6320 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6323 pcs_variant
= arm_pcs_default
;
6325 if (pcs_variant
!= ARM_PCS_AAPCS
)
6329 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6330 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6339 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6342 /* We aren't passed a decl, so we can't check that a call is local.
6343 However, it isn't clear that that would be a win anyway, since it
6344 might limit some tail-calling opportunities. */
6345 enum arm_pcs pcs_variant
;
6346 int unsignedp ATTRIBUTE_UNUSED
;
6350 const_tree fndecl
= NULL_TREE
;
6352 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6355 fntype
= TREE_TYPE (fntype
);
6358 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6361 pcs_variant
= arm_pcs_default
;
6363 /* Promote integer types. */
6364 if (type
&& INTEGRAL_TYPE_P (type
))
6365 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6367 if (pcs_variant
!= ARM_PCS_AAPCS
)
6371 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6372 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6374 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6378 /* Promotes small structs returned in a register to full-word size
6379 for big-endian AAPCS. */
6380 if (type
&& arm_return_in_msb (type
))
6382 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6383 if (size
% UNITS_PER_WORD
!= 0)
6385 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6386 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
6390 return gen_rtx_REG (mode
, R0_REGNUM
);
6394 aapcs_libcall_value (machine_mode mode
)
6396 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6397 && GET_MODE_SIZE (mode
) <= 4)
6400 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6403 /* Lay out a function argument using the AAPCS rules. The rule
6404 numbers referred to here are those in the AAPCS. */
6406 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6407 const_tree type
, bool named
)
6412 /* We only need to do this once per argument. */
6413 if (pcum
->aapcs_arg_processed
)
6416 pcum
->aapcs_arg_processed
= true;
6418 /* Special case: if named is false then we are handling an incoming
6419 anonymous argument which is on the stack. */
6423 /* Is this a potential co-processor register candidate? */
6424 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6426 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6427 pcum
->aapcs_cprc_slot
= slot
;
6429 /* We don't have to apply any of the rules from part B of the
6430 preparation phase, these are handled elsewhere in the
6435 /* A Co-processor register candidate goes either in its own
6436 class of registers or on the stack. */
6437 if (!pcum
->aapcs_cprc_failed
[slot
])
6439 /* C1.cp - Try to allocate the argument to co-processor
6441 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6444 /* C2.cp - Put the argument on the stack and note that we
6445 can't assign any more candidates in this slot. We also
6446 need to note that we have allocated stack space, so that
6447 we won't later try to split a non-cprc candidate between
6448 core registers and the stack. */
6449 pcum
->aapcs_cprc_failed
[slot
] = true;
6450 pcum
->can_split
= false;
6453 /* We didn't get a register, so this argument goes on the
6455 gcc_assert (pcum
->can_split
== false);
6460 /* C3 - For double-word aligned arguments, round the NCRN up to the
6461 next even number. */
6462 ncrn
= pcum
->aapcs_ncrn
;
6465 int res
= arm_needs_doubleword_align (mode
, type
);
6466 /* Only warn during RTL expansion of call stmts, otherwise we would
6467 warn e.g. during gimplification even on functions that will be
6468 always inlined, and we'd warn multiple times. Don't warn when
6469 called in expand_function_start either, as we warn instead in
6470 arm_function_arg_boundary in that case. */
6471 if (res
< 0 && warn_psabi
&& currently_expanding_gimple_stmt
)
6472 inform (input_location
, "parameter passing for argument of type "
6473 "%qT changed in GCC 7.1", type
);
6478 nregs
= ARM_NUM_REGS2(mode
, type
);
6480 /* Sigh, this test should really assert that nregs > 0, but a GCC
6481 extension allows empty structs and then gives them empty size; it
6482 then allows such a structure to be passed by value. For some of
6483 the code below we have to pretend that such an argument has
6484 non-zero size so that we 'locate' it correctly either in
6485 registers or on the stack. */
6486 gcc_assert (nregs
>= 0);
6488 nregs2
= nregs
? nregs
: 1;
6490 /* C4 - Argument fits entirely in core registers. */
6491 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6493 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6494 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6498 /* C5 - Some core registers left and there are no arguments already
6499 on the stack: split this argument between the remaining core
6500 registers and the stack. */
6501 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6503 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6504 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6505 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6509 /* C6 - NCRN is set to 4. */
6510 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6512 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6516 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6517 for a call to a function whose data type is FNTYPE.
6518 For a library call, FNTYPE is NULL. */
6520 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6522 tree fndecl ATTRIBUTE_UNUSED
)
6524 /* Long call handling. */
6526 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6528 pcum
->pcs_variant
= arm_pcs_default
;
6530 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6532 if (arm_libcall_uses_aapcs_base (libname
))
6533 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6535 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6536 pcum
->aapcs_reg
= NULL_RTX
;
6537 pcum
->aapcs_partial
= 0;
6538 pcum
->aapcs_arg_processed
= false;
6539 pcum
->aapcs_cprc_slot
= -1;
6540 pcum
->can_split
= true;
6542 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6546 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6548 pcum
->aapcs_cprc_failed
[i
] = false;
6549 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6557 /* On the ARM, the offset starts at 0. */
6559 pcum
->iwmmxt_nregs
= 0;
6560 pcum
->can_split
= true;
6562 /* Varargs vectors are treated the same as long long.
6563 named_count avoids having to change the way arm handles 'named' */
6564 pcum
->named_count
= 0;
6567 if (TARGET_REALLY_IWMMXT
&& fntype
)
6571 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6573 fn_arg
= TREE_CHAIN (fn_arg
))
6574 pcum
->named_count
+= 1;
6576 if (! pcum
->named_count
)
6577 pcum
->named_count
= INT_MAX
;
6581 /* Return 1 if double word alignment is required for argument passing.
6582 Return -1 if double word alignment used to be required for argument
6583 passing before PR77728 ABI fix, but is not required anymore.
6584 Return 0 if double word alignment is not required and wasn't requried
6587 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6590 return GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
;
6592 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6593 if (!AGGREGATE_TYPE_P (type
))
6594 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6596 /* Array types: Use member alignment of element type. */
6597 if (TREE_CODE (type
) == ARRAY_TYPE
)
6598 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
6601 /* Record/aggregate types: Use greatest member alignment of any member. */
6602 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6603 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
6605 if (TREE_CODE (field
) == FIELD_DECL
)
6608 /* Before PR77728 fix, we were incorrectly considering also
6609 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6610 Make sure we can warn about that with -Wpsabi. */
6618 /* Determine where to put an argument to a function.
6619 Value is zero to push the argument on the stack,
6620 or a hard register in which to store the argument.
6622 MODE is the argument's machine mode.
6623 TYPE is the data type of the argument (as a tree).
6624 This is null for libcalls where that information may
6626 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6627 the preceding args and about the function being called.
6628 NAMED is nonzero if this argument is a named parameter
6629 (otherwise it is an extra parameter matching an ellipsis).
6631 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6632 other arguments are passed on the stack. If (NAMED == 0) (which happens
6633 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6634 defined), say it is passed in the stack (function_prologue will
6635 indeed make it pass in the stack if necessary). */
6638 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6639 const_tree type
, bool named
)
6641 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6644 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6645 a call insn (op3 of a call_value insn). */
6646 if (mode
== VOIDmode
)
6649 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6651 aapcs_layout_arg (pcum
, mode
, type
, named
);
6652 return pcum
->aapcs_reg
;
6655 /* Varargs vectors are treated the same as long long.
6656 named_count avoids having to change the way arm handles 'named' */
6657 if (TARGET_IWMMXT_ABI
6658 && arm_vector_mode_supported_p (mode
)
6659 && pcum
->named_count
> pcum
->nargs
+ 1)
6661 if (pcum
->iwmmxt_nregs
<= 9)
6662 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6665 pcum
->can_split
= false;
6670 /* Put doubleword aligned quantities in even register pairs. */
6671 if ((pcum
->nregs
& 1) && ARM_DOUBLEWORD_ALIGN
)
6673 int res
= arm_needs_doubleword_align (mode
, type
);
6674 if (res
< 0 && warn_psabi
)
6675 inform (input_location
, "parameter passing for argument of type "
6676 "%qT changed in GCC 7.1", type
);
6681 /* Only allow splitting an arg between regs and memory if all preceding
6682 args were allocated to regs. For args passed by reference we only count
6683 the reference pointer. */
6684 if (pcum
->can_split
)
6687 nregs
= ARM_NUM_REGS2 (mode
, type
);
6689 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6692 return gen_rtx_REG (mode
, pcum
->nregs
);
6696 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6698 if (!ARM_DOUBLEWORD_ALIGN
)
6699 return PARM_BOUNDARY
;
6701 int res
= arm_needs_doubleword_align (mode
, type
);
6702 if (res
< 0 && warn_psabi
)
6703 inform (input_location
, "parameter passing for argument of type %qT "
6704 "changed in GCC 7.1", type
);
6706 return res
> 0 ? DOUBLEWORD_ALIGNMENT
: PARM_BOUNDARY
;
6710 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6711 tree type
, bool named
)
6713 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6714 int nregs
= pcum
->nregs
;
6716 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6718 aapcs_layout_arg (pcum
, mode
, type
, named
);
6719 return pcum
->aapcs_partial
;
6722 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6725 if (NUM_ARG_REGS
> nregs
6726 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6728 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6733 /* Update the data in PCUM to advance over an argument
6734 of mode MODE and data type TYPE.
6735 (TYPE is null for libcalls where that information may not be available.) */
6738 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6739 const_tree type
, bool named
)
6741 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6743 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6745 aapcs_layout_arg (pcum
, mode
, type
, named
);
6747 if (pcum
->aapcs_cprc_slot
>= 0)
6749 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6751 pcum
->aapcs_cprc_slot
= -1;
6754 /* Generic stuff. */
6755 pcum
->aapcs_arg_processed
= false;
6756 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6757 pcum
->aapcs_reg
= NULL_RTX
;
6758 pcum
->aapcs_partial
= 0;
6763 if (arm_vector_mode_supported_p (mode
)
6764 && pcum
->named_count
> pcum
->nargs
6765 && TARGET_IWMMXT_ABI
)
6766 pcum
->iwmmxt_nregs
+= 1;
6768 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6772 /* Variable sized types are passed by reference. This is a GCC
6773 extension to the ARM ABI. */
6776 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6777 machine_mode mode ATTRIBUTE_UNUSED
,
6778 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6780 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6783 /* Encode the current state of the #pragma [no_]long_calls. */
6786 OFF
, /* No #pragma [no_]long_calls is in effect. */
6787 LONG
, /* #pragma long_calls is in effect. */
6788 SHORT
/* #pragma no_long_calls is in effect. */
6791 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6794 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6796 arm_pragma_long_calls
= LONG
;
6800 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6802 arm_pragma_long_calls
= SHORT
;
6806 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6808 arm_pragma_long_calls
= OFF
;
6811 /* Handle an attribute requiring a FUNCTION_DECL;
6812 arguments as in struct attribute_spec.handler. */
6814 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6815 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6817 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6819 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6821 *no_add_attrs
= true;
6827 /* Handle an "interrupt" or "isr" attribute;
6828 arguments as in struct attribute_spec.handler. */
6830 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6835 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6837 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6839 *no_add_attrs
= true;
6841 /* FIXME: the argument if any is checked for type attributes;
6842 should it be checked for decl ones? */
6846 if (TREE_CODE (*node
) == FUNCTION_TYPE
6847 || TREE_CODE (*node
) == METHOD_TYPE
)
6849 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6851 warning (OPT_Wattributes
, "%qE attribute ignored",
6853 *no_add_attrs
= true;
6856 else if (TREE_CODE (*node
) == POINTER_TYPE
6857 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6858 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6859 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6861 *node
= build_variant_type_copy (*node
);
6862 TREE_TYPE (*node
) = build_type_attribute_variant
6864 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6865 *no_add_attrs
= true;
6869 /* Possibly pass this attribute on from the type to a decl. */
6870 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6871 | (int) ATTR_FLAG_FUNCTION_NEXT
6872 | (int) ATTR_FLAG_ARRAY_NEXT
))
6874 *no_add_attrs
= true;
6875 return tree_cons (name
, args
, NULL_TREE
);
6879 warning (OPT_Wattributes
, "%qE attribute ignored",
6888 /* Handle a "pcs" attribute; arguments as in struct
6889 attribute_spec.handler. */
6891 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6892 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6894 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6896 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6897 *no_add_attrs
= true;
6902 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6903 /* Handle the "notshared" attribute. This attribute is another way of
6904 requesting hidden visibility. ARM's compiler supports
6905 "__declspec(notshared)"; we support the same thing via an
6909 arm_handle_notshared_attribute (tree
*node
,
6910 tree name ATTRIBUTE_UNUSED
,
6911 tree args ATTRIBUTE_UNUSED
,
6912 int flags ATTRIBUTE_UNUSED
,
6915 tree decl
= TYPE_NAME (*node
);
6919 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6920 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6921 *no_add_attrs
= false;
6927 /* This function returns true if a function with declaration FNDECL and type
6928 FNTYPE uses the stack to pass arguments or return variables and false
6929 otherwise. This is used for functions with the attributes
6930 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6931 diagnostic messages if the stack is used. NAME is the name of the attribute
6935 cmse_func_args_or_return_in_stack (tree fndecl
, tree name
, tree fntype
)
6937 function_args_iterator args_iter
;
6938 CUMULATIVE_ARGS args_so_far_v
;
6939 cumulative_args_t args_so_far
;
6940 bool first_param
= true;
6941 tree arg_type
, prev_arg_type
= NULL_TREE
, ret_type
;
6943 /* Error out if any argument is passed on the stack. */
6944 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
6945 args_so_far
= pack_cumulative_args (&args_so_far_v
);
6946 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
6949 machine_mode arg_mode
= TYPE_MODE (arg_type
);
6951 prev_arg_type
= arg_type
;
6952 if (VOID_TYPE_P (arg_type
))
6956 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
, true);
6957 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
, true);
6959 || arm_arg_partial_bytes (args_so_far
, arg_mode
, arg_type
, true))
6961 error ("%qE attribute not available to functions with arguments "
6962 "passed on the stack", name
);
6965 first_param
= false;
6968 /* Error out for variadic functions since we cannot control how many
6969 arguments will be passed and thus stack could be used. stdarg_p () is not
6970 used for the checking to avoid browsing arguments twice. */
6971 if (prev_arg_type
!= NULL_TREE
&& !VOID_TYPE_P (prev_arg_type
))
6973 error ("%qE attribute not available to functions with variable number "
6974 "of arguments", name
);
6978 /* Error out if return value is passed on the stack. */
6979 ret_type
= TREE_TYPE (fntype
);
6980 if (arm_return_in_memory (ret_type
, fntype
))
6982 error ("%qE attribute not available to functions that return value on "
6989 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6990 function will check whether the attribute is allowed here and will add the
6991 attribute to the function declaration tree or otherwise issue a warning. */
6994 arm_handle_cmse_nonsecure_entry (tree
*node
, tree name
,
7003 *no_add_attrs
= true;
7004 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
7009 /* Ignore attribute for function types. */
7010 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7012 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7014 *no_add_attrs
= true;
7020 /* Warn for static linkage functions. */
7021 if (!TREE_PUBLIC (fndecl
))
7023 warning (OPT_Wattributes
, "%qE attribute has no effect on functions "
7024 "with static linkage", name
);
7025 *no_add_attrs
= true;
7029 *no_add_attrs
|= cmse_func_args_or_return_in_stack (fndecl
, name
,
7030 TREE_TYPE (fndecl
));
7035 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7036 function will check whether the attribute is allowed here and will add the
7037 attribute to the function type tree or otherwise issue a diagnostic. The
7038 reason we check this at declaration time is to only allow the use of the
7039 attribute with declarations of function pointers and not function
7040 declarations. This function checks NODE is of the expected type and issues
7041 diagnostics otherwise using NAME. If it is not of the expected type
7042 *NO_ADD_ATTRS will be set to true. */
7045 arm_handle_cmse_nonsecure_call (tree
*node
, tree name
,
7050 tree decl
= NULL_TREE
, fntype
= NULL_TREE
;
7055 *no_add_attrs
= true;
7056 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
7061 if (TREE_CODE (*node
) == VAR_DECL
|| TREE_CODE (*node
) == TYPE_DECL
)
7064 fntype
= TREE_TYPE (decl
);
7067 while (fntype
!= NULL_TREE
&& TREE_CODE (fntype
) == POINTER_TYPE
)
7068 fntype
= TREE_TYPE (fntype
);
7070 if (!decl
|| TREE_CODE (fntype
) != FUNCTION_TYPE
)
7072 warning (OPT_Wattributes
, "%qE attribute only applies to base type of a "
7073 "function pointer", name
);
7074 *no_add_attrs
= true;
7078 *no_add_attrs
|= cmse_func_args_or_return_in_stack (NULL
, name
, fntype
);
7083 /* Prevent trees being shared among function types with and without
7084 cmse_nonsecure_call attribute. */
7085 type
= TREE_TYPE (decl
);
7087 type
= build_distinct_type_copy (type
);
7088 TREE_TYPE (decl
) = type
;
7091 while (TREE_CODE (fntype
) != FUNCTION_TYPE
)
7094 fntype
= TREE_TYPE (fntype
);
7095 fntype
= build_distinct_type_copy (fntype
);
7096 TREE_TYPE (type
) = fntype
;
7099 /* Construct a type attribute and add it to the function type. */
7100 tree attrs
= tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE
,
7101 TYPE_ATTRIBUTES (fntype
));
7102 TYPE_ATTRIBUTES (fntype
) = attrs
;
7106 /* Return 0 if the attributes for two types are incompatible, 1 if they
7107 are compatible, and 2 if they are nearly compatible (which causes a
7108 warning to be generated). */
7110 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
7114 /* Check for mismatch of non-default calling convention. */
7115 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
7118 /* Check for mismatched call attributes. */
7119 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7120 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7121 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7122 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7124 /* Only bother to check if an attribute is defined. */
7125 if (l1
| l2
| s1
| s2
)
7127 /* If one type has an attribute, the other must have the same attribute. */
7128 if ((l1
!= l2
) || (s1
!= s2
))
7131 /* Disallow mixed attributes. */
7132 if ((l1
& s2
) || (l2
& s1
))
7136 /* Check for mismatched ISR attribute. */
7137 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
7139 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
7140 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
7142 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
7146 l1
= lookup_attribute ("cmse_nonsecure_call",
7147 TYPE_ATTRIBUTES (type1
)) != NULL
;
7148 l2
= lookup_attribute ("cmse_nonsecure_call",
7149 TYPE_ATTRIBUTES (type2
)) != NULL
;
7157 /* Assigns default attributes to newly defined type. This is used to
7158 set short_call/long_call attributes for function types of
7159 functions defined inside corresponding #pragma scopes. */
7161 arm_set_default_type_attributes (tree type
)
7163 /* Add __attribute__ ((long_call)) to all functions, when
7164 inside #pragma long_calls or __attribute__ ((short_call)),
7165 when inside #pragma no_long_calls. */
7166 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
7168 tree type_attr_list
, attr_name
;
7169 type_attr_list
= TYPE_ATTRIBUTES (type
);
7171 if (arm_pragma_long_calls
== LONG
)
7172 attr_name
= get_identifier ("long_call");
7173 else if (arm_pragma_long_calls
== SHORT
)
7174 attr_name
= get_identifier ("short_call");
7178 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
7179 TYPE_ATTRIBUTES (type
) = type_attr_list
;
7183 /* Return true if DECL is known to be linked into section SECTION. */
7186 arm_function_in_section_p (tree decl
, section
*section
)
7188 /* We can only be certain about the prevailing symbol definition. */
7189 if (!decl_binds_to_current_def_p (decl
))
7192 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7193 if (!DECL_SECTION_NAME (decl
))
7195 /* Make sure that we will not create a unique section for DECL. */
7196 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
7200 return function_section (decl
) == section
;
7203 /* Return nonzero if a 32-bit "long_call" should be generated for
7204 a call from the current function to DECL. We generate a long_call
7207 a. has an __attribute__((long call))
7208 or b. is within the scope of a #pragma long_calls
7209 or c. the -mlong-calls command line switch has been specified
7211 However we do not generate a long call if the function:
7213 d. has an __attribute__ ((short_call))
7214 or e. is inside the scope of a #pragma no_long_calls
7215 or f. is defined in the same section as the current function. */
7218 arm_is_long_call_p (tree decl
)
7223 return TARGET_LONG_CALLS
;
7225 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
7226 if (lookup_attribute ("short_call", attrs
))
7229 /* For "f", be conservative, and only cater for cases in which the
7230 whole of the current function is placed in the same section. */
7231 if (!flag_reorder_blocks_and_partition
7232 && TREE_CODE (decl
) == FUNCTION_DECL
7233 && arm_function_in_section_p (decl
, current_function_section ()))
7236 if (lookup_attribute ("long_call", attrs
))
7239 return TARGET_LONG_CALLS
;
7242 /* Return nonzero if it is ok to make a tail-call to DECL. */
7244 arm_function_ok_for_sibcall (tree decl
, tree exp
)
7246 unsigned long func_type
;
7248 if (cfun
->machine
->sibcall_blocked
)
7251 /* Never tailcall something if we are generating code for Thumb-1. */
7255 /* The PIC register is live on entry to VxWorks PLT entries, so we
7256 must make the call before restoring the PIC register. */
7257 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
7260 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7261 may be used both as target of the call and base register for restoring
7262 the VFP registers */
7263 if (TARGET_APCS_FRAME
&& TARGET_ARM
7264 && TARGET_HARD_FLOAT
7265 && decl
&& arm_is_long_call_p (decl
))
7268 /* If we are interworking and the function is not declared static
7269 then we can't tail-call it unless we know that it exists in this
7270 compilation unit (since it might be a Thumb routine). */
7271 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
7272 && !TREE_ASM_WRITTEN (decl
))
7275 func_type
= arm_current_func_type ();
7276 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7277 if (IS_INTERRUPT (func_type
))
7280 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7281 generated for entry functions themselves. */
7282 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7285 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7286 this would complicate matters for later code generation. */
7287 if (TREE_CODE (exp
) == CALL_EXPR
)
7289 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7290 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype
)))
7294 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
7296 /* Check that the return value locations are the same. For
7297 example that we aren't returning a value from the sibling in
7298 a VFP register but then need to transfer it to a core
7301 tree decl_or_type
= decl
;
7303 /* If it is an indirect function pointer, get the function type. */
7305 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7307 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
7308 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
7310 if (!rtx_equal_p (a
, b
))
7314 /* Never tailcall if function may be called with a misaligned SP. */
7315 if (IS_STACKALIGN (func_type
))
7318 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7319 references should become a NOP. Don't convert such calls into
7321 if (TARGET_AAPCS_BASED
7322 && arm_abi
== ARM_ABI_AAPCS
7324 && DECL_WEAK (decl
))
7327 /* We cannot do a tailcall for an indirect call by descriptor if all the
7328 argument registers are used because the only register left to load the
7329 address is IP and it will already contain the static chain. */
7330 if (!decl
&& CALL_EXPR_BY_DESCRIPTOR (exp
) && !flag_trampolines
)
7332 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7333 CUMULATIVE_ARGS cum
;
7334 cumulative_args_t cum_v
;
7336 arm_init_cumulative_args (&cum
, fntype
, NULL_RTX
, NULL_TREE
);
7337 cum_v
= pack_cumulative_args (&cum
);
7339 for (tree t
= TYPE_ARG_TYPES (fntype
); t
; t
= TREE_CHAIN (t
))
7341 tree type
= TREE_VALUE (t
);
7342 if (!VOID_TYPE_P (type
))
7343 arm_function_arg_advance (cum_v
, TYPE_MODE (type
), type
, true);
7346 if (!arm_function_arg (cum_v
, SImode
, integer_type_node
, true))
7350 /* Everything else is ok. */
7355 /* Addressing mode support functions. */
7357 /* Return nonzero if X is a legitimate immediate operand when compiling
7358 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7360 legitimate_pic_operand_p (rtx x
)
7362 if (GET_CODE (x
) == SYMBOL_REF
7363 || (GET_CODE (x
) == CONST
7364 && GET_CODE (XEXP (x
, 0)) == PLUS
7365 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7371 /* Record that the current function needs a PIC register. Initialize
7372 cfun->machine->pic_reg if we have not already done so. */
7375 require_pic_register (void)
7377 /* A lot of the logic here is made obscure by the fact that this
7378 routine gets called as part of the rtx cost estimation process.
7379 We don't want those calls to affect any assumptions about the real
7380 function; and further, we can't call entry_of_function() until we
7381 start the real expansion process. */
7382 if (!crtl
->uses_pic_offset_table
)
7384 gcc_assert (can_create_pseudo_p ());
7385 if (arm_pic_register
!= INVALID_REGNUM
7386 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
7388 if (!cfun
->machine
->pic_reg
)
7389 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
7391 /* Play games to avoid marking the function as needing pic
7392 if we are being called as part of the cost-estimation
7394 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7395 crtl
->uses_pic_offset_table
= 1;
7399 rtx_insn
*seq
, *insn
;
7401 if (!cfun
->machine
->pic_reg
)
7402 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
7404 /* Play games to avoid marking the function as needing pic
7405 if we are being called as part of the cost-estimation
7407 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7409 crtl
->uses_pic_offset_table
= 1;
7412 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
7413 && arm_pic_register
> LAST_LO_REGNUM
)
7414 emit_move_insn (cfun
->machine
->pic_reg
,
7415 gen_rtx_REG (Pmode
, arm_pic_register
));
7417 arm_load_pic_register (0UL);
7422 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
7424 INSN_LOCATION (insn
) = prologue_location
;
7426 /* We can be called during expansion of PHI nodes, where
7427 we can't yet emit instructions directly in the final
7428 insn stream. Queue the insns on the entry edge, they will
7429 be committed after everything else is expanded. */
7430 insert_insn_on_edge (seq
,
7431 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
7438 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
7440 if (GET_CODE (orig
) == SYMBOL_REF
7441 || GET_CODE (orig
) == LABEL_REF
)
7445 gcc_assert (can_create_pseudo_p ());
7446 reg
= gen_reg_rtx (Pmode
);
7449 /* VxWorks does not impose a fixed gap between segments; the run-time
7450 gap can be different from the object-file gap. We therefore can't
7451 use GOTOFF unless we are absolutely sure that the symbol is in the
7452 same segment as the GOT. Unfortunately, the flexibility of linker
7453 scripts means that we can't be sure of that in general, so assume
7454 that GOTOFF is never valid on VxWorks. */
7455 /* References to weak symbols cannot be resolved locally: they
7456 may be overridden by a non-weak definition at link time. */
7458 if ((GET_CODE (orig
) == LABEL_REF
7459 || (GET_CODE (orig
) == SYMBOL_REF
7460 && SYMBOL_REF_LOCAL_P (orig
)
7461 && (SYMBOL_REF_DECL (orig
)
7462 ? !DECL_WEAK (SYMBOL_REF_DECL (orig
)) : 1)))
7464 && arm_pic_data_is_text_relative
)
7465 insn
= arm_pic_static_addr (orig
, reg
);
7471 /* If this function doesn't have a pic register, create one now. */
7472 require_pic_register ();
7474 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
7476 /* Make the MEM as close to a constant as possible. */
7477 mem
= SET_SRC (pat
);
7478 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
7479 MEM_READONLY_P (mem
) = 1;
7480 MEM_NOTRAP_P (mem
) = 1;
7482 insn
= emit_insn (pat
);
7485 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7487 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
7491 else if (GET_CODE (orig
) == CONST
)
7495 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7496 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
7499 /* Handle the case where we have: const (UNSPEC_TLS). */
7500 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
7501 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
7504 /* Handle the case where we have:
7505 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7507 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7508 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
7509 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
7511 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
7517 gcc_assert (can_create_pseudo_p ());
7518 reg
= gen_reg_rtx (Pmode
);
7521 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
7523 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
7524 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
7525 base
== reg
? 0 : reg
);
7527 if (CONST_INT_P (offset
))
7529 /* The base register doesn't really matter, we only want to
7530 test the index for the appropriate mode. */
7531 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
7533 gcc_assert (can_create_pseudo_p ());
7534 offset
= force_reg (Pmode
, offset
);
7537 if (CONST_INT_P (offset
))
7538 return plus_constant (Pmode
, base
, INTVAL (offset
));
7541 if (GET_MODE_SIZE (mode
) > 4
7542 && (GET_MODE_CLASS (mode
) == MODE_INT
7543 || TARGET_SOFT_FLOAT
))
7545 emit_insn (gen_addsi3 (reg
, base
, offset
));
7549 return gen_rtx_PLUS (Pmode
, base
, offset
);
7556 /* Find a spare register to use during the prolog of a function. */
7559 thumb_find_work_register (unsigned long pushed_regs_mask
)
7563 /* Check the argument registers first as these are call-used. The
7564 register allocation order means that sometimes r3 might be used
7565 but earlier argument registers might not, so check them all. */
7566 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
7567 if (!df_regs_ever_live_p (reg
))
7570 /* Before going on to check the call-saved registers we can try a couple
7571 more ways of deducing that r3 is available. The first is when we are
7572 pushing anonymous arguments onto the stack and we have less than 4
7573 registers worth of fixed arguments(*). In this case r3 will be part of
7574 the variable argument list and so we can be sure that it will be
7575 pushed right at the start of the function. Hence it will be available
7576 for the rest of the prologue.
7577 (*): ie crtl->args.pretend_args_size is greater than 0. */
7578 if (cfun
->machine
->uses_anonymous_args
7579 && crtl
->args
.pretend_args_size
> 0)
7580 return LAST_ARG_REGNUM
;
7582 /* The other case is when we have fixed arguments but less than 4 registers
7583 worth. In this case r3 might be used in the body of the function, but
7584 it is not being used to convey an argument into the function. In theory
7585 we could just check crtl->args.size to see how many bytes are
7586 being passed in argument registers, but it seems that it is unreliable.
7587 Sometimes it will have the value 0 when in fact arguments are being
7588 passed. (See testcase execute/20021111-1.c for an example). So we also
7589 check the args_info.nregs field as well. The problem with this field is
7590 that it makes no allowances for arguments that are passed to the
7591 function but which are not used. Hence we could miss an opportunity
7592 when a function has an unused argument in r3. But it is better to be
7593 safe than to be sorry. */
7594 if (! cfun
->machine
->uses_anonymous_args
7595 && crtl
->args
.size
>= 0
7596 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
7597 && (TARGET_AAPCS_BASED
7598 ? crtl
->args
.info
.aapcs_ncrn
< 4
7599 : crtl
->args
.info
.nregs
< 4))
7600 return LAST_ARG_REGNUM
;
7602 /* Otherwise look for a call-saved register that is going to be pushed. */
7603 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
7604 if (pushed_regs_mask
& (1 << reg
))
7609 /* Thumb-2 can use high regs. */
7610 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
7611 if (pushed_regs_mask
& (1 << reg
))
7614 /* Something went wrong - thumb_compute_save_reg_mask()
7615 should have arranged for a suitable register to be pushed. */
7619 static GTY(()) int pic_labelno
;
7621 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7625 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
7627 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
7629 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
7632 gcc_assert (flag_pic
);
7634 pic_reg
= cfun
->machine
->pic_reg
;
7635 if (TARGET_VXWORKS_RTP
)
7637 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
7638 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7639 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
7641 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
7643 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
7644 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
7648 /* We use an UNSPEC rather than a LABEL_REF because this label
7649 never appears in the code stream. */
7651 labelno
= GEN_INT (pic_labelno
++);
7652 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7653 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7655 /* On the ARM the PC register contains 'dot + 8' at the time of the
7656 addition, on the Thumb it is 'dot + 4'. */
7657 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7658 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
7660 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7664 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7666 else /* TARGET_THUMB1 */
7668 if (arm_pic_register
!= INVALID_REGNUM
7669 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
7671 /* We will have pushed the pic register, so we should always be
7672 able to find a work register. */
7673 pic_tmp
= gen_rtx_REG (SImode
,
7674 thumb_find_work_register (saved_regs
));
7675 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
7676 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
7677 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
7679 else if (arm_pic_register
!= INVALID_REGNUM
7680 && arm_pic_register
> LAST_LO_REGNUM
7681 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
7683 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7684 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
7685 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
7688 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7692 /* Need to emit this whether or not we obey regdecls,
7693 since setjmp/longjmp can cause life info to screw up. */
7697 /* Generate code to load the address of a static var when flag_pic is set. */
7699 arm_pic_static_addr (rtx orig
, rtx reg
)
7701 rtx l1
, labelno
, offset_rtx
;
7703 gcc_assert (flag_pic
);
7705 /* We use an UNSPEC rather than a LABEL_REF because this label
7706 never appears in the code stream. */
7707 labelno
= GEN_INT (pic_labelno
++);
7708 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7709 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7711 /* On the ARM the PC register contains 'dot + 8' at the time of the
7712 addition, on the Thumb it is 'dot + 4'. */
7713 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7714 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
7715 UNSPEC_SYMBOL_OFFSET
);
7716 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
7718 return emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
7721 /* Return nonzero if X is valid as an ARM state addressing register. */
7723 arm_address_register_rtx_p (rtx x
, int strict_p
)
7733 return ARM_REGNO_OK_FOR_BASE_P (regno
);
7735 return (regno
<= LAST_ARM_REGNUM
7736 || regno
>= FIRST_PSEUDO_REGISTER
7737 || regno
== FRAME_POINTER_REGNUM
7738 || regno
== ARG_POINTER_REGNUM
);
7741 /* Return TRUE if this rtx is the difference of a symbol and a label,
7742 and will reduce to a PC-relative relocation in the object file.
7743 Expressions like this can be left alone when generating PIC, rather
7744 than forced through the GOT. */
7746 pcrel_constant_p (rtx x
)
7748 if (GET_CODE (x
) == MINUS
)
7749 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
7754 /* Return true if X will surely end up in an index register after next
7757 will_be_in_index_register (const_rtx x
)
7759 /* arm.md: calculate_pic_address will split this into a register. */
7760 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
7763 /* Return nonzero if X is a valid ARM state address operand. */
7765 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
7769 enum rtx_code code
= GET_CODE (x
);
7771 if (arm_address_register_rtx_p (x
, strict_p
))
7774 use_ldrd
= (TARGET_LDRD
7775 && (mode
== DImode
|| mode
== DFmode
));
7777 if (code
== POST_INC
|| code
== PRE_DEC
7778 || ((code
== PRE_INC
|| code
== POST_DEC
)
7779 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7780 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7782 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7783 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7784 && GET_CODE (XEXP (x
, 1)) == PLUS
7785 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7787 rtx addend
= XEXP (XEXP (x
, 1), 1);
7789 /* Don't allow ldrd post increment by register because it's hard
7790 to fixup invalid register choices. */
7792 && GET_CODE (x
) == POST_MODIFY
7796 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
7797 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
7800 /* After reload constants split into minipools will have addresses
7801 from a LABEL_REF. */
7802 else if (reload_completed
7803 && (code
== LABEL_REF
7805 && GET_CODE (XEXP (x
, 0)) == PLUS
7806 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7807 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7810 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7813 else if (code
== PLUS
)
7815 rtx xop0
= XEXP (x
, 0);
7816 rtx xop1
= XEXP (x
, 1);
7818 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7819 && ((CONST_INT_P (xop1
)
7820 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7821 || (!strict_p
&& will_be_in_index_register (xop1
))))
7822 || (arm_address_register_rtx_p (xop1
, strict_p
)
7823 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7827 /* Reload currently can't handle MINUS, so disable this for now */
7828 else if (GET_CODE (x
) == MINUS
)
7830 rtx xop0
= XEXP (x
, 0);
7831 rtx xop1
= XEXP (x
, 1);
7833 return (arm_address_register_rtx_p (xop0
, strict_p
)
7834 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7838 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7839 && code
== SYMBOL_REF
7840 && CONSTANT_POOL_ADDRESS_P (x
)
7842 && symbol_mentioned_p (get_pool_constant (x
))
7843 && ! pcrel_constant_p (get_pool_constant (x
))))
7849 /* Return true if we can avoid creating a constant pool entry for x. */
7851 can_avoid_literal_pool_for_label_p (rtx x
)
7853 /* Normally we can assign constant values to target registers without
7854 the help of constant pool. But there are cases we have to use constant
7856 1) assign a label to register.
7857 2) sign-extend a 8bit value to 32bit and then assign to register.
7859 Constant pool access in format:
7860 (set (reg r0) (mem (symbol_ref (".LC0"))))
7861 will cause the use of literal pool (later in function arm_reorg).
7862 So here we mark such format as an invalid format, then the compiler
7863 will adjust it into:
7864 (set (reg r0) (symbol_ref (".LC0")))
7865 (set (reg r0) (mem (reg r0))).
7866 No extra register is required, and (mem (reg r0)) won't cause the use
7867 of literal pools. */
7868 if (arm_disable_literal_pool
&& GET_CODE (x
) == SYMBOL_REF
7869 && CONSTANT_POOL_ADDRESS_P (x
))
7875 /* Return nonzero if X is a valid Thumb-2 address operand. */
7877 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7880 enum rtx_code code
= GET_CODE (x
);
7882 if (arm_address_register_rtx_p (x
, strict_p
))
7885 use_ldrd
= (TARGET_LDRD
7886 && (mode
== DImode
|| mode
== DFmode
));
7888 if (code
== POST_INC
|| code
== PRE_DEC
7889 || ((code
== PRE_INC
|| code
== POST_DEC
)
7890 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7891 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7893 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7894 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7895 && GET_CODE (XEXP (x
, 1)) == PLUS
7896 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7898 /* Thumb-2 only has autoincrement by constant. */
7899 rtx addend
= XEXP (XEXP (x
, 1), 1);
7900 HOST_WIDE_INT offset
;
7902 if (!CONST_INT_P (addend
))
7905 offset
= INTVAL(addend
);
7906 if (GET_MODE_SIZE (mode
) <= 4)
7907 return (offset
> -256 && offset
< 256);
7909 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7910 && (offset
& 3) == 0);
7913 /* After reload constants split into minipools will have addresses
7914 from a LABEL_REF. */
7915 else if (reload_completed
7916 && (code
== LABEL_REF
7918 && GET_CODE (XEXP (x
, 0)) == PLUS
7919 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7920 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7923 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7926 else if (code
== PLUS
)
7928 rtx xop0
= XEXP (x
, 0);
7929 rtx xop1
= XEXP (x
, 1);
7931 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7932 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7933 || (!strict_p
&& will_be_in_index_register (xop1
))))
7934 || (arm_address_register_rtx_p (xop1
, strict_p
)
7935 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7938 else if (can_avoid_literal_pool_for_label_p (x
))
7941 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7942 && code
== SYMBOL_REF
7943 && CONSTANT_POOL_ADDRESS_P (x
)
7945 && symbol_mentioned_p (get_pool_constant (x
))
7946 && ! pcrel_constant_p (get_pool_constant (x
))))
7952 /* Return nonzero if INDEX is valid for an address index operand in
7955 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7958 HOST_WIDE_INT range
;
7959 enum rtx_code code
= GET_CODE (index
);
7961 /* Standard coprocessor addressing modes. */
7962 if (TARGET_HARD_FLOAT
7963 && (mode
== SFmode
|| mode
== DFmode
))
7964 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7965 && INTVAL (index
) > -1024
7966 && (INTVAL (index
) & 3) == 0);
7968 /* For quad modes, we restrict the constant offset to be slightly less
7969 than what the instruction format permits. We do this because for
7970 quad mode moves, we will actually decompose them into two separate
7971 double-mode reads or writes. INDEX must therefore be a valid
7972 (double-mode) offset and so should INDEX+8. */
7973 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7974 return (code
== CONST_INT
7975 && INTVAL (index
) < 1016
7976 && INTVAL (index
) > -1024
7977 && (INTVAL (index
) & 3) == 0);
7979 /* We have no such constraint on double mode offsets, so we permit the
7980 full range of the instruction format. */
7981 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7982 return (code
== CONST_INT
7983 && INTVAL (index
) < 1024
7984 && INTVAL (index
) > -1024
7985 && (INTVAL (index
) & 3) == 0);
7987 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7988 return (code
== CONST_INT
7989 && INTVAL (index
) < 1024
7990 && INTVAL (index
) > -1024
7991 && (INTVAL (index
) & 3) == 0);
7993 if (arm_address_register_rtx_p (index
, strict_p
)
7994 && (GET_MODE_SIZE (mode
) <= 4))
7997 if (mode
== DImode
|| mode
== DFmode
)
7999 if (code
== CONST_INT
)
8001 HOST_WIDE_INT val
= INTVAL (index
);
8003 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8004 If vldr is selected it uses arm_coproc_mem_operand. */
8006 return val
> -256 && val
< 256;
8008 return val
> -4096 && val
< 4092;
8011 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
8014 if (GET_MODE_SIZE (mode
) <= 4
8018 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
8022 rtx xiop0
= XEXP (index
, 0);
8023 rtx xiop1
= XEXP (index
, 1);
8025 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8026 && power_of_two_operand (xiop1
, SImode
))
8027 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8028 && power_of_two_operand (xiop0
, SImode
)));
8030 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
8031 || code
== ASHIFT
|| code
== ROTATERT
)
8033 rtx op
= XEXP (index
, 1);
8035 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8038 && INTVAL (op
) <= 31);
8042 /* For ARM v4 we may be doing a sign-extend operation during the
8048 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
8054 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
8056 return (code
== CONST_INT
8057 && INTVAL (index
) < range
8058 && INTVAL (index
) > -range
);
8061 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8062 index operand. i.e. 1, 2, 4 or 8. */
8064 thumb2_index_mul_operand (rtx op
)
8068 if (!CONST_INT_P (op
))
8072 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
8075 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8077 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
8079 enum rtx_code code
= GET_CODE (index
);
8081 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8082 /* Standard coprocessor addressing modes. */
8083 if (TARGET_HARD_FLOAT
8084 && (mode
== SFmode
|| mode
== DFmode
))
8085 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8086 /* Thumb-2 allows only > -256 index range for it's core register
8087 load/stores. Since we allow SF/DF in core registers, we have
8088 to use the intersection between -256~4096 (core) and -1024~1024
8090 && INTVAL (index
) > -256
8091 && (INTVAL (index
) & 3) == 0);
8093 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8095 /* For DImode assume values will usually live in core regs
8096 and only allow LDRD addressing modes. */
8097 if (!TARGET_LDRD
|| mode
!= DImode
)
8098 return (code
== CONST_INT
8099 && INTVAL (index
) < 1024
8100 && INTVAL (index
) > -1024
8101 && (INTVAL (index
) & 3) == 0);
8104 /* For quad modes, we restrict the constant offset to be slightly less
8105 than what the instruction format permits. We do this because for
8106 quad mode moves, we will actually decompose them into two separate
8107 double-mode reads or writes. INDEX must therefore be a valid
8108 (double-mode) offset and so should INDEX+8. */
8109 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8110 return (code
== CONST_INT
8111 && INTVAL (index
) < 1016
8112 && INTVAL (index
) > -1024
8113 && (INTVAL (index
) & 3) == 0);
8115 /* We have no such constraint on double mode offsets, so we permit the
8116 full range of the instruction format. */
8117 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8118 return (code
== CONST_INT
8119 && INTVAL (index
) < 1024
8120 && INTVAL (index
) > -1024
8121 && (INTVAL (index
) & 3) == 0);
8123 if (arm_address_register_rtx_p (index
, strict_p
)
8124 && (GET_MODE_SIZE (mode
) <= 4))
8127 if (mode
== DImode
|| mode
== DFmode
)
8129 if (code
== CONST_INT
)
8131 HOST_WIDE_INT val
= INTVAL (index
);
8132 /* Thumb-2 ldrd only has reg+const addressing modes.
8133 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8134 If vldr is selected it uses arm_coproc_mem_operand. */
8136 return IN_RANGE (val
, -1020, 1020) && (val
& 3) == 0;
8138 return IN_RANGE (val
, -255, 4095 - 4);
8146 rtx xiop0
= XEXP (index
, 0);
8147 rtx xiop1
= XEXP (index
, 1);
8149 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8150 && thumb2_index_mul_operand (xiop1
))
8151 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8152 && thumb2_index_mul_operand (xiop0
)));
8154 else if (code
== ASHIFT
)
8156 rtx op
= XEXP (index
, 1);
8158 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8161 && INTVAL (op
) <= 3);
8164 return (code
== CONST_INT
8165 && INTVAL (index
) < 4096
8166 && INTVAL (index
) > -256);
8169 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8171 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
8181 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
8183 return (regno
<= LAST_LO_REGNUM
8184 || regno
> LAST_VIRTUAL_REGISTER
8185 || regno
== FRAME_POINTER_REGNUM
8186 || (GET_MODE_SIZE (mode
) >= 4
8187 && (regno
== STACK_POINTER_REGNUM
8188 || regno
>= FIRST_PSEUDO_REGISTER
8189 || x
== hard_frame_pointer_rtx
8190 || x
== arg_pointer_rtx
)));
8193 /* Return nonzero if x is a legitimate index register. This is the case
8194 for any base register that can access a QImode object. */
8196 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
8198 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
8201 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8203 The AP may be eliminated to either the SP or the FP, so we use the
8204 least common denominator, e.g. SImode, and offsets from 0 to 64.
8206 ??? Verify whether the above is the right approach.
8208 ??? Also, the FP may be eliminated to the SP, so perhaps that
8209 needs special handling also.
8211 ??? Look at how the mips16 port solves this problem. It probably uses
8212 better ways to solve some of these problems.
8214 Although it is not incorrect, we don't accept QImode and HImode
8215 addresses based on the frame pointer or arg pointer until the
8216 reload pass starts. This is so that eliminating such addresses
8217 into stack based ones won't produce impossible code. */
8219 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8221 if (TARGET_HAVE_MOVT
&& can_avoid_literal_pool_for_label_p (x
))
8224 /* ??? Not clear if this is right. Experiment. */
8225 if (GET_MODE_SIZE (mode
) < 4
8226 && !(reload_in_progress
|| reload_completed
)
8227 && (reg_mentioned_p (frame_pointer_rtx
, x
)
8228 || reg_mentioned_p (arg_pointer_rtx
, x
)
8229 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
8230 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
8231 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
8232 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
8235 /* Accept any base register. SP only in SImode or larger. */
8236 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
8239 /* This is PC relative data before arm_reorg runs. */
8240 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
8241 && GET_CODE (x
) == SYMBOL_REF
8242 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
8245 /* This is PC relative data after arm_reorg runs. */
8246 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
8248 && (GET_CODE (x
) == LABEL_REF
8249 || (GET_CODE (x
) == CONST
8250 && GET_CODE (XEXP (x
, 0)) == PLUS
8251 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8252 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8255 /* Post-inc indexing only supported for SImode and larger. */
8256 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
8257 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
8260 else if (GET_CODE (x
) == PLUS
)
8262 /* REG+REG address can be any two index registers. */
8263 /* We disallow FRAME+REG addressing since we know that FRAME
8264 will be replaced with STACK, and SP relative addressing only
8265 permits SP+OFFSET. */
8266 if (GET_MODE_SIZE (mode
) <= 4
8267 && XEXP (x
, 0) != frame_pointer_rtx
8268 && XEXP (x
, 1) != frame_pointer_rtx
8269 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8270 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
8271 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
8274 /* REG+const has 5-7 bit offset for non-SP registers. */
8275 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8276 || XEXP (x
, 0) == arg_pointer_rtx
)
8277 && CONST_INT_P (XEXP (x
, 1))
8278 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
8281 /* REG+const has 10-bit offset for SP, but only SImode and
8282 larger is supported. */
8283 /* ??? Should probably check for DI/DFmode overflow here
8284 just like GO_IF_LEGITIMATE_OFFSET does. */
8285 else if (REG_P (XEXP (x
, 0))
8286 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
8287 && GET_MODE_SIZE (mode
) >= 4
8288 && CONST_INT_P (XEXP (x
, 1))
8289 && INTVAL (XEXP (x
, 1)) >= 0
8290 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
8291 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8294 else if (REG_P (XEXP (x
, 0))
8295 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
8296 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
8297 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
8298 && REGNO (XEXP (x
, 0))
8299 <= LAST_VIRTUAL_POINTER_REGISTER
))
8300 && GET_MODE_SIZE (mode
) >= 4
8301 && CONST_INT_P (XEXP (x
, 1))
8302 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8306 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8307 && GET_MODE_SIZE (mode
) == 4
8308 && GET_CODE (x
) == SYMBOL_REF
8309 && CONSTANT_POOL_ADDRESS_P (x
)
8311 && symbol_mentioned_p (get_pool_constant (x
))
8312 && ! pcrel_constant_p (get_pool_constant (x
))))
8318 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8319 instruction of mode MODE. */
8321 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
8323 switch (GET_MODE_SIZE (mode
))
8326 return val
>= 0 && val
< 32;
8329 return val
>= 0 && val
< 64 && (val
& 1) == 0;
8333 && (val
+ GET_MODE_SIZE (mode
)) <= 128
8339 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
8342 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
8343 else if (TARGET_THUMB2
)
8344 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
8345 else /* if (TARGET_THUMB1) */
8346 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
8349 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8351 Given an rtx X being reloaded into a reg required to be
8352 in class CLASS, return the class of reg to actually use.
8353 In general this is just CLASS, but for the Thumb core registers and
8354 immediate constants we prefer a LO_REGS class or a subset. */
8357 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
8363 if (rclass
== GENERAL_REGS
)
8370 /* Build the SYMBOL_REF for __tls_get_addr. */
8372 static GTY(()) rtx tls_get_addr_libfunc
;
8375 get_tls_get_addr (void)
8377 if (!tls_get_addr_libfunc
)
8378 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
8379 return tls_get_addr_libfunc
;
8383 arm_load_tp (rtx target
)
8386 target
= gen_reg_rtx (SImode
);
8390 /* Can return in any reg. */
8391 emit_insn (gen_load_tp_hard (target
));
8395 /* Always returned in r0. Immediately copy the result into a pseudo,
8396 otherwise other uses of r0 (e.g. setting up function arguments) may
8397 clobber the value. */
8401 emit_insn (gen_load_tp_soft ());
8403 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
8404 emit_move_insn (target
, tmp
);
8410 load_tls_operand (rtx x
, rtx reg
)
8414 if (reg
== NULL_RTX
)
8415 reg
= gen_reg_rtx (SImode
);
8417 tmp
= gen_rtx_CONST (SImode
, x
);
8419 emit_move_insn (reg
, tmp
);
8425 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
8427 rtx label
, labelno
, sum
;
8429 gcc_assert (reloc
!= TLS_DESCSEQ
);
8432 labelno
= GEN_INT (pic_labelno
++);
8433 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8434 label
= gen_rtx_CONST (VOIDmode
, label
);
8436 sum
= gen_rtx_UNSPEC (Pmode
,
8437 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
8438 GEN_INT (TARGET_ARM
? 8 : 4)),
8440 reg
= load_tls_operand (sum
, reg
);
8443 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
8445 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8447 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
8448 LCT_PURE
, /* LCT_CONST? */
8451 rtx_insn
*insns
= get_insns ();
8458 arm_tls_descseq_addr (rtx x
, rtx reg
)
8460 rtx labelno
= GEN_INT (pic_labelno
++);
8461 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8462 rtx sum
= gen_rtx_UNSPEC (Pmode
,
8463 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
8464 gen_rtx_CONST (VOIDmode
, label
),
8465 GEN_INT (!TARGET_ARM
)),
8467 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
8469 emit_insn (gen_tlscall (x
, labelno
));
8471 reg
= gen_reg_rtx (SImode
);
8473 gcc_assert (REGNO (reg
) != R0_REGNUM
);
8475 emit_move_insn (reg
, reg0
);
8481 legitimize_tls_address (rtx x
, rtx reg
)
8483 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
8485 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
8489 case TLS_MODEL_GLOBAL_DYNAMIC
:
8490 if (TARGET_GNU2_TLS
)
8492 reg
= arm_tls_descseq_addr (x
, reg
);
8494 tp
= arm_load_tp (NULL_RTX
);
8496 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8500 /* Original scheme */
8501 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
8502 dest
= gen_reg_rtx (Pmode
);
8503 emit_libcall_block (insns
, dest
, ret
, x
);
8507 case TLS_MODEL_LOCAL_DYNAMIC
:
8508 if (TARGET_GNU2_TLS
)
8510 reg
= arm_tls_descseq_addr (x
, reg
);
8512 tp
= arm_load_tp (NULL_RTX
);
8514 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8518 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
8520 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8521 share the LDM result with other LD model accesses. */
8522 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
8524 dest
= gen_reg_rtx (Pmode
);
8525 emit_libcall_block (insns
, dest
, ret
, eqv
);
8527 /* Load the addend. */
8528 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
8529 GEN_INT (TLS_LDO32
)),
8531 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
8532 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
8536 case TLS_MODEL_INITIAL_EXEC
:
8537 labelno
= GEN_INT (pic_labelno
++);
8538 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8539 label
= gen_rtx_CONST (VOIDmode
, label
);
8540 sum
= gen_rtx_UNSPEC (Pmode
,
8541 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
8542 GEN_INT (TARGET_ARM
? 8 : 4)),
8544 reg
= load_tls_operand (sum
, reg
);
8547 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
8548 else if (TARGET_THUMB2
)
8549 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
8552 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8553 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
8556 tp
= arm_load_tp (NULL_RTX
);
8558 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8560 case TLS_MODEL_LOCAL_EXEC
:
8561 tp
= arm_load_tp (NULL_RTX
);
8563 reg
= gen_rtx_UNSPEC (Pmode
,
8564 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
8566 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
8568 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8575 /* Try machine-dependent ways of modifying an illegitimate address
8576 to be legitimate. If we find one, return the new, valid address. */
8578 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8580 if (arm_tls_referenced_p (x
))
8584 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
8586 addend
= XEXP (XEXP (x
, 0), 1);
8587 x
= XEXP (XEXP (x
, 0), 0);
8590 if (GET_CODE (x
) != SYMBOL_REF
)
8593 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
8595 x
= legitimize_tls_address (x
, NULL_RTX
);
8599 x
= gen_rtx_PLUS (SImode
, x
, addend
);
8608 /* TODO: legitimize_address for Thumb2. */
8611 return thumb_legitimize_address (x
, orig_x
, mode
);
8614 if (GET_CODE (x
) == PLUS
)
8616 rtx xop0
= XEXP (x
, 0);
8617 rtx xop1
= XEXP (x
, 1);
8619 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
8620 xop0
= force_reg (SImode
, xop0
);
8622 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
8623 && !symbol_mentioned_p (xop1
))
8624 xop1
= force_reg (SImode
, xop1
);
8626 if (ARM_BASE_REGISTER_RTX_P (xop0
)
8627 && CONST_INT_P (xop1
))
8629 HOST_WIDE_INT n
, low_n
;
8633 /* VFP addressing modes actually allow greater offsets, but for
8634 now we just stick with the lowest common denominator. */
8635 if (mode
== DImode
|| mode
== DFmode
)
8647 low_n
= ((mode
) == TImode
? 0
8648 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
8652 base_reg
= gen_reg_rtx (SImode
);
8653 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
8654 emit_move_insn (base_reg
, val
);
8655 x
= plus_constant (Pmode
, base_reg
, low_n
);
8657 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8658 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8661 /* XXX We don't allow MINUS any more -- see comment in
8662 arm_legitimate_address_outer_p (). */
8663 else if (GET_CODE (x
) == MINUS
)
8665 rtx xop0
= XEXP (x
, 0);
8666 rtx xop1
= XEXP (x
, 1);
8668 if (CONSTANT_P (xop0
))
8669 xop0
= force_reg (SImode
, xop0
);
8671 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
8672 xop1
= force_reg (SImode
, xop1
);
8674 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8675 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
8678 /* Make sure to take full advantage of the pre-indexed addressing mode
8679 with absolute addresses which often allows for the base register to
8680 be factorized for multiple adjacent memory references, and it might
8681 even allows for the mini pool to be avoided entirely. */
8682 else if (CONST_INT_P (x
) && optimize
> 0)
8685 HOST_WIDE_INT mask
, base
, index
;
8688 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8689 use a 8-bit index. So let's use a 12-bit index for SImode only and
8690 hope that arm_gen_constant will enable ldrb to use more bits. */
8691 bits
= (mode
== SImode
) ? 12 : 8;
8692 mask
= (1 << bits
) - 1;
8693 base
= INTVAL (x
) & ~mask
;
8694 index
= INTVAL (x
) & mask
;
8695 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
8697 /* It'll most probably be more efficient to generate the base
8698 with more bits set and use a negative index instead. */
8702 base_reg
= force_reg (SImode
, GEN_INT (base
));
8703 x
= plus_constant (Pmode
, base_reg
, index
);
8708 /* We need to find and carefully transform any SYMBOL and LABEL
8709 references; so go back to the original address expression. */
8710 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8712 if (new_x
!= orig_x
)
8720 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8721 to be legitimate. If we find one, return the new, valid address. */
8723 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8725 if (GET_CODE (x
) == PLUS
8726 && CONST_INT_P (XEXP (x
, 1))
8727 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
8728 || INTVAL (XEXP (x
, 1)) < 0))
8730 rtx xop0
= XEXP (x
, 0);
8731 rtx xop1
= XEXP (x
, 1);
8732 HOST_WIDE_INT offset
= INTVAL (xop1
);
8734 /* Try and fold the offset into a biasing of the base register and
8735 then offsetting that. Don't do this when optimizing for space
8736 since it can cause too many CSEs. */
8737 if (optimize_size
&& offset
>= 0
8738 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
8740 HOST_WIDE_INT delta
;
8743 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
8744 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
8745 delta
= 31 * GET_MODE_SIZE (mode
);
8747 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
8749 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
8751 x
= plus_constant (Pmode
, xop0
, delta
);
8753 else if (offset
< 0 && offset
> -256)
8754 /* Small negative offsets are best done with a subtract before the
8755 dereference, forcing these into a register normally takes two
8757 x
= force_operand (x
, NULL_RTX
);
8760 /* For the remaining cases, force the constant into a register. */
8761 xop1
= force_reg (SImode
, xop1
);
8762 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8765 else if (GET_CODE (x
) == PLUS
8766 && s_register_operand (XEXP (x
, 1), SImode
)
8767 && !s_register_operand (XEXP (x
, 0), SImode
))
8769 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
8771 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
8776 /* We need to find and carefully transform any SYMBOL and LABEL
8777 references; so go back to the original address expression. */
8778 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8780 if (new_x
!= orig_x
)
8787 /* Return TRUE if X contains any TLS symbol references. */
8790 arm_tls_referenced_p (rtx x
)
8792 if (! TARGET_HAVE_TLS
)
8795 subrtx_iterator::array_type array
;
8796 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8798 const_rtx x
= *iter
;
8799 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8801 /* ARM currently does not provide relocations to encode TLS variables
8802 into AArch32 instructions, only data, so there is no way to
8803 currently implement these if a literal pool is disabled. */
8804 if (arm_disable_literal_pool
)
8805 sorry ("accessing thread-local storage is not currently supported "
8806 "with -mpure-code or -mslow-flash-data");
8811 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8812 TLS offsets, not real symbol references. */
8813 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8814 iter
.skip_subrtxes ();
8819 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8821 On the ARM, allow any integer (invalid ones are removed later by insn
8822 patterns), nice doubles and symbol_refs which refer to the function's
8825 When generating pic allow anything. */
8828 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8830 return flag_pic
|| !label_mentioned_p (x
);
8834 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8836 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8837 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8838 for ARMv8-M Baseline or later the result is valid. */
8839 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
8842 return (CONST_INT_P (x
)
8843 || CONST_DOUBLE_P (x
)
8844 || CONSTANT_ADDRESS_P (x
)
8845 || (TARGET_HAVE_MOVT
&& GET_CODE (x
) == SYMBOL_REF
)
8850 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8852 return (!arm_cannot_force_const_mem (mode
, x
)
8854 ? arm_legitimate_constant_p_1 (mode
, x
)
8855 : thumb_legitimate_constant_p (mode
, x
)));
8858 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8861 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8865 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8867 split_const (x
, &base
, &offset
);
8868 if (GET_CODE (base
) == SYMBOL_REF
8869 && !offset_within_block_p (base
, INTVAL (offset
)))
8872 return arm_tls_referenced_p (x
);
8875 #define REG_OR_SUBREG_REG(X) \
8877 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8879 #define REG_OR_SUBREG_RTX(X) \
8880 (REG_P (X) ? (X) : SUBREG_REG (X))
8883 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8885 machine_mode mode
= GET_MODE (x
);
8894 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8901 return COSTS_N_INSNS (1);
8904 if (arm_arch6m
&& arm_m_profile_small_mul
)
8905 return COSTS_N_INSNS (32);
8907 if (CONST_INT_P (XEXP (x
, 1)))
8910 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8917 return COSTS_N_INSNS (2) + cycles
;
8919 return COSTS_N_INSNS (1) + 16;
8922 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8924 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8925 return (COSTS_N_INSNS (words
)
8926 + 4 * ((MEM_P (SET_SRC (x
)))
8927 + MEM_P (SET_DEST (x
))));
8932 if (UINTVAL (x
) < 256
8933 /* 16-bit constant. */
8934 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
8936 if (thumb_shiftable_const (INTVAL (x
)))
8937 return COSTS_N_INSNS (2);
8938 return COSTS_N_INSNS (3);
8940 else if ((outer
== PLUS
|| outer
== COMPARE
)
8941 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8943 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8944 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8945 return COSTS_N_INSNS (1);
8946 else if (outer
== AND
)
8949 /* This duplicates the tests in the andsi3 expander. */
8950 for (i
= 9; i
<= 31; i
++)
8951 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8952 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8953 return COSTS_N_INSNS (2);
8955 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8956 || outer
== LSHIFTRT
)
8958 return COSTS_N_INSNS (2);
8964 return COSTS_N_INSNS (3);
8982 /* XXX another guess. */
8983 /* Memory costs quite a lot for the first word, but subsequent words
8984 load at the equivalent of a single insn each. */
8985 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8986 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8991 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8997 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8998 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
9004 return total
+ COSTS_N_INSNS (1);
9006 /* Assume a two-shift sequence. Increase the cost slightly so
9007 we prefer actual shifts over an extend operation. */
9008 return total
+ 1 + COSTS_N_INSNS (2);
9015 /* Estimates the size cost of thumb1 instructions.
9016 For now most of the code is copied from thumb1_rtx_costs. We need more
9017 fine grain tuning when we have more related test cases. */
9019 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9021 machine_mode mode
= GET_MODE (x
);
9030 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9034 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9035 defined by RTL expansion, especially for the expansion of
9037 if ((GET_CODE (XEXP (x
, 0)) == MULT
9038 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
9039 || (GET_CODE (XEXP (x
, 1)) == MULT
9040 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
9041 return COSTS_N_INSNS (2);
9046 return COSTS_N_INSNS (1);
9049 if (CONST_INT_P (XEXP (x
, 1)))
9051 /* Thumb1 mul instruction can't operate on const. We must Load it
9052 into a register first. */
9053 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
9054 /* For the targets which have a very small and high-latency multiply
9055 unit, we prefer to synthesize the mult with up to 5 instructions,
9056 giving a good balance between size and performance. */
9057 if (arm_arch6m
&& arm_m_profile_small_mul
)
9058 return COSTS_N_INSNS (5);
9060 return COSTS_N_INSNS (1) + const_size
;
9062 return COSTS_N_INSNS (1);
9065 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9067 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9068 cost
= COSTS_N_INSNS (words
);
9069 if (satisfies_constraint_J (SET_SRC (x
))
9070 || satisfies_constraint_K (SET_SRC (x
))
9071 /* Too big an immediate for a 2-byte mov, using MOVT. */
9072 || (CONST_INT_P (SET_SRC (x
))
9073 && UINTVAL (SET_SRC (x
)) >= 256
9075 && satisfies_constraint_j (SET_SRC (x
)))
9076 /* thumb1_movdi_insn. */
9077 || ((words
> 1) && MEM_P (SET_SRC (x
))))
9078 cost
+= COSTS_N_INSNS (1);
9084 if (UINTVAL (x
) < 256)
9085 return COSTS_N_INSNS (1);
9086 /* movw is 4byte long. */
9087 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
9088 return COSTS_N_INSNS (2);
9089 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9090 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
9091 return COSTS_N_INSNS (2);
9092 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9093 if (thumb_shiftable_const (INTVAL (x
)))
9094 return COSTS_N_INSNS (2);
9095 return COSTS_N_INSNS (3);
9097 else if ((outer
== PLUS
|| outer
== COMPARE
)
9098 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9100 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9101 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9102 return COSTS_N_INSNS (1);
9103 else if (outer
== AND
)
9106 /* This duplicates the tests in the andsi3 expander. */
9107 for (i
= 9; i
<= 31; i
++)
9108 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9109 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9110 return COSTS_N_INSNS (2);
9112 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9113 || outer
== LSHIFTRT
)
9115 return COSTS_N_INSNS (2);
9121 return COSTS_N_INSNS (3);
9135 return COSTS_N_INSNS (1);
9138 return (COSTS_N_INSNS (1)
9140 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9141 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9142 ? COSTS_N_INSNS (1) : 0));
9146 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9151 /* XXX still guessing. */
9152 switch (GET_MODE (XEXP (x
, 0)))
9155 return (1 + (mode
== DImode
? 4 : 0)
9156 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9159 return (4 + (mode
== DImode
? 4 : 0)
9160 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9163 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9174 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9175 operand, then return the operand that is being shifted. If the shift
9176 is not by a constant, then set SHIFT_REG to point to the operand.
9177 Return NULL if OP is not a shifter operand. */
9179 shifter_op_p (rtx op
, rtx
*shift_reg
)
9181 enum rtx_code code
= GET_CODE (op
);
9183 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9184 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9185 return XEXP (op
, 0);
9186 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9187 return XEXP (op
, 0);
9188 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9189 || code
== ASHIFTRT
)
9191 if (!CONST_INT_P (XEXP (op
, 1)))
9192 *shift_reg
= XEXP (op
, 1);
9193 return XEXP (op
, 0);
9200 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9202 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9203 rtx_code code
= GET_CODE (x
);
9204 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9206 switch (XINT (x
, 1))
9208 case UNSPEC_UNALIGNED_LOAD
:
9209 /* We can only do unaligned loads into the integer unit, and we can't
9211 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9213 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9214 + extra_cost
->ldst
.load_unaligned
);
9217 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9218 ADDR_SPACE_GENERIC
, speed_p
);
9222 case UNSPEC_UNALIGNED_STORE
:
9223 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9225 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9226 + extra_cost
->ldst
.store_unaligned
);
9228 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
9230 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9231 ADDR_SPACE_GENERIC
, speed_p
);
9242 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9246 *cost
= COSTS_N_INSNS (2);
9252 /* Cost of a libcall. We assume one insn per argument, an amount for the
9253 call (one insn for -Os) and then one for processing the result. */
9254 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9256 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9259 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9260 if (shift_op != NULL \
9261 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9266 *cost += extra_cost->alu.arith_shift_reg; \
9267 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9268 ASHIFT, 1, speed_p); \
9271 *cost += extra_cost->alu.arith_shift; \
9273 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9274 ASHIFT, 0, speed_p) \
9275 + rtx_cost (XEXP (x, 1 - IDX), \
9276 GET_MODE (shift_op), \
9283 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
9284 considering the costs of the addressing mode and memory access
9287 arm_mem_costs (rtx x
, const struct cpu_cost_table
*extra_cost
,
9288 int *cost
, bool speed_p
)
9290 machine_mode mode
= GET_MODE (x
);
9292 *cost
= COSTS_N_INSNS (1);
9295 && GET_CODE (XEXP (x
, 0)) == PLUS
9296 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9297 /* This will be split into two instructions. Add the cost of the
9298 additional instruction here. The cost of the memory access is computed
9299 below. See arm.md:calculate_pic_address. */
9300 *cost
+= COSTS_N_INSNS (1);
9302 /* Calculate cost of the addressing mode. */
9305 arm_addr_mode_op op_type
;
9306 switch (GET_CODE (XEXP (x
, 0)))
9310 op_type
= AMO_DEFAULT
;
9313 /* MINUS does not appear in RTL, but the architecture supports it,
9314 so handle this case defensively. */
9317 op_type
= AMO_NO_WB
;
9329 if (VECTOR_MODE_P (mode
))
9330 *cost
+= current_tune
->addr_mode_costs
->vector
[op_type
];
9331 else if (FLOAT_MODE_P (mode
))
9332 *cost
+= current_tune
->addr_mode_costs
->fp
[op_type
];
9334 *cost
+= current_tune
->addr_mode_costs
->integer
[op_type
];
9337 /* Calculate cost of memory access. */
9340 if (FLOAT_MODE_P (mode
))
9342 if (GET_MODE_SIZE (mode
) == 8)
9343 *cost
+= extra_cost
->ldst
.loadd
;
9345 *cost
+= extra_cost
->ldst
.loadf
;
9347 else if (VECTOR_MODE_P (mode
))
9348 *cost
+= extra_cost
->ldst
.loadv
;
9352 if (GET_MODE_SIZE (mode
) == 8)
9353 *cost
+= extra_cost
->ldst
.ldrd
;
9355 *cost
+= extra_cost
->ldst
.load
;
9362 /* RTX costs. Make an estimate of the cost of executing the operation
9363 X, which is contained within an operation with code OUTER_CODE.
9364 SPEED_P indicates whether the cost desired is the performance cost,
9365 or the size cost. The estimate is stored in COST and the return
9366 value is TRUE if the cost calculation is final, or FALSE if the
9367 caller should recurse through the operands of X to add additional
9370 We currently make no attempt to model the size savings of Thumb-2
9371 16-bit instructions. At the normal points in compilation where
9372 this code is called we have no measure of whether the condition
9373 flags are live or not, and thus no realistic way to determine what
9374 the size will eventually be. */
9376 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9377 const struct cpu_cost_table
*extra_cost
,
9378 int *cost
, bool speed_p
)
9380 machine_mode mode
= GET_MODE (x
);
9382 *cost
= COSTS_N_INSNS (1);
9387 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9389 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9397 /* SET RTXs don't have a mode so we get it from the destination. */
9398 mode
= GET_MODE (SET_DEST (x
));
9400 if (REG_P (SET_SRC (x
))
9401 && REG_P (SET_DEST (x
)))
9403 /* Assume that most copies can be done with a single insn,
9404 unless we don't have HW FP, in which case everything
9405 larger than word mode will require two insns. */
9406 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9407 && GET_MODE_SIZE (mode
) > 4)
9410 /* Conditional register moves can be encoded
9411 in 16 bits in Thumb mode. */
9412 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9418 if (CONST_INT_P (SET_SRC (x
)))
9420 /* Handle CONST_INT here, since the value doesn't have a mode
9421 and we would otherwise be unable to work out the true cost. */
9422 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
9425 /* Slightly lower the cost of setting a core reg to a constant.
9426 This helps break up chains and allows for better scheduling. */
9427 if (REG_P (SET_DEST (x
))
9428 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9431 /* Immediate moves with an immediate in the range [0, 255] can be
9432 encoded in 16 bits in Thumb mode. */
9433 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9434 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9436 goto const_int_cost
;
9442 return arm_mem_costs (x
, extra_cost
, cost
, speed_p
);
9446 /* Calculations of LDM costs are complex. We assume an initial cost
9447 (ldm_1st) which will load the number of registers mentioned in
9448 ldm_regs_per_insn_1st registers; then each additional
9449 ldm_regs_per_insn_subsequent registers cost one more insn. The
9450 formula for N regs is thus:
9452 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9453 + ldm_regs_per_insn_subsequent - 1)
9454 / ldm_regs_per_insn_subsequent).
9456 Additional costs may also be added for addressing. A similar
9457 formula is used for STM. */
9459 bool is_ldm
= load_multiple_operation (x
, SImode
);
9460 bool is_stm
= store_multiple_operation (x
, SImode
);
9462 if (is_ldm
|| is_stm
)
9466 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9467 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9468 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9469 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9470 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9471 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9472 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9474 *cost
+= regs_per_insn_1st
9475 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9476 + regs_per_insn_sub
- 1)
9477 / regs_per_insn_sub
);
9486 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9487 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9488 *cost
+= COSTS_N_INSNS (speed_p
9489 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
9490 else if (mode
== SImode
&& TARGET_IDIV
)
9491 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
9493 *cost
= LIBCALL_COST (2);
9495 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9496 possible udiv is prefered. */
9497 *cost
+= (code
== DIV
? COSTS_N_INSNS (1) : 0);
9498 return false; /* All arguments must be in registers. */
9501 /* MOD by a power of 2 can be expanded as:
9503 and r0, r0, #(n - 1)
9504 and r1, r1, #(n - 1)
9505 rsbpl r0, r1, #0. */
9506 if (CONST_INT_P (XEXP (x
, 1))
9507 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
9510 *cost
+= COSTS_N_INSNS (3);
9513 *cost
+= 2 * extra_cost
->alu
.logical
9514 + extra_cost
->alu
.arith
;
9520 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9521 possible udiv is prefered. */
9522 *cost
= LIBCALL_COST (2) + (code
== MOD
? COSTS_N_INSNS (1) : 0);
9523 return false; /* All arguments must be in registers. */
9526 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9528 *cost
+= (COSTS_N_INSNS (1)
9529 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9531 *cost
+= extra_cost
->alu
.shift_reg
;
9539 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9541 *cost
+= (COSTS_N_INSNS (2)
9542 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9544 *cost
+= 2 * extra_cost
->alu
.shift
;
9545 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
9546 if (code
== ASHIFT
&& XEXP (x
, 1) == CONST1_RTX (SImode
))
9550 else if (mode
== SImode
)
9552 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9553 /* Slightly disparage register shifts at -Os, but not by much. */
9554 if (!CONST_INT_P (XEXP (x
, 1)))
9555 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9556 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9559 else if (GET_MODE_CLASS (mode
) == MODE_INT
9560 && GET_MODE_SIZE (mode
) < 4)
9564 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9565 /* Slightly disparage register shifts at -Os, but not by
9567 if (!CONST_INT_P (XEXP (x
, 1)))
9568 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9569 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9571 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9573 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9575 /* Can use SBFX/UBFX. */
9577 *cost
+= extra_cost
->alu
.bfx
;
9578 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9582 *cost
+= COSTS_N_INSNS (1);
9583 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9586 if (CONST_INT_P (XEXP (x
, 1)))
9587 *cost
+= 2 * extra_cost
->alu
.shift
;
9589 *cost
+= (extra_cost
->alu
.shift
9590 + extra_cost
->alu
.shift_reg
);
9593 /* Slightly disparage register shifts. */
9594 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9599 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
9600 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9603 if (CONST_INT_P (XEXP (x
, 1)))
9604 *cost
+= (2 * extra_cost
->alu
.shift
9605 + extra_cost
->alu
.log_shift
);
9607 *cost
+= (extra_cost
->alu
.shift
9608 + extra_cost
->alu
.shift_reg
9609 + extra_cost
->alu
.log_shift_reg
);
9615 *cost
= LIBCALL_COST (2);
9624 *cost
+= extra_cost
->alu
.rev
;
9631 /* No rev instruction available. Look at arm_legacy_rev
9632 and thumb_legacy_rev for the form of RTL used then. */
9635 *cost
+= COSTS_N_INSNS (9);
9639 *cost
+= 6 * extra_cost
->alu
.shift
;
9640 *cost
+= 3 * extra_cost
->alu
.logical
;
9645 *cost
+= COSTS_N_INSNS (4);
9649 *cost
+= 2 * extra_cost
->alu
.shift
;
9650 *cost
+= extra_cost
->alu
.arith_shift
;
9651 *cost
+= 2 * extra_cost
->alu
.logical
;
9659 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9660 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9662 if (GET_CODE (XEXP (x
, 0)) == MULT
9663 || GET_CODE (XEXP (x
, 1)) == MULT
)
9665 rtx mul_op0
, mul_op1
, sub_op
;
9668 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9670 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9672 mul_op0
= XEXP (XEXP (x
, 0), 0);
9673 mul_op1
= XEXP (XEXP (x
, 0), 1);
9674 sub_op
= XEXP (x
, 1);
9678 mul_op0
= XEXP (XEXP (x
, 1), 0);
9679 mul_op1
= XEXP (XEXP (x
, 1), 1);
9680 sub_op
= XEXP (x
, 0);
9683 /* The first operand of the multiply may be optionally
9685 if (GET_CODE (mul_op0
) == NEG
)
9686 mul_op0
= XEXP (mul_op0
, 0);
9688 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9689 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9690 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
9696 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9702 rtx shift_by_reg
= NULL
;
9706 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9707 if (shift_op
== NULL
)
9709 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9710 non_shift_op
= XEXP (x
, 0);
9713 non_shift_op
= XEXP (x
, 1);
9715 if (shift_op
!= NULL
)
9717 if (shift_by_reg
!= NULL
)
9720 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9721 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
9724 *cost
+= extra_cost
->alu
.arith_shift
;
9726 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
9727 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
9732 && GET_CODE (XEXP (x
, 1)) == MULT
)
9736 *cost
+= extra_cost
->mult
[0].add
;
9737 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
9738 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
9739 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
9743 if (CONST_INT_P (XEXP (x
, 0)))
9745 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9746 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9748 *cost
= COSTS_N_INSNS (insns
);
9750 *cost
+= insns
* extra_cost
->alu
.arith
;
9751 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9755 *cost
+= extra_cost
->alu
.arith
;
9760 if (GET_MODE_CLASS (mode
) == MODE_INT
9761 && GET_MODE_SIZE (mode
) < 4)
9763 rtx shift_op
, shift_reg
;
9766 /* We check both sides of the MINUS for shifter operands since,
9767 unlike PLUS, it's not commutative. */
9769 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0);
9770 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1);
9772 /* Slightly disparage, as we might need to widen the result. */
9775 *cost
+= extra_cost
->alu
.arith
;
9777 if (CONST_INT_P (XEXP (x
, 0)))
9779 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9788 *cost
+= COSTS_N_INSNS (1);
9790 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9792 rtx op1
= XEXP (x
, 1);
9795 *cost
+= 2 * extra_cost
->alu
.arith
;
9797 if (GET_CODE (op1
) == ZERO_EXTEND
)
9798 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
9801 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
9802 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9806 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9809 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9810 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
9812 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
9815 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9816 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9819 *cost
+= (extra_cost
->alu
.arith
9820 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9821 ? extra_cost
->alu
.arith
9822 : extra_cost
->alu
.arith_shift
));
9823 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
9824 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
9825 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9830 *cost
+= 2 * extra_cost
->alu
.arith
;
9836 *cost
= LIBCALL_COST (2);
9840 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9841 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9843 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9845 rtx mul_op0
, mul_op1
, add_op
;
9848 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9850 mul_op0
= XEXP (XEXP (x
, 0), 0);
9851 mul_op1
= XEXP (XEXP (x
, 0), 1);
9852 add_op
= XEXP (x
, 1);
9854 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9855 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9856 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
9862 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9865 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9867 *cost
= LIBCALL_COST (2);
9871 /* Narrow modes can be synthesized in SImode, but the range
9872 of useful sub-operations is limited. Check for shift operations
9873 on one of the operands. Only left shifts can be used in the
9875 if (GET_MODE_CLASS (mode
) == MODE_INT
9876 && GET_MODE_SIZE (mode
) < 4)
9878 rtx shift_op
, shift_reg
;
9881 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0);
9883 if (CONST_INT_P (XEXP (x
, 1)))
9885 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9886 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9888 *cost
= COSTS_N_INSNS (insns
);
9890 *cost
+= insns
* extra_cost
->alu
.arith
;
9891 /* Slightly penalize a narrow operation as the result may
9893 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9897 /* Slightly penalize a narrow operation as the result may
9901 *cost
+= extra_cost
->alu
.arith
;
9908 rtx shift_op
, shift_reg
;
9911 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9912 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9914 /* UXTA[BH] or SXTA[BH]. */
9916 *cost
+= extra_cost
->alu
.extend_arith
;
9917 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9919 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
9924 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9925 if (shift_op
!= NULL
)
9930 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9931 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9934 *cost
+= extra_cost
->alu
.arith_shift
;
9936 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9937 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9940 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9942 rtx mul_op
= XEXP (x
, 0);
9944 if (TARGET_DSP_MULTIPLY
9945 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9946 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9947 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9948 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9949 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9950 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9951 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9952 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9953 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9954 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9955 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9956 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9961 *cost
+= extra_cost
->mult
[0].extend_add
;
9962 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
9963 SIGN_EXTEND
, 0, speed_p
)
9964 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
9965 SIGN_EXTEND
, 0, speed_p
)
9966 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9971 *cost
+= extra_cost
->mult
[0].add
;
9972 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
9973 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
9974 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9977 if (CONST_INT_P (XEXP (x
, 1)))
9979 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9980 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9982 *cost
= COSTS_N_INSNS (insns
);
9984 *cost
+= insns
* extra_cost
->alu
.arith
;
9985 *cost
+= rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9989 *cost
+= extra_cost
->alu
.arith
;
9996 if (GET_CODE (XEXP (x
, 0)) == MULT
9997 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9998 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9999 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
10000 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
10003 *cost
+= extra_cost
->mult
[1].extend_add
;
10004 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10005 ZERO_EXTEND
, 0, speed_p
)
10006 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
10007 ZERO_EXTEND
, 0, speed_p
)
10008 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10012 *cost
+= COSTS_N_INSNS (1);
10014 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10015 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10018 *cost
+= (extra_cost
->alu
.arith
10019 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10020 ? extra_cost
->alu
.arith
10021 : extra_cost
->alu
.arith_shift
));
10023 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10025 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10030 *cost
+= 2 * extra_cost
->alu
.arith
;
10035 *cost
= LIBCALL_COST (2);
10038 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
10041 *cost
+= extra_cost
->alu
.rev
;
10045 /* Fall through. */
10046 case AND
: case XOR
:
10047 if (mode
== SImode
)
10049 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
10050 rtx op0
= XEXP (x
, 0);
10051 rtx shift_op
, shift_reg
;
10055 || (code
== IOR
&& TARGET_THUMB2
)))
10056 op0
= XEXP (op0
, 0);
10059 shift_op
= shifter_op_p (op0
, &shift_reg
);
10060 if (shift_op
!= NULL
)
10065 *cost
+= extra_cost
->alu
.log_shift_reg
;
10066 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10069 *cost
+= extra_cost
->alu
.log_shift
;
10071 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10072 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10076 if (CONST_INT_P (XEXP (x
, 1)))
10078 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
10079 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10082 *cost
= COSTS_N_INSNS (insns
);
10084 *cost
+= insns
* extra_cost
->alu
.logical
;
10085 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
10090 *cost
+= extra_cost
->alu
.logical
;
10091 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
10092 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10096 if (mode
== DImode
)
10098 rtx op0
= XEXP (x
, 0);
10099 enum rtx_code subcode
= GET_CODE (op0
);
10101 *cost
+= COSTS_N_INSNS (1);
10105 || (code
== IOR
&& TARGET_THUMB2
)))
10106 op0
= XEXP (op0
, 0);
10108 if (GET_CODE (op0
) == ZERO_EXTEND
)
10111 *cost
+= 2 * extra_cost
->alu
.logical
;
10113 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
10115 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10118 else if (GET_CODE (op0
) == SIGN_EXTEND
)
10121 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
10123 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
10125 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10130 *cost
+= 2 * extra_cost
->alu
.logical
;
10136 *cost
= LIBCALL_COST (2);
10140 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10141 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10143 rtx op0
= XEXP (x
, 0);
10145 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
10146 op0
= XEXP (op0
, 0);
10149 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10151 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
10152 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
10155 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10157 *cost
= LIBCALL_COST (2);
10161 if (mode
== SImode
)
10163 if (TARGET_DSP_MULTIPLY
10164 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10165 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10166 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10167 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10168 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10169 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10170 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10171 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10172 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10173 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10174 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10175 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10178 /* SMUL[TB][TB]. */
10180 *cost
+= extra_cost
->mult
[0].extend
;
10181 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
10182 SIGN_EXTEND
, 0, speed_p
);
10183 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
10184 SIGN_EXTEND
, 1, speed_p
);
10188 *cost
+= extra_cost
->mult
[0].simple
;
10192 if (mode
== DImode
)
10194 if ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10195 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10196 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10197 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
))
10200 *cost
+= extra_cost
->mult
[1].extend
;
10201 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
10202 ZERO_EXTEND
, 0, speed_p
)
10203 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10204 ZERO_EXTEND
, 0, speed_p
));
10208 *cost
= LIBCALL_COST (2);
10213 *cost
= LIBCALL_COST (2);
10217 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10218 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10220 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10223 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
10228 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10232 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10234 *cost
= LIBCALL_COST (1);
10238 if (mode
== SImode
)
10240 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10242 *cost
+= COSTS_N_INSNS (1);
10243 /* Assume the non-flag-changing variant. */
10245 *cost
+= (extra_cost
->alu
.log_shift
10246 + extra_cost
->alu
.arith_shift
);
10247 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
10251 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10252 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10254 *cost
+= COSTS_N_INSNS (1);
10255 /* No extra cost for MOV imm and MVN imm. */
10256 /* If the comparison op is using the flags, there's no further
10257 cost, otherwise we need to add the cost of the comparison. */
10258 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10259 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10260 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10262 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
10263 *cost
+= (COSTS_N_INSNS (1)
10264 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
10266 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
10269 *cost
+= extra_cost
->alu
.arith
;
10275 *cost
+= extra_cost
->alu
.arith
;
10279 if (GET_MODE_CLASS (mode
) == MODE_INT
10280 && GET_MODE_SIZE (mode
) < 4)
10282 /* Slightly disparage, as we might need an extend operation. */
10285 *cost
+= extra_cost
->alu
.arith
;
10289 if (mode
== DImode
)
10291 *cost
+= COSTS_N_INSNS (1);
10293 *cost
+= 2 * extra_cost
->alu
.arith
;
10298 *cost
= LIBCALL_COST (1);
10302 if (mode
== SImode
)
10305 rtx shift_reg
= NULL
;
10307 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10311 if (shift_reg
!= NULL
)
10314 *cost
+= extra_cost
->alu
.log_shift_reg
;
10315 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10318 *cost
+= extra_cost
->alu
.log_shift
;
10319 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
10324 *cost
+= extra_cost
->alu
.logical
;
10327 if (mode
== DImode
)
10329 *cost
+= COSTS_N_INSNS (1);
10335 *cost
+= LIBCALL_COST (1);
10340 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10342 *cost
+= COSTS_N_INSNS (3);
10345 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
10346 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
10348 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
10349 /* Assume that if one arm of the if_then_else is a register,
10350 that it will be tied with the result and eliminate the
10351 conditional insn. */
10352 if (REG_P (XEXP (x
, 1)))
10354 else if (REG_P (XEXP (x
, 2)))
10360 if (extra_cost
->alu
.non_exec_costs_exec
)
10361 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10363 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10366 *cost
+= op1cost
+ op2cost
;
10372 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10376 machine_mode op0mode
;
10377 /* We'll mostly assume that the cost of a compare is the cost of the
10378 LHS. However, there are some notable exceptions. */
10380 /* Floating point compares are never done as side-effects. */
10381 op0mode
= GET_MODE (XEXP (x
, 0));
10382 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10383 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10386 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10388 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10390 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
10396 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10398 *cost
= LIBCALL_COST (2);
10402 /* DImode compares normally take two insns. */
10403 if (op0mode
== DImode
)
10405 *cost
+= COSTS_N_INSNS (1);
10407 *cost
+= 2 * extra_cost
->alu
.arith
;
10411 if (op0mode
== SImode
)
10416 if (XEXP (x
, 1) == const0_rtx
10417 && !(REG_P (XEXP (x
, 0))
10418 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10419 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10421 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10423 /* Multiply operations that set the flags are often
10424 significantly more expensive. */
10426 && GET_CODE (XEXP (x
, 0)) == MULT
10427 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10428 *cost
+= extra_cost
->mult
[0].flag_setting
;
10431 && GET_CODE (XEXP (x
, 0)) == PLUS
10432 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10433 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10435 *cost
+= extra_cost
->mult
[0].flag_setting
;
10440 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10441 if (shift_op
!= NULL
)
10443 if (shift_reg
!= NULL
)
10445 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
10448 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10451 *cost
+= extra_cost
->alu
.arith_shift
;
10452 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
10453 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
10458 *cost
+= extra_cost
->alu
.arith
;
10459 if (CONST_INT_P (XEXP (x
, 1))
10460 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10462 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10470 *cost
= LIBCALL_COST (2);
10493 if (outer_code
== SET
)
10495 /* Is it a store-flag operation? */
10496 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10497 && XEXP (x
, 1) == const0_rtx
)
10499 /* Thumb also needs an IT insn. */
10500 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
10503 if (XEXP (x
, 1) == const0_rtx
)
10508 /* LSR Rd, Rn, #31. */
10510 *cost
+= extra_cost
->alu
.shift
;
10520 *cost
+= COSTS_N_INSNS (1);
10524 /* RSBS T1, Rn, Rn, LSR #31
10526 *cost
+= COSTS_N_INSNS (1);
10528 *cost
+= extra_cost
->alu
.arith_shift
;
10532 /* RSB Rd, Rn, Rn, ASR #1
10533 LSR Rd, Rd, #31. */
10534 *cost
+= COSTS_N_INSNS (1);
10536 *cost
+= (extra_cost
->alu
.arith_shift
10537 + extra_cost
->alu
.shift
);
10543 *cost
+= COSTS_N_INSNS (1);
10545 *cost
+= extra_cost
->alu
.shift
;
10549 /* Remaining cases are either meaningless or would take
10550 three insns anyway. */
10551 *cost
= COSTS_N_INSNS (3);
10554 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10559 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10560 if (CONST_INT_P (XEXP (x
, 1))
10561 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10563 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10570 /* Not directly inside a set. If it involves the condition code
10571 register it must be the condition for a branch, cond_exec or
10572 I_T_E operation. Since the comparison is performed elsewhere
10573 this is just the control part which has no additional
10575 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10576 && XEXP (x
, 1) == const0_rtx
)
10584 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10585 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10588 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10592 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10594 *cost
= LIBCALL_COST (1);
10598 if (mode
== SImode
)
10601 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10605 *cost
= LIBCALL_COST (1);
10609 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10610 && MEM_P (XEXP (x
, 0)))
10612 if (mode
== DImode
)
10613 *cost
+= COSTS_N_INSNS (1);
10618 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10619 *cost
+= extra_cost
->ldst
.load
;
10621 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10623 if (mode
== DImode
)
10624 *cost
+= extra_cost
->alu
.shift
;
10629 /* Widening from less than 32-bits requires an extend operation. */
10630 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10632 /* We have SXTB/SXTH. */
10633 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10635 *cost
+= extra_cost
->alu
.extend
;
10637 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10639 /* Needs two shifts. */
10640 *cost
+= COSTS_N_INSNS (1);
10641 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10643 *cost
+= 2 * extra_cost
->alu
.shift
;
10646 /* Widening beyond 32-bits requires one more insn. */
10647 if (mode
== DImode
)
10649 *cost
+= COSTS_N_INSNS (1);
10651 *cost
+= extra_cost
->alu
.shift
;
10658 || GET_MODE (XEXP (x
, 0)) == SImode
10659 || GET_MODE (XEXP (x
, 0)) == QImode
)
10660 && MEM_P (XEXP (x
, 0)))
10662 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10664 if (mode
== DImode
)
10665 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10670 /* Widening from less than 32-bits requires an extend operation. */
10671 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10673 /* UXTB can be a shorter instruction in Thumb2, but it might
10674 be slower than the AND Rd, Rn, #255 alternative. When
10675 optimizing for speed it should never be slower to use
10676 AND, and we don't really model 16-bit vs 32-bit insns
10679 *cost
+= extra_cost
->alu
.logical
;
10681 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10683 /* We have UXTB/UXTH. */
10684 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10686 *cost
+= extra_cost
->alu
.extend
;
10688 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10690 /* Needs two shifts. It's marginally preferable to use
10691 shifts rather than two BIC instructions as the second
10692 shift may merge with a subsequent insn as a shifter
10694 *cost
= COSTS_N_INSNS (2);
10695 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10697 *cost
+= 2 * extra_cost
->alu
.shift
;
10700 /* Widening beyond 32-bits requires one more insn. */
10701 if (mode
== DImode
)
10703 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10710 /* CONST_INT has no mode, so we cannot tell for sure how many
10711 insns are really going to be needed. The best we can do is
10712 look at the value passed. If it fits in SImode, then assume
10713 that's the mode it will be used for. Otherwise assume it
10714 will be used in DImode. */
10715 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10720 /* Avoid blowing up in arm_gen_constant (). */
10721 if (!(outer_code
== PLUS
10722 || outer_code
== AND
10723 || outer_code
== IOR
10724 || outer_code
== XOR
10725 || outer_code
== MINUS
))
10729 if (mode
== SImode
)
10731 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10732 INTVAL (x
), NULL
, NULL
,
10738 *cost
+= COSTS_N_INSNS (arm_gen_constant
10739 (outer_code
, SImode
, NULL
,
10740 trunc_int_for_mode (INTVAL (x
), SImode
),
10742 + arm_gen_constant (outer_code
, SImode
, NULL
,
10743 INTVAL (x
) >> 32, NULL
,
10755 if (arm_arch_thumb2
&& !flag_pic
)
10756 *cost
+= COSTS_N_INSNS (1);
10758 *cost
+= extra_cost
->ldst
.load
;
10761 *cost
+= COSTS_N_INSNS (1);
10765 *cost
+= COSTS_N_INSNS (1);
10767 *cost
+= extra_cost
->alu
.arith
;
10773 *cost
= COSTS_N_INSNS (4);
10778 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10779 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10781 if (vfp3_const_double_rtx (x
))
10784 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10790 if (mode
== DFmode
)
10791 *cost
+= extra_cost
->ldst
.loadd
;
10793 *cost
+= extra_cost
->ldst
.loadf
;
10796 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
10800 *cost
= COSTS_N_INSNS (4);
10806 && TARGET_HARD_FLOAT
10807 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10808 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10809 *cost
= COSTS_N_INSNS (1);
10811 *cost
= COSTS_N_INSNS (4);
10816 /* When optimizing for size, we prefer constant pool entries to
10817 MOVW/MOVT pairs, so bump the cost of these slightly. */
10824 *cost
+= extra_cost
->alu
.clz
;
10828 if (XEXP (x
, 1) == const0_rtx
)
10831 *cost
+= extra_cost
->alu
.log_shift
;
10832 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10835 /* Fall through. */
10839 *cost
+= COSTS_N_INSNS (1);
10843 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10844 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10845 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10846 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10847 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10848 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10849 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10850 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10854 *cost
+= extra_cost
->mult
[1].extend
;
10855 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
10856 ZERO_EXTEND
, 0, speed_p
)
10857 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
10858 ZERO_EXTEND
, 0, speed_p
));
10861 *cost
= LIBCALL_COST (1);
10864 case UNSPEC_VOLATILE
:
10866 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10869 /* Reading the PC is like reading any other register. Writing it
10870 is more expensive, but we take that into account elsewhere. */
10875 /* TODO: Simple zero_extract of bottom bits using AND. */
10876 /* Fall through. */
10880 && CONST_INT_P (XEXP (x
, 1))
10881 && CONST_INT_P (XEXP (x
, 2)))
10884 *cost
+= extra_cost
->alu
.bfx
;
10885 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10888 /* Without UBFX/SBFX, need to resort to shift operations. */
10889 *cost
+= COSTS_N_INSNS (1);
10891 *cost
+= 2 * extra_cost
->alu
.shift
;
10892 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
10896 if (TARGET_HARD_FLOAT
)
10899 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10901 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10903 /* Pre v8, widening HF->DF is a two-step process, first
10904 widening to SFmode. */
10905 *cost
+= COSTS_N_INSNS (1);
10907 *cost
+= extra_cost
->fp
[0].widen
;
10909 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10913 *cost
= LIBCALL_COST (1);
10916 case FLOAT_TRUNCATE
:
10917 if (TARGET_HARD_FLOAT
)
10920 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10921 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10923 /* Vector modes? */
10925 *cost
= LIBCALL_COST (1);
10929 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10931 rtx op0
= XEXP (x
, 0);
10932 rtx op1
= XEXP (x
, 1);
10933 rtx op2
= XEXP (x
, 2);
10936 /* vfms or vfnma. */
10937 if (GET_CODE (op0
) == NEG
)
10938 op0
= XEXP (op0
, 0);
10940 /* vfnms or vfnma. */
10941 if (GET_CODE (op2
) == NEG
)
10942 op2
= XEXP (op2
, 0);
10944 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
10945 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
10946 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
10949 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
10954 *cost
= LIBCALL_COST (3);
10959 if (TARGET_HARD_FLOAT
)
10961 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10962 a vcvt fixed-point conversion. */
10963 if (code
== FIX
&& mode
== SImode
10964 && GET_CODE (XEXP (x
, 0)) == FIX
10965 && GET_MODE (XEXP (x
, 0)) == SFmode
10966 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10967 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10971 *cost
+= extra_cost
->fp
[0].toint
;
10973 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10978 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10980 mode
= GET_MODE (XEXP (x
, 0));
10982 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
10983 /* Strip of the 'cost' of rounding towards zero. */
10984 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10985 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
10988 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10989 /* ??? Increase the cost to deal with transferring from
10990 FP -> CORE registers? */
10993 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10997 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
11000 /* Vector costs? */
11002 *cost
= LIBCALL_COST (1);
11006 case UNSIGNED_FLOAT
:
11007 if (TARGET_HARD_FLOAT
)
11009 /* ??? Increase the cost to deal with transferring from CORE
11010 -> FP registers? */
11012 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
11015 *cost
= LIBCALL_COST (1);
11023 /* Just a guess. Guess number of instructions in the asm
11024 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11025 though (see PR60663). */
11026 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
11027 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
11029 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
11033 if (mode
!= VOIDmode
)
11034 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
11036 *cost
= COSTS_N_INSNS (4); /* Who knows? */
11041 #undef HANDLE_NARROW_SHIFT_ARITH
11043 /* RTX costs entry point. */
11046 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
11047 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
11050 int code
= GET_CODE (x
);
11051 gcc_assert (current_tune
->insn_extra_cost
);
11053 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
11054 (enum rtx_code
) outer_code
,
11055 current_tune
->insn_extra_cost
,
11058 if (dump_file
&& arm_verbose_cost
)
11060 print_rtl_single (dump_file
, x
);
11061 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
11062 *total
, result
? "final" : "partial");
11067 /* All address computations that can be done are free, but rtx cost returns
11068 the same for practically all of them. So we weight the different types
11069 of address here in the order (most pref first):
11070 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11072 arm_arm_address_cost (rtx x
)
11074 enum rtx_code c
= GET_CODE (x
);
11076 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11078 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11083 if (CONST_INT_P (XEXP (x
, 1)))
11086 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11096 arm_thumb_address_cost (rtx x
)
11098 enum rtx_code c
= GET_CODE (x
);
11103 && REG_P (XEXP (x
, 0))
11104 && CONST_INT_P (XEXP (x
, 1)))
11111 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
11112 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11114 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11117 /* Adjust cost hook for XScale. */
11119 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11122 /* Some true dependencies can have a higher cost depending
11123 on precisely how certain input operands are used. */
11125 && recog_memoized (insn
) >= 0
11126 && recog_memoized (dep
) >= 0)
11128 int shift_opnum
= get_attr_shift (insn
);
11129 enum attr_type attr_type
= get_attr_type (dep
);
11131 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11132 operand for INSN. If we have a shifted input operand and the
11133 instruction we depend on is another ALU instruction, then we may
11134 have to account for an additional stall. */
11135 if (shift_opnum
!= 0
11136 && (attr_type
== TYPE_ALU_SHIFT_IMM
11137 || attr_type
== TYPE_ALUS_SHIFT_IMM
11138 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11139 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11140 || attr_type
== TYPE_ALU_SHIFT_REG
11141 || attr_type
== TYPE_ALUS_SHIFT_REG
11142 || attr_type
== TYPE_LOGIC_SHIFT_REG
11143 || attr_type
== TYPE_LOGICS_SHIFT_REG
11144 || attr_type
== TYPE_MOV_SHIFT
11145 || attr_type
== TYPE_MVN_SHIFT
11146 || attr_type
== TYPE_MOV_SHIFT_REG
11147 || attr_type
== TYPE_MVN_SHIFT_REG
))
11149 rtx shifted_operand
;
11152 /* Get the shifted operand. */
11153 extract_insn (insn
);
11154 shifted_operand
= recog_data
.operand
[shift_opnum
];
11156 /* Iterate over all the operands in DEP. If we write an operand
11157 that overlaps with SHIFTED_OPERAND, then we have increase the
11158 cost of this dependency. */
11159 extract_insn (dep
);
11160 preprocess_constraints (dep
);
11161 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11163 /* We can ignore strict inputs. */
11164 if (recog_data
.operand_type
[opno
] == OP_IN
)
11167 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11179 /* Adjust cost hook for Cortex A9. */
11181 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11191 case REG_DEP_OUTPUT
:
11192 if (recog_memoized (insn
) >= 0
11193 && recog_memoized (dep
) >= 0)
11195 if (GET_CODE (PATTERN (insn
)) == SET
)
11198 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11200 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11202 enum attr_type attr_type_insn
= get_attr_type (insn
);
11203 enum attr_type attr_type_dep
= get_attr_type (dep
);
11205 /* By default all dependencies of the form
11208 have an extra latency of 1 cycle because
11209 of the input and output dependency in this
11210 case. However this gets modeled as an true
11211 dependency and hence all these checks. */
11212 if (REG_P (SET_DEST (PATTERN (insn
)))
11213 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
11215 /* FMACS is a special case where the dependent
11216 instruction can be issued 3 cycles before
11217 the normal latency in case of an output
11219 if ((attr_type_insn
== TYPE_FMACS
11220 || attr_type_insn
== TYPE_FMACD
)
11221 && (attr_type_dep
== TYPE_FMACS
11222 || attr_type_dep
== TYPE_FMACD
))
11224 if (dep_type
== REG_DEP_OUTPUT
)
11225 *cost
= insn_default_latency (dep
) - 3;
11227 *cost
= insn_default_latency (dep
);
11232 if (dep_type
== REG_DEP_OUTPUT
)
11233 *cost
= insn_default_latency (dep
) + 1;
11235 *cost
= insn_default_latency (dep
);
11245 gcc_unreachable ();
11251 /* Adjust cost hook for FA726TE. */
11253 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11256 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11257 have penalty of 3. */
11258 if (dep_type
== REG_DEP_TRUE
11259 && recog_memoized (insn
) >= 0
11260 && recog_memoized (dep
) >= 0
11261 && get_attr_conds (dep
) == CONDS_SET
)
11263 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11264 if (get_attr_conds (insn
) == CONDS_USE
11265 && get_attr_type (insn
) != TYPE_BRANCH
)
11271 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11272 || get_attr_conds (insn
) == CONDS_USE
)
11282 /* Implement TARGET_REGISTER_MOVE_COST.
11284 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11285 it is typically more expensive than a single memory access. We set
11286 the cost to less than two memory accesses so that floating
11287 point to integer conversion does not go through memory. */
11290 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11291 reg_class_t from
, reg_class_t to
)
11295 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11296 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11298 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11299 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11301 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11308 if (from
== HI_REGS
|| to
== HI_REGS
)
11315 /* Implement TARGET_MEMORY_MOVE_COST. */
11318 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11319 bool in ATTRIBUTE_UNUSED
)
11325 if (GET_MODE_SIZE (mode
) < 4)
11328 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11332 /* Vectorizer cost model implementation. */
11334 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11336 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11338 int misalign ATTRIBUTE_UNUSED
)
11342 switch (type_of_cost
)
11345 return current_tune
->vec_costs
->scalar_stmt_cost
;
11348 return current_tune
->vec_costs
->scalar_load_cost
;
11351 return current_tune
->vec_costs
->scalar_store_cost
;
11354 return current_tune
->vec_costs
->vec_stmt_cost
;
11357 return current_tune
->vec_costs
->vec_align_load_cost
;
11360 return current_tune
->vec_costs
->vec_store_cost
;
11362 case vec_to_scalar
:
11363 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11365 case scalar_to_vec
:
11366 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11368 case unaligned_load
:
11369 case vector_gather_load
:
11370 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11372 case unaligned_store
:
11373 case vector_scatter_store
:
11374 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11376 case cond_branch_taken
:
11377 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11379 case cond_branch_not_taken
:
11380 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11383 case vec_promote_demote
:
11384 return current_tune
->vec_costs
->vec_stmt_cost
;
11386 case vec_construct
:
11387 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11388 return elements
/ 2 + 1;
11391 gcc_unreachable ();
11395 /* Implement targetm.vectorize.add_stmt_cost. */
11398 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11399 struct _stmt_vec_info
*stmt_info
, int misalign
,
11400 enum vect_cost_model_location where
)
11402 unsigned *cost
= (unsigned *) data
;
11403 unsigned retval
= 0;
11405 if (flag_vect_cost_model
)
11407 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11408 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11410 /* Statements in an inner loop relative to the loop being
11411 vectorized are weighted more heavily. The value here is
11412 arbitrary and could potentially be improved with analysis. */
11413 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11414 count
*= 50; /* FIXME. */
11416 retval
= (unsigned) (count
* stmt_cost
);
11417 cost
[where
] += retval
;
11423 /* Return true if and only if this insn can dual-issue only as older. */
11425 cortexa7_older_only (rtx_insn
*insn
)
11427 if (recog_memoized (insn
) < 0)
11430 switch (get_attr_type (insn
))
11432 case TYPE_ALU_DSP_REG
:
11433 case TYPE_ALU_SREG
:
11434 case TYPE_ALUS_SREG
:
11435 case TYPE_LOGIC_REG
:
11436 case TYPE_LOGICS_REG
:
11438 case TYPE_ADCS_REG
:
11443 case TYPE_SHIFT_IMM
:
11444 case TYPE_SHIFT_REG
:
11445 case TYPE_LOAD_BYTE
:
11448 case TYPE_FFARITHS
:
11450 case TYPE_FFARITHD
:
11468 case TYPE_F_STORES
:
11475 /* Return true if and only if this insn can dual-issue as younger. */
11477 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11479 if (recog_memoized (insn
) < 0)
11482 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11486 switch (get_attr_type (insn
))
11489 case TYPE_ALUS_IMM
:
11490 case TYPE_LOGIC_IMM
:
11491 case TYPE_LOGICS_IMM
:
11496 case TYPE_MOV_SHIFT
:
11497 case TYPE_MOV_SHIFT_REG
:
11507 /* Look for an instruction that can dual issue only as an older
11508 instruction, and move it in front of any instructions that can
11509 dual-issue as younger, while preserving the relative order of all
11510 other instructions in the ready list. This is a hueuristic to help
11511 dual-issue in later cycles, by postponing issue of more flexible
11512 instructions. This heuristic may affect dual issue opportunities
11513 in the current cycle. */
11515 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11516 int *n_readyp
, int clock
)
11519 int first_older_only
= -1, first_younger
= -1;
11523 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11527 /* Traverse the ready list from the head (the instruction to issue
11528 first), and looking for the first instruction that can issue as
11529 younger and the first instruction that can dual-issue only as
11531 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11533 rtx_insn
*insn
= ready
[i
];
11534 if (cortexa7_older_only (insn
))
11536 first_older_only
= i
;
11538 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11541 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11545 /* Nothing to reorder because either no younger insn found or insn
11546 that can dual-issue only as older appears before any insn that
11547 can dual-issue as younger. */
11548 if (first_younger
== -1)
11551 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11555 /* Nothing to reorder because no older-only insn in the ready list. */
11556 if (first_older_only
== -1)
11559 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11563 /* Move first_older_only insn before first_younger. */
11565 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11566 INSN_UID(ready
[first_older_only
]),
11567 INSN_UID(ready
[first_younger
]));
11568 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11569 for (i
= first_older_only
; i
< first_younger
; i
++)
11571 ready
[i
] = ready
[i
+1];
11574 ready
[i
] = first_older_only_insn
;
11578 /* Implement TARGET_SCHED_REORDER. */
11580 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11585 case TARGET_CPU_cortexa7
:
11586 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11589 /* Do nothing for other cores. */
11593 return arm_issue_rate ();
11596 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11597 It corrects the value of COST based on the relationship between
11598 INSN and DEP through the dependence LINK. It returns the new
11599 value. There is a per-core adjust_cost hook to adjust scheduler costs
11600 and the per-core hook can choose to completely override the generic
11601 adjust_cost function. Only put bits of code into arm_adjust_cost that
11602 are common across all cores. */
11604 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
11609 /* When generating Thumb-1 code, we want to place flag-setting operations
11610 close to a conditional branch which depends on them, so that we can
11611 omit the comparison. */
11614 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11615 && recog_memoized (dep
) >= 0
11616 && get_attr_conds (dep
) == CONDS_SET
)
11619 if (current_tune
->sched_adjust_cost
!= NULL
)
11621 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
11625 /* XXX Is this strictly true? */
11626 if (dep_type
== REG_DEP_ANTI
11627 || dep_type
== REG_DEP_OUTPUT
)
11630 /* Call insns don't incur a stall, even if they follow a load. */
11635 if ((i_pat
= single_set (insn
)) != NULL
11636 && MEM_P (SET_SRC (i_pat
))
11637 && (d_pat
= single_set (dep
)) != NULL
11638 && MEM_P (SET_DEST (d_pat
)))
11640 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11641 /* This is a load after a store, there is no conflict if the load reads
11642 from a cached area. Assume that loads from the stack, and from the
11643 constant pool are cached, and that others will miss. This is a
11646 if ((GET_CODE (src_mem
) == SYMBOL_REF
11647 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11648 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11649 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11650 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11658 arm_max_conditional_execute (void)
11660 return max_insns_skipped
;
11664 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11667 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11669 return (optimize
> 0) ? 2 : 0;
11673 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11675 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11678 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11679 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11680 sequences of non-executed instructions in IT blocks probably take the same
11681 amount of time as executed instructions (and the IT instruction itself takes
11682 space in icache). This function was experimentally determined to give good
11683 results on a popular embedded benchmark. */
11686 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11688 return (TARGET_32BIT
&& speed_p
) ? 1
11689 : arm_default_branch_cost (speed_p
, predictable_p
);
11693 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
11695 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11698 static bool fp_consts_inited
= false;
11700 static REAL_VALUE_TYPE value_fp0
;
11703 init_fp_table (void)
11707 r
= REAL_VALUE_ATOF ("0", DFmode
);
11709 fp_consts_inited
= true;
11712 /* Return TRUE if rtx X is a valid immediate FP constant. */
11714 arm_const_double_rtx (rtx x
)
11716 const REAL_VALUE_TYPE
*r
;
11718 if (!fp_consts_inited
)
11721 r
= CONST_DOUBLE_REAL_VALUE (x
);
11722 if (REAL_VALUE_MINUS_ZERO (*r
))
11725 if (real_equal (r
, &value_fp0
))
11731 /* VFPv3 has a fairly wide range of representable immediates, formed from
11732 "quarter-precision" floating-point values. These can be evaluated using this
11733 formula (with ^ for exponentiation):
11737 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11738 16 <= n <= 31 and 0 <= r <= 7.
11740 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11742 - A (most-significant) is the sign bit.
11743 - BCD are the exponent (encoded as r XOR 3).
11744 - EFGH are the mantissa (encoded as n - 16).
11747 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11748 fconst[sd] instruction, or -1 if X isn't suitable. */
11750 vfp3_const_double_index (rtx x
)
11752 REAL_VALUE_TYPE r
, m
;
11753 int sign
, exponent
;
11754 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11755 unsigned HOST_WIDE_INT mask
;
11756 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11759 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11762 r
= *CONST_DOUBLE_REAL_VALUE (x
);
11764 /* We can't represent these things, so detect them first. */
11765 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11768 /* Extract sign, exponent and mantissa. */
11769 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
11770 r
= real_value_abs (&r
);
11771 exponent
= REAL_EXP (&r
);
11772 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11773 highest (sign) bit, with a fixed binary point at bit point_pos.
11774 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11775 bits for the mantissa, this may fail (low bits would be lost). */
11776 real_ldexp (&m
, &r
, point_pos
- exponent
);
11777 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
11778 mantissa
= w
.elt (0);
11779 mant_hi
= w
.elt (1);
11781 /* If there are bits set in the low part of the mantissa, we can't
11782 represent this value. */
11786 /* Now make it so that mantissa contains the most-significant bits, and move
11787 the point_pos to indicate that the least-significant bits have been
11789 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11790 mantissa
= mant_hi
;
11792 /* We can permit four significant bits of mantissa only, plus a high bit
11793 which is always 1. */
11794 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
11795 if ((mantissa
& mask
) != 0)
11798 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11799 mantissa
>>= point_pos
- 5;
11801 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11802 floating-point immediate zero with Neon using an integer-zero load, but
11803 that case is handled elsewhere.) */
11807 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
11809 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11810 normalized significands are in the range [1, 2). (Our mantissa is shifted
11811 left 4 places at this point relative to normalized IEEE754 values). GCC
11812 internally uses [0.5, 1) (see real.c), so the exponent returned from
11813 REAL_EXP must be altered. */
11814 exponent
= 5 - exponent
;
11816 if (exponent
< 0 || exponent
> 7)
11819 /* Sign, mantissa and exponent are now in the correct form to plug into the
11820 formula described in the comment above. */
11821 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
11824 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11826 vfp3_const_double_rtx (rtx x
)
11831 return vfp3_const_double_index (x
) != -1;
11834 /* Recognize immediates which can be used in various Neon instructions. Legal
11835 immediates are described by the following table (for VMVN variants, the
11836 bitwise inverse of the constant shown is recognized. In either case, VMOV
11837 is output and the correct instruction to use for a given constant is chosen
11838 by the assembler). The constant shown is replicated across all elements of
11839 the destination vector.
11841 insn elems variant constant (binary)
11842 ---- ----- ------- -----------------
11843 vmov i32 0 00000000 00000000 00000000 abcdefgh
11844 vmov i32 1 00000000 00000000 abcdefgh 00000000
11845 vmov i32 2 00000000 abcdefgh 00000000 00000000
11846 vmov i32 3 abcdefgh 00000000 00000000 00000000
11847 vmov i16 4 00000000 abcdefgh
11848 vmov i16 5 abcdefgh 00000000
11849 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11850 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11851 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11852 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11853 vmvn i16 10 00000000 abcdefgh
11854 vmvn i16 11 abcdefgh 00000000
11855 vmov i32 12 00000000 00000000 abcdefgh 11111111
11856 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11857 vmov i32 14 00000000 abcdefgh 11111111 11111111
11858 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11859 vmov i8 16 abcdefgh
11860 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11861 eeeeeeee ffffffff gggggggg hhhhhhhh
11862 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11863 vmov f32 19 00000000 00000000 00000000 00000000
11865 For case 18, B = !b. Representable values are exactly those accepted by
11866 vfp3_const_double_index, but are output as floating-point numbers rather
11869 For case 19, we will change it to vmov.i32 when assembling.
11871 Variants 0-5 (inclusive) may also be used as immediates for the second
11872 operand of VORR/VBIC instructions.
11874 The INVERSE argument causes the bitwise inverse of the given operand to be
11875 recognized instead (used for recognizing legal immediates for the VAND/VORN
11876 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11877 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11878 output, rather than the real insns vbic/vorr).
11880 INVERSE makes no difference to the recognition of float vectors.
11882 The return value is the variant of immediate as shown in the above table, or
11883 -1 if the given value doesn't match any of the listed patterns.
11886 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
11887 rtx
*modconst
, int *elementwidth
)
11889 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11891 for (i = 0; i < idx; i += (STRIDE)) \
11896 immtype = (CLASS); \
11897 elsize = (ELSIZE); \
11901 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
11902 unsigned int innersize
;
11903 unsigned char bytes
[16];
11904 int immtype
= -1, matches
;
11905 unsigned int invmask
= inverse
? 0xff : 0;
11906 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
11909 n_elts
= CONST_VECTOR_NUNITS (op
);
11913 if (mode
== VOIDmode
)
11917 innersize
= GET_MODE_UNIT_SIZE (mode
);
11919 /* Vectors of float constants. */
11920 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11922 rtx el0
= CONST_VECTOR_ELT (op
, 0);
11924 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
11927 /* FP16 vectors cannot be represented. */
11928 if (GET_MODE_INNER (mode
) == HFmode
)
11931 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11932 are distinct in this context. */
11933 if (!const_vec_duplicate_p (op
))
11937 *modconst
= CONST_VECTOR_ELT (op
, 0);
11942 if (el0
== CONST0_RTX (GET_MODE (el0
)))
11948 /* The tricks done in the code below apply for little-endian vector layout.
11949 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11950 FIXME: Implement logic for big-endian vectors. */
11951 if (BYTES_BIG_ENDIAN
&& vector
&& !const_vec_duplicate_p (op
))
11954 /* Splat vector constant out into a byte vector. */
11955 for (i
= 0; i
< n_elts
; i
++)
11957 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
11958 unsigned HOST_WIDE_INT elpart
;
11960 gcc_assert (CONST_INT_P (el
));
11961 elpart
= INTVAL (el
);
11963 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
11965 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
11966 elpart
>>= BITS_PER_UNIT
;
11970 /* Sanity check. */
11971 gcc_assert (idx
== GET_MODE_SIZE (mode
));
11975 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
11976 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11978 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11979 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11981 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11982 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11984 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11985 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
11987 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
11989 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
11991 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
11992 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11994 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11995 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11997 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11998 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12000 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12001 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
12003 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
12005 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
12007 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12008 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12010 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12011 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12013 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12014 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12016 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12017 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12019 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
12021 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
12022 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
12030 *elementwidth
= elsize
;
12034 unsigned HOST_WIDE_INT imm
= 0;
12036 /* Un-invert bytes of recognized vector, if necessary. */
12038 for (i
= 0; i
< idx
; i
++)
12039 bytes
[i
] ^= invmask
;
12043 /* FIXME: Broken on 32-bit H_W_I hosts. */
12044 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12046 for (i
= 0; i
< 8; i
++)
12047 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12048 << (i
* BITS_PER_UNIT
);
12050 *modconst
= GEN_INT (imm
);
12054 unsigned HOST_WIDE_INT imm
= 0;
12056 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12057 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12059 *modconst
= GEN_INT (imm
);
12067 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12068 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12069 float elements), and a modified constant (whatever should be output for a
12070 VMOV) in *MODCONST. */
12073 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
12074 rtx
*modconst
, int *elementwidth
)
12078 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12084 *modconst
= tmpconst
;
12087 *elementwidth
= tmpwidth
;
12092 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12093 the immediate is valid, write a constant suitable for using as an operand
12094 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12095 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12098 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
12099 rtx
*modconst
, int *elementwidth
)
12103 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12105 if (retval
< 0 || retval
> 5)
12109 *modconst
= tmpconst
;
12112 *elementwidth
= tmpwidth
;
12117 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12118 the immediate is valid, write a constant suitable for using as an operand
12119 to VSHR/VSHL to *MODCONST and the corresponding element width to
12120 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12121 because they have different limitations. */
12124 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
12125 rtx
*modconst
, int *elementwidth
,
12128 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
12129 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12130 unsigned HOST_WIDE_INT last_elt
= 0;
12131 unsigned HOST_WIDE_INT maxshift
;
12133 /* Split vector constant out into a byte vector. */
12134 for (i
= 0; i
< n_elts
; i
++)
12136 rtx el
= CONST_VECTOR_ELT (op
, i
);
12137 unsigned HOST_WIDE_INT elpart
;
12139 if (CONST_INT_P (el
))
12140 elpart
= INTVAL (el
);
12141 else if (CONST_DOUBLE_P (el
))
12144 gcc_unreachable ();
12146 if (i
!= 0 && elpart
!= last_elt
)
12152 /* Shift less than element size. */
12153 maxshift
= innersize
* 8;
12157 /* Left shift immediate value can be from 0 to <size>-1. */
12158 if (last_elt
>= maxshift
)
12163 /* Right shift immediate value can be from 1 to <size>. */
12164 if (last_elt
== 0 || last_elt
> maxshift
)
12169 *elementwidth
= innersize
* 8;
12172 *modconst
= CONST_VECTOR_ELT (op
, 0);
12177 /* Return a string suitable for output of Neon immediate logic operation
12181 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12182 int inverse
, int quad
)
12184 int width
, is_valid
;
12185 static char templ
[40];
12187 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12189 gcc_assert (is_valid
!= 0);
12192 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12194 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12199 /* Return a string suitable for output of Neon immediate shift operation
12200 (VSHR or VSHL) MNEM. */
12203 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12204 machine_mode mode
, int quad
,
12207 int width
, is_valid
;
12208 static char templ
[40];
12210 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12211 gcc_assert (is_valid
!= 0);
12214 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12216 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12221 /* Output a sequence of pairwise operations to implement a reduction.
12222 NOTE: We do "too much work" here, because pairwise operations work on two
12223 registers-worth of operands in one go. Unfortunately we can't exploit those
12224 extra calculations to do the full operation in fewer steps, I don't think.
12225 Although all vector elements of the result but the first are ignored, we
12226 actually calculate the same result in each of the elements. An alternative
12227 such as initially loading a vector with zero to use as each of the second
12228 operands would use up an additional register and take an extra instruction,
12229 for no particular gain. */
12232 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12233 rtx (*reduc
) (rtx
, rtx
, rtx
))
12235 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
12238 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12240 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12241 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12246 /* If VALS is a vector constant that can be loaded into a register
12247 using VDUP, generate instructions to do so and return an RTX to
12248 assign to the register. Otherwise return NULL_RTX. */
12251 neon_vdup_constant (rtx vals
)
12253 machine_mode mode
= GET_MODE (vals
);
12254 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12257 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12260 if (!const_vec_duplicate_p (vals
, &x
))
12261 /* The elements are not all the same. We could handle repeating
12262 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12263 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12267 /* We can load this constant by using VDUP and a constant in a
12268 single ARM register. This will be cheaper than a vector
12271 x
= copy_to_mode_reg (inner_mode
, x
);
12272 return gen_vec_duplicate (mode
, x
);
12275 /* Generate code to load VALS, which is a PARALLEL containing only
12276 constants (for vec_init) or CONST_VECTOR, efficiently into a
12277 register. Returns an RTX to copy into the register, or NULL_RTX
12278 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12281 neon_make_constant (rtx vals
)
12283 machine_mode mode
= GET_MODE (vals
);
12285 rtx const_vec
= NULL_RTX
;
12286 int n_elts
= GET_MODE_NUNITS (mode
);
12290 if (GET_CODE (vals
) == CONST_VECTOR
)
12292 else if (GET_CODE (vals
) == PARALLEL
)
12294 /* A CONST_VECTOR must contain only CONST_INTs and
12295 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12296 Only store valid constants in a CONST_VECTOR. */
12297 for (i
= 0; i
< n_elts
; ++i
)
12299 rtx x
= XVECEXP (vals
, 0, i
);
12300 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12303 if (n_const
== n_elts
)
12304 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12307 gcc_unreachable ();
12309 if (const_vec
!= NULL
12310 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12311 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12313 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12314 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12315 pipeline cycle; creating the constant takes one or two ARM
12316 pipeline cycles. */
12318 else if (const_vec
!= NULL_RTX
)
12319 /* Load from constant pool. On Cortex-A8 this takes two cycles
12320 (for either double or quad vectors). We can not take advantage
12321 of single-cycle VLD1 because we need a PC-relative addressing
12325 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12326 We can not construct an initializer. */
12330 /* Initialize vector TARGET to VALS. */
12333 neon_expand_vector_init (rtx target
, rtx vals
)
12335 machine_mode mode
= GET_MODE (target
);
12336 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12337 int n_elts
= GET_MODE_NUNITS (mode
);
12338 int n_var
= 0, one_var
= -1;
12339 bool all_same
= true;
12343 for (i
= 0; i
< n_elts
; ++i
)
12345 x
= XVECEXP (vals
, 0, i
);
12346 if (!CONSTANT_P (x
))
12347 ++n_var
, one_var
= i
;
12349 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12355 rtx constant
= neon_make_constant (vals
);
12356 if (constant
!= NULL_RTX
)
12358 emit_move_insn (target
, constant
);
12363 /* Splat a single non-constant element if we can. */
12364 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12366 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12367 emit_insn (gen_rtx_SET (target
, gen_vec_duplicate (mode
, x
)));
12371 /* One field is non-constant. Load constant then overwrite varying
12372 field. This is more efficient than using the stack. */
12375 rtx copy
= copy_rtx (vals
);
12376 rtx index
= GEN_INT (one_var
);
12378 /* Load constant part of vector, substitute neighboring value for
12379 varying element. */
12380 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12381 neon_expand_vector_init (target
, copy
);
12383 /* Insert variable. */
12384 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12388 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12391 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12394 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12397 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12400 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12403 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12406 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12409 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12412 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12415 gcc_unreachable ();
12420 /* Construct the vector in memory one field at a time
12421 and load the whole vector. */
12422 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12423 for (i
= 0; i
< n_elts
; i
++)
12424 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12425 i
* GET_MODE_SIZE (inner_mode
)),
12426 XVECEXP (vals
, 0, i
));
12427 emit_move_insn (target
, mem
);
12430 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12431 ERR if it doesn't. EXP indicates the source location, which includes the
12432 inlining history for intrinsics. */
12435 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12436 const_tree exp
, const char *desc
)
12438 HOST_WIDE_INT lane
;
12440 gcc_assert (CONST_INT_P (operand
));
12442 lane
= INTVAL (operand
);
12444 if (lane
< low
|| lane
>= high
)
12447 error ("%K%s %wd out of range %wd - %wd",
12448 exp
, desc
, lane
, low
, high
- 1);
12450 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
12454 /* Bounds-check lanes. */
12457 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12460 bounds_check (operand
, low
, high
, exp
, "lane");
12463 /* Bounds-check constants. */
12466 arm_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12468 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
12472 neon_element_bits (machine_mode mode
)
12474 return GET_MODE_UNIT_BITSIZE (mode
);
12478 /* Predicates for `match_operand' and `match_operator'. */
12480 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12481 WB is true if full writeback address modes are allowed and is false
12482 if limited writeback address modes (POST_INC and PRE_DEC) are
12486 arm_coproc_mem_operand (rtx op
, bool wb
)
12490 /* Reject eliminable registers. */
12491 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12492 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12493 || reg_mentioned_p (arg_pointer_rtx
, op
)
12494 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12495 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12496 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12497 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12500 /* Constants are converted into offsets from labels. */
12504 ind
= XEXP (op
, 0);
12506 if (reload_completed
12507 && (GET_CODE (ind
) == LABEL_REF
12508 || (GET_CODE (ind
) == CONST
12509 && GET_CODE (XEXP (ind
, 0)) == PLUS
12510 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12511 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12514 /* Match: (mem (reg)). */
12516 return arm_address_register_rtx_p (ind
, 0);
12518 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12519 acceptable in any case (subject to verification by
12520 arm_address_register_rtx_p). We need WB to be true to accept
12521 PRE_INC and POST_DEC. */
12522 if (GET_CODE (ind
) == POST_INC
12523 || GET_CODE (ind
) == PRE_DEC
12525 && (GET_CODE (ind
) == PRE_INC
12526 || GET_CODE (ind
) == POST_DEC
)))
12527 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12530 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12531 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12532 && GET_CODE (XEXP (ind
, 1)) == PLUS
12533 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12534 ind
= XEXP (ind
, 1);
12539 if (GET_CODE (ind
) == PLUS
12540 && REG_P (XEXP (ind
, 0))
12541 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12542 && CONST_INT_P (XEXP (ind
, 1))
12543 && INTVAL (XEXP (ind
, 1)) > -1024
12544 && INTVAL (XEXP (ind
, 1)) < 1024
12545 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12551 /* Return TRUE if OP is a memory operand which we can load or store a vector
12552 to/from. TYPE is one of the following values:
12553 0 - Vector load/stor (vldr)
12554 1 - Core registers (ldm)
12555 2 - Element/structure loads (vld1)
12558 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12562 /* Reject eliminable registers. */
12563 if (strict
&& ! (reload_in_progress
|| reload_completed
)
12564 && (reg_mentioned_p (frame_pointer_rtx
, op
)
12565 || reg_mentioned_p (arg_pointer_rtx
, op
)
12566 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12567 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12568 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12569 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12572 /* Constants are converted into offsets from labels. */
12576 ind
= XEXP (op
, 0);
12578 if (reload_completed
12579 && (GET_CODE (ind
) == LABEL_REF
12580 || (GET_CODE (ind
) == CONST
12581 && GET_CODE (XEXP (ind
, 0)) == PLUS
12582 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12583 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12586 /* Match: (mem (reg)). */
12588 return arm_address_register_rtx_p (ind
, 0);
12590 /* Allow post-increment with Neon registers. */
12591 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12592 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12593 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12595 /* Allow post-increment by register for VLDn */
12596 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12597 && GET_CODE (XEXP (ind
, 1)) == PLUS
12598 && REG_P (XEXP (XEXP (ind
, 1), 1)))
12605 && GET_CODE (ind
) == PLUS
12606 && REG_P (XEXP (ind
, 0))
12607 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12608 && CONST_INT_P (XEXP (ind
, 1))
12609 && INTVAL (XEXP (ind
, 1)) > -1024
12610 /* For quad modes, we restrict the constant offset to be slightly less
12611 than what the instruction format permits. We have no such constraint
12612 on double mode offsets. (This must match arm_legitimate_index_p.) */
12613 && (INTVAL (XEXP (ind
, 1))
12614 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12615 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12621 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12624 neon_struct_mem_operand (rtx op
)
12628 /* Reject eliminable registers. */
12629 if (! (reload_in_progress
|| reload_completed
)
12630 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12631 || reg_mentioned_p (arg_pointer_rtx
, op
)
12632 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12633 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12634 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12635 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12638 /* Constants are converted into offsets from labels. */
12642 ind
= XEXP (op
, 0);
12644 if (reload_completed
12645 && (GET_CODE (ind
) == LABEL_REF
12646 || (GET_CODE (ind
) == CONST
12647 && GET_CODE (XEXP (ind
, 0)) == PLUS
12648 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12649 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12652 /* Match: (mem (reg)). */
12654 return arm_address_register_rtx_p (ind
, 0);
12656 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12657 if (GET_CODE (ind
) == POST_INC
12658 || GET_CODE (ind
) == PRE_DEC
)
12659 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12664 /* Return true if X is a register that will be eliminated later on. */
12666 arm_eliminable_register (rtx x
)
12668 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12669 || REGNO (x
) == ARG_POINTER_REGNUM
12670 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12671 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12674 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12675 coprocessor registers. Otherwise return NO_REGS. */
12678 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
12680 if (mode
== HFmode
)
12682 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
12683 return GENERAL_REGS
;
12684 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12686 return GENERAL_REGS
;
12689 /* The neon move patterns handle all legitimate vector and struct
12692 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12693 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12694 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12695 || VALID_NEON_STRUCT_MODE (mode
)))
12698 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12701 return GENERAL_REGS
;
12704 /* Values which must be returned in the most-significant end of the return
12708 arm_return_in_msb (const_tree valtype
)
12710 return (TARGET_AAPCS_BASED
12711 && BYTES_BIG_ENDIAN
12712 && (AGGREGATE_TYPE_P (valtype
)
12713 || TREE_CODE (valtype
) == COMPLEX_TYPE
12714 || FIXED_POINT_TYPE_P (valtype
)));
12717 /* Return TRUE if X references a SYMBOL_REF. */
12719 symbol_mentioned_p (rtx x
)
12724 if (GET_CODE (x
) == SYMBOL_REF
)
12727 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12728 are constant offsets, not symbols. */
12729 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12732 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12734 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12740 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12741 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
12744 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
12751 /* Return TRUE if X references a LABEL_REF. */
12753 label_mentioned_p (rtx x
)
12758 if (GET_CODE (x
) == LABEL_REF
)
12761 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12762 instruction, but they are constant offsets, not symbols. */
12763 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12766 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12767 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12773 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12774 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
12777 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
12785 tls_mentioned_p (rtx x
)
12787 switch (GET_CODE (x
))
12790 return tls_mentioned_p (XEXP (x
, 0));
12793 if (XINT (x
, 1) == UNSPEC_TLS
)
12796 /* Fall through. */
12802 /* Must not copy any rtx that uses a pc-relative address.
12803 Also, disallow copying of load-exclusive instructions that
12804 may appear after splitting of compare-and-swap-style operations
12805 so as to prevent those loops from being transformed away from their
12806 canonical forms (see PR 69904). */
12809 arm_cannot_copy_insn_p (rtx_insn
*insn
)
12811 /* The tls call insn cannot be copied, as it is paired with a data
12813 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
12816 subrtx_iterator::array_type array
;
12817 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
12819 const_rtx x
= *iter
;
12820 if (GET_CODE (x
) == UNSPEC
12821 && (XINT (x
, 1) == UNSPEC_PIC_BASE
12822 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
12826 rtx set
= single_set (insn
);
12829 rtx src
= SET_SRC (set
);
12830 if (GET_CODE (src
) == ZERO_EXTEND
)
12831 src
= XEXP (src
, 0);
12833 /* Catch the load-exclusive and load-acquire operations. */
12834 if (GET_CODE (src
) == UNSPEC_VOLATILE
12835 && (XINT (src
, 1) == VUNSPEC_LL
12836 || XINT (src
, 1) == VUNSPEC_LAX
))
12843 minmax_code (rtx x
)
12845 enum rtx_code code
= GET_CODE (x
);
12858 gcc_unreachable ();
12862 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12865 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
12866 int *mask
, bool *signed_sat
)
12868 /* The high bound must be a power of two minus one. */
12869 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
12873 /* The low bound is either zero (for usat) or one less than the
12874 negation of the high bound (for ssat). */
12875 if (INTVAL (lo_bound
) == 0)
12880 *signed_sat
= false;
12885 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
12890 *signed_sat
= true;
12898 /* Return 1 if memory locations are adjacent. */
12900 adjacent_mem_locations (rtx a
, rtx b
)
12902 /* We don't guarantee to preserve the order of these memory refs. */
12903 if (volatile_refs_p (a
) || volatile_refs_p (b
))
12906 if ((REG_P (XEXP (a
, 0))
12907 || (GET_CODE (XEXP (a
, 0)) == PLUS
12908 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
12909 && (REG_P (XEXP (b
, 0))
12910 || (GET_CODE (XEXP (b
, 0)) == PLUS
12911 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
12913 HOST_WIDE_INT val0
= 0, val1
= 0;
12917 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
12919 reg0
= XEXP (XEXP (a
, 0), 0);
12920 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
12923 reg0
= XEXP (a
, 0);
12925 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
12927 reg1
= XEXP (XEXP (b
, 0), 0);
12928 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
12931 reg1
= XEXP (b
, 0);
12933 /* Don't accept any offset that will require multiple
12934 instructions to handle, since this would cause the
12935 arith_adjacentmem pattern to output an overlong sequence. */
12936 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
12939 /* Don't allow an eliminable register: register elimination can make
12940 the offset too large. */
12941 if (arm_eliminable_register (reg0
))
12944 val_diff
= val1
- val0
;
12948 /* If the target has load delay slots, then there's no benefit
12949 to using an ldm instruction unless the offset is zero and
12950 we are optimizing for size. */
12951 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
12952 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
12953 && (val_diff
== 4 || val_diff
== -4));
12956 return ((REGNO (reg0
) == REGNO (reg1
))
12957 && (val_diff
== 4 || val_diff
== -4));
12963 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12964 for load operations, false for store operations. CONSECUTIVE is true
12965 if the register numbers in the operation must be consecutive in the register
12966 bank. RETURN_PC is true if value is to be loaded in PC.
12967 The pattern we are trying to match for load is:
12968 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12969 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12972 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12975 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12976 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12977 3. If consecutive is TRUE, then for kth register being loaded,
12978 REGNO (R_dk) = REGNO (R_d0) + k.
12979 The pattern for store is similar. */
12981 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
12982 bool consecutive
, bool return_pc
)
12984 HOST_WIDE_INT count
= XVECLEN (op
, 0);
12985 rtx reg
, mem
, addr
;
12987 unsigned first_regno
;
12988 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
12990 bool addr_reg_in_reglist
= false;
12991 bool update
= false;
12996 /* If not in SImode, then registers must be consecutive
12997 (e.g., VLDM instructions for DFmode). */
12998 gcc_assert ((mode
== SImode
) || consecutive
);
12999 /* Setting return_pc for stores is illegal. */
13000 gcc_assert (!return_pc
|| load
);
13002 /* Set up the increments and the regs per val based on the mode. */
13003 reg_increment
= GET_MODE_SIZE (mode
);
13004 regs_per_val
= reg_increment
/ 4;
13005 offset_adj
= return_pc
? 1 : 0;
13008 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
13009 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
13012 /* Check if this is a write-back. */
13013 elt
= XVECEXP (op
, 0, offset_adj
);
13014 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
13020 /* The offset adjustment must be the number of registers being
13021 popped times the size of a single register. */
13022 if (!REG_P (SET_DEST (elt
))
13023 || !REG_P (XEXP (SET_SRC (elt
), 0))
13024 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
13025 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
13026 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
13027 ((count
- 1 - offset_adj
) * reg_increment
))
13031 i
= i
+ offset_adj
;
13032 base
= base
+ offset_adj
;
13033 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13034 success depends on the type: VLDM can do just one reg,
13035 LDM must do at least two. */
13036 if ((count
<= i
) && (mode
== SImode
))
13039 elt
= XVECEXP (op
, 0, i
- 1);
13040 if (GET_CODE (elt
) != SET
)
13045 reg
= SET_DEST (elt
);
13046 mem
= SET_SRC (elt
);
13050 reg
= SET_SRC (elt
);
13051 mem
= SET_DEST (elt
);
13054 if (!REG_P (reg
) || !MEM_P (mem
))
13057 regno
= REGNO (reg
);
13058 first_regno
= regno
;
13059 addr
= XEXP (mem
, 0);
13060 if (GET_CODE (addr
) == PLUS
)
13062 if (!CONST_INT_P (XEXP (addr
, 1)))
13065 offset
= INTVAL (XEXP (addr
, 1));
13066 addr
= XEXP (addr
, 0);
13072 /* Don't allow SP to be loaded unless it is also the base register. It
13073 guarantees that SP is reset correctly when an LDM instruction
13074 is interrupted. Otherwise, we might end up with a corrupt stack. */
13075 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13078 for (; i
< count
; i
++)
13080 elt
= XVECEXP (op
, 0, i
);
13081 if (GET_CODE (elt
) != SET
)
13086 reg
= SET_DEST (elt
);
13087 mem
= SET_SRC (elt
);
13091 reg
= SET_SRC (elt
);
13092 mem
= SET_DEST (elt
);
13096 || GET_MODE (reg
) != mode
13097 || REGNO (reg
) <= regno
13100 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13101 /* Don't allow SP to be loaded unless it is also the base register. It
13102 guarantees that SP is reset correctly when an LDM instruction
13103 is interrupted. Otherwise, we might end up with a corrupt stack. */
13104 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13106 || GET_MODE (mem
) != mode
13107 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13108 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13109 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13110 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13111 offset
+ (i
- base
) * reg_increment
))
13112 && (!REG_P (XEXP (mem
, 0))
13113 || offset
+ (i
- base
) * reg_increment
!= 0)))
13116 regno
= REGNO (reg
);
13117 if (regno
== REGNO (addr
))
13118 addr_reg_in_reglist
= true;
13123 if (update
&& addr_reg_in_reglist
)
13126 /* For Thumb-1, address register is always modified - either by write-back
13127 or by explicit load. If the pattern does not describe an update,
13128 then the address register must be in the list of loaded registers. */
13130 return update
|| addr_reg_in_reglist
;
13136 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13137 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13138 instruction. ADD_OFFSET is nonzero if the base address register needs
13139 to be modified with an add instruction before we can use it. */
13142 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13143 int nops
, HOST_WIDE_INT add_offset
)
13145 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13146 if the offset isn't small enough. The reason 2 ldrs are faster
13147 is because these ARMs are able to do more than one cache access
13148 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13149 whilst the ARM8 has a double bandwidth cache. This means that
13150 these cores can do both an instruction fetch and a data fetch in
13151 a single cycle, so the trick of calculating the address into a
13152 scratch register (one of the result regs) and then doing a load
13153 multiple actually becomes slower (and no smaller in code size).
13154 That is the transformation
13156 ldr rd1, [rbase + offset]
13157 ldr rd2, [rbase + offset + 4]
13161 add rd1, rbase, offset
13162 ldmia rd1, {rd1, rd2}
13164 produces worse code -- '3 cycles + any stalls on rd2' instead of
13165 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13166 access per cycle, the first sequence could never complete in less
13167 than 6 cycles, whereas the ldm sequence would only take 5 and
13168 would make better use of sequential accesses if not hitting the
13171 We cheat here and test 'arm_ld_sched' which we currently know to
13172 only be true for the ARM8, ARM9 and StrongARM. If this ever
13173 changes, then the test below needs to be reworked. */
13174 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13177 /* XScale has load-store double instructions, but they have stricter
13178 alignment requirements than load-store multiple, so we cannot
13181 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13182 the pipeline until completion.
13190 An ldr instruction takes 1-3 cycles, but does not block the
13199 Best case ldr will always win. However, the more ldr instructions
13200 we issue, the less likely we are to be able to schedule them well.
13201 Using ldr instructions also increases code size.
13203 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13204 for counts of 3 or 4 regs. */
13205 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13210 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13211 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13212 an array ORDER which describes the sequence to use when accessing the
13213 offsets that produces an ascending order. In this sequence, each
13214 offset must be larger by exactly 4 than the previous one. ORDER[0]
13215 must have been filled in with the lowest offset by the caller.
13216 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13217 we use to verify that ORDER produces an ascending order of registers.
13218 Return true if it was possible to construct such an order, false if
13222 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13223 int *unsorted_regs
)
13226 for (i
= 1; i
< nops
; i
++)
13230 order
[i
] = order
[i
- 1];
13231 for (j
= 0; j
< nops
; j
++)
13232 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13234 /* We must find exactly one offset that is higher than the
13235 previous one by 4. */
13236 if (order
[i
] != order
[i
- 1])
13240 if (order
[i
] == order
[i
- 1])
13242 /* The register numbers must be ascending. */
13243 if (unsorted_regs
!= NULL
13244 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13250 /* Used to determine in a peephole whether a sequence of load
13251 instructions can be changed into a load-multiple instruction.
13252 NOPS is the number of separate load instructions we are examining. The
13253 first NOPS entries in OPERANDS are the destination registers, the
13254 next NOPS entries are memory operands. If this function is
13255 successful, *BASE is set to the common base register of the memory
13256 accesses; *LOAD_OFFSET is set to the first memory location's offset
13257 from that base register.
13258 REGS is an array filled in with the destination register numbers.
13259 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13260 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13261 the sequence of registers in REGS matches the loads from ascending memory
13262 locations, and the function verifies that the register numbers are
13263 themselves ascending. If CHECK_REGS is false, the register numbers
13264 are stored in the order they are found in the operands. */
13266 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13267 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13269 int unsorted_regs
[MAX_LDM_STM_OPS
];
13270 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13271 int order
[MAX_LDM_STM_OPS
];
13272 rtx base_reg_rtx
= NULL
;
13276 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13277 easily extended if required. */
13278 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13280 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13282 /* Loop over the operands and check that the memory references are
13283 suitable (i.e. immediate offsets from the same base register). At
13284 the same time, extract the target register, and the memory
13286 for (i
= 0; i
< nops
; i
++)
13291 /* Convert a subreg of a mem into the mem itself. */
13292 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13293 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13295 gcc_assert (MEM_P (operands
[nops
+ i
]));
13297 /* Don't reorder volatile memory references; it doesn't seem worth
13298 looking for the case where the order is ok anyway. */
13299 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13302 offset
= const0_rtx
;
13304 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13305 || (GET_CODE (reg
) == SUBREG
13306 && REG_P (reg
= SUBREG_REG (reg
))))
13307 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13308 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13309 || (GET_CODE (reg
) == SUBREG
13310 && REG_P (reg
= SUBREG_REG (reg
))))
13311 && (CONST_INT_P (offset
13312 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13316 base_reg
= REGNO (reg
);
13317 base_reg_rtx
= reg
;
13318 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13321 else if (base_reg
!= (int) REGNO (reg
))
13322 /* Not addressed from the same base register. */
13325 unsorted_regs
[i
] = (REG_P (operands
[i
])
13326 ? REGNO (operands
[i
])
13327 : REGNO (SUBREG_REG (operands
[i
])));
13329 /* If it isn't an integer register, or if it overwrites the
13330 base register but isn't the last insn in the list, then
13331 we can't do this. */
13332 if (unsorted_regs
[i
] < 0
13333 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13334 || unsorted_regs
[i
] > 14
13335 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13338 /* Don't allow SP to be loaded unless it is also the base
13339 register. It guarantees that SP is reset correctly when
13340 an LDM instruction is interrupted. Otherwise, we might
13341 end up with a corrupt stack. */
13342 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13345 unsorted_offsets
[i
] = INTVAL (offset
);
13346 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13350 /* Not a suitable memory address. */
13354 /* All the useful information has now been extracted from the
13355 operands into unsorted_regs and unsorted_offsets; additionally,
13356 order[0] has been set to the lowest offset in the list. Sort
13357 the offsets into order, verifying that they are adjacent, and
13358 check that the register numbers are ascending. */
13359 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13360 check_regs
? unsorted_regs
: NULL
))
13364 memcpy (saved_order
, order
, sizeof order
);
13370 for (i
= 0; i
< nops
; i
++)
13371 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13373 *load_offset
= unsorted_offsets
[order
[0]];
13377 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13380 if (unsorted_offsets
[order
[0]] == 0)
13381 ldm_case
= 1; /* ldmia */
13382 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13383 ldm_case
= 2; /* ldmib */
13384 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13385 ldm_case
= 3; /* ldmda */
13386 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13387 ldm_case
= 4; /* ldmdb */
13388 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13389 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13394 if (!multiple_operation_profitable_p (false, nops
,
13396 ? unsorted_offsets
[order
[0]] : 0))
13402 /* Used to determine in a peephole whether a sequence of store instructions can
13403 be changed into a store-multiple instruction.
13404 NOPS is the number of separate store instructions we are examining.
13405 NOPS_TOTAL is the total number of instructions recognized by the peephole
13407 The first NOPS entries in OPERANDS are the source registers, the next
13408 NOPS entries are memory operands. If this function is successful, *BASE is
13409 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13410 to the first memory location's offset from that base register. REGS is an
13411 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13412 likewise filled with the corresponding rtx's.
13413 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13414 numbers to an ascending order of stores.
13415 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13416 from ascending memory locations, and the function verifies that the register
13417 numbers are themselves ascending. If CHECK_REGS is false, the register
13418 numbers are stored in the order they are found in the operands. */
13420 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13421 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13422 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13424 int unsorted_regs
[MAX_LDM_STM_OPS
];
13425 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13426 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13427 int order
[MAX_LDM_STM_OPS
];
13429 rtx base_reg_rtx
= NULL
;
13432 /* Write back of base register is currently only supported for Thumb 1. */
13433 int base_writeback
= TARGET_THUMB1
;
13435 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13436 easily extended if required. */
13437 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13439 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13441 /* Loop over the operands and check that the memory references are
13442 suitable (i.e. immediate offsets from the same base register). At
13443 the same time, extract the target register, and the memory
13445 for (i
= 0; i
< nops
; i
++)
13450 /* Convert a subreg of a mem into the mem itself. */
13451 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13452 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13454 gcc_assert (MEM_P (operands
[nops
+ i
]));
13456 /* Don't reorder volatile memory references; it doesn't seem worth
13457 looking for the case where the order is ok anyway. */
13458 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13461 offset
= const0_rtx
;
13463 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13464 || (GET_CODE (reg
) == SUBREG
13465 && REG_P (reg
= SUBREG_REG (reg
))))
13466 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13467 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13468 || (GET_CODE (reg
) == SUBREG
13469 && REG_P (reg
= SUBREG_REG (reg
))))
13470 && (CONST_INT_P (offset
13471 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13473 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13474 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13475 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13479 base_reg
= REGNO (reg
);
13480 base_reg_rtx
= reg
;
13481 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13484 else if (base_reg
!= (int) REGNO (reg
))
13485 /* Not addressed from the same base register. */
13488 /* If it isn't an integer register, then we can't do this. */
13489 if (unsorted_regs
[i
] < 0
13490 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13491 /* The effects are unpredictable if the base register is
13492 both updated and stored. */
13493 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13494 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13495 || unsorted_regs
[i
] > 14)
13498 unsorted_offsets
[i
] = INTVAL (offset
);
13499 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13503 /* Not a suitable memory address. */
13507 /* All the useful information has now been extracted from the
13508 operands into unsorted_regs and unsorted_offsets; additionally,
13509 order[0] has been set to the lowest offset in the list. Sort
13510 the offsets into order, verifying that they are adjacent, and
13511 check that the register numbers are ascending. */
13512 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13513 check_regs
? unsorted_regs
: NULL
))
13517 memcpy (saved_order
, order
, sizeof order
);
13523 for (i
= 0; i
< nops
; i
++)
13525 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13527 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13530 *load_offset
= unsorted_offsets
[order
[0]];
13534 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13537 if (unsorted_offsets
[order
[0]] == 0)
13538 stm_case
= 1; /* stmia */
13539 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13540 stm_case
= 2; /* stmib */
13541 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13542 stm_case
= 3; /* stmda */
13543 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13544 stm_case
= 4; /* stmdb */
13548 if (!multiple_operation_profitable_p (false, nops
, 0))
13554 /* Routines for use in generating RTL. */
13556 /* Generate a load-multiple instruction. COUNT is the number of loads in
13557 the instruction; REGS and MEMS are arrays containing the operands.
13558 BASEREG is the base register to be used in addressing the memory operands.
13559 WBACK_OFFSET is nonzero if the instruction should update the base
13563 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13564 HOST_WIDE_INT wback_offset
)
13569 if (!multiple_operation_profitable_p (false, count
, 0))
13575 for (i
= 0; i
< count
; i
++)
13576 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13578 if (wback_offset
!= 0)
13579 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13581 seq
= get_insns ();
13587 result
= gen_rtx_PARALLEL (VOIDmode
,
13588 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13589 if (wback_offset
!= 0)
13591 XVECEXP (result
, 0, 0)
13592 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13597 for (j
= 0; i
< count
; i
++, j
++)
13598 XVECEXP (result
, 0, i
)
13599 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13604 /* Generate a store-multiple instruction. COUNT is the number of stores in
13605 the instruction; REGS and MEMS are arrays containing the operands.
13606 BASEREG is the base register to be used in addressing the memory operands.
13607 WBACK_OFFSET is nonzero if the instruction should update the base
13611 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13612 HOST_WIDE_INT wback_offset
)
13617 if (GET_CODE (basereg
) == PLUS
)
13618 basereg
= XEXP (basereg
, 0);
13620 if (!multiple_operation_profitable_p (false, count
, 0))
13626 for (i
= 0; i
< count
; i
++)
13627 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13629 if (wback_offset
!= 0)
13630 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13632 seq
= get_insns ();
13638 result
= gen_rtx_PARALLEL (VOIDmode
,
13639 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13640 if (wback_offset
!= 0)
13642 XVECEXP (result
, 0, 0)
13643 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13648 for (j
= 0; i
< count
; i
++, j
++)
13649 XVECEXP (result
, 0, i
)
13650 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13655 /* Generate either a load-multiple or a store-multiple instruction. This
13656 function can be used in situations where we can start with a single MEM
13657 rtx and adjust its address upwards.
13658 COUNT is the number of operations in the instruction, not counting a
13659 possible update of the base register. REGS is an array containing the
13661 BASEREG is the base register to be used in addressing the memory operands,
13662 which are constructed from BASEMEM.
13663 WRITE_BACK specifies whether the generated instruction should include an
13664 update of the base register.
13665 OFFSETP is used to pass an offset to and from this function; this offset
13666 is not used when constructing the address (instead BASEMEM should have an
13667 appropriate offset in its address), it is used only for setting
13668 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13671 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13672 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13674 rtx mems
[MAX_LDM_STM_OPS
];
13675 HOST_WIDE_INT offset
= *offsetp
;
13678 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13680 if (GET_CODE (basereg
) == PLUS
)
13681 basereg
= XEXP (basereg
, 0);
13683 for (i
= 0; i
< count
; i
++)
13685 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13686 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13694 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13695 write_back
? 4 * count
: 0);
13697 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13698 write_back
? 4 * count
: 0);
13702 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13703 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13705 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13710 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13711 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13713 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13717 /* Called from a peephole2 expander to turn a sequence of loads into an
13718 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13719 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13720 is true if we can reorder the registers because they are used commutatively
13722 Returns true iff we could generate a new instruction. */
13725 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13727 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13728 rtx mems
[MAX_LDM_STM_OPS
];
13729 int i
, j
, base_reg
;
13731 HOST_WIDE_INT offset
;
13732 int write_back
= FALSE
;
13736 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13737 &base_reg
, &offset
, !sort_regs
);
13743 for (i
= 0; i
< nops
- 1; i
++)
13744 for (j
= i
+ 1; j
< nops
; j
++)
13745 if (regs
[i
] > regs
[j
])
13751 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13755 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
13756 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13762 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
13763 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
13765 if (!TARGET_THUMB1
)
13766 base_reg_rtx
= newbase
;
13769 for (i
= 0; i
< nops
; i
++)
13771 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13772 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13775 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13776 write_back
? offset
+ i
* 4 : 0));
13780 /* Called from a peephole2 expander to turn a sequence of stores into an
13781 STM instruction. OPERANDS are the operands found by the peephole matcher;
13782 NOPS indicates how many separate stores we are trying to combine.
13783 Returns true iff we could generate a new instruction. */
13786 gen_stm_seq (rtx
*operands
, int nops
)
13789 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13790 rtx mems
[MAX_LDM_STM_OPS
];
13793 HOST_WIDE_INT offset
;
13794 int write_back
= FALSE
;
13797 bool base_reg_dies
;
13799 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
13800 mem_order
, &base_reg
, &offset
, true);
13805 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13807 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
13810 gcc_assert (base_reg_dies
);
13816 gcc_assert (base_reg_dies
);
13817 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13821 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13823 for (i
= 0; i
< nops
; i
++)
13825 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13826 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13829 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13830 write_back
? offset
+ i
* 4 : 0));
13834 /* Called from a peephole2 expander to turn a sequence of stores that are
13835 preceded by constant loads into an STM instruction. OPERANDS are the
13836 operands found by the peephole matcher; NOPS indicates how many
13837 separate stores we are trying to combine; there are 2 * NOPS
13838 instructions in the peephole.
13839 Returns true iff we could generate a new instruction. */
13842 gen_const_stm_seq (rtx
*operands
, int nops
)
13844 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
13845 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13846 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
13847 rtx mems
[MAX_LDM_STM_OPS
];
13850 HOST_WIDE_INT offset
;
13851 int write_back
= FALSE
;
13854 bool base_reg_dies
;
13856 HARD_REG_SET allocated
;
13858 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
13859 mem_order
, &base_reg
, &offset
, false);
13864 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
13866 /* If the same register is used more than once, try to find a free
13868 CLEAR_HARD_REG_SET (allocated
);
13869 for (i
= 0; i
< nops
; i
++)
13871 for (j
= i
+ 1; j
< nops
; j
++)
13872 if (regs
[i
] == regs
[j
])
13874 rtx t
= peep2_find_free_register (0, nops
* 2,
13875 TARGET_THUMB1
? "l" : "r",
13876 SImode
, &allocated
);
13880 regs
[i
] = REGNO (t
);
13884 /* Compute an ordering that maps the register numbers to an ascending
13887 for (i
= 0; i
< nops
; i
++)
13888 if (regs
[i
] < regs
[reg_order
[0]])
13891 for (i
= 1; i
< nops
; i
++)
13893 int this_order
= reg_order
[i
- 1];
13894 for (j
= 0; j
< nops
; j
++)
13895 if (regs
[j
] > regs
[reg_order
[i
- 1]]
13896 && (this_order
== reg_order
[i
- 1]
13897 || regs
[j
] < regs
[this_order
]))
13899 reg_order
[i
] = this_order
;
13902 /* Ensure that registers that must be live after the instruction end
13903 up with the correct value. */
13904 for (i
= 0; i
< nops
; i
++)
13906 int this_order
= reg_order
[i
];
13907 if ((this_order
!= mem_order
[i
]
13908 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
13909 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
13913 /* Load the constants. */
13914 for (i
= 0; i
< nops
; i
++)
13916 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
13917 sorted_regs
[i
] = regs
[reg_order
[i
]];
13918 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
13921 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13923 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
13926 gcc_assert (base_reg_dies
);
13932 gcc_assert (base_reg_dies
);
13933 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13937 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13939 for (i
= 0; i
< nops
; i
++)
13941 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13942 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13945 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
13946 write_back
? offset
+ i
* 4 : 0));
13950 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13951 unaligned copies on processors which support unaligned semantics for those
13952 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13953 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13954 An interleave factor of 1 (the minimum) will perform no interleaving.
13955 Load/store multiple are used for aligned addresses where possible. */
13958 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
13959 HOST_WIDE_INT length
,
13960 unsigned int interleave_factor
)
13962 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
13963 int *regnos
= XALLOCAVEC (int, interleave_factor
);
13964 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
13965 HOST_WIDE_INT i
, j
;
13966 HOST_WIDE_INT remaining
= length
, words
;
13967 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
13969 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
13970 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
13971 HOST_WIDE_INT srcoffset
, dstoffset
;
13972 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
13975 gcc_assert (interleave_factor
>= 1 && interleave_factor
<= 4);
13977 /* Use hard registers if we have aligned source or destination so we can use
13978 load/store multiple with contiguous registers. */
13979 if (dst_aligned
|| src_aligned
)
13980 for (i
= 0; i
< interleave_factor
; i
++)
13981 regs
[i
] = gen_rtx_REG (SImode
, i
);
13983 for (i
= 0; i
< interleave_factor
; i
++)
13984 regs
[i
] = gen_reg_rtx (SImode
);
13986 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
13987 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
13989 srcoffset
= dstoffset
= 0;
13991 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13992 For copying the last bytes we want to subtract this offset again. */
13993 src_autoinc
= dst_autoinc
= 0;
13995 for (i
= 0; i
< interleave_factor
; i
++)
13998 /* Copy BLOCK_SIZE_BYTES chunks. */
14000 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
14003 if (src_aligned
&& interleave_factor
> 1)
14005 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
14006 TRUE
, srcbase
, &srcoffset
));
14007 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14011 for (j
= 0; j
< interleave_factor
; j
++)
14013 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
14015 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14016 srcoffset
+ j
* UNITS_PER_WORD
);
14017 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14019 srcoffset
+= block_size_bytes
;
14023 if (dst_aligned
&& interleave_factor
> 1)
14025 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
14026 TRUE
, dstbase
, &dstoffset
));
14027 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14031 for (j
= 0; j
< interleave_factor
; j
++)
14033 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
14035 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14036 dstoffset
+ j
* UNITS_PER_WORD
);
14037 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14039 dstoffset
+= block_size_bytes
;
14042 remaining
-= block_size_bytes
;
14045 /* Copy any whole words left (note these aren't interleaved with any
14046 subsequent halfword/byte load/stores in the interests of simplicity). */
14048 words
= remaining
/ UNITS_PER_WORD
;
14050 gcc_assert (words
< interleave_factor
);
14052 if (src_aligned
&& words
> 1)
14054 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
14056 src_autoinc
+= UNITS_PER_WORD
* words
;
14060 for (j
= 0; j
< words
; j
++)
14062 addr
= plus_constant (Pmode
, src
,
14063 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
14064 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14065 srcoffset
+ j
* UNITS_PER_WORD
);
14067 emit_move_insn (regs
[j
], mem
);
14069 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14071 srcoffset
+= words
* UNITS_PER_WORD
;
14074 if (dst_aligned
&& words
> 1)
14076 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14078 dst_autoinc
+= words
* UNITS_PER_WORD
;
14082 for (j
= 0; j
< words
; j
++)
14084 addr
= plus_constant (Pmode
, dst
,
14085 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14086 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14087 dstoffset
+ j
* UNITS_PER_WORD
);
14089 emit_move_insn (mem
, regs
[j
]);
14091 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14093 dstoffset
+= words
* UNITS_PER_WORD
;
14096 remaining
-= words
* UNITS_PER_WORD
;
14098 gcc_assert (remaining
< 4);
14100 /* Copy a halfword if necessary. */
14102 if (remaining
>= 2)
14104 halfword_tmp
= gen_reg_rtx (SImode
);
14106 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14107 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14108 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14110 /* Either write out immediately, or delay until we've loaded the last
14111 byte, depending on interleave factor. */
14112 if (interleave_factor
== 1)
14114 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14115 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14116 emit_insn (gen_unaligned_storehi (mem
,
14117 gen_lowpart (HImode
, halfword_tmp
)));
14118 halfword_tmp
= NULL
;
14126 gcc_assert (remaining
< 2);
14128 /* Copy last byte. */
14130 if ((remaining
& 1) != 0)
14132 byte_tmp
= gen_reg_rtx (SImode
);
14134 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14135 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14136 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14138 if (interleave_factor
== 1)
14140 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14141 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14142 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14151 /* Store last halfword if we haven't done so already. */
14155 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14156 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14157 emit_insn (gen_unaligned_storehi (mem
,
14158 gen_lowpart (HImode
, halfword_tmp
)));
14162 /* Likewise for last byte. */
14166 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14167 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14168 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14172 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14175 /* From mips_adjust_block_mem:
14177 Helper function for doing a loop-based block operation on memory
14178 reference MEM. Each iteration of the loop will operate on LENGTH
14181 Create a new base register for use within the loop and point it to
14182 the start of MEM. Create a new memory reference that uses this
14183 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14186 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14189 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14191 /* Although the new mem does not refer to a known location,
14192 it does keep up to LENGTH bytes of alignment. */
14193 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14194 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14197 /* From mips_block_move_loop:
14199 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14200 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14201 the memory regions do not overlap. */
14204 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14205 unsigned int interleave_factor
,
14206 HOST_WIDE_INT bytes_per_iter
)
14208 rtx src_reg
, dest_reg
, final_src
, test
;
14209 HOST_WIDE_INT leftover
;
14211 leftover
= length
% bytes_per_iter
;
14212 length
-= leftover
;
14214 /* Create registers and memory references for use within the loop. */
14215 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14216 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14218 /* Calculate the value that SRC_REG should have after the last iteration of
14220 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14221 0, 0, OPTAB_WIDEN
);
14223 /* Emit the start of the loop. */
14224 rtx_code_label
*label
= gen_label_rtx ();
14225 emit_label (label
);
14227 /* Emit the loop body. */
14228 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14229 interleave_factor
);
14231 /* Move on to the next block. */
14232 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14233 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14235 /* Emit the loop condition. */
14236 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14237 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14239 /* Mop up any left-over bytes. */
14241 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14244 /* Emit a block move when either the source or destination is unaligned (not
14245 aligned to a four-byte boundary). This may need further tuning depending on
14246 core type, optimize_size setting, etc. */
14249 arm_movmemqi_unaligned (rtx
*operands
)
14251 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14255 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14256 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14257 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14258 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14259 or dst_aligned though: allow more interleaving in those cases since the
14260 resulting code can be smaller. */
14261 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14262 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14265 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14266 interleave_factor
, bytes_per_iter
);
14268 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14269 interleave_factor
);
14273 /* Note that the loop created by arm_block_move_unaligned_loop may be
14274 subject to loop unrolling, which makes tuning this condition a little
14277 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14279 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14286 arm_gen_movmemqi (rtx
*operands
)
14288 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14289 HOST_WIDE_INT srcoffset
, dstoffset
;
14290 rtx src
, dst
, srcbase
, dstbase
;
14291 rtx part_bytes_reg
= NULL
;
14294 if (!CONST_INT_P (operands
[2])
14295 || !CONST_INT_P (operands
[3])
14296 || INTVAL (operands
[2]) > 64)
14299 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14300 return arm_movmemqi_unaligned (operands
);
14302 if (INTVAL (operands
[3]) & 3)
14305 dstbase
= operands
[0];
14306 srcbase
= operands
[1];
14308 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14309 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14311 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14312 out_words_to_go
= INTVAL (operands
[2]) / 4;
14313 last_bytes
= INTVAL (operands
[2]) & 3;
14314 dstoffset
= srcoffset
= 0;
14316 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14317 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14319 while (in_words_to_go
>= 2)
14321 if (in_words_to_go
> 4)
14322 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14323 TRUE
, srcbase
, &srcoffset
));
14325 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14326 src
, FALSE
, srcbase
,
14329 if (out_words_to_go
)
14331 if (out_words_to_go
> 4)
14332 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14333 TRUE
, dstbase
, &dstoffset
));
14334 else if (out_words_to_go
!= 1)
14335 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14336 out_words_to_go
, dst
,
14339 dstbase
, &dstoffset
));
14342 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14343 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14344 if (last_bytes
!= 0)
14346 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14352 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14353 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14356 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14357 if (out_words_to_go
)
14361 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14362 sreg
= copy_to_reg (mem
);
14364 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14365 emit_move_insn (mem
, sreg
);
14368 gcc_assert (!in_words_to_go
); /* Sanity check */
14371 if (in_words_to_go
)
14373 gcc_assert (in_words_to_go
> 0);
14375 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14376 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14379 gcc_assert (!last_bytes
|| part_bytes_reg
);
14381 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14383 rtx tmp
= gen_reg_rtx (SImode
);
14385 /* The bytes we want are in the top end of the word. */
14386 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14387 GEN_INT (8 * (4 - last_bytes
))));
14388 part_bytes_reg
= tmp
;
14392 mem
= adjust_automodify_address (dstbase
, QImode
,
14393 plus_constant (Pmode
, dst
,
14395 dstoffset
+ last_bytes
- 1);
14396 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14400 tmp
= gen_reg_rtx (SImode
);
14401 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14402 part_bytes_reg
= tmp
;
14409 if (last_bytes
> 1)
14411 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14412 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14416 rtx tmp
= gen_reg_rtx (SImode
);
14417 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14418 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14419 part_bytes_reg
= tmp
;
14426 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14427 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14434 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14437 next_consecutive_mem (rtx mem
)
14439 machine_mode mode
= GET_MODE (mem
);
14440 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14441 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14443 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14446 /* Copy using LDRD/STRD instructions whenever possible.
14447 Returns true upon success. */
14449 gen_movmem_ldrd_strd (rtx
*operands
)
14451 unsigned HOST_WIDE_INT len
;
14452 HOST_WIDE_INT align
;
14453 rtx src
, dst
, base
;
14455 bool src_aligned
, dst_aligned
;
14456 bool src_volatile
, dst_volatile
;
14458 gcc_assert (CONST_INT_P (operands
[2]));
14459 gcc_assert (CONST_INT_P (operands
[3]));
14461 len
= UINTVAL (operands
[2]);
14465 /* Maximum alignment we can assume for both src and dst buffers. */
14466 align
= INTVAL (operands
[3]);
14468 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14471 /* Place src and dst addresses in registers
14472 and update the corresponding mem rtx. */
14474 dst_volatile
= MEM_VOLATILE_P (dst
);
14475 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14476 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14477 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14480 src_volatile
= MEM_VOLATILE_P (src
);
14481 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14482 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14483 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14485 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14488 if (src_volatile
|| dst_volatile
)
14491 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14492 if (!(dst_aligned
|| src_aligned
))
14493 return arm_gen_movmemqi (operands
);
14495 /* If the either src or dst is unaligned we'll be accessing it as pairs
14496 of unaligned SImode accesses. Otherwise we can generate DImode
14497 ldrd/strd instructions. */
14498 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
14499 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
14504 reg0
= gen_reg_rtx (DImode
);
14505 rtx low_reg
= NULL_RTX
;
14506 rtx hi_reg
= NULL_RTX
;
14508 if (!src_aligned
|| !dst_aligned
)
14510 low_reg
= gen_lowpart (SImode
, reg0
);
14511 hi_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
14514 emit_move_insn (reg0
, src
);
14517 emit_insn (gen_unaligned_loadsi (low_reg
, src
));
14518 src
= next_consecutive_mem (src
);
14519 emit_insn (gen_unaligned_loadsi (hi_reg
, src
));
14523 emit_move_insn (dst
, reg0
);
14526 emit_insn (gen_unaligned_storesi (dst
, low_reg
));
14527 dst
= next_consecutive_mem (dst
);
14528 emit_insn (gen_unaligned_storesi (dst
, hi_reg
));
14531 src
= next_consecutive_mem (src
);
14532 dst
= next_consecutive_mem (dst
);
14535 gcc_assert (len
< 8);
14538 /* More than a word but less than a double-word to copy. Copy a word. */
14539 reg0
= gen_reg_rtx (SImode
);
14540 src
= adjust_address (src
, SImode
, 0);
14541 dst
= adjust_address (dst
, SImode
, 0);
14543 emit_move_insn (reg0
, src
);
14545 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14548 emit_move_insn (dst
, reg0
);
14550 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14552 src
= next_consecutive_mem (src
);
14553 dst
= next_consecutive_mem (dst
);
14560 /* Copy the remaining bytes. */
14563 dst
= adjust_address (dst
, HImode
, 0);
14564 src
= adjust_address (src
, HImode
, 0);
14565 reg0
= gen_reg_rtx (SImode
);
14567 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14569 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14572 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14574 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14576 src
= next_consecutive_mem (src
);
14577 dst
= next_consecutive_mem (dst
);
14582 dst
= adjust_address (dst
, QImode
, 0);
14583 src
= adjust_address (src
, QImode
, 0);
14584 reg0
= gen_reg_rtx (QImode
);
14585 emit_move_insn (reg0
, src
);
14586 emit_move_insn (dst
, reg0
);
14590 /* Select a dominance comparison mode if possible for a test of the general
14591 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14592 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14593 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14594 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14595 In all cases OP will be either EQ or NE, but we don't need to know which
14596 here. If we are unable to support a dominance comparison we return
14597 CC mode. This will then fail to match for the RTL expressions that
14598 generate this call. */
14600 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14602 enum rtx_code cond1
, cond2
;
14605 /* Currently we will probably get the wrong result if the individual
14606 comparisons are not simple. This also ensures that it is safe to
14607 reverse a comparison if necessary. */
14608 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14610 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14614 /* The if_then_else variant of this tests the second condition if the
14615 first passes, but is true if the first fails. Reverse the first
14616 condition to get a true "inclusive-or" expression. */
14617 if (cond_or
== DOM_CC_NX_OR_Y
)
14618 cond1
= reverse_condition (cond1
);
14620 /* If the comparisons are not equal, and one doesn't dominate the other,
14621 then we can't do this. */
14623 && !comparison_dominates_p (cond1
, cond2
)
14624 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14628 std::swap (cond1
, cond2
);
14633 if (cond_or
== DOM_CC_X_AND_Y
)
14638 case EQ
: return CC_DEQmode
;
14639 case LE
: return CC_DLEmode
;
14640 case LEU
: return CC_DLEUmode
;
14641 case GE
: return CC_DGEmode
;
14642 case GEU
: return CC_DGEUmode
;
14643 default: gcc_unreachable ();
14647 if (cond_or
== DOM_CC_X_AND_Y
)
14659 gcc_unreachable ();
14663 if (cond_or
== DOM_CC_X_AND_Y
)
14675 gcc_unreachable ();
14679 if (cond_or
== DOM_CC_X_AND_Y
)
14680 return CC_DLTUmode
;
14685 return CC_DLTUmode
;
14687 return CC_DLEUmode
;
14691 gcc_unreachable ();
14695 if (cond_or
== DOM_CC_X_AND_Y
)
14696 return CC_DGTUmode
;
14701 return CC_DGTUmode
;
14703 return CC_DGEUmode
;
14707 gcc_unreachable ();
14710 /* The remaining cases only occur when both comparisons are the
14713 gcc_assert (cond1
== cond2
);
14717 gcc_assert (cond1
== cond2
);
14721 gcc_assert (cond1
== cond2
);
14725 gcc_assert (cond1
== cond2
);
14726 return CC_DLEUmode
;
14729 gcc_assert (cond1
== cond2
);
14730 return CC_DGEUmode
;
14733 gcc_unreachable ();
14738 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14740 /* All floating point compares return CCFP if it is an equality
14741 comparison, and CCFPE otherwise. */
14742 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14765 gcc_unreachable ();
14769 /* A compare with a shifted operand. Because of canonicalization, the
14770 comparison will have to be swapped when we emit the assembler. */
14771 if (GET_MODE (y
) == SImode
14772 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14773 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14774 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14775 || GET_CODE (x
) == ROTATERT
))
14778 /* This operation is performed swapped, but since we only rely on the Z
14779 flag we don't need an additional mode. */
14780 if (GET_MODE (y
) == SImode
14781 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14782 && GET_CODE (x
) == NEG
14783 && (op
== EQ
|| op
== NE
))
14786 /* This is a special case that is used by combine to allow a
14787 comparison of a shifted byte load to be split into a zero-extend
14788 followed by a comparison of the shifted integer (only valid for
14789 equalities and unsigned inequalities). */
14790 if (GET_MODE (x
) == SImode
14791 && GET_CODE (x
) == ASHIFT
14792 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
14793 && GET_CODE (XEXP (x
, 0)) == SUBREG
14794 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
14795 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
14796 && (op
== EQ
|| op
== NE
14797 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
14798 && CONST_INT_P (y
))
14801 /* A construct for a conditional compare, if the false arm contains
14802 0, then both conditions must be true, otherwise either condition
14803 must be true. Not all conditions are possible, so CCmode is
14804 returned if it can't be done. */
14805 if (GET_CODE (x
) == IF_THEN_ELSE
14806 && (XEXP (x
, 2) == const0_rtx
14807 || XEXP (x
, 2) == const1_rtx
)
14808 && COMPARISON_P (XEXP (x
, 0))
14809 && COMPARISON_P (XEXP (x
, 1)))
14810 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14811 INTVAL (XEXP (x
, 2)));
14813 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14814 if (GET_CODE (x
) == AND
14815 && (op
== EQ
|| op
== NE
)
14816 && COMPARISON_P (XEXP (x
, 0))
14817 && COMPARISON_P (XEXP (x
, 1)))
14818 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14821 if (GET_CODE (x
) == IOR
14822 && (op
== EQ
|| op
== NE
)
14823 && COMPARISON_P (XEXP (x
, 0))
14824 && COMPARISON_P (XEXP (x
, 1)))
14825 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14828 /* An operation (on Thumb) where we want to test for a single bit.
14829 This is done by shifting that bit up into the top bit of a
14830 scratch register; we can then branch on the sign bit. */
14832 && GET_MODE (x
) == SImode
14833 && (op
== EQ
|| op
== NE
)
14834 && GET_CODE (x
) == ZERO_EXTRACT
14835 && XEXP (x
, 1) == const1_rtx
)
14838 /* An operation that sets the condition codes as a side-effect, the
14839 V flag is not set correctly, so we can only use comparisons where
14840 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14842 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14843 if (GET_MODE (x
) == SImode
14845 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
14846 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
14847 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
14848 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
14849 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
14850 || GET_CODE (x
) == LSHIFTRT
14851 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14852 || GET_CODE (x
) == ROTATERT
14853 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
14854 return CC_NOOVmode
;
14856 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
14859 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
14860 && GET_CODE (x
) == PLUS
14861 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
14864 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
14870 /* A DImode comparison against zero can be implemented by
14871 or'ing the two halves together. */
14872 if (y
== const0_rtx
)
14875 /* We can do an equality test in three Thumb instructions. */
14885 /* DImode unsigned comparisons can be implemented by cmp +
14886 cmpeq without a scratch register. Not worth doing in
14897 /* DImode signed and unsigned comparisons can be implemented
14898 by cmp + sbcs with a scratch register, but that does not
14899 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14900 gcc_assert (op
!= EQ
&& op
!= NE
);
14904 gcc_unreachable ();
14908 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
14909 return GET_MODE (x
);
14914 /* X and Y are two things to compare using CODE. Emit the compare insn and
14915 return the rtx for register 0 in the proper mode. FP means this is a
14916 floating point compare: I don't think that it is needed on the arm. */
14918 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
14922 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
14924 /* We might have X as a constant, Y as a register because of the predicates
14925 used for cmpdi. If so, force X to a register here. */
14926 if (dimode_comparison
&& !REG_P (x
))
14927 x
= force_reg (DImode
, x
);
14929 mode
= SELECT_CC_MODE (code
, x
, y
);
14930 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
14932 if (dimode_comparison
14933 && mode
!= CC_CZmode
)
14937 /* To compare two non-zero values for equality, XOR them and
14938 then compare against zero. Not used for ARM mode; there
14939 CC_CZmode is cheaper. */
14940 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
14942 gcc_assert (!reload_completed
);
14943 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
14947 /* A scratch register is required. */
14948 if (reload_completed
)
14949 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
14951 scratch
= gen_rtx_SCRATCH (SImode
);
14953 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
14954 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14955 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
14958 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14963 /* Generate a sequence of insns that will generate the correct return
14964 address mask depending on the physical architecture that the program
14967 arm_gen_return_addr_mask (void)
14969 rtx reg
= gen_reg_rtx (Pmode
);
14971 emit_insn (gen_return_addr_mask (reg
));
14976 arm_reload_in_hi (rtx
*operands
)
14978 rtx ref
= operands
[1];
14980 HOST_WIDE_INT offset
= 0;
14982 if (GET_CODE (ref
) == SUBREG
)
14984 offset
= SUBREG_BYTE (ref
);
14985 ref
= SUBREG_REG (ref
);
14990 /* We have a pseudo which has been spilt onto the stack; there
14991 are two cases here: the first where there is a simple
14992 stack-slot replacement and a second where the stack-slot is
14993 out of range, or is used as a subreg. */
14994 if (reg_equiv_mem (REGNO (ref
)))
14996 ref
= reg_equiv_mem (REGNO (ref
));
14997 base
= find_replacement (&XEXP (ref
, 0));
15000 /* The slot is out of range, or was dressed up in a SUBREG. */
15001 base
= reg_equiv_address (REGNO (ref
));
15003 /* PR 62554: If there is no equivalent memory location then just move
15004 the value as an SImode register move. This happens when the target
15005 architecture variant does not have an HImode register move. */
15008 gcc_assert (REG_P (operands
[0]));
15009 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15010 gen_rtx_SUBREG (SImode
, ref
, 0)));
15015 base
= find_replacement (&XEXP (ref
, 0));
15017 /* Handle the case where the address is too complex to be offset by 1. */
15018 if (GET_CODE (base
) == MINUS
15019 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15021 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15023 emit_set_insn (base_plus
, base
);
15026 else if (GET_CODE (base
) == PLUS
)
15028 /* The addend must be CONST_INT, or we would have dealt with it above. */
15029 HOST_WIDE_INT hi
, lo
;
15031 offset
+= INTVAL (XEXP (base
, 1));
15032 base
= XEXP (base
, 0);
15034 /* Rework the address into a legal sequence of insns. */
15035 /* Valid range for lo is -4095 -> 4095 */
15038 : -((-offset
) & 0xfff));
15040 /* Corner case, if lo is the max offset then we would be out of range
15041 once we have added the additional 1 below, so bump the msb into the
15042 pre-loading insn(s). */
15046 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15047 ^ (HOST_WIDE_INT
) 0x80000000)
15048 - (HOST_WIDE_INT
) 0x80000000);
15050 gcc_assert (hi
+ lo
== offset
);
15054 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15056 /* Get the base address; addsi3 knows how to handle constants
15057 that require more than one insn. */
15058 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15064 /* Operands[2] may overlap operands[0] (though it won't overlap
15065 operands[1]), that's why we asked for a DImode reg -- so we can
15066 use the bit that does not overlap. */
15067 if (REGNO (operands
[2]) == REGNO (operands
[0]))
15068 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15070 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15072 emit_insn (gen_zero_extendqisi2 (scratch
,
15073 gen_rtx_MEM (QImode
,
15074 plus_constant (Pmode
, base
,
15076 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15077 gen_rtx_MEM (QImode
,
15078 plus_constant (Pmode
, base
,
15080 if (!BYTES_BIG_ENDIAN
)
15081 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15082 gen_rtx_IOR (SImode
,
15085 gen_rtx_SUBREG (SImode
, operands
[0], 0),
15089 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15090 gen_rtx_IOR (SImode
,
15091 gen_rtx_ASHIFT (SImode
, scratch
,
15093 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
15096 /* Handle storing a half-word to memory during reload by synthesizing as two
15097 byte stores. Take care not to clobber the input values until after we
15098 have moved them somewhere safe. This code assumes that if the DImode
15099 scratch in operands[2] overlaps either the input value or output address
15100 in some way, then that value must die in this insn (we absolutely need
15101 two scratch registers for some corner cases). */
15103 arm_reload_out_hi (rtx
*operands
)
15105 rtx ref
= operands
[0];
15106 rtx outval
= operands
[1];
15108 HOST_WIDE_INT offset
= 0;
15110 if (GET_CODE (ref
) == SUBREG
)
15112 offset
= SUBREG_BYTE (ref
);
15113 ref
= SUBREG_REG (ref
);
15118 /* We have a pseudo which has been spilt onto the stack; there
15119 are two cases here: the first where there is a simple
15120 stack-slot replacement and a second where the stack-slot is
15121 out of range, or is used as a subreg. */
15122 if (reg_equiv_mem (REGNO (ref
)))
15124 ref
= reg_equiv_mem (REGNO (ref
));
15125 base
= find_replacement (&XEXP (ref
, 0));
15128 /* The slot is out of range, or was dressed up in a SUBREG. */
15129 base
= reg_equiv_address (REGNO (ref
));
15131 /* PR 62254: If there is no equivalent memory location then just move
15132 the value as an SImode register move. This happens when the target
15133 architecture variant does not have an HImode register move. */
15136 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
15138 if (REG_P (outval
))
15140 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15141 gen_rtx_SUBREG (SImode
, outval
, 0)));
15143 else /* SUBREG_P (outval) */
15145 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
15146 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15147 SUBREG_REG (outval
)));
15149 /* FIXME: Handle other cases ? */
15150 gcc_unreachable ();
15156 base
= find_replacement (&XEXP (ref
, 0));
15158 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15160 /* Handle the case where the address is too complex to be offset by 1. */
15161 if (GET_CODE (base
) == MINUS
15162 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15164 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15166 /* Be careful not to destroy OUTVAL. */
15167 if (reg_overlap_mentioned_p (base_plus
, outval
))
15169 /* Updating base_plus might destroy outval, see if we can
15170 swap the scratch and base_plus. */
15171 if (!reg_overlap_mentioned_p (scratch
, outval
))
15172 std::swap (scratch
, base_plus
);
15175 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15177 /* Be conservative and copy OUTVAL into the scratch now,
15178 this should only be necessary if outval is a subreg
15179 of something larger than a word. */
15180 /* XXX Might this clobber base? I can't see how it can,
15181 since scratch is known to overlap with OUTVAL, and
15182 must be wider than a word. */
15183 emit_insn (gen_movhi (scratch_hi
, outval
));
15184 outval
= scratch_hi
;
15188 emit_set_insn (base_plus
, base
);
15191 else if (GET_CODE (base
) == PLUS
)
15193 /* The addend must be CONST_INT, or we would have dealt with it above. */
15194 HOST_WIDE_INT hi
, lo
;
15196 offset
+= INTVAL (XEXP (base
, 1));
15197 base
= XEXP (base
, 0);
15199 /* Rework the address into a legal sequence of insns. */
15200 /* Valid range for lo is -4095 -> 4095 */
15203 : -((-offset
) & 0xfff));
15205 /* Corner case, if lo is the max offset then we would be out of range
15206 once we have added the additional 1 below, so bump the msb into the
15207 pre-loading insn(s). */
15211 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15212 ^ (HOST_WIDE_INT
) 0x80000000)
15213 - (HOST_WIDE_INT
) 0x80000000);
15215 gcc_assert (hi
+ lo
== offset
);
15219 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15221 /* Be careful not to destroy OUTVAL. */
15222 if (reg_overlap_mentioned_p (base_plus
, outval
))
15224 /* Updating base_plus might destroy outval, see if we
15225 can swap the scratch and base_plus. */
15226 if (!reg_overlap_mentioned_p (scratch
, outval
))
15227 std::swap (scratch
, base_plus
);
15230 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15232 /* Be conservative and copy outval into scratch now,
15233 this should only be necessary if outval is a
15234 subreg of something larger than a word. */
15235 /* XXX Might this clobber base? I can't see how it
15236 can, since scratch is known to overlap with
15238 emit_insn (gen_movhi (scratch_hi
, outval
));
15239 outval
= scratch_hi
;
15243 /* Get the base address; addsi3 knows how to handle constants
15244 that require more than one insn. */
15245 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15251 if (BYTES_BIG_ENDIAN
)
15253 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15254 plus_constant (Pmode
, base
,
15256 gen_lowpart (QImode
, outval
)));
15257 emit_insn (gen_lshrsi3 (scratch
,
15258 gen_rtx_SUBREG (SImode
, outval
, 0),
15260 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15262 gen_lowpart (QImode
, scratch
)));
15266 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15268 gen_lowpart (QImode
, outval
)));
15269 emit_insn (gen_lshrsi3 (scratch
,
15270 gen_rtx_SUBREG (SImode
, outval
, 0),
15272 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15273 plus_constant (Pmode
, base
,
15275 gen_lowpart (QImode
, scratch
)));
15279 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15280 (padded to the size of a word) should be passed in a register. */
15283 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15285 if (TARGET_AAPCS_BASED
)
15286 return must_pass_in_stack_var_size (mode
, type
);
15288 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15292 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15293 byte of a stack argument has useful data. For legacy APCS ABIs we use
15294 the default. For AAPCS based ABIs small aggregate types are placed
15295 in the lowest memory address. */
15297 static pad_direction
15298 arm_function_arg_padding (machine_mode mode
, const_tree type
)
15300 if (!TARGET_AAPCS_BASED
)
15301 return default_function_arg_padding (mode
, type
);
15303 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15304 return PAD_DOWNWARD
;
15310 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15311 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15312 register has useful data, and return the opposite if the most
15313 significant byte does. */
15316 arm_pad_reg_upward (machine_mode mode
,
15317 tree type
, int first ATTRIBUTE_UNUSED
)
15319 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15321 /* For AAPCS, small aggregates, small fixed-point types,
15322 and small complex types are always padded upwards. */
15325 if ((AGGREGATE_TYPE_P (type
)
15326 || TREE_CODE (type
) == COMPLEX_TYPE
15327 || FIXED_POINT_TYPE_P (type
))
15328 && int_size_in_bytes (type
) <= 4)
15333 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15334 && GET_MODE_SIZE (mode
) <= 4)
15339 /* Otherwise, use default padding. */
15340 return !BYTES_BIG_ENDIAN
;
15343 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15344 assuming that the address in the base register is word aligned. */
15346 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15348 HOST_WIDE_INT max_offset
;
15350 /* Offset must be a multiple of 4 in Thumb mode. */
15351 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15356 else if (TARGET_ARM
)
15361 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15364 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15365 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15366 Assumes that the address in the base register RN is word aligned. Pattern
15367 guarantees that both memory accesses use the same base register,
15368 the offsets are constants within the range, and the gap between the offsets is 4.
15369 If preload complete then check that registers are legal. WBACK indicates whether
15370 address is updated. LOAD indicates whether memory access is load or store. */
15372 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15373 bool wback
, bool load
)
15375 unsigned int t
, t2
, n
;
15377 if (!reload_completed
)
15380 if (!offset_ok_for_ldrd_strd (offset
))
15387 if ((TARGET_THUMB2
)
15388 && ((wback
&& (n
== t
|| n
== t2
))
15389 || (t
== SP_REGNUM
)
15390 || (t
== PC_REGNUM
)
15391 || (t2
== SP_REGNUM
)
15392 || (t2
== PC_REGNUM
)
15393 || (!load
&& (n
== PC_REGNUM
))
15394 || (load
&& (t
== t2
))
15395 /* Triggers Cortex-M3 LDRD errata. */
15396 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15400 && ((wback
&& (n
== t
|| n
== t2
))
15401 || (t2
== PC_REGNUM
)
15402 || (t
% 2 != 0) /* First destination register is not even. */
15404 /* PC can be used as base register (for offset addressing only),
15405 but it is depricated. */
15406 || (n
== PC_REGNUM
)))
15412 /* Return true if a 64-bit access with alignment ALIGN and with a
15413 constant offset OFFSET from the base pointer is permitted on this
15416 align_ok_ldrd_strd (HOST_WIDE_INT align
, HOST_WIDE_INT offset
)
15418 return (unaligned_access
15419 ? (align
>= BITS_PER_WORD
&& (offset
& 3) == 0)
15420 : (align
>= 2 * BITS_PER_WORD
&& (offset
& 7) == 0));
15423 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15424 operand MEM's address contains an immediate offset from the base
15425 register and has no side effects, in which case it sets BASE,
15426 OFFSET and ALIGN accordingly. */
15428 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
, HOST_WIDE_INT
*align
)
15432 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15434 /* TODO: Handle more general memory operand patterns, such as
15435 PRE_DEC and PRE_INC. */
15437 if (side_effects_p (mem
))
15440 /* Can't deal with subregs. */
15441 if (GET_CODE (mem
) == SUBREG
)
15444 gcc_assert (MEM_P (mem
));
15446 *offset
= const0_rtx
;
15447 *align
= MEM_ALIGN (mem
);
15449 addr
= XEXP (mem
, 0);
15451 /* If addr isn't valid for DImode, then we can't handle it. */
15452 if (!arm_legitimate_address_p (DImode
, addr
,
15453 reload_in_progress
|| reload_completed
))
15461 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15463 *base
= XEXP (addr
, 0);
15464 *offset
= XEXP (addr
, 1);
15465 return (REG_P (*base
) && CONST_INT_P (*offset
));
15471 /* Called from a peephole2 to replace two word-size accesses with a
15472 single LDRD/STRD instruction. Returns true iff we can generate a
15473 new instruction sequence. That is, both accesses use the same base
15474 register and the gap between constant offsets is 4. This function
15475 may reorder its operands to match ldrd/strd RTL templates.
15476 OPERANDS are the operands found by the peephole matcher;
15477 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15478 corresponding memory operands. LOAD indicaates whether the access
15479 is load or store. CONST_STORE indicates a store of constant
15480 integer values held in OPERANDS[4,5] and assumes that the pattern
15481 is of length 4 insn, for the purpose of checking dead registers.
15482 COMMUTE indicates that register operands may be reordered. */
15484 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15485 bool const_store
, bool commute
)
15488 HOST_WIDE_INT offsets
[2], offset
, align
[2];
15489 rtx base
= NULL_RTX
;
15490 rtx cur_base
, cur_offset
, tmp
;
15492 HARD_REG_SET regset
;
15494 gcc_assert (!const_store
|| !load
);
15495 /* Check that the memory references are immediate offsets from the
15496 same base register. Extract the base register, the destination
15497 registers, and the corresponding memory offsets. */
15498 for (i
= 0; i
< nops
; i
++)
15500 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
15506 else if (REGNO (base
) != REGNO (cur_base
))
15509 offsets
[i
] = INTVAL (cur_offset
);
15510 if (GET_CODE (operands
[i
]) == SUBREG
)
15512 tmp
= SUBREG_REG (operands
[i
]);
15513 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15518 /* Make sure there is no dependency between the individual loads. */
15519 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15520 return false; /* RAW */
15522 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15523 return false; /* WAW */
15525 /* If the same input register is used in both stores
15526 when storing different constants, try to find a free register.
15527 For example, the code
15532 can be transformed into
15536 in Thumb mode assuming that r1 is free.
15537 For ARM mode do the same but only if the starting register
15538 can be made to be even. */
15540 && REGNO (operands
[0]) == REGNO (operands
[1])
15541 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15545 CLEAR_HARD_REG_SET (regset
);
15546 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15547 if (tmp
== NULL_RTX
)
15550 /* Use the new register in the first load to ensure that
15551 if the original input register is not dead after peephole,
15552 then it will have the correct constant value. */
15555 else if (TARGET_ARM
)
15557 int regno
= REGNO (operands
[0]);
15558 if (!peep2_reg_dead_p (4, operands
[0]))
15560 /* When the input register is even and is not dead after the
15561 pattern, it has to hold the second constant but we cannot
15562 form a legal STRD in ARM mode with this register as the second
15564 if (regno
% 2 == 0)
15567 /* Is regno-1 free? */
15568 SET_HARD_REG_SET (regset
);
15569 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15570 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15571 if (tmp
== NULL_RTX
)
15578 /* Find a DImode register. */
15579 CLEAR_HARD_REG_SET (regset
);
15580 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15581 if (tmp
!= NULL_RTX
)
15583 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15584 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15588 /* Can we use the input register to form a DI register? */
15589 SET_HARD_REG_SET (regset
);
15590 CLEAR_HARD_REG_BIT(regset
,
15591 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15592 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15593 if (tmp
== NULL_RTX
)
15595 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15599 gcc_assert (operands
[0] != NULL_RTX
);
15600 gcc_assert (operands
[1] != NULL_RTX
);
15601 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15602 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15606 /* Make sure the instructions are ordered with lower memory access first. */
15607 if (offsets
[0] > offsets
[1])
15609 gap
= offsets
[0] - offsets
[1];
15610 offset
= offsets
[1];
15612 /* Swap the instructions such that lower memory is accessed first. */
15613 std::swap (operands
[0], operands
[1]);
15614 std::swap (operands
[2], operands
[3]);
15615 std::swap (align
[0], align
[1]);
15617 std::swap (operands
[4], operands
[5]);
15621 gap
= offsets
[1] - offsets
[0];
15622 offset
= offsets
[0];
15625 /* Make sure accesses are to consecutive memory locations. */
15629 if (!align_ok_ldrd_strd (align
[0], offset
))
15632 /* Make sure we generate legal instructions. */
15633 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15637 /* In Thumb state, where registers are almost unconstrained, there
15638 is little hope to fix it. */
15642 if (load
&& commute
)
15644 /* Try reordering registers. */
15645 std::swap (operands
[0], operands
[1]);
15646 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15653 /* If input registers are dead after this pattern, they can be
15654 reordered or replaced by other registers that are free in the
15655 current pattern. */
15656 if (!peep2_reg_dead_p (4, operands
[0])
15657 || !peep2_reg_dead_p (4, operands
[1]))
15660 /* Try to reorder the input registers. */
15661 /* For example, the code
15666 can be transformed into
15671 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15674 std::swap (operands
[0], operands
[1]);
15678 /* Try to find a free DI register. */
15679 CLEAR_HARD_REG_SET (regset
);
15680 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15681 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15684 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15685 if (tmp
== NULL_RTX
)
15688 /* DREG must be an even-numbered register in DImode.
15689 Split it into SI registers. */
15690 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15691 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15692 gcc_assert (operands
[0] != NULL_RTX
);
15693 gcc_assert (operands
[1] != NULL_RTX
);
15694 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15695 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15697 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15709 /* Print a symbolic form of X to the debug file, F. */
15711 arm_print_value (FILE *f
, rtx x
)
15713 switch (GET_CODE (x
))
15716 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15720 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15728 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15730 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15731 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15739 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15743 fprintf (f
, "`%s'", XSTR (x
, 0));
15747 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15751 arm_print_value (f
, XEXP (x
, 0));
15755 arm_print_value (f
, XEXP (x
, 0));
15757 arm_print_value (f
, XEXP (x
, 1));
15765 fprintf (f
, "????");
15770 /* Routines for manipulation of the constant pool. */
15772 /* Arm instructions cannot load a large constant directly into a
15773 register; they have to come from a pc relative load. The constant
15774 must therefore be placed in the addressable range of the pc
15775 relative load. Depending on the precise pc relative load
15776 instruction the range is somewhere between 256 bytes and 4k. This
15777 means that we often have to dump a constant inside a function, and
15778 generate code to branch around it.
15780 It is important to minimize this, since the branches will slow
15781 things down and make the code larger.
15783 Normally we can hide the table after an existing unconditional
15784 branch so that there is no interruption of the flow, but in the
15785 worst case the code looks like this:
15803 We fix this by performing a scan after scheduling, which notices
15804 which instructions need to have their operands fetched from the
15805 constant table and builds the table.
15807 The algorithm starts by building a table of all the constants that
15808 need fixing up and all the natural barriers in the function (places
15809 where a constant table can be dropped without breaking the flow).
15810 For each fixup we note how far the pc-relative replacement will be
15811 able to reach and the offset of the instruction into the function.
15813 Having built the table we then group the fixes together to form
15814 tables that are as large as possible (subject to addressing
15815 constraints) and emit each table of constants after the last
15816 barrier that is within range of all the instructions in the group.
15817 If a group does not contain a barrier, then we forcibly create one
15818 by inserting a jump instruction into the flow. Once the table has
15819 been inserted, the insns are then modified to reference the
15820 relevant entry in the pool.
15822 Possible enhancements to the algorithm (not implemented) are:
15824 1) For some processors and object formats, there may be benefit in
15825 aligning the pools to the start of cache lines; this alignment
15826 would need to be taken into account when calculating addressability
15829 /* These typedefs are located at the start of this file, so that
15830 they can be used in the prototypes there. This comment is to
15831 remind readers of that fact so that the following structures
15832 can be understood more easily.
15834 typedef struct minipool_node Mnode;
15835 typedef struct minipool_fixup Mfix; */
15837 struct minipool_node
15839 /* Doubly linked chain of entries. */
15842 /* The maximum offset into the code that this entry can be placed. While
15843 pushing fixes for forward references, all entries are sorted in order
15844 of increasing max_address. */
15845 HOST_WIDE_INT max_address
;
15846 /* Similarly for an entry inserted for a backwards ref. */
15847 HOST_WIDE_INT min_address
;
15848 /* The number of fixes referencing this entry. This can become zero
15849 if we "unpush" an entry. In this case we ignore the entry when we
15850 come to emit the code. */
15852 /* The offset from the start of the minipool. */
15853 HOST_WIDE_INT offset
;
15854 /* The value in table. */
15856 /* The mode of value. */
15858 /* The size of the value. With iWMMXt enabled
15859 sizes > 4 also imply an alignment of 8-bytes. */
15863 struct minipool_fixup
15867 HOST_WIDE_INT address
;
15873 HOST_WIDE_INT forwards
;
15874 HOST_WIDE_INT backwards
;
15877 /* Fixes less than a word need padding out to a word boundary. */
15878 #define MINIPOOL_FIX_SIZE(mode) \
15879 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15881 static Mnode
* minipool_vector_head
;
15882 static Mnode
* minipool_vector_tail
;
15883 static rtx_code_label
*minipool_vector_label
;
15884 static int minipool_pad
;
15886 /* The linked list of all minipool fixes required for this function. */
15887 Mfix
* minipool_fix_head
;
15888 Mfix
* minipool_fix_tail
;
15889 /* The fix entry for the current minipool, once it has been placed. */
15890 Mfix
* minipool_barrier
;
15892 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15893 #define JUMP_TABLES_IN_TEXT_SECTION 0
15896 static HOST_WIDE_INT
15897 get_jump_table_size (rtx_jump_table_data
*insn
)
15899 /* ADDR_VECs only take room if read-only data does into the text
15901 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
15903 rtx body
= PATTERN (insn
);
15904 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
15905 HOST_WIDE_INT size
;
15906 HOST_WIDE_INT modesize
;
15908 modesize
= GET_MODE_SIZE (GET_MODE (body
));
15909 size
= modesize
* XVECLEN (body
, elt
);
15913 /* Round up size of TBB table to a halfword boundary. */
15914 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
15917 /* No padding necessary for TBH. */
15920 /* Add two bytes for alignment on Thumb. */
15925 gcc_unreachable ();
15933 /* Return the maximum amount of padding that will be inserted before
15936 static HOST_WIDE_INT
15937 get_label_padding (rtx label
)
15939 HOST_WIDE_INT align
, min_insn_size
;
15941 align
= 1 << label_to_alignment (label
);
15942 min_insn_size
= TARGET_THUMB
? 2 : 4;
15943 return align
> min_insn_size
? align
- min_insn_size
: 0;
15946 /* Move a minipool fix MP from its current location to before MAX_MP.
15947 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15948 constraints may need updating. */
15950 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
15951 HOST_WIDE_INT max_address
)
15953 /* The code below assumes these are different. */
15954 gcc_assert (mp
!= max_mp
);
15956 if (max_mp
== NULL
)
15958 if (max_address
< mp
->max_address
)
15959 mp
->max_address
= max_address
;
15963 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15964 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15966 mp
->max_address
= max_address
;
15968 /* Unlink MP from its current position. Since max_mp is non-null,
15969 mp->prev must be non-null. */
15970 mp
->prev
->next
= mp
->next
;
15971 if (mp
->next
!= NULL
)
15972 mp
->next
->prev
= mp
->prev
;
15974 minipool_vector_tail
= mp
->prev
;
15976 /* Re-insert it before MAX_MP. */
15978 mp
->prev
= max_mp
->prev
;
15981 if (mp
->prev
!= NULL
)
15982 mp
->prev
->next
= mp
;
15984 minipool_vector_head
= mp
;
15987 /* Save the new entry. */
15990 /* Scan over the preceding entries and adjust their addresses as
15992 while (mp
->prev
!= NULL
15993 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15995 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16002 /* Add a constant to the minipool for a forward reference. Returns the
16003 node added or NULL if the constant will not fit in this pool. */
16005 add_minipool_forward_ref (Mfix
*fix
)
16007 /* If set, max_mp is the first pool_entry that has a lower
16008 constraint than the one we are trying to add. */
16009 Mnode
* max_mp
= NULL
;
16010 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
16013 /* If the minipool starts before the end of FIX->INSN then this FIX
16014 can not be placed into the current pool. Furthermore, adding the
16015 new constant pool entry may cause the pool to start FIX_SIZE bytes
16017 if (minipool_vector_head
&&
16018 (fix
->address
+ get_attr_length (fix
->insn
)
16019 >= minipool_vector_head
->max_address
- fix
->fix_size
))
16022 /* Scan the pool to see if a constant with the same value has
16023 already been added. While we are doing this, also note the
16024 location where we must insert the constant if it doesn't already
16026 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16028 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16029 && fix
->mode
== mp
->mode
16030 && (!LABEL_P (fix
->value
)
16031 || (CODE_LABEL_NUMBER (fix
->value
)
16032 == CODE_LABEL_NUMBER (mp
->value
)))
16033 && rtx_equal_p (fix
->value
, mp
->value
))
16035 /* More than one fix references this entry. */
16037 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
16040 /* Note the insertion point if necessary. */
16042 && mp
->max_address
> max_address
)
16045 /* If we are inserting an 8-bytes aligned quantity and
16046 we have not already found an insertion point, then
16047 make sure that all such 8-byte aligned quantities are
16048 placed at the start of the pool. */
16049 if (ARM_DOUBLEWORD_ALIGN
16051 && fix
->fix_size
>= 8
16052 && mp
->fix_size
< 8)
16055 max_address
= mp
->max_address
;
16059 /* The value is not currently in the minipool, so we need to create
16060 a new entry for it. If MAX_MP is NULL, the entry will be put on
16061 the end of the list since the placement is less constrained than
16062 any existing entry. Otherwise, we insert the new fix before
16063 MAX_MP and, if necessary, adjust the constraints on the other
16066 mp
->fix_size
= fix
->fix_size
;
16067 mp
->mode
= fix
->mode
;
16068 mp
->value
= fix
->value
;
16070 /* Not yet required for a backwards ref. */
16071 mp
->min_address
= -65536;
16073 if (max_mp
== NULL
)
16075 mp
->max_address
= max_address
;
16077 mp
->prev
= minipool_vector_tail
;
16079 if (mp
->prev
== NULL
)
16081 minipool_vector_head
= mp
;
16082 minipool_vector_label
= gen_label_rtx ();
16085 mp
->prev
->next
= mp
;
16087 minipool_vector_tail
= mp
;
16091 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16092 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16094 mp
->max_address
= max_address
;
16097 mp
->prev
= max_mp
->prev
;
16099 if (mp
->prev
!= NULL
)
16100 mp
->prev
->next
= mp
;
16102 minipool_vector_head
= mp
;
16105 /* Save the new entry. */
16108 /* Scan over the preceding entries and adjust their addresses as
16110 while (mp
->prev
!= NULL
16111 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16113 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16121 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16122 HOST_WIDE_INT min_address
)
16124 HOST_WIDE_INT offset
;
16126 /* The code below assumes these are different. */
16127 gcc_assert (mp
!= min_mp
);
16129 if (min_mp
== NULL
)
16131 if (min_address
> mp
->min_address
)
16132 mp
->min_address
= min_address
;
16136 /* We will adjust this below if it is too loose. */
16137 mp
->min_address
= min_address
;
16139 /* Unlink MP from its current position. Since min_mp is non-null,
16140 mp->next must be non-null. */
16141 mp
->next
->prev
= mp
->prev
;
16142 if (mp
->prev
!= NULL
)
16143 mp
->prev
->next
= mp
->next
;
16145 minipool_vector_head
= mp
->next
;
16147 /* Reinsert it after MIN_MP. */
16149 mp
->next
= min_mp
->next
;
16151 if (mp
->next
!= NULL
)
16152 mp
->next
->prev
= mp
;
16154 minipool_vector_tail
= mp
;
16160 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16162 mp
->offset
= offset
;
16163 if (mp
->refcount
> 0)
16164 offset
+= mp
->fix_size
;
16166 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16167 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16173 /* Add a constant to the minipool for a backward reference. Returns the
16174 node added or NULL if the constant will not fit in this pool.
16176 Note that the code for insertion for a backwards reference can be
16177 somewhat confusing because the calculated offsets for each fix do
16178 not take into account the size of the pool (which is still under
16181 add_minipool_backward_ref (Mfix
*fix
)
16183 /* If set, min_mp is the last pool_entry that has a lower constraint
16184 than the one we are trying to add. */
16185 Mnode
*min_mp
= NULL
;
16186 /* This can be negative, since it is only a constraint. */
16187 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16190 /* If we can't reach the current pool from this insn, or if we can't
16191 insert this entry at the end of the pool without pushing other
16192 fixes out of range, then we don't try. This ensures that we
16193 can't fail later on. */
16194 if (min_address
>= minipool_barrier
->address
16195 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16196 >= minipool_barrier
->address
))
16199 /* Scan the pool to see if a constant with the same value has
16200 already been added. While we are doing this, also note the
16201 location where we must insert the constant if it doesn't already
16203 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16205 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16206 && fix
->mode
== mp
->mode
16207 && (!LABEL_P (fix
->value
)
16208 || (CODE_LABEL_NUMBER (fix
->value
)
16209 == CODE_LABEL_NUMBER (mp
->value
)))
16210 && rtx_equal_p (fix
->value
, mp
->value
)
16211 /* Check that there is enough slack to move this entry to the
16212 end of the table (this is conservative). */
16213 && (mp
->max_address
16214 > (minipool_barrier
->address
16215 + minipool_vector_tail
->offset
16216 + minipool_vector_tail
->fix_size
)))
16219 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16222 if (min_mp
!= NULL
)
16223 mp
->min_address
+= fix
->fix_size
;
16226 /* Note the insertion point if necessary. */
16227 if (mp
->min_address
< min_address
)
16229 /* For now, we do not allow the insertion of 8-byte alignment
16230 requiring nodes anywhere but at the start of the pool. */
16231 if (ARM_DOUBLEWORD_ALIGN
16232 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16237 else if (mp
->max_address
16238 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16240 /* Inserting before this entry would push the fix beyond
16241 its maximum address (which can happen if we have
16242 re-located a forwards fix); force the new fix to come
16244 if (ARM_DOUBLEWORD_ALIGN
16245 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16250 min_address
= mp
->min_address
+ fix
->fix_size
;
16253 /* Do not insert a non-8-byte aligned quantity before 8-byte
16254 aligned quantities. */
16255 else if (ARM_DOUBLEWORD_ALIGN
16256 && fix
->fix_size
< 8
16257 && mp
->fix_size
>= 8)
16260 min_address
= mp
->min_address
+ fix
->fix_size
;
16265 /* We need to create a new entry. */
16267 mp
->fix_size
= fix
->fix_size
;
16268 mp
->mode
= fix
->mode
;
16269 mp
->value
= fix
->value
;
16271 mp
->max_address
= minipool_barrier
->address
+ 65536;
16273 mp
->min_address
= min_address
;
16275 if (min_mp
== NULL
)
16278 mp
->next
= minipool_vector_head
;
16280 if (mp
->next
== NULL
)
16282 minipool_vector_tail
= mp
;
16283 minipool_vector_label
= gen_label_rtx ();
16286 mp
->next
->prev
= mp
;
16288 minipool_vector_head
= mp
;
16292 mp
->next
= min_mp
->next
;
16296 if (mp
->next
!= NULL
)
16297 mp
->next
->prev
= mp
;
16299 minipool_vector_tail
= mp
;
16302 /* Save the new entry. */
16310 /* Scan over the following entries and adjust their offsets. */
16311 while (mp
->next
!= NULL
)
16313 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16314 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16317 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16319 mp
->next
->offset
= mp
->offset
;
16328 assign_minipool_offsets (Mfix
*barrier
)
16330 HOST_WIDE_INT offset
= 0;
16333 minipool_barrier
= barrier
;
16335 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16337 mp
->offset
= offset
;
16339 if (mp
->refcount
> 0)
16340 offset
+= mp
->fix_size
;
16344 /* Output the literal table */
16346 dump_minipool (rtx_insn
*scan
)
16352 if (ARM_DOUBLEWORD_ALIGN
)
16353 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16354 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16361 fprintf (dump_file
,
16362 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16363 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16365 scan
= emit_label_after (gen_label_rtx (), scan
);
16366 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16367 scan
= emit_label_after (minipool_vector_label
, scan
);
16369 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16371 if (mp
->refcount
> 0)
16375 fprintf (dump_file
,
16376 ";; Offset %u, min %ld, max %ld ",
16377 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16378 (unsigned long) mp
->max_address
);
16379 arm_print_value (dump_file
, mp
->value
);
16380 fputc ('\n', dump_file
);
16383 rtx val
= copy_rtx (mp
->value
);
16385 switch (GET_MODE_SIZE (mp
->mode
))
16387 #ifdef HAVE_consttable_1
16389 scan
= emit_insn_after (gen_consttable_1 (val
), scan
);
16393 #ifdef HAVE_consttable_2
16395 scan
= emit_insn_after (gen_consttable_2 (val
), scan
);
16399 #ifdef HAVE_consttable_4
16401 scan
= emit_insn_after (gen_consttable_4 (val
), scan
);
16405 #ifdef HAVE_consttable_8
16407 scan
= emit_insn_after (gen_consttable_8 (val
), scan
);
16411 #ifdef HAVE_consttable_16
16413 scan
= emit_insn_after (gen_consttable_16 (val
), scan
);
16418 gcc_unreachable ();
16426 minipool_vector_head
= minipool_vector_tail
= NULL
;
16427 scan
= emit_insn_after (gen_consttable_end (), scan
);
16428 scan
= emit_barrier_after (scan
);
16431 /* Return the cost of forcibly inserting a barrier after INSN. */
16433 arm_barrier_cost (rtx_insn
*insn
)
16435 /* Basing the location of the pool on the loop depth is preferable,
16436 but at the moment, the basic block information seems to be
16437 corrupt by this stage of the compilation. */
16438 int base_cost
= 50;
16439 rtx_insn
*next
= next_nonnote_insn (insn
);
16441 if (next
!= NULL
&& LABEL_P (next
))
16444 switch (GET_CODE (insn
))
16447 /* It will always be better to place the table before the label, rather
16456 return base_cost
- 10;
16459 return base_cost
+ 10;
16463 /* Find the best place in the insn stream in the range
16464 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16465 Create the barrier by inserting a jump and add a new fix entry for
16468 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16470 HOST_WIDE_INT count
= 0;
16471 rtx_barrier
*barrier
;
16472 rtx_insn
*from
= fix
->insn
;
16473 /* The instruction after which we will insert the jump. */
16474 rtx_insn
*selected
= NULL
;
16476 /* The address at which the jump instruction will be placed. */
16477 HOST_WIDE_INT selected_address
;
16479 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16480 rtx_code_label
*label
= gen_label_rtx ();
16482 selected_cost
= arm_barrier_cost (from
);
16483 selected_address
= fix
->address
;
16485 while (from
&& count
< max_count
)
16487 rtx_jump_table_data
*tmp
;
16490 /* This code shouldn't have been called if there was a natural barrier
16492 gcc_assert (!BARRIER_P (from
));
16494 /* Count the length of this insn. This must stay in sync with the
16495 code that pushes minipool fixes. */
16496 if (LABEL_P (from
))
16497 count
+= get_label_padding (from
);
16499 count
+= get_attr_length (from
);
16501 /* If there is a jump table, add its length. */
16502 if (tablejump_p (from
, NULL
, &tmp
))
16504 count
+= get_jump_table_size (tmp
);
16506 /* Jump tables aren't in a basic block, so base the cost on
16507 the dispatch insn. If we select this location, we will
16508 still put the pool after the table. */
16509 new_cost
= arm_barrier_cost (from
);
16511 if (count
< max_count
16512 && (!selected
|| new_cost
<= selected_cost
))
16515 selected_cost
= new_cost
;
16516 selected_address
= fix
->address
+ count
;
16519 /* Continue after the dispatch table. */
16520 from
= NEXT_INSN (tmp
);
16524 new_cost
= arm_barrier_cost (from
);
16526 if (count
< max_count
16527 && (!selected
|| new_cost
<= selected_cost
))
16530 selected_cost
= new_cost
;
16531 selected_address
= fix
->address
+ count
;
16534 from
= NEXT_INSN (from
);
16537 /* Make sure that we found a place to insert the jump. */
16538 gcc_assert (selected
);
16540 /* Create a new JUMP_INSN that branches around a barrier. */
16541 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16542 JUMP_LABEL (from
) = label
;
16543 barrier
= emit_barrier_after (from
);
16544 emit_label_after (label
, barrier
);
16546 /* Create a minipool barrier entry for the new barrier. */
16547 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16548 new_fix
->insn
= barrier
;
16549 new_fix
->address
= selected_address
;
16550 new_fix
->next
= fix
->next
;
16551 fix
->next
= new_fix
;
16556 /* Record that there is a natural barrier in the insn stream at
16559 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16561 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16564 fix
->address
= address
;
16567 if (minipool_fix_head
!= NULL
)
16568 minipool_fix_tail
->next
= fix
;
16570 minipool_fix_head
= fix
;
16572 minipool_fix_tail
= fix
;
16575 /* Record INSN, which will need fixing up to load a value from the
16576 minipool. ADDRESS is the offset of the insn since the start of the
16577 function; LOC is a pointer to the part of the insn which requires
16578 fixing; VALUE is the constant that must be loaded, which is of type
16581 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16582 machine_mode mode
, rtx value
)
16584 gcc_assert (!arm_disable_literal_pool
);
16585 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16588 fix
->address
= address
;
16591 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16592 fix
->value
= value
;
16593 fix
->forwards
= get_attr_pool_range (insn
);
16594 fix
->backwards
= get_attr_neg_pool_range (insn
);
16595 fix
->minipool
= NULL
;
16597 /* If an insn doesn't have a range defined for it, then it isn't
16598 expecting to be reworked by this code. Better to stop now than
16599 to generate duff assembly code. */
16600 gcc_assert (fix
->forwards
|| fix
->backwards
);
16602 /* If an entry requires 8-byte alignment then assume all constant pools
16603 require 4 bytes of padding. Trying to do this later on a per-pool
16604 basis is awkward because existing pool entries have to be modified. */
16605 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16610 fprintf (dump_file
,
16611 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16612 GET_MODE_NAME (mode
),
16613 INSN_UID (insn
), (unsigned long) address
,
16614 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16615 arm_print_value (dump_file
, fix
->value
);
16616 fprintf (dump_file
, "\n");
16619 /* Add it to the chain of fixes. */
16622 if (minipool_fix_head
!= NULL
)
16623 minipool_fix_tail
->next
= fix
;
16625 minipool_fix_head
= fix
;
16627 minipool_fix_tail
= fix
;
16630 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16631 Returns the number of insns needed, or 99 if we always want to synthesize
16634 arm_max_const_double_inline_cost ()
16636 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16639 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16640 Returns the number of insns needed, or 99 if we don't know how to
16643 arm_const_double_inline_cost (rtx val
)
16645 rtx lowpart
, highpart
;
16648 mode
= GET_MODE (val
);
16650 if (mode
== VOIDmode
)
16653 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16655 lowpart
= gen_lowpart (SImode
, val
);
16656 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16658 gcc_assert (CONST_INT_P (lowpart
));
16659 gcc_assert (CONST_INT_P (highpart
));
16661 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16662 NULL_RTX
, NULL_RTX
, 0, 0)
16663 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16664 NULL_RTX
, NULL_RTX
, 0, 0));
16667 /* Cost of loading a SImode constant. */
16669 arm_const_inline_cost (enum rtx_code code
, rtx val
)
16671 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
16672 NULL_RTX
, NULL_RTX
, 1, 0);
16675 /* Return true if it is worthwhile to split a 64-bit constant into two
16676 32-bit operations. This is the case if optimizing for size, or
16677 if we have load delay slots, or if one 32-bit part can be done with
16678 a single data operation. */
16680 arm_const_double_by_parts (rtx val
)
16682 machine_mode mode
= GET_MODE (val
);
16685 if (optimize_size
|| arm_ld_sched
)
16688 if (mode
== VOIDmode
)
16691 part
= gen_highpart_mode (SImode
, mode
, val
);
16693 gcc_assert (CONST_INT_P (part
));
16695 if (const_ok_for_arm (INTVAL (part
))
16696 || const_ok_for_arm (~INTVAL (part
)))
16699 part
= gen_lowpart (SImode
, val
);
16701 gcc_assert (CONST_INT_P (part
));
16703 if (const_ok_for_arm (INTVAL (part
))
16704 || const_ok_for_arm (~INTVAL (part
)))
16710 /* Return true if it is possible to inline both the high and low parts
16711 of a 64-bit constant into 32-bit data processing instructions. */
16713 arm_const_double_by_immediates (rtx val
)
16715 machine_mode mode
= GET_MODE (val
);
16718 if (mode
== VOIDmode
)
16721 part
= gen_highpart_mode (SImode
, mode
, val
);
16723 gcc_assert (CONST_INT_P (part
));
16725 if (!const_ok_for_arm (INTVAL (part
)))
16728 part
= gen_lowpart (SImode
, val
);
16730 gcc_assert (CONST_INT_P (part
));
16732 if (!const_ok_for_arm (INTVAL (part
)))
16738 /* Scan INSN and note any of its operands that need fixing.
16739 If DO_PUSHES is false we do not actually push any of the fixups
16742 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
16746 extract_constrain_insn (insn
);
16748 if (recog_data
.n_alternatives
== 0)
16751 /* Fill in recog_op_alt with information about the constraints of
16753 preprocess_constraints (insn
);
16755 const operand_alternative
*op_alt
= which_op_alt ();
16756 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16758 /* Things we need to fix can only occur in inputs. */
16759 if (recog_data
.operand_type
[opno
] != OP_IN
)
16762 /* If this alternative is a memory reference, then any mention
16763 of constants in this alternative is really to fool reload
16764 into allowing us to accept one there. We need to fix them up
16765 now so that we output the right code. */
16766 if (op_alt
[opno
].memory_ok
)
16768 rtx op
= recog_data
.operand
[opno
];
16770 if (CONSTANT_P (op
))
16773 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16774 recog_data
.operand_mode
[opno
], op
);
16776 else if (MEM_P (op
)
16777 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16778 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16782 rtx cop
= avoid_constant_pool_reference (op
);
16784 /* Casting the address of something to a mode narrower
16785 than a word can cause avoid_constant_pool_reference()
16786 to return the pool reference itself. That's no good to
16787 us here. Lets just hope that we can use the
16788 constant pool value directly. */
16790 cop
= get_pool_constant (XEXP (op
, 0));
16792 push_minipool_fix (insn
, address
,
16793 recog_data
.operand_loc
[opno
],
16794 recog_data
.operand_mode
[opno
], cop
);
16804 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16805 and unions in the context of ARMv8-M Security Extensions. It is used as a
16806 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16807 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16808 or four masks, depending on whether it is being computed for a
16809 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16810 respectively. The tree for the type of the argument or a field within an
16811 argument is passed in ARG_TYPE, the current register this argument or field
16812 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16813 argument or field starts at is passed in STARTING_BIT and the last used bit
16814 is kept in LAST_USED_BIT which is also updated accordingly. */
16816 static unsigned HOST_WIDE_INT
16817 comp_not_to_clear_mask_str_un (tree arg_type
, int * regno
,
16818 uint32_t * padding_bits_to_clear
,
16819 unsigned starting_bit
, int * last_used_bit
)
16822 unsigned HOST_WIDE_INT not_to_clear_reg_mask
= 0;
16824 if (TREE_CODE (arg_type
) == RECORD_TYPE
)
16826 unsigned current_bit
= starting_bit
;
16828 long int offset
, size
;
16831 field
= TYPE_FIELDS (arg_type
);
16834 /* The offset within a structure is always an offset from
16835 the start of that structure. Make sure we take that into the
16836 calculation of the register based offset that we use here. */
16837 offset
= starting_bit
;
16838 offset
+= TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field
), 0);
16841 /* This is the actual size of the field, for bitfields this is the
16842 bitfield width and not the container size. */
16843 size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16845 if (*last_used_bit
!= offset
)
16847 if (offset
< *last_used_bit
)
16849 /* This field's offset is before the 'last_used_bit', that
16850 means this field goes on the next register. So we need to
16851 pad the rest of the current register and increase the
16852 register number. */
16854 mask
= ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit
);
16857 padding_bits_to_clear
[*regno
] |= mask
;
16858 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16863 /* Otherwise we pad the bits between the last field's end and
16864 the start of the new field. */
16867 mask
= ((uint32_t)-1) >> (32 - offset
);
16868 mask
-= ((uint32_t) 1 << *last_used_bit
) - 1;
16869 padding_bits_to_clear
[*regno
] |= mask
;
16871 current_bit
= offset
;
16874 /* Calculate further padding bits for inner structs/unions too. */
16875 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field
)))
16877 *last_used_bit
= current_bit
;
16878 not_to_clear_reg_mask
16879 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field
), regno
,
16880 padding_bits_to_clear
, offset
,
16885 /* Update 'current_bit' with this field's size. If the
16886 'current_bit' lies in a subsequent register, update 'regno' and
16887 reset 'current_bit' to point to the current bit in that new
16889 current_bit
+= size
;
16890 while (current_bit
>= 32)
16893 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16896 *last_used_bit
= current_bit
;
16899 field
= TREE_CHAIN (field
);
16901 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16903 else if (TREE_CODE (arg_type
) == UNION_TYPE
)
16905 tree field
, field_t
;
16906 int i
, regno_t
, field_size
;
16910 uint32_t padding_bits_to_clear_res
[NUM_ARG_REGS
]
16911 = {-1, -1, -1, -1};
16913 /* To compute the padding bits in a union we only consider bits as
16914 padding bits if they are always either a padding bit or fall outside a
16915 fields size for all fields in the union. */
16916 field
= TYPE_FIELDS (arg_type
);
16919 uint32_t padding_bits_to_clear_t
[NUM_ARG_REGS
]
16920 = {0U, 0U, 0U, 0U};
16921 int last_used_bit_t
= *last_used_bit
;
16923 field_t
= TREE_TYPE (field
);
16925 /* If the field's type is either a record or a union make sure to
16926 compute their padding bits too. */
16927 if (RECORD_OR_UNION_TYPE_P (field_t
))
16928 not_to_clear_reg_mask
16929 |= comp_not_to_clear_mask_str_un (field_t
, ®no_t
,
16930 &padding_bits_to_clear_t
[0],
16931 starting_bit
, &last_used_bit_t
);
16934 field_size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16935 regno_t
= (field_size
/ 32) + *regno
;
16936 last_used_bit_t
= (starting_bit
+ field_size
) % 32;
16939 for (i
= *regno
; i
< regno_t
; i
++)
16941 /* For all but the last register used by this field only keep the
16942 padding bits that were padding bits in this field. */
16943 padding_bits_to_clear_res
[i
] &= padding_bits_to_clear_t
[i
];
16946 /* For the last register, keep all padding bits that were padding
16947 bits in this field and any padding bits that are still valid
16948 as padding bits but fall outside of this field's size. */
16949 mask
= (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t
)) + 1;
16950 padding_bits_to_clear_res
[regno_t
]
16951 &= padding_bits_to_clear_t
[regno_t
] | mask
;
16953 /* Update the maximum size of the fields in terms of registers used
16954 ('max_reg') and the 'last_used_bit' in said register. */
16955 if (max_reg
< regno_t
)
16958 max_bit
= last_used_bit_t
;
16960 else if (max_reg
== regno_t
&& max_bit
< last_used_bit_t
)
16961 max_bit
= last_used_bit_t
;
16963 field
= TREE_CHAIN (field
);
16966 /* Update the current padding_bits_to_clear using the intersection of the
16967 padding bits of all the fields. */
16968 for (i
=*regno
; i
< max_reg
; i
++)
16969 padding_bits_to_clear
[i
] |= padding_bits_to_clear_res
[i
];
16971 /* Do not keep trailing padding bits, we do not know yet whether this
16972 is the end of the argument. */
16973 mask
= ((uint32_t) 1 << max_bit
) - 1;
16974 padding_bits_to_clear
[max_reg
]
16975 |= padding_bits_to_clear_res
[max_reg
] & mask
;
16978 *last_used_bit
= max_bit
;
16981 /* This function should only be used for structs and unions. */
16982 gcc_unreachable ();
16984 return not_to_clear_reg_mask
;
16987 /* In the context of ARMv8-M Security Extensions, this function is used for both
16988 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16989 registers are used when returning or passing arguments, which is then
16990 returned as a mask. It will also compute a mask to indicate padding/unused
16991 bits for each of these registers, and passes this through the
16992 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16993 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16994 the starting register used to pass this argument or return value is passed
16995 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16996 for struct and union types. */
16998 static unsigned HOST_WIDE_INT
16999 compute_not_to_clear_mask (tree arg_type
, rtx arg_rtx
, int regno
,
17000 uint32_t * padding_bits_to_clear
)
17003 int last_used_bit
= 0;
17004 unsigned HOST_WIDE_INT not_to_clear_mask
;
17006 if (RECORD_OR_UNION_TYPE_P (arg_type
))
17009 = comp_not_to_clear_mask_str_un (arg_type
, ®no
,
17010 padding_bits_to_clear
, 0,
17014 /* If the 'last_used_bit' is not zero, that means we are still using a
17015 part of the last 'regno'. In such cases we must clear the trailing
17016 bits. Otherwise we are not using regno and we should mark it as to
17018 if (last_used_bit
!= 0)
17019 padding_bits_to_clear
[regno
]
17020 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit
) + 1;
17022 not_to_clear_mask
&= ~(HOST_WIDE_INT_1U
<< regno
);
17026 not_to_clear_mask
= 0;
17027 /* We are not dealing with structs nor unions. So these arguments may be
17028 passed in floating point registers too. In some cases a BLKmode is
17029 used when returning or passing arguments in multiple VFP registers. */
17030 if (GET_MODE (arg_rtx
) == BLKmode
)
17035 /* This should really only occur when dealing with the hard-float
17037 gcc_assert (TARGET_HARD_FLOAT_ABI
);
17039 for (i
= 0; i
< XVECLEN (arg_rtx
, 0); i
++)
17041 reg
= XEXP (XVECEXP (arg_rtx
, 0, i
), 0);
17042 gcc_assert (REG_P (reg
));
17044 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (reg
);
17046 /* If we are dealing with DF mode, make sure we don't
17047 clear either of the registers it addresses. */
17048 arg_regs
= ARM_NUM_REGS (GET_MODE (reg
));
17051 unsigned HOST_WIDE_INT mask
;
17052 mask
= HOST_WIDE_INT_1U
<< (REGNO (reg
) + arg_regs
);
17053 mask
-= HOST_WIDE_INT_1U
<< REGNO (reg
);
17054 not_to_clear_mask
|= mask
;
17060 /* Otherwise we can rely on the MODE to determine how many registers
17061 are being used by this argument. */
17062 int arg_regs
= ARM_NUM_REGS (GET_MODE (arg_rtx
));
17063 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
17066 unsigned HOST_WIDE_INT
17067 mask
= HOST_WIDE_INT_1U
<< (REGNO (arg_rtx
) + arg_regs
);
17068 mask
-= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
17069 not_to_clear_mask
|= mask
;
17074 return not_to_clear_mask
;
17077 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17078 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
17079 are to be fully cleared, using the value in register CLEARING_REG if more
17080 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17081 the bits that needs to be cleared in caller-saved core registers, with
17082 SCRATCH_REG used as a scratch register for that clearing.
17084 NOTE: one of three following assertions must hold:
17085 - SCRATCH_REG is a low register
17086 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17087 in TO_CLEAR_BITMAP)
17088 - CLEARING_REG is a low register. */
17091 cmse_clear_registers (sbitmap to_clear_bitmap
, uint32_t *padding_bits_to_clear
,
17092 int padding_bits_len
, rtx scratch_reg
, rtx clearing_reg
)
17094 bool saved_clearing
= false;
17095 rtx saved_clearing_reg
= NULL_RTX
;
17096 int i
, regno
, clearing_regno
, minregno
= R0_REGNUM
, maxregno
= minregno
- 1;
17098 gcc_assert (arm_arch_cmse
);
17100 if (!bitmap_empty_p (to_clear_bitmap
))
17102 minregno
= bitmap_first_set_bit (to_clear_bitmap
);
17103 maxregno
= bitmap_last_set_bit (to_clear_bitmap
);
17105 clearing_regno
= REGNO (clearing_reg
);
17107 /* Clear padding bits. */
17108 gcc_assert (padding_bits_len
<= NUM_ARG_REGS
);
17109 for (i
= 0, regno
= R0_REGNUM
; i
< padding_bits_len
; i
++, regno
++)
17112 rtx rtx16
, dest
, cleared_reg
= gen_rtx_REG (SImode
, regno
);
17114 if (padding_bits_to_clear
[i
] == 0)
17117 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17118 CLEARING_REG as scratch. */
17120 && REGNO (scratch_reg
) > LAST_LO_REGNUM
)
17122 /* clearing_reg is not to be cleared, copy its value into scratch_reg
17123 such that we can use clearing_reg to clear the unused bits in the
17125 if ((clearing_regno
> maxregno
17126 || !bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
17127 && !saved_clearing
)
17129 gcc_assert (clearing_regno
<= LAST_LO_REGNUM
);
17130 emit_move_insn (scratch_reg
, clearing_reg
);
17131 saved_clearing
= true;
17132 saved_clearing_reg
= scratch_reg
;
17134 scratch_reg
= clearing_reg
;
17137 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
17138 mask
= (~padding_bits_to_clear
[i
]) & 0xFFFF;
17139 emit_move_insn (scratch_reg
, gen_int_mode (mask
, SImode
));
17141 /* Fill the top half of the negated padding_bits_to_clear[i]. */
17142 mask
= (~padding_bits_to_clear
[i
]) >> 16;
17143 rtx16
= gen_int_mode (16, SImode
);
17144 dest
= gen_rtx_ZERO_EXTRACT (SImode
, scratch_reg
, rtx16
, rtx16
);
17146 emit_insn (gen_rtx_SET (dest
, gen_int_mode (mask
, SImode
)));
17148 emit_insn (gen_andsi3 (cleared_reg
, cleared_reg
, scratch_reg
));
17150 if (saved_clearing
)
17151 emit_move_insn (clearing_reg
, saved_clearing_reg
);
17154 /* Clear full registers. */
17156 /* If not marked for clearing, clearing_reg already does not contain
17158 if (clearing_regno
<= maxregno
17159 && bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
17161 emit_move_insn (clearing_reg
, const0_rtx
);
17162 emit_use (clearing_reg
);
17163 bitmap_clear_bit (to_clear_bitmap
, clearing_regno
);
17166 for (regno
= minregno
; regno
<= maxregno
; regno
++)
17168 if (!bitmap_bit_p (to_clear_bitmap
, regno
))
17171 if (IS_VFP_REGNUM (regno
))
17173 /* If regno is an even vfp register and its successor is also to
17174 be cleared, use vmov. */
17175 if (TARGET_VFP_DOUBLE
17176 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
17177 && bitmap_bit_p (to_clear_bitmap
, regno
+ 1))
17179 emit_move_insn (gen_rtx_REG (DFmode
, regno
),
17180 CONST1_RTX (DFmode
));
17181 emit_use (gen_rtx_REG (DFmode
, regno
));
17186 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
17187 CONST1_RTX (SFmode
));
17188 emit_use (gen_rtx_REG (SFmode
, regno
));
17193 emit_move_insn (gen_rtx_REG (SImode
, regno
), clearing_reg
);
17194 emit_use (gen_rtx_REG (SImode
, regno
));
17199 /* Clears caller saved registers not used to pass arguments before a
17200 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
17201 registers is done in __gnu_cmse_nonsecure_call libcall.
17202 See libgcc/config/arm/cmse_nonsecure_call.S. */
17205 cmse_nonsecure_call_clear_caller_saved (void)
17209 FOR_EACH_BB_FN (bb
, cfun
)
17213 FOR_BB_INSNS (bb
, insn
)
17215 unsigned address_regnum
, regno
, maxregno
=
17216 TARGET_HARD_FLOAT_ABI
? D7_VFP_REGNUM
: NUM_ARG_REGS
- 1;
17217 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
17219 rtx pat
, call
, unspec
, clearing_reg
, ip_reg
, shift
;
17221 CUMULATIVE_ARGS args_so_far_v
;
17222 cumulative_args_t args_so_far
;
17223 tree arg_type
, fntype
;
17224 bool first_param
= true;
17225 function_args_iterator args_iter
;
17226 uint32_t padding_bits_to_clear
[4] = {0U, 0U, 0U, 0U};
17228 if (!NONDEBUG_INSN_P (insn
))
17231 if (!CALL_P (insn
))
17234 pat
= PATTERN (insn
);
17235 gcc_assert (GET_CODE (pat
) == PARALLEL
&& XVECLEN (pat
, 0) > 0);
17236 call
= XVECEXP (pat
, 0, 0);
17238 /* Get the real call RTX if the insn sets a value, ie. returns. */
17239 if (GET_CODE (call
) == SET
)
17240 call
= SET_SRC (call
);
17242 /* Check if it is a cmse_nonsecure_call. */
17243 unspec
= XEXP (call
, 0);
17244 if (GET_CODE (unspec
) != UNSPEC
17245 || XINT (unspec
, 1) != UNSPEC_NONSECURE_MEM
)
17248 /* Determine the caller-saved registers we need to clear. */
17249 bitmap_clear (to_clear_bitmap
);
17250 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
, NUM_ARG_REGS
);
17252 /* Only look at the caller-saved floating point registers in case of
17253 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
17254 lazy store and loads which clear both caller- and callee-saved
17256 if (TARGET_HARD_FLOAT_ABI
)
17258 auto_sbitmap
float_bitmap (maxregno
+ 1);
17260 bitmap_clear (float_bitmap
);
17261 bitmap_set_range (float_bitmap
, FIRST_VFP_REGNUM
,
17262 D7_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1);
17263 bitmap_ior (to_clear_bitmap
, to_clear_bitmap
, float_bitmap
);
17266 /* Make sure the register used to hold the function address is not
17268 address
= RTVEC_ELT (XVEC (unspec
, 0), 0);
17269 gcc_assert (MEM_P (address
));
17270 gcc_assert (REG_P (XEXP (address
, 0)));
17271 address_regnum
= REGNO (XEXP (address
, 0));
17272 if (address_regnum
< R0_REGNUM
+ NUM_ARG_REGS
)
17273 bitmap_clear_bit (to_clear_bitmap
, address_regnum
);
17275 /* Set basic block of call insn so that df rescan is performed on
17276 insns inserted here. */
17277 set_block_for_insn (insn
, bb
);
17278 df_set_flags (DF_DEFER_INSN_RESCAN
);
17281 /* Make sure the scheduler doesn't schedule other insns beyond
17283 emit_insn (gen_blockage ());
17285 /* Walk through all arguments and clear registers appropriately.
17287 fntype
= TREE_TYPE (MEM_EXPR (address
));
17288 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
,
17290 args_so_far
= pack_cumulative_args (&args_so_far_v
);
17291 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
17294 uint64_t to_clear_args_mask
;
17295 machine_mode arg_mode
= TYPE_MODE (arg_type
);
17297 if (VOID_TYPE_P (arg_type
))
17301 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
,
17304 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
,
17306 gcc_assert (REG_P (arg_rtx
));
17308 = compute_not_to_clear_mask (arg_type
, arg_rtx
,
17310 &padding_bits_to_clear
[0]);
17311 if (to_clear_args_mask
)
17313 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
17315 if (to_clear_args_mask
& (1ULL << regno
))
17316 bitmap_clear_bit (to_clear_bitmap
, regno
);
17320 first_param
= false;
17323 /* We use right shift and left shift to clear the LSB of the address
17324 we jump to instead of using bic, to avoid having to use an extra
17325 register on Thumb-1. */
17326 clearing_reg
= XEXP (address
, 0);
17327 shift
= gen_rtx_LSHIFTRT (SImode
, clearing_reg
, const1_rtx
);
17328 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
17329 shift
= gen_rtx_ASHIFT (SImode
, clearing_reg
, const1_rtx
);
17330 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
17332 /* Clear caller-saved registers that leak before doing a non-secure
17334 ip_reg
= gen_rtx_REG (SImode
, IP_REGNUM
);
17335 cmse_clear_registers (to_clear_bitmap
, padding_bits_to_clear
,
17336 NUM_ARG_REGS
, ip_reg
, clearing_reg
);
17338 seq
= get_insns ();
17340 emit_insn_before (seq
, insn
);
17345 /* Rewrite move insn into subtract of 0 if the condition codes will
17346 be useful in next conditional jump insn. */
17349 thumb1_reorg (void)
17353 FOR_EACH_BB_FN (bb
, cfun
)
17356 rtx cmp
, op0
, op1
, set
= NULL
;
17357 rtx_insn
*prev
, *insn
= BB_END (bb
);
17358 bool insn_clobbered
= false;
17360 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17361 insn
= PREV_INSN (insn
);
17363 /* Find the last cbranchsi4_insn in basic block BB. */
17364 if (insn
== BB_HEAD (bb
)
17365 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17368 /* Get the register with which we are comparing. */
17369 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
17370 op0
= XEXP (cmp
, 0);
17371 op1
= XEXP (cmp
, 1);
17373 /* Check that comparison is against ZERO. */
17374 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
17377 /* Find the first flag setting insn before INSN in basic block BB. */
17378 gcc_assert (insn
!= BB_HEAD (bb
));
17379 for (prev
= PREV_INSN (insn
);
17381 && prev
!= BB_HEAD (bb
)
17383 || DEBUG_INSN_P (prev
)
17384 || ((set
= single_set (prev
)) != NULL
17385 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17386 prev
= PREV_INSN (prev
))
17388 if (reg_set_p (op0
, prev
))
17389 insn_clobbered
= true;
17392 /* Skip if op0 is clobbered by insn other than prev. */
17393 if (insn_clobbered
)
17399 dest
= SET_DEST (set
);
17400 src
= SET_SRC (set
);
17401 if (!low_register_operand (dest
, SImode
)
17402 || !low_register_operand (src
, SImode
))
17405 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17406 in INSN. Both src and dest of the move insn are checked. */
17407 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17409 dest
= copy_rtx (dest
);
17410 src
= copy_rtx (src
);
17411 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17412 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
17413 INSN_CODE (prev
) = -1;
17414 /* Set test register in INSN to dest. */
17415 XEXP (cmp
, 0) = copy_rtx (dest
);
17416 INSN_CODE (insn
) = -1;
17421 /* Convert instructions to their cc-clobbering variant if possible, since
17422 that allows us to use smaller encodings. */
17425 thumb2_reorg (void)
17430 INIT_REG_SET (&live
);
17432 /* We are freeing block_for_insn in the toplev to keep compatibility
17433 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17434 compute_bb_for_insn ();
17437 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17439 FOR_EACH_BB_FN (bb
, cfun
)
17441 if ((current_tune
->disparage_flag_setting_t16_encodings
17442 == tune_params::DISPARAGE_FLAGS_ALL
)
17443 && optimize_bb_for_speed_p (bb
))
17447 Convert_Action action
= SKIP
;
17448 Convert_Action action_for_partial_flag_setting
17449 = ((current_tune
->disparage_flag_setting_t16_encodings
17450 != tune_params::DISPARAGE_FLAGS_NEITHER
)
17451 && optimize_bb_for_speed_p (bb
))
17454 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17455 df_simulate_initialize_backwards (bb
, &live
);
17456 FOR_BB_INSNS_REVERSE (bb
, insn
)
17458 if (NONJUMP_INSN_P (insn
)
17459 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17460 && GET_CODE (PATTERN (insn
)) == SET
)
17463 rtx pat
= PATTERN (insn
);
17464 rtx dst
= XEXP (pat
, 0);
17465 rtx src
= XEXP (pat
, 1);
17466 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17468 if (UNARY_P (src
) || BINARY_P (src
))
17469 op0
= XEXP (src
, 0);
17471 if (BINARY_P (src
))
17472 op1
= XEXP (src
, 1);
17474 if (low_register_operand (dst
, SImode
))
17476 switch (GET_CODE (src
))
17479 /* Adding two registers and storing the result
17480 in the first source is already a 16-bit
17482 if (rtx_equal_p (dst
, op0
)
17483 && register_operand (op1
, SImode
))
17486 if (low_register_operand (op0
, SImode
))
17488 /* ADDS <Rd>,<Rn>,<Rm> */
17489 if (low_register_operand (op1
, SImode
))
17491 /* ADDS <Rdn>,#<imm8> */
17492 /* SUBS <Rdn>,#<imm8> */
17493 else if (rtx_equal_p (dst
, op0
)
17494 && CONST_INT_P (op1
)
17495 && IN_RANGE (INTVAL (op1
), -255, 255))
17497 /* ADDS <Rd>,<Rn>,#<imm3> */
17498 /* SUBS <Rd>,<Rn>,#<imm3> */
17499 else if (CONST_INT_P (op1
)
17500 && IN_RANGE (INTVAL (op1
), -7, 7))
17503 /* ADCS <Rd>, <Rn> */
17504 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17505 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17506 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17508 && COMPARISON_P (op1
)
17509 && cc_register (XEXP (op1
, 0), VOIDmode
)
17510 && maybe_get_arm_condition_code (op1
) == ARM_CS
17511 && XEXP (op1
, 1) == const0_rtx
)
17516 /* RSBS <Rd>,<Rn>,#0
17517 Not handled here: see NEG below. */
17518 /* SUBS <Rd>,<Rn>,#<imm3>
17520 Not handled here: see PLUS above. */
17521 /* SUBS <Rd>,<Rn>,<Rm> */
17522 if (low_register_operand (op0
, SImode
)
17523 && low_register_operand (op1
, SImode
))
17528 /* MULS <Rdm>,<Rn>,<Rdm>
17529 As an exception to the rule, this is only used
17530 when optimizing for size since MULS is slow on all
17531 known implementations. We do not even want to use
17532 MULS in cold code, if optimizing for speed, so we
17533 test the global flag here. */
17534 if (!optimize_size
)
17536 /* Fall through. */
17540 /* ANDS <Rdn>,<Rm> */
17541 if (rtx_equal_p (dst
, op0
)
17542 && low_register_operand (op1
, SImode
))
17543 action
= action_for_partial_flag_setting
;
17544 else if (rtx_equal_p (dst
, op1
)
17545 && low_register_operand (op0
, SImode
))
17546 action
= action_for_partial_flag_setting
== SKIP
17547 ? SKIP
: SWAP_CONV
;
17553 /* ASRS <Rdn>,<Rm> */
17554 /* LSRS <Rdn>,<Rm> */
17555 /* LSLS <Rdn>,<Rm> */
17556 if (rtx_equal_p (dst
, op0
)
17557 && low_register_operand (op1
, SImode
))
17558 action
= action_for_partial_flag_setting
;
17559 /* ASRS <Rd>,<Rm>,#<imm5> */
17560 /* LSRS <Rd>,<Rm>,#<imm5> */
17561 /* LSLS <Rd>,<Rm>,#<imm5> */
17562 else if (low_register_operand (op0
, SImode
)
17563 && CONST_INT_P (op1
)
17564 && IN_RANGE (INTVAL (op1
), 0, 31))
17565 action
= action_for_partial_flag_setting
;
17569 /* RORS <Rdn>,<Rm> */
17570 if (rtx_equal_p (dst
, op0
)
17571 && low_register_operand (op1
, SImode
))
17572 action
= action_for_partial_flag_setting
;
17576 /* MVNS <Rd>,<Rm> */
17577 if (low_register_operand (op0
, SImode
))
17578 action
= action_for_partial_flag_setting
;
17582 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17583 if (low_register_operand (op0
, SImode
))
17588 /* MOVS <Rd>,#<imm8> */
17589 if (CONST_INT_P (src
)
17590 && IN_RANGE (INTVAL (src
), 0, 255))
17591 action
= action_for_partial_flag_setting
;
17595 /* MOVS and MOV<c> with registers have different
17596 encodings, so are not relevant here. */
17604 if (action
!= SKIP
)
17606 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17607 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17610 if (action
== SWAP_CONV
)
17612 src
= copy_rtx (src
);
17613 XEXP (src
, 0) = op1
;
17614 XEXP (src
, 1) = op0
;
17615 pat
= gen_rtx_SET (dst
, src
);
17616 vec
= gen_rtvec (2, pat
, clobber
);
17618 else /* action == CONV */
17619 vec
= gen_rtvec (2, pat
, clobber
);
17621 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17622 INSN_CODE (insn
) = -1;
17626 if (NONDEBUG_INSN_P (insn
))
17627 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17631 CLEAR_REG_SET (&live
);
17634 /* Gcc puts the pool in the wrong place for ARM, since we can only
17635 load addresses a limited distance around the pc. We do some
17636 special munging to move the constant pool values to the correct
17637 point in the code. */
17642 HOST_WIDE_INT address
= 0;
17646 cmse_nonsecure_call_clear_caller_saved ();
17649 else if (TARGET_THUMB2
)
17652 /* Ensure all insns that must be split have been split at this point.
17653 Otherwise, the pool placement code below may compute incorrect
17654 insn lengths. Note that when optimizing, all insns have already
17655 been split at this point. */
17657 split_all_insns_noflow ();
17659 /* Make sure we do not attempt to create a literal pool even though it should
17660 no longer be necessary to create any. */
17661 if (arm_disable_literal_pool
)
17664 minipool_fix_head
= minipool_fix_tail
= NULL
;
17666 /* The first insn must always be a note, or the code below won't
17667 scan it properly. */
17668 insn
= get_insns ();
17669 gcc_assert (NOTE_P (insn
));
17672 /* Scan all the insns and record the operands that will need fixing. */
17673 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17675 if (BARRIER_P (insn
))
17676 push_minipool_barrier (insn
, address
);
17677 else if (INSN_P (insn
))
17679 rtx_jump_table_data
*table
;
17681 note_invalid_constants (insn
, address
, true);
17682 address
+= get_attr_length (insn
);
17684 /* If the insn is a vector jump, add the size of the table
17685 and skip the table. */
17686 if (tablejump_p (insn
, NULL
, &table
))
17688 address
+= get_jump_table_size (table
);
17692 else if (LABEL_P (insn
))
17693 /* Add the worst-case padding due to alignment. We don't add
17694 the _current_ padding because the minipool insertions
17695 themselves might change it. */
17696 address
+= get_label_padding (insn
);
17699 fix
= minipool_fix_head
;
17701 /* Now scan the fixups and perform the required changes. */
17706 Mfix
* last_added_fix
;
17707 Mfix
* last_barrier
= NULL
;
17710 /* Skip any further barriers before the next fix. */
17711 while (fix
&& BARRIER_P (fix
->insn
))
17714 /* No more fixes. */
17718 last_added_fix
= NULL
;
17720 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17722 if (BARRIER_P (ftmp
->insn
))
17724 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17727 last_barrier
= ftmp
;
17729 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17732 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17735 /* If we found a barrier, drop back to that; any fixes that we
17736 could have reached but come after the barrier will now go in
17737 the next mini-pool. */
17738 if (last_barrier
!= NULL
)
17740 /* Reduce the refcount for those fixes that won't go into this
17742 for (fdel
= last_barrier
->next
;
17743 fdel
&& fdel
!= ftmp
;
17746 fdel
->minipool
->refcount
--;
17747 fdel
->minipool
= NULL
;
17750 ftmp
= last_barrier
;
17754 /* ftmp is first fix that we can't fit into this pool and
17755 there no natural barriers that we could use. Insert a
17756 new barrier in the code somewhere between the previous
17757 fix and this one, and arrange to jump around it. */
17758 HOST_WIDE_INT max_address
;
17760 /* The last item on the list of fixes must be a barrier, so
17761 we can never run off the end of the list of fixes without
17762 last_barrier being set. */
17765 max_address
= minipool_vector_head
->max_address
;
17766 /* Check that there isn't another fix that is in range that
17767 we couldn't fit into this pool because the pool was
17768 already too large: we need to put the pool before such an
17769 instruction. The pool itself may come just after the
17770 fix because create_fix_barrier also allows space for a
17771 jump instruction. */
17772 if (ftmp
->address
< max_address
)
17773 max_address
= ftmp
->address
+ 1;
17775 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17778 assign_minipool_offsets (last_barrier
);
17782 if (!BARRIER_P (ftmp
->insn
)
17783 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17790 /* Scan over the fixes we have identified for this pool, fixing them
17791 up and adding the constants to the pool itself. */
17792 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17793 this_fix
= this_fix
->next
)
17794 if (!BARRIER_P (this_fix
->insn
))
17797 = plus_constant (Pmode
,
17798 gen_rtx_LABEL_REF (VOIDmode
,
17799 minipool_vector_label
),
17800 this_fix
->minipool
->offset
);
17801 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17804 dump_minipool (last_barrier
->insn
);
17808 /* From now on we must synthesize any constants that we can't handle
17809 directly. This can happen if the RTL gets split during final
17810 instruction generation. */
17811 cfun
->machine
->after_arm_reorg
= 1;
17813 /* Free the minipool memory. */
17814 obstack_free (&minipool_obstack
, minipool_startobj
);
17817 /* Routines to output assembly language. */
17819 /* Return string representation of passed in real value. */
17820 static const char *
17821 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17823 if (!fp_consts_inited
)
17826 gcc_assert (real_equal (r
, &value_fp0
));
17830 /* OPERANDS[0] is the entire list of insns that constitute pop,
17831 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17832 is in the list, UPDATE is true iff the list contains explicit
17833 update of base register. */
17835 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17841 const char *conditional
;
17842 int num_saves
= XVECLEN (operands
[0], 0);
17843 unsigned int regno
;
17844 unsigned int regno_base
= REGNO (operands
[1]);
17845 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
17848 offset
+= update
? 1 : 0;
17849 offset
+= return_pc
? 1 : 0;
17851 /* Is the base register in the list? */
17852 for (i
= offset
; i
< num_saves
; i
++)
17854 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17855 /* If SP is in the list, then the base register must be SP. */
17856 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17857 /* If base register is in the list, there must be no explicit update. */
17858 if (regno
== regno_base
)
17859 gcc_assert (!update
);
17862 conditional
= reverse
? "%?%D0" : "%?%d0";
17863 /* Can't use POP if returning from an interrupt. */
17864 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
17865 sprintf (pattern
, "pop%s\t{", conditional
);
17868 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17869 It's just a convention, their semantics are identical. */
17870 if (regno_base
== SP_REGNUM
)
17871 sprintf (pattern
, "ldmfd%s\t", conditional
);
17873 sprintf (pattern
, "ldmia%s\t", conditional
);
17875 sprintf (pattern
, "ldm%s\t", conditional
);
17877 strcat (pattern
, reg_names
[regno_base
]);
17879 strcat (pattern
, "!, {");
17881 strcat (pattern
, ", {");
17884 /* Output the first destination register. */
17886 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17888 /* Output the rest of the destination registers. */
17889 for (i
= offset
+ 1; i
< num_saves
; i
++)
17891 strcat (pattern
, ", ");
17893 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17896 strcat (pattern
, "}");
17898 if (interrupt_p
&& return_pc
)
17899 strcat (pattern
, "^");
17901 output_asm_insn (pattern
, &cond
);
17905 /* Output the assembly for a store multiple. */
17908 vfp_output_vstmd (rtx
* operands
)
17914 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17915 ? XEXP (operands
[0], 0)
17916 : XEXP (XEXP (operands
[0], 0), 0);
17917 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17920 strcpy (pattern
, "vpush%?.64\t{%P1");
17922 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17924 p
= strlen (pattern
);
17926 gcc_assert (REG_P (operands
[1]));
17928 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17929 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17931 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17933 strcpy (&pattern
[p
], "}");
17935 output_asm_insn (pattern
, operands
);
17940 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17941 number of bytes pushed. */
17944 vfp_emit_fstmd (int base_reg
, int count
)
17951 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17952 register pairs are stored by a store multiple insn. We avoid this
17953 by pushing an extra pair. */
17954 if (count
== 2 && !arm_arch6
)
17956 if (base_reg
== LAST_VFP_REGNUM
- 3)
17961 /* FSTMD may not store more than 16 doubleword registers at once. Split
17962 larger stores into multiple parts (up to a maximum of two, in
17967 /* NOTE: base_reg is an internal register number, so each D register
17969 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17970 saved
+= vfp_emit_fstmd (base_reg
, 16);
17974 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17975 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17977 reg
= gen_rtx_REG (DFmode
, base_reg
);
17980 XVECEXP (par
, 0, 0)
17981 = gen_rtx_SET (gen_frame_mem
17983 gen_rtx_PRE_MODIFY (Pmode
,
17986 (Pmode
, stack_pointer_rtx
,
17989 gen_rtx_UNSPEC (BLKmode
,
17990 gen_rtvec (1, reg
),
17991 UNSPEC_PUSH_MULT
));
17993 tmp
= gen_rtx_SET (stack_pointer_rtx
,
17994 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17995 RTX_FRAME_RELATED_P (tmp
) = 1;
17996 XVECEXP (dwarf
, 0, 0) = tmp
;
17998 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
17999 RTX_FRAME_RELATED_P (tmp
) = 1;
18000 XVECEXP (dwarf
, 0, 1) = tmp
;
18002 for (i
= 1; i
< count
; i
++)
18004 reg
= gen_rtx_REG (DFmode
, base_reg
);
18006 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
18008 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
18009 plus_constant (Pmode
,
18013 RTX_FRAME_RELATED_P (tmp
) = 1;
18014 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
18017 par
= emit_insn (par
);
18018 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
18019 RTX_FRAME_RELATED_P (par
) = 1;
18024 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18025 has the cmse_nonsecure_call attribute and returns false otherwise. */
18028 detect_cmse_nonsecure_call (tree addr
)
18033 tree fntype
= TREE_TYPE (addr
);
18034 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_call",
18035 TYPE_ATTRIBUTES (fntype
)))
18041 /* Emit a call instruction with pattern PAT. ADDR is the address of
18042 the call target. */
18045 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
18049 insn
= emit_call_insn (pat
);
18051 /* The PIC register is live on entry to VxWorks PIC PLT entries.
18052 If the call might use such an entry, add a use of the PIC register
18053 to the instruction's CALL_INSN_FUNCTION_USAGE. */
18054 if (TARGET_VXWORKS_RTP
18057 && GET_CODE (addr
) == SYMBOL_REF
18058 && (SYMBOL_REF_DECL (addr
)
18059 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
18060 : !SYMBOL_REF_LOCAL_P (addr
)))
18062 require_pic_register ();
18063 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
18066 if (TARGET_AAPCS_BASED
)
18068 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18069 linker. We need to add an IP clobber to allow setting
18070 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
18071 is not needed since it's a fixed register. */
18072 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
18073 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
18077 /* Output a 'call' insn. */
18079 output_call (rtx
*operands
)
18081 gcc_assert (!arm_arch5t
); /* Patterns should call blx <reg> directly. */
18083 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
18084 if (REGNO (operands
[0]) == LR_REGNUM
)
18086 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
18087 output_asm_insn ("mov%?\t%0, %|lr", operands
);
18090 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
18092 if (TARGET_INTERWORK
|| arm_arch4t
)
18093 output_asm_insn ("bx%?\t%0", operands
);
18095 output_asm_insn ("mov%?\t%|pc, %0", operands
);
18100 /* Output a move from arm registers to arm registers of a long double
18101 OPERANDS[0] is the destination.
18102 OPERANDS[1] is the source. */
18104 output_mov_long_double_arm_from_arm (rtx
*operands
)
18106 /* We have to be careful here because the two might overlap. */
18107 int dest_start
= REGNO (operands
[0]);
18108 int src_start
= REGNO (operands
[1]);
18112 if (dest_start
< src_start
)
18114 for (i
= 0; i
< 3; i
++)
18116 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
18117 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
18118 output_asm_insn ("mov%?\t%0, %1", ops
);
18123 for (i
= 2; i
>= 0; i
--)
18125 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
18126 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
18127 output_asm_insn ("mov%?\t%0, %1", ops
);
18135 arm_emit_movpair (rtx dest
, rtx src
)
18137 /* If the src is an immediate, simplify it. */
18138 if (CONST_INT_P (src
))
18140 HOST_WIDE_INT val
= INTVAL (src
);
18141 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
18142 if ((val
>> 16) & 0x0000ffff)
18144 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
18146 GEN_INT ((val
>> 16) & 0x0000ffff));
18147 rtx_insn
*insn
= get_last_insn ();
18148 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
18152 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
18153 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
18154 rtx_insn
*insn
= get_last_insn ();
18155 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
18158 /* Output a move between double words. It must be REG<-MEM
18161 output_move_double (rtx
*operands
, bool emit
, int *count
)
18163 enum rtx_code code0
= GET_CODE (operands
[0]);
18164 enum rtx_code code1
= GET_CODE (operands
[1]);
18169 /* The only case when this might happen is when
18170 you are looking at the length of a DImode instruction
18171 that has an invalid constant in it. */
18172 if (code0
== REG
&& code1
!= MEM
)
18174 gcc_assert (!emit
);
18181 unsigned int reg0
= REGNO (operands
[0]);
18183 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
18185 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
18187 switch (GET_CODE (XEXP (operands
[1], 0)))
18194 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
18195 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
18197 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18202 gcc_assert (TARGET_LDRD
);
18204 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
18211 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
18213 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
18221 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
18223 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
18228 gcc_assert (TARGET_LDRD
);
18230 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
18235 /* Autoicrement addressing modes should never have overlapping
18236 base and destination registers, and overlapping index registers
18237 are already prohibited, so this doesn't need to worry about
18239 otherops
[0] = operands
[0];
18240 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
18241 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
18243 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
18245 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
18247 /* Registers overlap so split out the increment. */
18250 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
18251 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
18258 /* Use a single insn if we can.
18259 FIXME: IWMMXT allows offsets larger than ldrd can
18260 handle, fix these up with a pair of ldr. */
18262 || !CONST_INT_P (otherops
[2])
18263 || (INTVAL (otherops
[2]) > -256
18264 && INTVAL (otherops
[2]) < 256))
18267 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
18273 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
18274 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18284 /* Use a single insn if we can.
18285 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18286 fix these up with a pair of ldr. */
18288 || !CONST_INT_P (otherops
[2])
18289 || (INTVAL (otherops
[2]) > -256
18290 && INTVAL (otherops
[2]) < 256))
18293 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
18299 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18300 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18310 /* We might be able to use ldrd %0, %1 here. However the range is
18311 different to ldr/adr, and it is broken on some ARMv7-M
18312 implementations. */
18313 /* Use the second register of the pair to avoid problematic
18315 otherops
[1] = operands
[1];
18317 output_asm_insn ("adr%?\t%0, %1", otherops
);
18318 operands
[1] = otherops
[0];
18322 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18324 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
18331 /* ??? This needs checking for thumb2. */
18333 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18334 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18336 otherops
[0] = operands
[0];
18337 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18338 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18340 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18342 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18344 switch ((int) INTVAL (otherops
[2]))
18348 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
18354 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
18360 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
18364 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18365 operands
[1] = otherops
[0];
18367 && (REG_P (otherops
[2])
18369 || (CONST_INT_P (otherops
[2])
18370 && INTVAL (otherops
[2]) > -256
18371 && INTVAL (otherops
[2]) < 256)))
18373 if (reg_overlap_mentioned_p (operands
[0],
18376 /* Swap base and index registers over to
18377 avoid a conflict. */
18378 std::swap (otherops
[1], otherops
[2]);
18380 /* If both registers conflict, it will usually
18381 have been fixed by a splitter. */
18382 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18383 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18387 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18388 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18395 otherops
[0] = operands
[0];
18397 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
18402 if (CONST_INT_P (otherops
[2]))
18406 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18407 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18409 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18415 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18421 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18428 return "ldrd%?\t%0, [%1]";
18430 return "ldmia%?\t%1, %M0";
18434 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18435 /* Take care of overlapping base/data reg. */
18436 if (reg_mentioned_p (operands
[0], operands
[1]))
18440 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18441 output_asm_insn ("ldr%?\t%0, %1", operands
);
18451 output_asm_insn ("ldr%?\t%0, %1", operands
);
18452 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18462 /* Constraints should ensure this. */
18463 gcc_assert (code0
== MEM
&& code1
== REG
);
18464 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18465 || (TARGET_ARM
&& TARGET_LDRD
));
18467 /* For TARGET_ARM the first source register of an STRD
18468 must be even. This is usually the case for double-word
18469 values but user assembly constraints can force an odd
18470 starting register. */
18471 bool allow_strd
= TARGET_LDRD
18472 && !(TARGET_ARM
&& (REGNO (operands
[1]) & 1) == 1);
18473 switch (GET_CODE (XEXP (operands
[0], 0)))
18479 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
18481 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18486 gcc_assert (allow_strd
);
18488 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
18495 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
18497 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
18505 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
18507 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
18512 gcc_assert (allow_strd
);
18514 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
18519 otherops
[0] = operands
[1];
18520 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18521 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18523 /* IWMMXT allows offsets larger than strd can handle,
18524 fix these up with a pair of str. */
18526 && CONST_INT_P (otherops
[2])
18527 && (INTVAL(otherops
[2]) <= -256
18528 || INTVAL(otherops
[2]) >= 256))
18530 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18534 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18535 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18544 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18545 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18551 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18554 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
18559 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
18564 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18565 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18567 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18571 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
18578 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
18585 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
18590 && (REG_P (otherops
[2])
18592 || (CONST_INT_P (otherops
[2])
18593 && INTVAL (otherops
[2]) > -256
18594 && INTVAL (otherops
[2]) < 256)))
18596 otherops
[0] = operands
[1];
18597 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18599 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
18605 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18606 otherops
[1] = operands
[1];
18609 output_asm_insn ("str%?\t%1, %0", operands
);
18610 output_asm_insn ("str%?\t%H1, %0", otherops
);
18620 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18621 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18624 output_move_quad (rtx
*operands
)
18626 if (REG_P (operands
[0]))
18628 /* Load, or reg->reg move. */
18630 if (MEM_P (operands
[1]))
18632 switch (GET_CODE (XEXP (operands
[1], 0)))
18635 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18640 output_asm_insn ("adr%?\t%0, %1", operands
);
18641 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
18645 gcc_unreachable ();
18653 gcc_assert (REG_P (operands
[1]));
18655 dest
= REGNO (operands
[0]);
18656 src
= REGNO (operands
[1]);
18658 /* This seems pretty dumb, but hopefully GCC won't try to do it
18661 for (i
= 0; i
< 4; i
++)
18663 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18664 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18665 output_asm_insn ("mov%?\t%0, %1", ops
);
18668 for (i
= 3; i
>= 0; i
--)
18670 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18671 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18672 output_asm_insn ("mov%?\t%0, %1", ops
);
18678 gcc_assert (MEM_P (operands
[0]));
18679 gcc_assert (REG_P (operands
[1]));
18680 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18682 switch (GET_CODE (XEXP (operands
[0], 0)))
18685 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18689 gcc_unreachable ();
18696 /* Output a VFP load or store instruction. */
18699 output_move_vfp (rtx
*operands
)
18701 rtx reg
, mem
, addr
, ops
[2];
18702 int load
= REG_P (operands
[0]);
18703 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18704 int sp
= (!TARGET_VFP_FP16INST
18705 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
18706 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18711 reg
= operands
[!load
];
18712 mem
= operands
[load
];
18714 mode
= GET_MODE (reg
);
18716 gcc_assert (REG_P (reg
));
18717 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18718 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
18724 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18725 gcc_assert (MEM_P (mem
));
18727 addr
= XEXP (mem
, 0);
18729 switch (GET_CODE (addr
))
18732 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18733 ops
[0] = XEXP (addr
, 0);
18738 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18739 ops
[0] = XEXP (addr
, 0);
18744 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18750 sprintf (buff
, templ
,
18751 load
? "ld" : "st",
18752 dp
? "64" : sp
? "32" : "16",
18754 integer_p
? "\t%@ int" : "");
18755 output_asm_insn (buff
, ops
);
18760 /* Output a Neon double-word or quad-word load or store, or a load
18761 or store for larger structure modes.
18763 WARNING: The ordering of elements is weird in big-endian mode,
18764 because the EABI requires that vectors stored in memory appear
18765 as though they were stored by a VSTM, as required by the EABI.
18766 GCC RTL defines element ordering based on in-memory order.
18767 This can be different from the architectural ordering of elements
18768 within a NEON register. The intrinsics defined in arm_neon.h use the
18769 NEON register element ordering, not the GCC RTL element ordering.
18771 For example, the in-memory ordering of a big-endian a quadword
18772 vector with 16-bit elements when stored from register pair {d0,d1}
18773 will be (lowest address first, d0[N] is NEON register element N):
18775 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18777 When necessary, quadword registers (dN, dN+1) are moved to ARM
18778 registers from rN in the order:
18780 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18782 So that STM/LDM can be used on vectors in ARM registers, and the
18783 same memory layout will result as if VSTM/VLDM were used.
18785 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18786 possible, which allows use of appropriate alignment tags.
18787 Note that the choice of "64" is independent of the actual vector
18788 element size; this size simply ensures that the behavior is
18789 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18791 Due to limitations of those instructions, use of VST1.64/VLD1.64
18792 is not possible if:
18793 - the address contains PRE_DEC, or
18794 - the mode refers to more than 4 double-word registers
18796 In those cases, it would be possible to replace VSTM/VLDM by a
18797 sequence of instructions; this is not currently implemented since
18798 this is not certain to actually improve performance. */
18801 output_move_neon (rtx
*operands
)
18803 rtx reg
, mem
, addr
, ops
[2];
18804 int regno
, nregs
, load
= REG_P (operands
[0]);
18809 reg
= operands
[!load
];
18810 mem
= operands
[load
];
18812 mode
= GET_MODE (reg
);
18814 gcc_assert (REG_P (reg
));
18815 regno
= REGNO (reg
);
18816 nregs
= REG_NREGS (reg
) / 2;
18817 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18818 || NEON_REGNO_OK_FOR_QUAD (regno
));
18819 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18820 || VALID_NEON_QREG_MODE (mode
)
18821 || VALID_NEON_STRUCT_MODE (mode
));
18822 gcc_assert (MEM_P (mem
));
18824 addr
= XEXP (mem
, 0);
18826 /* Strip off const from addresses like (const (plus (...))). */
18827 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18828 addr
= XEXP (addr
, 0);
18830 switch (GET_CODE (addr
))
18833 /* We have to use vldm / vstm for too-large modes. */
18836 templ
= "v%smia%%?\t%%0!, %%h1";
18837 ops
[0] = XEXP (addr
, 0);
18841 templ
= "v%s1.64\t%%h1, %%A0";
18848 /* We have to use vldm / vstm in this case, since there is no
18849 pre-decrement form of the vld1 / vst1 instructions. */
18850 templ
= "v%smdb%%?\t%%0!, %%h1";
18851 ops
[0] = XEXP (addr
, 0);
18856 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18857 gcc_unreachable ();
18860 /* We have to use vldm / vstm for too-large modes. */
18864 templ
= "v%smia%%?\t%%m0, %%h1";
18866 templ
= "v%s1.64\t%%h1, %%A0";
18872 /* Fall through. */
18878 for (i
= 0; i
< nregs
; i
++)
18880 /* We're only using DImode here because it's a convenient size. */
18881 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18882 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18883 if (reg_overlap_mentioned_p (ops
[0], mem
))
18885 gcc_assert (overlap
== -1);
18890 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18891 output_asm_insn (buff
, ops
);
18896 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18897 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18898 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18899 output_asm_insn (buff
, ops
);
18906 gcc_unreachable ();
18909 sprintf (buff
, templ
, load
? "ld" : "st");
18910 output_asm_insn (buff
, ops
);
18915 /* Compute and return the length of neon_mov<mode>, where <mode> is
18916 one of VSTRUCT modes: EI, OI, CI or XI. */
18918 arm_attr_length_move_neon (rtx_insn
*insn
)
18920 rtx reg
, mem
, addr
;
18924 extract_insn_cached (insn
);
18926 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18928 mode
= GET_MODE (recog_data
.operand
[0]);
18939 gcc_unreachable ();
18943 load
= REG_P (recog_data
.operand
[0]);
18944 reg
= recog_data
.operand
[!load
];
18945 mem
= recog_data
.operand
[load
];
18947 gcc_assert (MEM_P (mem
));
18949 addr
= XEXP (mem
, 0);
18951 /* Strip off const from addresses like (const (plus (...))). */
18952 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18953 addr
= XEXP (addr
, 0);
18955 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18957 int insns
= REG_NREGS (reg
) / 2;
18964 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18968 arm_address_offset_is_imm (rtx_insn
*insn
)
18972 extract_insn_cached (insn
);
18974 if (REG_P (recog_data
.operand
[0]))
18977 mem
= recog_data
.operand
[0];
18979 gcc_assert (MEM_P (mem
));
18981 addr
= XEXP (mem
, 0);
18984 || (GET_CODE (addr
) == PLUS
18985 && REG_P (XEXP (addr
, 0))
18986 && CONST_INT_P (XEXP (addr
, 1))))
18992 /* Output an ADD r, s, #n where n may be too big for one instruction.
18993 If adding zero to one register, output nothing. */
18995 output_add_immediate (rtx
*operands
)
18997 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18999 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
19002 output_multi_immediate (operands
,
19003 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19006 output_multi_immediate (operands
,
19007 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19014 /* Output a multiple immediate operation.
19015 OPERANDS is the vector of operands referred to in the output patterns.
19016 INSTR1 is the output pattern to use for the first constant.
19017 INSTR2 is the output pattern to use for subsequent constants.
19018 IMMED_OP is the index of the constant slot in OPERANDS.
19019 N is the constant value. */
19020 static const char *
19021 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
19022 int immed_op
, HOST_WIDE_INT n
)
19024 #if HOST_BITS_PER_WIDE_INT > 32
19030 /* Quick and easy output. */
19031 operands
[immed_op
] = const0_rtx
;
19032 output_asm_insn (instr1
, operands
);
19037 const char * instr
= instr1
;
19039 /* Note that n is never zero here (which would give no output). */
19040 for (i
= 0; i
< 32; i
+= 2)
19044 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
19045 output_asm_insn (instr
, operands
);
19055 /* Return the name of a shifter operation. */
19056 static const char *
19057 arm_shift_nmem(enum rtx_code code
)
19062 return ARM_LSL_NAME
;
19078 /* Return the appropriate ARM instruction for the operation code.
19079 The returned result should not be overwritten. OP is the rtx of the
19080 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19083 arithmetic_instr (rtx op
, int shift_first_arg
)
19085 switch (GET_CODE (op
))
19091 return shift_first_arg
? "rsb" : "sub";
19106 return arm_shift_nmem(GET_CODE(op
));
19109 gcc_unreachable ();
19113 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19114 for the operation code. The returned result should not be overwritten.
19115 OP is the rtx code of the shift.
19116 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19118 static const char *
19119 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
19122 enum rtx_code code
= GET_CODE (op
);
19127 if (!CONST_INT_P (XEXP (op
, 1)))
19129 output_operand_lossage ("invalid shift operand");
19134 *amountp
= 32 - INTVAL (XEXP (op
, 1));
19142 mnem
= arm_shift_nmem(code
);
19143 if (CONST_INT_P (XEXP (op
, 1)))
19145 *amountp
= INTVAL (XEXP (op
, 1));
19147 else if (REG_P (XEXP (op
, 1)))
19154 output_operand_lossage ("invalid shift operand");
19160 /* We never have to worry about the amount being other than a
19161 power of 2, since this case can never be reloaded from a reg. */
19162 if (!CONST_INT_P (XEXP (op
, 1)))
19164 output_operand_lossage ("invalid shift operand");
19168 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
19170 /* Amount must be a power of two. */
19171 if (*amountp
& (*amountp
- 1))
19173 output_operand_lossage ("invalid shift operand");
19177 *amountp
= exact_log2 (*amountp
);
19178 gcc_assert (IN_RANGE (*amountp
, 0, 31));
19179 return ARM_LSL_NAME
;
19182 output_operand_lossage ("invalid shift operand");
19186 /* This is not 100% correct, but follows from the desire to merge
19187 multiplication by a power of 2 with the recognizer for a
19188 shift. >=32 is not a valid shift for "lsl", so we must try and
19189 output a shift that produces the correct arithmetical result.
19190 Using lsr #32 is identical except for the fact that the carry bit
19191 is not set correctly if we set the flags; but we never use the
19192 carry bit from such an operation, so we can ignore that. */
19193 if (code
== ROTATERT
)
19194 /* Rotate is just modulo 32. */
19196 else if (*amountp
!= (*amountp
& 31))
19198 if (code
== ASHIFT
)
19203 /* Shifts of 0 are no-ops. */
19210 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19211 because /bin/as is horribly restrictive. The judgement about
19212 whether or not each character is 'printable' (and can be output as
19213 is) or not (and must be printed with an octal escape) must be made
19214 with reference to the *host* character set -- the situation is
19215 similar to that discussed in the comments above pp_c_char in
19216 c-pretty-print.c. */
19218 #define MAX_ASCII_LEN 51
19221 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
19224 int len_so_far
= 0;
19226 fputs ("\t.ascii\t\"", stream
);
19228 for (i
= 0; i
< len
; i
++)
19232 if (len_so_far
>= MAX_ASCII_LEN
)
19234 fputs ("\"\n\t.ascii\t\"", stream
);
19240 if (c
== '\\' || c
== '\"')
19242 putc ('\\', stream
);
19250 fprintf (stream
, "\\%03o", c
);
19255 fputs ("\"\n", stream
);
19258 /* Whether a register is callee saved or not. This is necessary because high
19259 registers are marked as caller saved when optimizing for size on Thumb-1
19260 targets despite being callee saved in order to avoid using them. */
19261 #define callee_saved_reg_p(reg) \
19262 (!call_used_regs[reg] \
19263 || (TARGET_THUMB1 && optimize_size \
19264 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19266 /* Compute the register save mask for registers 0 through 12
19267 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19269 static unsigned long
19270 arm_compute_save_reg0_reg12_mask (void)
19272 unsigned long func_type
= arm_current_func_type ();
19273 unsigned long save_reg_mask
= 0;
19276 if (IS_INTERRUPT (func_type
))
19278 unsigned int max_reg
;
19279 /* Interrupt functions must not corrupt any registers,
19280 even call clobbered ones. If this is a leaf function
19281 we can just examine the registers used by the RTL, but
19282 otherwise we have to assume that whatever function is
19283 called might clobber anything, and so we have to save
19284 all the call-clobbered registers as well. */
19285 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
19286 /* FIQ handlers have registers r8 - r12 banked, so
19287 we only need to check r0 - r7, Normal ISRs only
19288 bank r14 and r15, so we must check up to r12.
19289 r13 is the stack pointer which is always preserved,
19290 so we do not need to consider it here. */
19295 for (reg
= 0; reg
<= max_reg
; reg
++)
19296 if (df_regs_ever_live_p (reg
)
19297 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
19298 save_reg_mask
|= (1 << reg
);
19300 /* Also save the pic base register if necessary. */
19302 && !TARGET_SINGLE_PIC_BASE
19303 && arm_pic_register
!= INVALID_REGNUM
19304 && crtl
->uses_pic_offset_table
)
19305 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19307 else if (IS_VOLATILE(func_type
))
19309 /* For noreturn functions we historically omitted register saves
19310 altogether. However this really messes up debugging. As a
19311 compromise save just the frame pointers. Combined with the link
19312 register saved elsewhere this should be sufficient to get
19314 if (frame_pointer_needed
)
19315 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19316 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19317 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19318 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19319 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19323 /* In the normal case we only need to save those registers
19324 which are call saved and which are used by this function. */
19325 for (reg
= 0; reg
<= 11; reg
++)
19326 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19327 save_reg_mask
|= (1 << reg
);
19329 /* Handle the frame pointer as a special case. */
19330 if (frame_pointer_needed
)
19331 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19333 /* If we aren't loading the PIC register,
19334 don't stack it even though it may be live. */
19336 && !TARGET_SINGLE_PIC_BASE
19337 && arm_pic_register
!= INVALID_REGNUM
19338 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19339 || crtl
->uses_pic_offset_table
))
19340 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19342 /* The prologue will copy SP into R0, so save it. */
19343 if (IS_STACKALIGN (func_type
))
19344 save_reg_mask
|= 1;
19347 /* Save registers so the exception handler can modify them. */
19348 if (crtl
->calls_eh_return
)
19354 reg
= EH_RETURN_DATA_REGNO (i
);
19355 if (reg
== INVALID_REGNUM
)
19357 save_reg_mask
|= 1 << reg
;
19361 return save_reg_mask
;
19364 /* Return true if r3 is live at the start of the function. */
19367 arm_r3_live_at_start_p (void)
19369 /* Just look at cfg info, which is still close enough to correct at this
19370 point. This gives false positives for broken functions that might use
19371 uninitialized data that happens to be allocated in r3, but who cares? */
19372 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19375 /* Compute the number of bytes used to store the static chain register on the
19376 stack, above the stack frame. We need to know this accurately to get the
19377 alignment of the rest of the stack frame correct. */
19380 arm_compute_static_chain_stack_bytes (void)
19382 /* Once the value is updated from the init value of -1, do not
19384 if (cfun
->machine
->static_chain_stack_bytes
!= -1)
19385 return cfun
->machine
->static_chain_stack_bytes
;
19387 /* See the defining assertion in arm_expand_prologue. */
19388 if (IS_NESTED (arm_current_func_type ())
19389 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19390 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
19391 || flag_stack_clash_protection
)
19392 && !df_regs_ever_live_p (LR_REGNUM
)))
19393 && arm_r3_live_at_start_p ()
19394 && crtl
->args
.pretend_args_size
== 0)
19400 /* Compute a bit mask of which core registers need to be
19401 saved on the stack for the current function.
19402 This is used by arm_compute_frame_layout, which may add extra registers. */
19404 static unsigned long
19405 arm_compute_save_core_reg_mask (void)
19407 unsigned int save_reg_mask
= 0;
19408 unsigned long func_type
= arm_current_func_type ();
19411 if (IS_NAKED (func_type
))
19412 /* This should never really happen. */
19415 /* If we are creating a stack frame, then we must save the frame pointer,
19416 IP (which will hold the old stack pointer), LR and the PC. */
19417 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19419 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19422 | (1 << PC_REGNUM
);
19424 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19426 /* Decide if we need to save the link register.
19427 Interrupt routines have their own banked link register,
19428 so they never need to save it.
19429 Otherwise if we do not use the link register we do not need to save
19430 it. If we are pushing other registers onto the stack however, we
19431 can save an instruction in the epilogue by pushing the link register
19432 now and then popping it back into the PC. This incurs extra memory
19433 accesses though, so we only do it when optimizing for size, and only
19434 if we know that we will not need a fancy return sequence. */
19435 if (df_regs_ever_live_p (LR_REGNUM
)
19438 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19439 && !crtl
->tail_call_emit
19440 && !crtl
->calls_eh_return
))
19441 save_reg_mask
|= 1 << LR_REGNUM
;
19443 if (cfun
->machine
->lr_save_eliminated
)
19444 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19446 if (TARGET_REALLY_IWMMXT
19447 && ((bit_count (save_reg_mask
)
19448 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19449 arm_compute_static_chain_stack_bytes())
19452 /* The total number of registers that are going to be pushed
19453 onto the stack is odd. We need to ensure that the stack
19454 is 64-bit aligned before we start to save iWMMXt registers,
19455 and also before we start to create locals. (A local variable
19456 might be a double or long long which we will load/store using
19457 an iWMMXt instruction). Therefore we need to push another
19458 ARM register, so that the stack will be 64-bit aligned. We
19459 try to avoid using the arg registers (r0 -r3) as they might be
19460 used to pass values in a tail call. */
19461 for (reg
= 4; reg
<= 12; reg
++)
19462 if ((save_reg_mask
& (1 << reg
)) == 0)
19466 save_reg_mask
|= (1 << reg
);
19469 cfun
->machine
->sibcall_blocked
= 1;
19470 save_reg_mask
|= (1 << 3);
19474 /* We may need to push an additional register for use initializing the
19475 PIC base register. */
19476 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19477 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19479 reg
= thumb_find_work_register (1 << 4);
19480 if (!call_used_regs
[reg
])
19481 save_reg_mask
|= (1 << reg
);
19484 return save_reg_mask
;
19487 /* Compute a bit mask of which core registers need to be
19488 saved on the stack for the current function. */
19489 static unsigned long
19490 thumb1_compute_save_core_reg_mask (void)
19492 unsigned long mask
;
19496 for (reg
= 0; reg
< 12; reg
++)
19497 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19500 /* Handle the frame pointer as a special case. */
19501 if (frame_pointer_needed
)
19502 mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19505 && !TARGET_SINGLE_PIC_BASE
19506 && arm_pic_register
!= INVALID_REGNUM
19507 && crtl
->uses_pic_offset_table
)
19508 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19510 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19511 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19512 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19514 /* LR will also be pushed if any lo regs are pushed. */
19515 if (mask
& 0xff || thumb_force_lr_save ())
19516 mask
|= (1 << LR_REGNUM
);
19518 /* Make sure we have a low work register if we need one.
19519 We will need one if we are going to push a high register,
19520 but we are not currently intending to push a low register. */
19521 if ((mask
& 0xff) == 0
19522 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19524 /* Use thumb_find_work_register to choose which register
19525 we will use. If the register is live then we will
19526 have to push it. Use LAST_LO_REGNUM as our fallback
19527 choice for the register to select. */
19528 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19529 /* Make sure the register returned by thumb_find_work_register is
19530 not part of the return value. */
19531 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19532 reg
= LAST_LO_REGNUM
;
19534 if (callee_saved_reg_p (reg
))
19538 /* The 504 below is 8 bytes less than 512 because there are two possible
19539 alignment words. We can't tell here if they will be present or not so we
19540 have to play it safe and assume that they are. */
19541 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19542 ROUND_UP_WORD (get_frame_size ()) +
19543 crtl
->outgoing_args_size
) >= 504)
19545 /* This is the same as the code in thumb1_expand_prologue() which
19546 determines which register to use for stack decrement. */
19547 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19548 if (mask
& (1 << reg
))
19551 if (reg
> LAST_LO_REGNUM
)
19553 /* Make sure we have a register available for stack decrement. */
19554 mask
|= 1 << LAST_LO_REGNUM
;
19562 /* Return the number of bytes required to save VFP registers. */
19564 arm_get_vfp_saved_size (void)
19566 unsigned int regno
;
19571 /* Space for saved VFP registers. */
19572 if (TARGET_HARD_FLOAT
)
19575 for (regno
= FIRST_VFP_REGNUM
;
19576 regno
< LAST_VFP_REGNUM
;
19579 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19580 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19584 /* Workaround ARM10 VFPr1 bug. */
19585 if (count
== 2 && !arm_arch6
)
19587 saved
+= count
* 8;
19596 if (count
== 2 && !arm_arch6
)
19598 saved
+= count
* 8;
19605 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19606 everything bar the final return instruction. If simple_return is true,
19607 then do not output epilogue, because it has already been emitted in RTL.
19609 Note: do not forget to update length attribute of corresponding insn pattern
19610 when changing assembly output (eg. length attribute of
19611 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
19612 register clearing sequences). */
19614 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19615 bool simple_return
)
19617 char conditional
[10];
19620 unsigned long live_regs_mask
;
19621 unsigned long func_type
;
19622 arm_stack_offsets
*offsets
;
19624 func_type
= arm_current_func_type ();
19626 if (IS_NAKED (func_type
))
19629 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19631 /* If this function was declared non-returning, and we have
19632 found a tail call, then we have to trust that the called
19633 function won't return. */
19638 /* Otherwise, trap an attempted return by aborting. */
19640 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19642 assemble_external_libcall (ops
[1]);
19643 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19649 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19651 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19653 cfun
->machine
->return_used_this_function
= 1;
19655 offsets
= arm_get_frame_offsets ();
19656 live_regs_mask
= offsets
->saved_regs_mask
;
19658 if (!simple_return
&& live_regs_mask
)
19660 const char * return_reg
;
19662 /* If we do not have any special requirements for function exit
19663 (e.g. interworking) then we can load the return address
19664 directly into the PC. Otherwise we must load it into LR. */
19666 && !IS_CMSE_ENTRY (func_type
)
19667 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19668 return_reg
= reg_names
[PC_REGNUM
];
19670 return_reg
= reg_names
[LR_REGNUM
];
19672 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19674 /* There are three possible reasons for the IP register
19675 being saved. 1) a stack frame was created, in which case
19676 IP contains the old stack pointer, or 2) an ISR routine
19677 corrupted it, or 3) it was saved to align the stack on
19678 iWMMXt. In case 1, restore IP into SP, otherwise just
19680 if (frame_pointer_needed
)
19682 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19683 live_regs_mask
|= (1 << SP_REGNUM
);
19686 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19689 /* On some ARM architectures it is faster to use LDR rather than
19690 LDM to load a single register. On other architectures, the
19691 cost is the same. In 26 bit mode, or for exception handlers,
19692 we have to use LDM to load the PC so that the CPSR is also
19694 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19695 if (live_regs_mask
== (1U << reg
))
19698 if (reg
<= LAST_ARM_REGNUM
19699 && (reg
!= LR_REGNUM
19701 || ! IS_INTERRUPT (func_type
)))
19703 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19704 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19711 /* Generate the load multiple instruction to restore the
19712 registers. Note we can get here, even if
19713 frame_pointer_needed is true, but only if sp already
19714 points to the base of the saved core registers. */
19715 if (live_regs_mask
& (1 << SP_REGNUM
))
19717 unsigned HOST_WIDE_INT stack_adjust
;
19719 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19720 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19722 if (stack_adjust
&& arm_arch5t
&& TARGET_ARM
)
19723 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19726 /* If we can't use ldmib (SA110 bug),
19727 then try to pop r3 instead. */
19729 live_regs_mask
|= 1 << 3;
19731 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19734 /* For interrupt returns we have to use an LDM rather than
19735 a POP so that we can use the exception return variant. */
19736 else if (IS_INTERRUPT (func_type
))
19737 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
19739 sprintf (instr
, "pop%s\t{", conditional
);
19741 p
= instr
+ strlen (instr
);
19743 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19744 if (live_regs_mask
& (1 << reg
))
19746 int l
= strlen (reg_names
[reg
]);
19752 memcpy (p
, ", ", 2);
19756 memcpy (p
, "%|", 2);
19757 memcpy (p
+ 2, reg_names
[reg
], l
);
19761 if (live_regs_mask
& (1 << LR_REGNUM
))
19763 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19764 /* If returning from an interrupt, restore the CPSR. */
19765 if (IS_INTERRUPT (func_type
))
19772 output_asm_insn (instr
, & operand
);
19774 /* See if we need to generate an extra instruction to
19775 perform the actual function return. */
19777 && func_type
!= ARM_FT_INTERWORKED
19778 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19780 /* The return has already been handled
19781 by loading the LR into the PC. */
19788 switch ((int) ARM_FUNC_TYPE (func_type
))
19792 /* ??? This is wrong for unified assembly syntax. */
19793 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19796 case ARM_FT_INTERWORKED
:
19797 gcc_assert (arm_arch5t
|| arm_arch4t
);
19798 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19801 case ARM_FT_EXCEPTION
:
19802 /* ??? This is wrong for unified assembly syntax. */
19803 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19807 if (IS_CMSE_ENTRY (func_type
))
19809 /* Check if we have to clear the 'GE bits' which is only used if
19810 parallel add and subtraction instructions are available. */
19811 if (TARGET_INT_SIMD
)
19812 snprintf (instr
, sizeof (instr
),
19813 "msr%s\tAPSR_nzcvqg, %%|lr", conditional
);
19815 snprintf (instr
, sizeof (instr
),
19816 "msr%s\tAPSR_nzcvq, %%|lr", conditional
);
19818 output_asm_insn (instr
, & operand
);
19819 if (TARGET_HARD_FLOAT
&& !TARGET_THUMB1
)
19821 /* Clear the cumulative exception-status bits (0-4,7) and the
19822 condition code bits (28-31) of the FPSCR. We need to
19823 remember to clear the first scratch register used (IP) and
19824 save and restore the second (r4). */
19825 snprintf (instr
, sizeof (instr
), "push\t{%%|r4}");
19826 output_asm_insn (instr
, & operand
);
19827 snprintf (instr
, sizeof (instr
), "vmrs\t%%|ip, fpscr");
19828 output_asm_insn (instr
, & operand
);
19829 snprintf (instr
, sizeof (instr
), "movw\t%%|r4, #65376");
19830 output_asm_insn (instr
, & operand
);
19831 snprintf (instr
, sizeof (instr
), "movt\t%%|r4, #4095");
19832 output_asm_insn (instr
, & operand
);
19833 snprintf (instr
, sizeof (instr
), "and\t%%|ip, %%|r4");
19834 output_asm_insn (instr
, & operand
);
19835 snprintf (instr
, sizeof (instr
), "vmsr\tfpscr, %%|ip");
19836 output_asm_insn (instr
, & operand
);
19837 snprintf (instr
, sizeof (instr
), "pop\t{%%|r4}");
19838 output_asm_insn (instr
, & operand
);
19839 snprintf (instr
, sizeof (instr
), "mov\t%%|ip, %%|lr");
19840 output_asm_insn (instr
, & operand
);
19842 snprintf (instr
, sizeof (instr
), "bxns\t%%|lr");
19844 /* Use bx if it's available. */
19845 else if (arm_arch5t
|| arm_arch4t
)
19846 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19848 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19852 output_asm_insn (instr
, & operand
);
19858 /* Output in FILE asm statements needed to declare the NAME of the function
19859 defined by its DECL node. */
19862 arm_asm_declare_function_name (FILE *file
, const char *name
, tree decl
)
19864 size_t cmse_name_len
;
19865 char *cmse_name
= 0;
19866 char cmse_prefix
[] = "__acle_se_";
19868 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19869 extra function label for each function with the 'cmse_nonsecure_entry'
19870 attribute. This extra function label should be prepended with
19871 '__acle_se_', telling the linker that it needs to create secure gateway
19872 veneers for this function. */
19873 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_entry",
19874 DECL_ATTRIBUTES (decl
)))
19876 cmse_name_len
= sizeof (cmse_prefix
) + strlen (name
);
19877 cmse_name
= XALLOCAVEC (char, cmse_name_len
);
19878 snprintf (cmse_name
, cmse_name_len
, "%s%s", cmse_prefix
, name
);
19879 targetm
.asm_out
.globalize_label (file
, cmse_name
);
19881 ARM_DECLARE_FUNCTION_NAME (file
, cmse_name
, decl
);
19882 ASM_OUTPUT_TYPE_DIRECTIVE (file
, cmse_name
, "function");
19885 ARM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
19886 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
19887 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
19888 ASM_OUTPUT_LABEL (file
, name
);
19891 ASM_OUTPUT_LABEL (file
, cmse_name
);
19893 ARM_OUTPUT_FN_UNWIND (file
, TRUE
);
19896 /* Write the function name into the code section, directly preceding
19897 the function prologue.
19899 Code will be output similar to this:
19901 .ascii "arm_poke_function_name", 0
19904 .word 0xff000000 + (t1 - t0)
19905 arm_poke_function_name
19907 stmfd sp!, {fp, ip, lr, pc}
19910 When performing a stack backtrace, code can inspect the value
19911 of 'pc' stored at 'fp' + 0. If the trace function then looks
19912 at location pc - 12 and the top 8 bits are set, then we know
19913 that there is a function name embedded immediately preceding this
19914 location and has length ((pc[-3]) & 0xff000000).
19916 We assume that pc is declared as a pointer to an unsigned long.
19918 It is of no benefit to output the function name if we are assembling
19919 a leaf function. These function types will not contain a stack
19920 backtrace structure, therefore it is not possible to determine the
19923 arm_poke_function_name (FILE *stream
, const char *name
)
19925 unsigned long alignlength
;
19926 unsigned long length
;
19929 length
= strlen (name
) + 1;
19930 alignlength
= ROUND_UP_WORD (length
);
19932 ASM_OUTPUT_ASCII (stream
, name
, length
);
19933 ASM_OUTPUT_ALIGN (stream
, 2);
19934 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19935 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19938 /* Place some comments into the assembler stream
19939 describing the current function. */
19941 arm_output_function_prologue (FILE *f
)
19943 unsigned long func_type
;
19945 /* Sanity check. */
19946 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19948 func_type
= arm_current_func_type ();
19950 switch ((int) ARM_FUNC_TYPE (func_type
))
19953 case ARM_FT_NORMAL
:
19955 case ARM_FT_INTERWORKED
:
19956 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19959 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19962 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19964 case ARM_FT_EXCEPTION
:
19965 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19969 if (IS_NAKED (func_type
))
19970 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19972 if (IS_VOLATILE (func_type
))
19973 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19975 if (IS_NESTED (func_type
))
19976 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19977 if (IS_STACKALIGN (func_type
))
19978 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19979 if (IS_CMSE_ENTRY (func_type
))
19980 asm_fprintf (f
, "\t%@ Non-secure entry function: called from non-secure code.\n");
19982 asm_fprintf (f
, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
19983 (HOST_WIDE_INT
) crtl
->args
.size
,
19984 crtl
->args
.pretend_args_size
,
19985 (HOST_WIDE_INT
) get_frame_size ());
19987 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19988 frame_pointer_needed
,
19989 cfun
->machine
->uses_anonymous_args
);
19991 if (cfun
->machine
->lr_save_eliminated
)
19992 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19994 if (crtl
->calls_eh_return
)
19995 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
20000 arm_output_function_epilogue (FILE *)
20002 arm_stack_offsets
*offsets
;
20008 /* Emit any call-via-reg trampolines that are needed for v4t support
20009 of call_reg and call_value_reg type insns. */
20010 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
20012 rtx label
= cfun
->machine
->call_via
[regno
];
20016 switch_to_section (function_section (current_function_decl
));
20017 targetm
.asm_out
.internal_label (asm_out_file
, "L",
20018 CODE_LABEL_NUMBER (label
));
20019 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
20023 /* ??? Probably not safe to set this here, since it assumes that a
20024 function will be emitted as assembly immediately after we generate
20025 RTL for it. This does not happen for inline functions. */
20026 cfun
->machine
->return_used_this_function
= 0;
20028 else /* TARGET_32BIT */
20030 /* We need to take into account any stack-frame rounding. */
20031 offsets
= arm_get_frame_offsets ();
20033 gcc_assert (!use_return_insn (FALSE
, NULL
)
20034 || (cfun
->machine
->return_used_this_function
!= 0)
20035 || offsets
->saved_regs
== offsets
->outgoing_args
20036 || frame_pointer_needed
);
20040 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20041 STR and STRD. If an even number of registers are being pushed, one
20042 or more STRD patterns are created for each register pair. If an
20043 odd number of registers are pushed, emit an initial STR followed by
20044 as many STRD instructions as are needed. This works best when the
20045 stack is initially 64-bit aligned (the normal case), since it
20046 ensures that each STRD is also 64-bit aligned. */
20048 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
20053 rtx par
= NULL_RTX
;
20054 rtx dwarf
= NULL_RTX
;
20058 num_regs
= bit_count (saved_regs_mask
);
20060 /* Must be at least one register to save, and can't save SP or PC. */
20061 gcc_assert (num_regs
> 0 && num_regs
<= 14);
20062 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20063 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
20065 /* Create sequence for DWARF info. All the frame-related data for
20066 debugging is held in this wrapper. */
20067 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20069 /* Describe the stack adjustment. */
20070 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20071 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20072 RTX_FRAME_RELATED_P (tmp
) = 1;
20073 XVECEXP (dwarf
, 0, 0) = tmp
;
20075 /* Find the first register. */
20076 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
20081 /* If there's an odd number of registers to push. Start off by
20082 pushing a single register. This ensures that subsequent strd
20083 operations are dword aligned (assuming that SP was originally
20084 64-bit aligned). */
20085 if ((num_regs
& 1) != 0)
20087 rtx reg
, mem
, insn
;
20089 reg
= gen_rtx_REG (SImode
, regno
);
20091 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
20092 stack_pointer_rtx
));
20094 mem
= gen_frame_mem (Pmode
,
20096 (Pmode
, stack_pointer_rtx
,
20097 plus_constant (Pmode
, stack_pointer_rtx
,
20100 tmp
= gen_rtx_SET (mem
, reg
);
20101 RTX_FRAME_RELATED_P (tmp
) = 1;
20102 insn
= emit_insn (tmp
);
20103 RTX_FRAME_RELATED_P (insn
) = 1;
20104 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20105 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
20106 RTX_FRAME_RELATED_P (tmp
) = 1;
20109 XVECEXP (dwarf
, 0, i
) = tmp
;
20113 while (i
< num_regs
)
20114 if (saved_regs_mask
& (1 << regno
))
20116 rtx reg1
, reg2
, mem1
, mem2
;
20117 rtx tmp0
, tmp1
, tmp2
;
20120 /* Find the register to pair with this one. */
20121 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
20125 reg1
= gen_rtx_REG (SImode
, regno
);
20126 reg2
= gen_rtx_REG (SImode
, regno2
);
20133 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20136 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20138 -4 * (num_regs
- 1)));
20139 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
20140 plus_constant (Pmode
, stack_pointer_rtx
,
20142 tmp1
= gen_rtx_SET (mem1
, reg1
);
20143 tmp2
= gen_rtx_SET (mem2
, reg2
);
20144 RTX_FRAME_RELATED_P (tmp0
) = 1;
20145 RTX_FRAME_RELATED_P (tmp1
) = 1;
20146 RTX_FRAME_RELATED_P (tmp2
) = 1;
20147 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
20148 XVECEXP (par
, 0, 0) = tmp0
;
20149 XVECEXP (par
, 0, 1) = tmp1
;
20150 XVECEXP (par
, 0, 2) = tmp2
;
20151 insn
= emit_insn (par
);
20152 RTX_FRAME_RELATED_P (insn
) = 1;
20153 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20157 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20160 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20163 tmp1
= gen_rtx_SET (mem1
, reg1
);
20164 tmp2
= gen_rtx_SET (mem2
, reg2
);
20165 RTX_FRAME_RELATED_P (tmp1
) = 1;
20166 RTX_FRAME_RELATED_P (tmp2
) = 1;
20167 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20168 XVECEXP (par
, 0, 0) = tmp1
;
20169 XVECEXP (par
, 0, 1) = tmp2
;
20173 /* Create unwind information. This is an approximation. */
20174 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
20175 plus_constant (Pmode
,
20179 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
20180 plus_constant (Pmode
,
20185 RTX_FRAME_RELATED_P (tmp1
) = 1;
20186 RTX_FRAME_RELATED_P (tmp2
) = 1;
20187 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
20188 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
20190 regno
= regno2
+ 1;
20198 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20199 whenever possible, otherwise it emits single-word stores. The first store
20200 also allocates stack space for all saved registers, using writeback with
20201 post-addressing mode. All other stores use offset addressing. If no STRD
20202 can be emitted, this function emits a sequence of single-word stores,
20203 and not an STM as before, because single-word stores provide more freedom
20204 scheduling and can be turned into an STM by peephole optimizations. */
20206 arm_emit_strd_push (unsigned long saved_regs_mask
)
20209 int i
, j
, dwarf_index
= 0;
20211 rtx dwarf
= NULL_RTX
;
20212 rtx insn
= NULL_RTX
;
20215 /* TODO: A more efficient code can be emitted by changing the
20216 layout, e.g., first push all pairs that can use STRD to keep the
20217 stack aligned, and then push all other registers. */
20218 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20219 if (saved_regs_mask
& (1 << i
))
20222 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20223 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
20224 gcc_assert (num_regs
> 0);
20226 /* Create sequence for DWARF info. */
20227 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20229 /* For dwarf info, we generate explicit stack update. */
20230 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20231 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20232 RTX_FRAME_RELATED_P (tmp
) = 1;
20233 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20235 /* Save registers. */
20236 offset
= - 4 * num_regs
;
20238 while (j
<= LAST_ARM_REGNUM
)
20239 if (saved_regs_mask
& (1 << j
))
20242 && (saved_regs_mask
& (1 << (j
+ 1))))
20244 /* Current register and previous register form register pair for
20245 which STRD can be generated. */
20248 /* Allocate stack space for all saved registers. */
20249 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20250 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20251 mem
= gen_frame_mem (DImode
, tmp
);
20254 else if (offset
> 0)
20255 mem
= gen_frame_mem (DImode
,
20256 plus_constant (Pmode
,
20260 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20262 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
20263 RTX_FRAME_RELATED_P (tmp
) = 1;
20264 tmp
= emit_insn (tmp
);
20266 /* Record the first store insn. */
20267 if (dwarf_index
== 1)
20270 /* Generate dwarf info. */
20271 mem
= gen_frame_mem (SImode
,
20272 plus_constant (Pmode
,
20275 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20276 RTX_FRAME_RELATED_P (tmp
) = 1;
20277 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20279 mem
= gen_frame_mem (SImode
,
20280 plus_constant (Pmode
,
20283 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
20284 RTX_FRAME_RELATED_P (tmp
) = 1;
20285 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20292 /* Emit a single word store. */
20295 /* Allocate stack space for all saved registers. */
20296 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20297 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20298 mem
= gen_frame_mem (SImode
, tmp
);
20301 else if (offset
> 0)
20302 mem
= gen_frame_mem (SImode
,
20303 plus_constant (Pmode
,
20307 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20309 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20310 RTX_FRAME_RELATED_P (tmp
) = 1;
20311 tmp
= emit_insn (tmp
);
20313 /* Record the first store insn. */
20314 if (dwarf_index
== 1)
20317 /* Generate dwarf info. */
20318 mem
= gen_frame_mem (SImode
,
20319 plus_constant(Pmode
,
20322 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20323 RTX_FRAME_RELATED_P (tmp
) = 1;
20324 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20333 /* Attach dwarf info to the first insn we generate. */
20334 gcc_assert (insn
!= NULL_RTX
);
20335 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20336 RTX_FRAME_RELATED_P (insn
) = 1;
20339 /* Generate and emit an insn that we will recognize as a push_multi.
20340 Unfortunately, since this insn does not reflect very well the actual
20341 semantics of the operation, we need to annotate the insn for the benefit
20342 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20343 MASK for registers that should be annotated for DWARF2 frame unwind
20346 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
20349 int num_dwarf_regs
= 0;
20353 int dwarf_par_index
;
20356 /* We don't record the PC in the dwarf frame information. */
20357 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20359 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20361 if (mask
& (1 << i
))
20363 if (dwarf_regs_mask
& (1 << i
))
20367 gcc_assert (num_regs
&& num_regs
<= 16);
20368 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20370 /* For the body of the insn we are going to generate an UNSPEC in
20371 parallel with several USEs. This allows the insn to be recognized
20372 by the push_multi pattern in the arm.md file.
20374 The body of the insn looks something like this:
20377 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20378 (const_int:SI <num>)))
20379 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20385 For the frame note however, we try to be more explicit and actually
20386 show each register being stored into the stack frame, plus a (single)
20387 decrement of the stack pointer. We do it this way in order to be
20388 friendly to the stack unwinding code, which only wants to see a single
20389 stack decrement per instruction. The RTL we generate for the note looks
20390 something like this:
20393 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20394 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20395 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20396 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20400 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20401 instead we'd have a parallel expression detailing all
20402 the stores to the various memory addresses so that debug
20403 information is more up-to-date. Remember however while writing
20404 this to take care of the constraints with the push instruction.
20406 Note also that this has to be taken care of for the VFP registers.
20408 For more see PR43399. */
20410 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20411 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20412 dwarf_par_index
= 1;
20414 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20416 if (mask
& (1 << i
))
20418 reg
= gen_rtx_REG (SImode
, i
);
20420 XVECEXP (par
, 0, 0)
20421 = gen_rtx_SET (gen_frame_mem
20423 gen_rtx_PRE_MODIFY (Pmode
,
20426 (Pmode
, stack_pointer_rtx
,
20429 gen_rtx_UNSPEC (BLKmode
,
20430 gen_rtvec (1, reg
),
20431 UNSPEC_PUSH_MULT
));
20433 if (dwarf_regs_mask
& (1 << i
))
20435 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
20437 RTX_FRAME_RELATED_P (tmp
) = 1;
20438 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20445 for (j
= 1, i
++; j
< num_regs
; i
++)
20447 if (mask
& (1 << i
))
20449 reg
= gen_rtx_REG (SImode
, i
);
20451 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20453 if (dwarf_regs_mask
& (1 << i
))
20456 = gen_rtx_SET (gen_frame_mem
20458 plus_constant (Pmode
, stack_pointer_rtx
,
20461 RTX_FRAME_RELATED_P (tmp
) = 1;
20462 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20469 par
= emit_insn (par
);
20471 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20472 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20473 RTX_FRAME_RELATED_P (tmp
) = 1;
20474 XVECEXP (dwarf
, 0, 0) = tmp
;
20476 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20481 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20482 SIZE is the offset to be adjusted.
20483 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20485 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20489 RTX_FRAME_RELATED_P (insn
) = 1;
20490 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
20491 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20494 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20495 SAVED_REGS_MASK shows which registers need to be restored.
20497 Unfortunately, since this insn does not reflect very well the actual
20498 semantics of the operation, we need to annotate the insn for the benefit
20499 of DWARF2 frame unwind information. */
20501 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20506 rtx dwarf
= NULL_RTX
;
20508 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20512 offset_adj
= return_in_pc
? 1 : 0;
20513 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20514 if (saved_regs_mask
& (1 << i
))
20517 gcc_assert (num_regs
&& num_regs
<= 16);
20519 /* If SP is in reglist, then we don't emit SP update insn. */
20520 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20522 /* The parallel needs to hold num_regs SETs
20523 and one SET for the stack update. */
20524 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20527 XVECEXP (par
, 0, 0) = ret_rtx
;
20531 /* Increment the stack pointer, based on there being
20532 num_regs 4-byte registers to restore. */
20533 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20534 plus_constant (Pmode
,
20537 RTX_FRAME_RELATED_P (tmp
) = 1;
20538 XVECEXP (par
, 0, offset_adj
) = tmp
;
20541 /* Now restore every reg, which may include PC. */
20542 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20543 if (saved_regs_mask
& (1 << i
))
20545 reg
= gen_rtx_REG (SImode
, i
);
20546 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20548 /* Emit single load with writeback. */
20549 tmp
= gen_frame_mem (SImode
,
20550 gen_rtx_POST_INC (Pmode
,
20551 stack_pointer_rtx
));
20552 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
20553 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20557 tmp
= gen_rtx_SET (reg
,
20560 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20561 RTX_FRAME_RELATED_P (tmp
) = 1;
20562 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20564 /* We need to maintain a sequence for DWARF info too. As dwarf info
20565 should not have PC, skip PC. */
20566 if (i
!= PC_REGNUM
)
20567 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20573 par
= emit_jump_insn (par
);
20575 par
= emit_insn (par
);
20577 REG_NOTES (par
) = dwarf
;
20579 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20580 stack_pointer_rtx
, stack_pointer_rtx
);
20583 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20584 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20586 Unfortunately, since this insn does not reflect very well the actual
20587 semantics of the operation, we need to annotate the insn for the benefit
20588 of DWARF2 frame unwind information. */
20590 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20594 rtx dwarf
= NULL_RTX
;
20597 gcc_assert (num_regs
&& num_regs
<= 32);
20599 /* Workaround ARM10 VFPr1 bug. */
20600 if (num_regs
== 2 && !arm_arch6
)
20602 if (first_reg
== 15)
20608 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20609 there could be up to 32 D-registers to restore.
20610 If there are more than 16 D-registers, make two recursive calls,
20611 each of which emits one pop_multi instruction. */
20614 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20615 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20619 /* The parallel needs to hold num_regs SETs
20620 and one SET for the stack update. */
20621 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20623 /* Increment the stack pointer, based on there being
20624 num_regs 8-byte registers to restore. */
20625 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20626 RTX_FRAME_RELATED_P (tmp
) = 1;
20627 XVECEXP (par
, 0, 0) = tmp
;
20629 /* Now show every reg that will be restored, using a SET for each. */
20630 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20632 reg
= gen_rtx_REG (DFmode
, i
);
20634 tmp
= gen_rtx_SET (reg
,
20637 plus_constant (Pmode
, base_reg
, 8 * j
)));
20638 RTX_FRAME_RELATED_P (tmp
) = 1;
20639 XVECEXP (par
, 0, j
+ 1) = tmp
;
20641 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20646 par
= emit_insn (par
);
20647 REG_NOTES (par
) = dwarf
;
20649 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20650 if (REGNO (base_reg
) == IP_REGNUM
)
20652 RTX_FRAME_RELATED_P (par
) = 1;
20653 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20656 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20657 base_reg
, base_reg
);
20660 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20661 number of registers are being popped, multiple LDRD patterns are created for
20662 all register pairs. If odd number of registers are popped, last register is
20663 loaded by using LDR pattern. */
20665 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20669 rtx par
= NULL_RTX
;
20670 rtx dwarf
= NULL_RTX
;
20671 rtx tmp
, reg
, tmp1
;
20672 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20674 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20675 if (saved_regs_mask
& (1 << i
))
20678 gcc_assert (num_regs
&& num_regs
<= 16);
20680 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20681 to be popped. So, if num_regs is even, now it will become odd,
20682 and we can generate pop with PC. If num_regs is odd, it will be
20683 even now, and ldr with return can be generated for PC. */
20687 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20689 /* Var j iterates over all the registers to gather all the registers in
20690 saved_regs_mask. Var i gives index of saved registers in stack frame.
20691 A PARALLEL RTX of register-pair is created here, so that pattern for
20692 LDRD can be matched. As PC is always last register to be popped, and
20693 we have already decremented num_regs if PC, we don't have to worry
20694 about PC in this loop. */
20695 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20696 if (saved_regs_mask
& (1 << j
))
20698 /* Create RTX for memory load. */
20699 reg
= gen_rtx_REG (SImode
, j
);
20700 tmp
= gen_rtx_SET (reg
,
20701 gen_frame_mem (SImode
,
20702 plus_constant (Pmode
,
20703 stack_pointer_rtx
, 4 * i
)));
20704 RTX_FRAME_RELATED_P (tmp
) = 1;
20708 /* When saved-register index (i) is even, the RTX to be emitted is
20709 yet to be created. Hence create it first. The LDRD pattern we
20710 are generating is :
20711 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20712 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20713 where target registers need not be consecutive. */
20714 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20718 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20719 added as 0th element and if i is odd, reg_i is added as 1st element
20720 of LDRD pattern shown above. */
20721 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20722 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20726 /* When saved-register index (i) is odd, RTXs for both the registers
20727 to be loaded are generated in above given LDRD pattern, and the
20728 pattern can be emitted now. */
20729 par
= emit_insn (par
);
20730 REG_NOTES (par
) = dwarf
;
20731 RTX_FRAME_RELATED_P (par
) = 1;
20737 /* If the number of registers pushed is odd AND return_in_pc is false OR
20738 number of registers are even AND return_in_pc is true, last register is
20739 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20740 then LDR with post increment. */
20742 /* Increment the stack pointer, based on there being
20743 num_regs 4-byte registers to restore. */
20744 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20745 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20746 RTX_FRAME_RELATED_P (tmp
) = 1;
20747 tmp
= emit_insn (tmp
);
20750 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20751 stack_pointer_rtx
, stack_pointer_rtx
);
20756 if (((num_regs
% 2) == 1 && !return_in_pc
)
20757 || ((num_regs
% 2) == 0 && return_in_pc
))
20759 /* Scan for the single register to be popped. Skip until the saved
20760 register is found. */
20761 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20763 /* Gen LDR with post increment here. */
20764 tmp1
= gen_rtx_MEM (SImode
,
20765 gen_rtx_POST_INC (SImode
,
20766 stack_pointer_rtx
));
20767 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20769 reg
= gen_rtx_REG (SImode
, j
);
20770 tmp
= gen_rtx_SET (reg
, tmp1
);
20771 RTX_FRAME_RELATED_P (tmp
) = 1;
20772 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20776 /* If return_in_pc, j must be PC_REGNUM. */
20777 gcc_assert (j
== PC_REGNUM
);
20778 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20779 XVECEXP (par
, 0, 0) = ret_rtx
;
20780 XVECEXP (par
, 0, 1) = tmp
;
20781 par
= emit_jump_insn (par
);
20785 par
= emit_insn (tmp
);
20786 REG_NOTES (par
) = dwarf
;
20787 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20788 stack_pointer_rtx
, stack_pointer_rtx
);
20792 else if ((num_regs
% 2) == 1 && return_in_pc
)
20794 /* There are 2 registers to be popped. So, generate the pattern
20795 pop_multiple_with_stack_update_and_return to pop in PC. */
20796 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20802 /* LDRD in ARM mode needs consecutive registers as operands. This function
20803 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20804 offset addressing and then generates one separate stack udpate. This provides
20805 more scheduling freedom, compared to writeback on every load. However,
20806 if the function returns using load into PC directly
20807 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20808 before the last load. TODO: Add a peephole optimization to recognize
20809 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20810 peephole optimization to merge the load at stack-offset zero
20811 with the stack update instruction using load with writeback
20812 in post-index addressing mode. */
20814 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20818 rtx par
= NULL_RTX
;
20819 rtx dwarf
= NULL_RTX
;
20822 /* Restore saved registers. */
20823 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20825 while (j
<= LAST_ARM_REGNUM
)
20826 if (saved_regs_mask
& (1 << j
))
20829 && (saved_regs_mask
& (1 << (j
+ 1)))
20830 && (j
+ 1) != PC_REGNUM
)
20832 /* Current register and next register form register pair for which
20833 LDRD can be generated. PC is always the last register popped, and
20834 we handle it separately. */
20836 mem
= gen_frame_mem (DImode
,
20837 plus_constant (Pmode
,
20841 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20843 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
20844 tmp
= emit_insn (tmp
);
20845 RTX_FRAME_RELATED_P (tmp
) = 1;
20847 /* Generate dwarf info. */
20849 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20850 gen_rtx_REG (SImode
, j
),
20852 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20853 gen_rtx_REG (SImode
, j
+ 1),
20856 REG_NOTES (tmp
) = dwarf
;
20861 else if (j
!= PC_REGNUM
)
20863 /* Emit a single word load. */
20865 mem
= gen_frame_mem (SImode
,
20866 plus_constant (Pmode
,
20870 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20872 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
20873 tmp
= emit_insn (tmp
);
20874 RTX_FRAME_RELATED_P (tmp
) = 1;
20876 /* Generate dwarf info. */
20877 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20878 gen_rtx_REG (SImode
, j
),
20884 else /* j == PC_REGNUM */
20890 /* Update the stack. */
20893 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20894 plus_constant (Pmode
,
20897 tmp
= emit_insn (tmp
);
20898 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20899 stack_pointer_rtx
, stack_pointer_rtx
);
20903 if (saved_regs_mask
& (1 << PC_REGNUM
))
20905 /* Only PC is to be popped. */
20906 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20907 XVECEXP (par
, 0, 0) = ret_rtx
;
20908 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
20909 gen_frame_mem (SImode
,
20910 gen_rtx_POST_INC (SImode
,
20911 stack_pointer_rtx
)));
20912 RTX_FRAME_RELATED_P (tmp
) = 1;
20913 XVECEXP (par
, 0, 1) = tmp
;
20914 par
= emit_jump_insn (par
);
20916 /* Generate dwarf info. */
20917 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20918 gen_rtx_REG (SImode
, PC_REGNUM
),
20920 REG_NOTES (par
) = dwarf
;
20921 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20922 stack_pointer_rtx
, stack_pointer_rtx
);
20926 /* Calculate the size of the return value that is passed in registers. */
20928 arm_size_return_regs (void)
20932 if (crtl
->return_rtx
!= 0)
20933 mode
= GET_MODE (crtl
->return_rtx
);
20935 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20937 return GET_MODE_SIZE (mode
);
20940 /* Return true if the current function needs to save/restore LR. */
20942 thumb_force_lr_save (void)
20944 return !cfun
->machine
->lr_save_eliminated
20946 || thumb_far_jump_used_p ()
20947 || df_regs_ever_live_p (LR_REGNUM
));
20950 /* We do not know if r3 will be available because
20951 we do have an indirect tailcall happening in this
20952 particular case. */
20954 is_indirect_tailcall_p (rtx call
)
20956 rtx pat
= PATTERN (call
);
20958 /* Indirect tail call. */
20959 pat
= XVECEXP (pat
, 0, 0);
20960 if (GET_CODE (pat
) == SET
)
20961 pat
= SET_SRC (pat
);
20963 pat
= XEXP (XEXP (pat
, 0), 0);
20964 return REG_P (pat
);
20967 /* Return true if r3 is used by any of the tail call insns in the
20968 current function. */
20970 any_sibcall_could_use_r3 (void)
20975 if (!crtl
->tail_call_emit
)
20977 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20978 if (e
->flags
& EDGE_SIBCALL
)
20980 rtx_insn
*call
= BB_END (e
->src
);
20981 if (!CALL_P (call
))
20982 call
= prev_nonnote_nondebug_insn (call
);
20983 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20984 if (find_regno_fusage (call
, USE
, 3)
20985 || is_indirect_tailcall_p (call
))
20992 /* Compute the distance from register FROM to register TO.
20993 These can be the arg pointer (26), the soft frame pointer (25),
20994 the stack pointer (13) or the hard frame pointer (11).
20995 In thumb mode r7 is used as the soft frame pointer, if needed.
20996 Typical stack layout looks like this:
20998 old stack pointer -> | |
21001 | | saved arguments for
21002 | | vararg functions
21005 hard FP & arg pointer -> | | \
21013 soft frame pointer -> | | /
21018 locals base pointer -> | | /
21023 current stack pointer -> | | /
21026 For a given function some or all of these stack components
21027 may not be needed, giving rise to the possibility of
21028 eliminating some of the registers.
21030 The values returned by this function must reflect the behavior
21031 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21033 The sign of the number returned reflects the direction of stack
21034 growth, so the values are positive for all eliminations except
21035 from the soft frame pointer to the hard frame pointer.
21037 SFP may point just inside the local variables block to ensure correct
21041 /* Return cached stack offsets. */
21043 static arm_stack_offsets
*
21044 arm_get_frame_offsets (void)
21046 struct arm_stack_offsets
*offsets
;
21048 offsets
= &cfun
->machine
->stack_offsets
;
21054 /* Calculate stack offsets. These are used to calculate register elimination
21055 offsets and in prologue/epilogue code. Also calculates which registers
21056 should be saved. */
21059 arm_compute_frame_layout (void)
21061 struct arm_stack_offsets
*offsets
;
21062 unsigned long func_type
;
21065 HOST_WIDE_INT frame_size
;
21068 offsets
= &cfun
->machine
->stack_offsets
;
21070 /* Initially this is the size of the local variables. It will translated
21071 into an offset once we have determined the size of preceding data. */
21072 frame_size
= ROUND_UP_WORD (get_frame_size ());
21074 /* Space for variadic functions. */
21075 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
21077 /* In Thumb mode this is incorrect, but never used. */
21079 = (offsets
->saved_args
21080 + arm_compute_static_chain_stack_bytes ()
21081 + (frame_pointer_needed
? 4 : 0));
21085 unsigned int regno
;
21087 offsets
->saved_regs_mask
= arm_compute_save_core_reg_mask ();
21088 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
21089 saved
= core_saved
;
21091 /* We know that SP will be doubleword aligned on entry, and we must
21092 preserve that condition at any subroutine call. We also require the
21093 soft frame pointer to be doubleword aligned. */
21095 if (TARGET_REALLY_IWMMXT
)
21097 /* Check for the call-saved iWMMXt registers. */
21098 for (regno
= FIRST_IWMMXT_REGNUM
;
21099 regno
<= LAST_IWMMXT_REGNUM
;
21101 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
21105 func_type
= arm_current_func_type ();
21106 /* Space for saved VFP registers. */
21107 if (! IS_VOLATILE (func_type
)
21108 && TARGET_HARD_FLOAT
)
21109 saved
+= arm_get_vfp_saved_size ();
21111 else /* TARGET_THUMB1 */
21113 offsets
->saved_regs_mask
= thumb1_compute_save_core_reg_mask ();
21114 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
21115 saved
= core_saved
;
21116 if (TARGET_BACKTRACE
)
21120 /* Saved registers include the stack frame. */
21121 offsets
->saved_regs
21122 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
21123 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
21125 /* A leaf function does not need any stack alignment if it has nothing
21127 if (crtl
->is_leaf
&& frame_size
== 0
21128 /* However if it calls alloca(), we have a dynamically allocated
21129 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
21130 && ! cfun
->calls_alloca
)
21132 offsets
->outgoing_args
= offsets
->soft_frame
;
21133 offsets
->locals_base
= offsets
->soft_frame
;
21137 /* Ensure SFP has the correct alignment. */
21138 if (ARM_DOUBLEWORD_ALIGN
21139 && (offsets
->soft_frame
& 7))
21141 offsets
->soft_frame
+= 4;
21142 /* Try to align stack by pushing an extra reg. Don't bother doing this
21143 when there is a stack frame as the alignment will be rolled into
21144 the normal stack adjustment. */
21145 if (frame_size
+ crtl
->outgoing_args_size
== 0)
21149 /* Register r3 is caller-saved. Normally it does not need to be
21150 saved on entry by the prologue. However if we choose to save
21151 it for padding then we may confuse the compiler into thinking
21152 a prologue sequence is required when in fact it is not. This
21153 will occur when shrink-wrapping if r3 is used as a scratch
21154 register and there are no other callee-saved writes.
21156 This situation can be avoided when other callee-saved registers
21157 are available and r3 is not mandatory if we choose a callee-saved
21158 register for padding. */
21159 bool prefer_callee_reg_p
= false;
21161 /* If it is safe to use r3, then do so. This sometimes
21162 generates better code on Thumb-2 by avoiding the need to
21163 use 32-bit push/pop instructions. */
21164 if (! any_sibcall_could_use_r3 ()
21165 && arm_size_return_regs () <= 12
21166 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
21168 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
21171 if (!TARGET_THUMB2
)
21172 prefer_callee_reg_p
= true;
21175 || prefer_callee_reg_p
)
21177 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
21179 /* Avoid fixed registers; they may be changed at
21180 arbitrary times so it's unsafe to restore them
21181 during the epilogue. */
21183 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
21193 offsets
->saved_regs
+= 4;
21194 offsets
->saved_regs_mask
|= (1 << reg
);
21199 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
21200 offsets
->outgoing_args
= (offsets
->locals_base
21201 + crtl
->outgoing_args_size
);
21203 if (ARM_DOUBLEWORD_ALIGN
)
21205 /* Ensure SP remains doubleword aligned. */
21206 if (offsets
->outgoing_args
& 7)
21207 offsets
->outgoing_args
+= 4;
21208 gcc_assert (!(offsets
->outgoing_args
& 7));
21213 /* Calculate the relative offsets for the different stack pointers. Positive
21214 offsets are in the direction of stack growth. */
21217 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
21219 arm_stack_offsets
*offsets
;
21221 offsets
= arm_get_frame_offsets ();
21223 /* OK, now we have enough information to compute the distances.
21224 There must be an entry in these switch tables for each pair
21225 of registers in ELIMINABLE_REGS, even if some of the entries
21226 seem to be redundant or useless. */
21229 case ARG_POINTER_REGNUM
:
21232 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21235 case FRAME_POINTER_REGNUM
:
21236 /* This is the reverse of the soft frame pointer
21237 to hard frame pointer elimination below. */
21238 return offsets
->soft_frame
- offsets
->saved_args
;
21240 case ARM_HARD_FRAME_POINTER_REGNUM
:
21241 /* This is only non-zero in the case where the static chain register
21242 is stored above the frame. */
21243 return offsets
->frame
- offsets
->saved_args
- 4;
21245 case STACK_POINTER_REGNUM
:
21246 /* If nothing has been pushed on the stack at all
21247 then this will return -4. This *is* correct! */
21248 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
21251 gcc_unreachable ();
21253 gcc_unreachable ();
21255 case FRAME_POINTER_REGNUM
:
21258 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21261 case ARM_HARD_FRAME_POINTER_REGNUM
:
21262 /* The hard frame pointer points to the top entry in the
21263 stack frame. The soft frame pointer to the bottom entry
21264 in the stack frame. If there is no stack frame at all,
21265 then they are identical. */
21267 return offsets
->frame
- offsets
->soft_frame
;
21269 case STACK_POINTER_REGNUM
:
21270 return offsets
->outgoing_args
- offsets
->soft_frame
;
21273 gcc_unreachable ();
21275 gcc_unreachable ();
21278 /* You cannot eliminate from the stack pointer.
21279 In theory you could eliminate from the hard frame
21280 pointer to the stack pointer, but this will never
21281 happen, since if a stack frame is not needed the
21282 hard frame pointer will never be used. */
21283 gcc_unreachable ();
21287 /* Given FROM and TO register numbers, say whether this elimination is
21288 allowed. Frame pointer elimination is automatically handled.
21290 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21291 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21292 pointer, we must eliminate FRAME_POINTER_REGNUM into
21293 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21294 ARG_POINTER_REGNUM. */
21297 arm_can_eliminate (const int from
, const int to
)
21299 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
21300 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
21301 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
21302 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
21306 /* Emit RTL to save coprocessor registers on function entry. Returns the
21307 number of bytes pushed. */
21310 arm_save_coproc_regs(void)
21312 int saved_size
= 0;
21314 unsigned start_reg
;
21317 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
21318 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
21320 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21321 insn
= gen_rtx_MEM (V2SImode
, insn
);
21322 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
21323 RTX_FRAME_RELATED_P (insn
) = 1;
21327 if (TARGET_HARD_FLOAT
)
21329 start_reg
= FIRST_VFP_REGNUM
;
21331 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
21333 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
21334 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
21336 if (start_reg
!= reg
)
21337 saved_size
+= vfp_emit_fstmd (start_reg
,
21338 (reg
- start_reg
) / 2);
21339 start_reg
= reg
+ 2;
21342 if (start_reg
!= reg
)
21343 saved_size
+= vfp_emit_fstmd (start_reg
,
21344 (reg
- start_reg
) / 2);
21350 /* Set the Thumb frame pointer from the stack pointer. */
21353 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21355 HOST_WIDE_INT amount
;
21358 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21360 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21361 stack_pointer_rtx
, GEN_INT (amount
)));
21364 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21365 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21366 expects the first two operands to be the same. */
21369 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21371 hard_frame_pointer_rtx
));
21375 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21376 hard_frame_pointer_rtx
,
21377 stack_pointer_rtx
));
21379 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
21380 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21381 RTX_FRAME_RELATED_P (dwarf
) = 1;
21382 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21385 RTX_FRAME_RELATED_P (insn
) = 1;
21388 struct scratch_reg
{
21393 /* Return a short-lived scratch register for use as a 2nd scratch register on
21394 function entry after the registers are saved in the prologue. This register
21395 must be released by means of release_scratch_register_on_entry. IP is not
21396 considered since it is always used as the 1st scratch register if available.
21398 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21399 mask of live registers. */
21402 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
21403 unsigned long live_regs
)
21409 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
21415 for (i
= 4; i
< 11; i
++)
21416 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
21424 /* If IP is used as the 1st scratch register for a nested function,
21425 then either r3 wasn't available or is used to preserve IP. */
21426 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
21428 regno
= (regno1
== 3 ? 2 : 3);
21430 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
21435 sr
->reg
= gen_rtx_REG (SImode
, regno
);
21438 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21439 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
21440 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21441 plus_constant (Pmode
, stack_pointer_rtx
, -4));
21442 RTX_FRAME_RELATED_P (insn
) = 1;
21443 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21447 /* Release a scratch register obtained from the preceding function. */
21450 release_scratch_register_on_entry (struct scratch_reg
*sr
)
21454 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
21455 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
21456 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21457 plus_constant (Pmode
, stack_pointer_rtx
, 4));
21458 RTX_FRAME_RELATED_P (insn
) = 1;
21459 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21463 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21465 #if PROBE_INTERVAL > 4096
21466 #error Cannot use indexed addressing mode for stack probing
21469 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21470 inclusive. These are offsets from the current stack pointer. REGNO1
21471 is the index number of the 1st scratch register and LIVE_REGS is the
21472 mask of live registers. */
21475 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
21476 unsigned int regno1
, unsigned long live_regs
)
21478 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
21480 /* See if we have a constant small number of probes to generate. If so,
21481 that's the easy case. */
21482 if (size
<= PROBE_INTERVAL
)
21484 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21485 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21486 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
21489 /* The run-time loop is made up of 10 insns in the generic case while the
21490 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21491 else if (size
<= 5 * PROBE_INTERVAL
)
21493 HOST_WIDE_INT i
, rem
;
21495 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21496 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21497 emit_stack_probe (reg1
);
21499 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21500 it exceeds SIZE. If only two probes are needed, this will not
21501 generate any code. Then probe at FIRST + SIZE. */
21502 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
21504 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21505 emit_stack_probe (reg1
);
21508 rem
= size
- (i
- PROBE_INTERVAL
);
21509 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21511 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21512 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
21515 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
21518 /* Otherwise, do the same as above, but in a loop. Note that we must be
21519 extra careful with variables wrapping around because we might be at
21520 the very top (or the very bottom) of the address space and we have
21521 to be able to handle this case properly; in particular, we use an
21522 equality test for the loop condition. */
21525 HOST_WIDE_INT rounded_size
;
21526 struct scratch_reg sr
;
21528 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
21530 emit_move_insn (reg1
, GEN_INT (first
));
21533 /* Step 1: round SIZE to the previous multiple of the interval. */
21535 rounded_size
= size
& -PROBE_INTERVAL
;
21536 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
21539 /* Step 2: compute initial and final value of the loop counter. */
21541 /* TEST_ADDR = SP + FIRST. */
21542 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21544 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21545 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
21548 /* Step 3: the loop
21552 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21555 while (TEST_ADDR != LAST_ADDR)
21557 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21558 until it is equal to ROUNDED_SIZE. */
21560 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
21563 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21564 that SIZE is equal to ROUNDED_SIZE. */
21566 if (size
!= rounded_size
)
21568 HOST_WIDE_INT rem
= size
- rounded_size
;
21570 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21572 emit_set_insn (sr
.reg
,
21573 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
21574 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
21575 PROBE_INTERVAL
- rem
));
21578 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
21581 release_scratch_register_on_entry (&sr
);
21584 /* Make sure nothing is scheduled before we are done. */
21585 emit_insn (gen_blockage ());
21588 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21589 absolute addresses. */
21592 output_probe_stack_range (rtx reg1
, rtx reg2
)
21594 static int labelno
= 0;
21598 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
21601 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
21603 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21605 xops
[1] = GEN_INT (PROBE_INTERVAL
);
21606 output_asm_insn ("sub\t%0, %0, %1", xops
);
21608 /* Probe at TEST_ADDR. */
21609 output_asm_insn ("str\tr0, [%0, #0]", xops
);
21611 /* Test if TEST_ADDR == LAST_ADDR. */
21613 output_asm_insn ("cmp\t%0, %1", xops
);
21616 fputs ("\tbne\t", asm_out_file
);
21617 assemble_name_raw (asm_out_file
, loop_lab
);
21618 fputc ('\n', asm_out_file
);
21623 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21626 arm_expand_prologue (void)
21631 unsigned long live_regs_mask
;
21632 unsigned long func_type
;
21634 int saved_pretend_args
= 0;
21635 int saved_regs
= 0;
21636 unsigned HOST_WIDE_INT args_to_push
;
21637 HOST_WIDE_INT size
;
21638 arm_stack_offsets
*offsets
;
21641 func_type
= arm_current_func_type ();
21643 /* Naked functions don't have prologues. */
21644 if (IS_NAKED (func_type
))
21646 if (flag_stack_usage_info
)
21647 current_function_static_stack_size
= 0;
21651 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21652 args_to_push
= crtl
->args
.pretend_args_size
;
21654 /* Compute which register we will have to save onto the stack. */
21655 offsets
= arm_get_frame_offsets ();
21656 live_regs_mask
= offsets
->saved_regs_mask
;
21658 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21660 if (IS_STACKALIGN (func_type
))
21664 /* Handle a word-aligned stack pointer. We generate the following:
21669 <save and restore r0 in normal prologue/epilogue>
21673 The unwinder doesn't need to know about the stack realignment.
21674 Just tell it we saved SP in r0. */
21675 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21677 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
21678 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
21680 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21681 RTX_FRAME_RELATED_P (insn
) = 1;
21682 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21684 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21686 /* ??? The CFA changes here, which may cause GDB to conclude that it
21687 has entered a different function. That said, the unwind info is
21688 correct, individually, before and after this instruction because
21689 we've described the save of SP, which will override the default
21690 handling of SP as restoring from the CFA. */
21691 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21694 /* Let's compute the static_chain_stack_bytes required and store it. Right
21695 now the value must be -1 as stored by arm_init_machine_status (). */
21696 cfun
->machine
->static_chain_stack_bytes
21697 = arm_compute_static_chain_stack_bytes ();
21699 /* The static chain register is the same as the IP register. If it is
21700 clobbered when creating the frame, we need to save and restore it. */
21701 clobber_ip
= IS_NESTED (func_type
)
21702 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21703 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21704 || flag_stack_clash_protection
)
21705 && !df_regs_ever_live_p (LR_REGNUM
)
21706 && arm_r3_live_at_start_p ()));
21708 /* Find somewhere to store IP whilst the frame is being created.
21709 We try the following places in order:
21711 1. The last argument register r3 if it is available.
21712 2. A slot on the stack above the frame if there are no
21713 arguments to push onto the stack.
21714 3. Register r3 again, after pushing the argument registers
21715 onto the stack, if this is a varargs function.
21716 4. The last slot on the stack created for the arguments to
21717 push, if this isn't a varargs function.
21719 Note - we only need to tell the dwarf2 backend about the SP
21720 adjustment in the second variant; the static chain register
21721 doesn't need to be unwound, as it doesn't contain a value
21722 inherited from the caller. */
21725 if (!arm_r3_live_at_start_p ())
21726 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21727 else if (args_to_push
== 0)
21731 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21734 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21735 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21738 /* Just tell the dwarf backend that we adjusted SP. */
21739 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21740 plus_constant (Pmode
, stack_pointer_rtx
,
21742 RTX_FRAME_RELATED_P (insn
) = 1;
21743 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21747 /* Store the args on the stack. */
21748 if (cfun
->machine
->uses_anonymous_args
)
21750 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21751 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21752 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21753 saved_pretend_args
= 1;
21759 if (args_to_push
== 4)
21760 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21762 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21763 plus_constant (Pmode
,
21767 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21769 /* Just tell the dwarf backend that we adjusted SP. */
21770 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21771 plus_constant (Pmode
, stack_pointer_rtx
,
21773 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21776 RTX_FRAME_RELATED_P (insn
) = 1;
21777 fp_offset
= args_to_push
;
21782 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21784 if (IS_INTERRUPT (func_type
))
21786 /* Interrupt functions must not corrupt any registers.
21787 Creating a frame pointer however, corrupts the IP
21788 register, so we must push it first. */
21789 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21791 /* Do not set RTX_FRAME_RELATED_P on this insn.
21792 The dwarf stack unwinding code only wants to see one
21793 stack decrement per function, and this is not it. If
21794 this instruction is labeled as being part of the frame
21795 creation sequence then dwarf2out_frame_debug_expr will
21796 die when it encounters the assignment of IP to FP
21797 later on, since the use of SP here establishes SP as
21798 the CFA register and not IP.
21800 Anyway this instruction is not really part of the stack
21801 frame creation although it is part of the prologue. */
21804 insn
= emit_set_insn (ip_rtx
,
21805 plus_constant (Pmode
, stack_pointer_rtx
,
21807 RTX_FRAME_RELATED_P (insn
) = 1;
21812 /* Push the argument registers, or reserve space for them. */
21813 if (cfun
->machine
->uses_anonymous_args
)
21814 insn
= emit_multi_reg_push
21815 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21816 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21819 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21820 GEN_INT (- args_to_push
)));
21821 RTX_FRAME_RELATED_P (insn
) = 1;
21824 /* If this is an interrupt service routine, and the link register
21825 is going to be pushed, and we're not generating extra
21826 push of IP (needed when frame is needed and frame layout if apcs),
21827 subtracting four from LR now will mean that the function return
21828 can be done with a single instruction. */
21829 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21830 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21831 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21834 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21836 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21839 if (live_regs_mask
)
21841 unsigned long dwarf_regs_mask
= live_regs_mask
;
21843 saved_regs
+= bit_count (live_regs_mask
) * 4;
21844 if (optimize_size
&& !frame_pointer_needed
21845 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21847 /* If no coprocessor registers are being pushed and we don't have
21848 to worry about a frame pointer then push extra registers to
21849 create the stack frame. This is done in a way that does not
21850 alter the frame layout, so is independent of the epilogue. */
21854 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21856 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21857 if (frame
&& n
* 4 >= frame
)
21860 live_regs_mask
|= (1 << n
) - 1;
21861 saved_regs
+= frame
;
21866 && current_tune
->prefer_ldrd_strd
21867 && !optimize_function_for_size_p (cfun
))
21869 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21871 thumb2_emit_strd_push (live_regs_mask
);
21872 else if (TARGET_ARM
21873 && !TARGET_APCS_FRAME
21874 && !IS_INTERRUPT (func_type
))
21875 arm_emit_strd_push (live_regs_mask
);
21878 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21879 RTX_FRAME_RELATED_P (insn
) = 1;
21884 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21885 RTX_FRAME_RELATED_P (insn
) = 1;
21889 if (! IS_VOLATILE (func_type
))
21890 saved_regs
+= arm_save_coproc_regs ();
21892 if (frame_pointer_needed
&& TARGET_ARM
)
21894 /* Create the new frame pointer. */
21895 if (TARGET_APCS_FRAME
)
21897 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21898 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21899 RTX_FRAME_RELATED_P (insn
) = 1;
21903 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
21904 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21905 stack_pointer_rtx
, insn
));
21906 RTX_FRAME_RELATED_P (insn
) = 1;
21910 size
= offsets
->outgoing_args
- offsets
->saved_args
;
21911 if (flag_stack_usage_info
)
21912 current_function_static_stack_size
= size
;
21914 /* If this isn't an interrupt service routine and we have a frame, then do
21915 stack checking. We use IP as the first scratch register, except for the
21916 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21917 if (!IS_INTERRUPT (func_type
)
21918 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21919 || flag_stack_clash_protection
))
21921 unsigned int regno
;
21923 if (!IS_NESTED (func_type
) || clobber_ip
)
21925 else if (df_regs_ever_live_p (LR_REGNUM
))
21930 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
21932 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
21933 arm_emit_probe_stack_range (get_stack_check_protect (),
21934 size
- get_stack_check_protect (),
21935 regno
, live_regs_mask
);
21938 arm_emit_probe_stack_range (get_stack_check_protect (), size
,
21939 regno
, live_regs_mask
);
21942 /* Recover the static chain register. */
21945 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21946 insn
= gen_rtx_REG (SImode
, 3);
21949 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21950 insn
= gen_frame_mem (SImode
, insn
);
21952 emit_set_insn (ip_rtx
, insn
);
21953 emit_insn (gen_force_register_use (ip_rtx
));
21956 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21958 /* This add can produce multiple insns for a large constant, so we
21959 need to get tricky. */
21960 rtx_insn
*last
= get_last_insn ();
21962 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21963 - offsets
->outgoing_args
);
21965 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21969 last
= last
? NEXT_INSN (last
) : get_insns ();
21970 RTX_FRAME_RELATED_P (last
) = 1;
21972 while (last
!= insn
);
21974 /* If the frame pointer is needed, emit a special barrier that
21975 will prevent the scheduler from moving stores to the frame
21976 before the stack adjustment. */
21977 if (frame_pointer_needed
)
21978 emit_insn (gen_stack_tie (stack_pointer_rtx
,
21979 hard_frame_pointer_rtx
));
21983 if (frame_pointer_needed
&& TARGET_THUMB2
)
21984 thumb_set_frame_pointer (offsets
);
21986 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21988 unsigned long mask
;
21990 mask
= live_regs_mask
;
21991 mask
&= THUMB2_WORK_REGS
;
21992 if (!IS_NESTED (func_type
))
21993 mask
|= (1 << IP_REGNUM
);
21994 arm_load_pic_register (mask
);
21997 /* If we are profiling, make sure no instructions are scheduled before
21998 the call to mcount. Similarly if the user has requested no
21999 scheduling in the prolog. Similarly if we want non-call exceptions
22000 using the EABI unwinder, to prevent faulting instructions from being
22001 swapped with a stack adjustment. */
22002 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
22003 || (arm_except_unwind_info (&global_options
) == UI_TARGET
22004 && cfun
->can_throw_non_call_exceptions
))
22005 emit_insn (gen_blockage ());
22007 /* If the link register is being kept alive, with the return address in it,
22008 then make sure that it does not get reused by the ce2 pass. */
22009 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
22010 cfun
->machine
->lr_save_eliminated
= 1;
22013 /* Print condition code to STREAM. Helper function for arm_print_operand. */
22015 arm_print_condition (FILE *stream
)
22017 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
22019 /* Branch conversion is not implemented for Thumb-2. */
22022 output_operand_lossage ("predicated Thumb instruction");
22025 if (current_insn_predicate
!= NULL
)
22027 output_operand_lossage
22028 ("predicated instruction in conditional sequence");
22032 fputs (arm_condition_codes
[arm_current_cc
], stream
);
22034 else if (current_insn_predicate
)
22036 enum arm_cond_code code
;
22040 output_operand_lossage ("predicated Thumb instruction");
22044 code
= get_arm_condition_code (current_insn_predicate
);
22045 fputs (arm_condition_codes
[code
], stream
);
22050 /* Globally reserved letters: acln
22051 Puncutation letters currently used: @_|?().!#
22052 Lower case letters currently used: bcdefhimpqtvwxyz
22053 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22054 Letters previously used, but now deprecated/obsolete: sVWXYZ.
22056 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22058 If CODE is 'd', then the X is a condition operand and the instruction
22059 should only be executed if the condition is true.
22060 if CODE is 'D', then the X is a condition operand and the instruction
22061 should only be executed if the condition is false: however, if the mode
22062 of the comparison is CCFPEmode, then always execute the instruction -- we
22063 do this because in these circumstances !GE does not necessarily imply LT;
22064 in these cases the instruction pattern will take care to make sure that
22065 an instruction containing %d will follow, thereby undoing the effects of
22066 doing this instruction unconditionally.
22067 If CODE is 'N' then X is a floating point operand that must be negated
22069 If CODE is 'B' then output a bitwise inverted value of X (a const int).
22070 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
22072 arm_print_operand (FILE *stream
, rtx x
, int code
)
22077 fputs (ASM_COMMENT_START
, stream
);
22081 fputs (user_label_prefix
, stream
);
22085 fputs (REGISTER_PREFIX
, stream
);
22089 arm_print_condition (stream
);
22093 /* The current condition code for a condition code setting instruction.
22094 Preceded by 's' in unified syntax, otherwise followed by 's'. */
22095 fputc('s', stream
);
22096 arm_print_condition (stream
);
22100 /* If the instruction is conditionally executed then print
22101 the current condition code, otherwise print 's'. */
22102 gcc_assert (TARGET_THUMB2
);
22103 if (current_insn_predicate
)
22104 arm_print_condition (stream
);
22106 fputc('s', stream
);
22109 /* %# is a "break" sequence. It doesn't output anything, but is used to
22110 separate e.g. operand numbers from following text, if that text consists
22111 of further digits which we don't want to be part of the operand
22119 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
22120 fprintf (stream
, "%s", fp_const_from_val (&r
));
22124 /* An integer or symbol address without a preceding # sign. */
22126 switch (GET_CODE (x
))
22129 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
22133 output_addr_const (stream
, x
);
22137 if (GET_CODE (XEXP (x
, 0)) == PLUS
22138 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
22140 output_addr_const (stream
, x
);
22143 /* Fall through. */
22146 output_operand_lossage ("Unsupported operand for code '%c'", code
);
22150 /* An integer that we want to print in HEX. */
22152 switch (GET_CODE (x
))
22155 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
22159 output_operand_lossage ("Unsupported operand for code '%c'", code
);
22164 if (CONST_INT_P (x
))
22167 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
22168 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
22172 putc ('~', stream
);
22173 output_addr_const (stream
, x
);
22178 /* Print the log2 of a CONST_INT. */
22182 if (!CONST_INT_P (x
)
22183 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
22184 output_operand_lossage ("Unsupported operand for code '%c'", code
);
22186 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
22191 /* The low 16 bits of an immediate constant. */
22192 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
22196 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
22200 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
22208 shift
= shift_op (x
, &val
);
22212 fprintf (stream
, ", %s ", shift
);
22214 arm_print_operand (stream
, XEXP (x
, 1), 0);
22216 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
22221 /* An explanation of the 'Q', 'R' and 'H' register operands:
22223 In a pair of registers containing a DI or DF value the 'Q'
22224 operand returns the register number of the register containing
22225 the least significant part of the value. The 'R' operand returns
22226 the register number of the register containing the most
22227 significant part of the value.
22229 The 'H' operand returns the higher of the two register numbers.
22230 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22231 same as the 'Q' operand, since the most significant part of the
22232 value is held in the lower number register. The reverse is true
22233 on systems where WORDS_BIG_ENDIAN is false.
22235 The purpose of these operands is to distinguish between cases
22236 where the endian-ness of the values is important (for example
22237 when they are added together), and cases where the endian-ness
22238 is irrelevant, but the order of register operations is important.
22239 For example when loading a value from memory into a register
22240 pair, the endian-ness does not matter. Provided that the value
22241 from the lower memory address is put into the lower numbered
22242 register, and the value from the higher address is put into the
22243 higher numbered register, the load will work regardless of whether
22244 the value being loaded is big-wordian or little-wordian. The
22245 order of the two register loads can matter however, if the address
22246 of the memory location is actually held in one of the registers
22247 being overwritten by the load.
22249 The 'Q' and 'R' constraints are also available for 64-bit
22252 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22254 rtx part
= gen_lowpart (SImode
, x
);
22255 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22259 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22261 output_operand_lossage ("invalid operand for code '%c'", code
);
22265 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
22269 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22271 machine_mode mode
= GET_MODE (x
);
22274 if (mode
== VOIDmode
)
22276 part
= gen_highpart_mode (SImode
, mode
, x
);
22277 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22281 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22283 output_operand_lossage ("invalid operand for code '%c'", code
);
22287 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
22291 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22293 output_operand_lossage ("invalid operand for code '%c'", code
);
22297 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
22301 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22303 output_operand_lossage ("invalid operand for code '%c'", code
);
22307 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
22311 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22313 output_operand_lossage ("invalid operand for code '%c'", code
);
22317 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
22321 asm_fprintf (stream
, "%r",
22322 REG_P (XEXP (x
, 0))
22323 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
22327 asm_fprintf (stream
, "{%r-%r}",
22329 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
22332 /* Like 'M', but writing doubleword vector registers, for use by Neon
22336 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
22337 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
22339 asm_fprintf (stream
, "{d%d}", regno
);
22341 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
22346 /* CONST_TRUE_RTX means always -- that's the default. */
22347 if (x
== const_true_rtx
)
22350 if (!COMPARISON_P (x
))
22352 output_operand_lossage ("invalid operand for code '%c'", code
);
22356 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
22361 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22362 want to do that. */
22363 if (x
== const_true_rtx
)
22365 output_operand_lossage ("instruction never executed");
22368 if (!COMPARISON_P (x
))
22370 output_operand_lossage ("invalid operand for code '%c'", code
);
22374 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
22375 (get_arm_condition_code (x
))],
22385 /* Former Maverick support, removed after GCC-4.7. */
22386 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
22391 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
22392 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
22393 /* Bad value for wCG register number. */
22395 output_operand_lossage ("invalid operand for code '%c'", code
);
22400 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
22403 /* Print an iWMMXt control register name. */
22405 if (!CONST_INT_P (x
)
22407 || INTVAL (x
) >= 16)
22408 /* Bad value for wC register number. */
22410 output_operand_lossage ("invalid operand for code '%c'", code
);
22416 static const char * wc_reg_names
[16] =
22418 "wCID", "wCon", "wCSSF", "wCASF",
22419 "wC4", "wC5", "wC6", "wC7",
22420 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22421 "wC12", "wC13", "wC14", "wC15"
22424 fputs (wc_reg_names
[INTVAL (x
)], stream
);
22428 /* Print the high single-precision register of a VFP double-precision
22432 machine_mode mode
= GET_MODE (x
);
22435 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
22437 output_operand_lossage ("invalid operand for code '%c'", code
);
22442 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
22444 output_operand_lossage ("invalid operand for code '%c'", code
);
22448 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
22452 /* Print a VFP/Neon double precision or quad precision register name. */
22456 machine_mode mode
= GET_MODE (x
);
22457 int is_quad
= (code
== 'q');
22460 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
22462 output_operand_lossage ("invalid operand for code '%c'", code
);
22467 || !IS_VFP_REGNUM (REGNO (x
)))
22469 output_operand_lossage ("invalid operand for code '%c'", code
);
22474 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
22475 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
22477 output_operand_lossage ("invalid operand for code '%c'", code
);
22481 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
22482 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
22486 /* These two codes print the low/high doubleword register of a Neon quad
22487 register, respectively. For pair-structure types, can also print
22488 low/high quadword registers. */
22492 machine_mode mode
= GET_MODE (x
);
22495 if ((GET_MODE_SIZE (mode
) != 16
22496 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
22498 output_operand_lossage ("invalid operand for code '%c'", code
);
22503 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
22505 output_operand_lossage ("invalid operand for code '%c'", code
);
22509 if (GET_MODE_SIZE (mode
) == 16)
22510 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
22511 + (code
== 'f' ? 1 : 0));
22513 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
22514 + (code
== 'f' ? 1 : 0));
22518 /* Print a VFPv3 floating-point constant, represented as an integer
22522 int index
= vfp3_const_double_index (x
);
22523 gcc_assert (index
!= -1);
22524 fprintf (stream
, "%d", index
);
22528 /* Print bits representing opcode features for Neon.
22530 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22531 and polynomials as unsigned.
22533 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22535 Bit 2 is 1 for rounding functions, 0 otherwise. */
22537 /* Identify the type as 's', 'u', 'p' or 'f'. */
22540 HOST_WIDE_INT bits
= INTVAL (x
);
22541 fputc ("uspf"[bits
& 3], stream
);
22545 /* Likewise, but signed and unsigned integers are both 'i'. */
22548 HOST_WIDE_INT bits
= INTVAL (x
);
22549 fputc ("iipf"[bits
& 3], stream
);
22553 /* As for 'T', but emit 'u' instead of 'p'. */
22556 HOST_WIDE_INT bits
= INTVAL (x
);
22557 fputc ("usuf"[bits
& 3], stream
);
22561 /* Bit 2: rounding (vs none). */
22564 HOST_WIDE_INT bits
= INTVAL (x
);
22565 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
22569 /* Memory operand for vld1/vst1 instruction. */
22573 bool postinc
= FALSE
;
22574 rtx postinc_reg
= NULL
;
22575 unsigned align
, memsize
, align_bits
;
22577 gcc_assert (MEM_P (x
));
22578 addr
= XEXP (x
, 0);
22579 if (GET_CODE (addr
) == POST_INC
)
22582 addr
= XEXP (addr
, 0);
22584 if (GET_CODE (addr
) == POST_MODIFY
)
22586 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22587 addr
= XEXP (addr
, 0);
22589 asm_fprintf (stream
, "[%r", REGNO (addr
));
22591 /* We know the alignment of this access, so we can emit a hint in the
22592 instruction (for some alignments) as an aid to the memory subsystem
22594 align
= MEM_ALIGN (x
) >> 3;
22595 memsize
= MEM_SIZE (x
);
22597 /* Only certain alignment specifiers are supported by the hardware. */
22598 if (memsize
== 32 && (align
% 32) == 0)
22600 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22602 else if (memsize
>= 8 && (align
% 8) == 0)
22607 if (align_bits
!= 0)
22608 asm_fprintf (stream
, ":%d", align_bits
);
22610 asm_fprintf (stream
, "]");
22613 fputs("!", stream
);
22615 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22623 gcc_assert (MEM_P (x
));
22624 addr
= XEXP (x
, 0);
22625 gcc_assert (REG_P (addr
));
22626 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22630 /* Translate an S register number into a D register number and element index. */
22633 machine_mode mode
= GET_MODE (x
);
22636 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22638 output_operand_lossage ("invalid operand for code '%c'", code
);
22643 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22645 output_operand_lossage ("invalid operand for code '%c'", code
);
22649 regno
= regno
- FIRST_VFP_REGNUM
;
22650 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22655 gcc_assert (CONST_DOUBLE_P (x
));
22657 result
= vfp3_const_double_for_fract_bits (x
);
22659 result
= vfp3_const_double_for_bits (x
);
22660 fprintf (stream
, "#%d", result
);
22663 /* Register specifier for vld1.16/vst1.16. Translate the S register
22664 number into a D register number and element index. */
22667 machine_mode mode
= GET_MODE (x
);
22670 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22672 output_operand_lossage ("invalid operand for code '%c'", code
);
22677 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22679 output_operand_lossage ("invalid operand for code '%c'", code
);
22683 regno
= regno
- FIRST_VFP_REGNUM
;
22684 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22691 output_operand_lossage ("missing operand");
22695 switch (GET_CODE (x
))
22698 asm_fprintf (stream
, "%r", REGNO (x
));
22702 output_address (GET_MODE (x
), XEXP (x
, 0));
22708 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22709 sizeof (fpstr
), 0, 1);
22710 fprintf (stream
, "#%s", fpstr
);
22715 gcc_assert (GET_CODE (x
) != NEG
);
22716 fputc ('#', stream
);
22717 if (GET_CODE (x
) == HIGH
)
22719 fputs (":lower16:", stream
);
22723 output_addr_const (stream
, x
);
22729 /* Target hook for printing a memory address. */
22731 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
22735 int is_minus
= GET_CODE (x
) == MINUS
;
22738 asm_fprintf (stream
, "[%r]", REGNO (x
));
22739 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22741 rtx base
= XEXP (x
, 0);
22742 rtx index
= XEXP (x
, 1);
22743 HOST_WIDE_INT offset
= 0;
22745 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22747 /* Ensure that BASE is a register. */
22748 /* (one of them must be). */
22749 /* Also ensure the SP is not used as in index register. */
22750 std::swap (base
, index
);
22752 switch (GET_CODE (index
))
22755 offset
= INTVAL (index
);
22758 asm_fprintf (stream
, "[%r, #%wd]",
22759 REGNO (base
), offset
);
22763 asm_fprintf (stream
, "[%r, %s%r]",
22764 REGNO (base
), is_minus
? "-" : "",
22774 asm_fprintf (stream
, "[%r, %s%r",
22775 REGNO (base
), is_minus
? "-" : "",
22776 REGNO (XEXP (index
, 0)));
22777 arm_print_operand (stream
, index
, 'S');
22778 fputs ("]", stream
);
22783 gcc_unreachable ();
22786 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22787 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22789 gcc_assert (REG_P (XEXP (x
, 0)));
22791 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22792 asm_fprintf (stream
, "[%r, #%s%d]!",
22793 REGNO (XEXP (x
, 0)),
22794 GET_CODE (x
) == PRE_DEC
? "-" : "",
22795 GET_MODE_SIZE (mode
));
22797 asm_fprintf (stream
, "[%r], #%s%d",
22798 REGNO (XEXP (x
, 0)),
22799 GET_CODE (x
) == POST_DEC
? "-" : "",
22800 GET_MODE_SIZE (mode
));
22802 else if (GET_CODE (x
) == PRE_MODIFY
)
22804 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22805 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22806 asm_fprintf (stream
, "#%wd]!",
22807 INTVAL (XEXP (XEXP (x
, 1), 1)));
22809 asm_fprintf (stream
, "%r]!",
22810 REGNO (XEXP (XEXP (x
, 1), 1)));
22812 else if (GET_CODE (x
) == POST_MODIFY
)
22814 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22815 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22816 asm_fprintf (stream
, "#%wd",
22817 INTVAL (XEXP (XEXP (x
, 1), 1)));
22819 asm_fprintf (stream
, "%r",
22820 REGNO (XEXP (XEXP (x
, 1), 1)));
22822 else output_addr_const (stream
, x
);
22827 asm_fprintf (stream
, "[%r]", REGNO (x
));
22828 else if (GET_CODE (x
) == POST_INC
)
22829 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22830 else if (GET_CODE (x
) == PLUS
)
22832 gcc_assert (REG_P (XEXP (x
, 0)));
22833 if (CONST_INT_P (XEXP (x
, 1)))
22834 asm_fprintf (stream
, "[%r, #%wd]",
22835 REGNO (XEXP (x
, 0)),
22836 INTVAL (XEXP (x
, 1)));
22838 asm_fprintf (stream
, "[%r, %r]",
22839 REGNO (XEXP (x
, 0)),
22840 REGNO (XEXP (x
, 1)));
22843 output_addr_const (stream
, x
);
22847 /* Target hook for indicating whether a punctuation character for
22848 TARGET_PRINT_OPERAND is valid. */
22850 arm_print_operand_punct_valid_p (unsigned char code
)
22852 return (code
== '@' || code
== '|' || code
== '.'
22853 || code
== '(' || code
== ')' || code
== '#'
22854 || (TARGET_32BIT
&& (code
== '?'))
22855 || (TARGET_THUMB2
&& (code
== '!'))
22856 || (TARGET_THUMB
&& (code
== '_')));
22859 /* Target hook for assembling integer objects. The ARM version needs to
22860 handle word-sized values specially. */
22862 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22866 if (size
== UNITS_PER_WORD
&& aligned_p
)
22868 fputs ("\t.word\t", asm_out_file
);
22869 output_addr_const (asm_out_file
, x
);
22871 /* Mark symbols as position independent. We only do this in the
22872 .text segment, not in the .data segment. */
22873 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22874 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22876 /* See legitimize_pic_address for an explanation of the
22877 TARGET_VXWORKS_RTP check. */
22878 /* References to weak symbols cannot be resolved locally:
22879 they may be overridden by a non-weak definition at link
22881 if (!arm_pic_data_is_text_relative
22882 || (GET_CODE (x
) == SYMBOL_REF
22883 && (!SYMBOL_REF_LOCAL_P (x
)
22884 || (SYMBOL_REF_DECL (x
)
22885 ? DECL_WEAK (SYMBOL_REF_DECL (x
)) : 0))))
22886 fputs ("(GOT)", asm_out_file
);
22888 fputs ("(GOTOFF)", asm_out_file
);
22890 fputc ('\n', asm_out_file
);
22894 mode
= GET_MODE (x
);
22896 if (arm_vector_mode_supported_p (mode
))
22900 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22902 units
= CONST_VECTOR_NUNITS (x
);
22903 size
= GET_MODE_UNIT_SIZE (mode
);
22905 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22906 for (i
= 0; i
< units
; i
++)
22908 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22910 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22913 for (i
= 0; i
< units
; i
++)
22915 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22917 (*CONST_DOUBLE_REAL_VALUE (elt
),
22918 as_a
<scalar_float_mode
> (GET_MODE_INNER (mode
)),
22919 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22925 return default_assemble_integer (x
, size
, aligned_p
);
22929 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22933 if (!TARGET_AAPCS_BASED
)
22936 default_named_section_asm_out_constructor
22937 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22941 /* Put these in the .init_array section, using a special relocation. */
22942 if (priority
!= DEFAULT_INIT_PRIORITY
)
22945 sprintf (buf
, "%s.%.5u",
22946 is_ctor
? ".init_array" : ".fini_array",
22948 s
= get_section (buf
, SECTION_WRITE
| SECTION_NOTYPE
, NULL_TREE
);
22955 switch_to_section (s
);
22956 assemble_align (POINTER_SIZE
);
22957 fputs ("\t.word\t", asm_out_file
);
22958 output_addr_const (asm_out_file
, symbol
);
22959 fputs ("(target1)\n", asm_out_file
);
22962 /* Add a function to the list of static constructors. */
22965 arm_elf_asm_constructor (rtx symbol
, int priority
)
22967 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22970 /* Add a function to the list of static destructors. */
22973 arm_elf_asm_destructor (rtx symbol
, int priority
)
22975 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22978 /* A finite state machine takes care of noticing whether or not instructions
22979 can be conditionally executed, and thus decrease execution time and code
22980 size by deleting branch instructions. The fsm is controlled by
22981 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22983 /* The state of the fsm controlling condition codes are:
22984 0: normal, do nothing special
22985 1: make ASM_OUTPUT_OPCODE not output this instruction
22986 2: make ASM_OUTPUT_OPCODE not output this instruction
22987 3: make instructions conditional
22988 4: make instructions conditional
22990 State transitions (state->state by whom under condition):
22991 0 -> 1 final_prescan_insn if the `target' is a label
22992 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22993 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22994 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22995 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22996 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22997 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22998 (the target insn is arm_target_insn).
23000 If the jump clobbers the conditions then we use states 2 and 4.
23002 A similar thing can be done with conditional return insns.
23004 XXX In case the `target' is an unconditional branch, this conditionalising
23005 of the instructions always reduces code size, but not always execution
23006 time. But then, I want to reduce the code size to somewhere near what
23007 /bin/cc produces. */
23009 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23010 instructions. When a COND_EXEC instruction is seen the subsequent
23011 instructions are scanned so that multiple conditional instructions can be
23012 combined into a single IT block. arm_condexec_count and arm_condexec_mask
23013 specify the length and true/false mask for the IT block. These will be
23014 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
23016 /* Returns the index of the ARM condition code string in
23017 `arm_condition_codes', or ARM_NV if the comparison is invalid.
23018 COMPARISON should be an rtx like `(eq (...) (...))'. */
23021 maybe_get_arm_condition_code (rtx comparison
)
23023 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
23024 enum arm_cond_code code
;
23025 enum rtx_code comp_code
= GET_CODE (comparison
);
23027 if (GET_MODE_CLASS (mode
) != MODE_CC
)
23028 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
23029 XEXP (comparison
, 1));
23033 case E_CC_DNEmode
: code
= ARM_NE
; goto dominance
;
23034 case E_CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
23035 case E_CC_DGEmode
: code
= ARM_GE
; goto dominance
;
23036 case E_CC_DGTmode
: code
= ARM_GT
; goto dominance
;
23037 case E_CC_DLEmode
: code
= ARM_LE
; goto dominance
;
23038 case E_CC_DLTmode
: code
= ARM_LT
; goto dominance
;
23039 case E_CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
23040 case E_CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
23041 case E_CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
23042 case E_CC_DLTUmode
: code
= ARM_CC
;
23045 if (comp_code
== EQ
)
23046 return ARM_INVERSE_CONDITION_CODE (code
);
23047 if (comp_code
== NE
)
23051 case E_CC_NOOVmode
:
23054 case NE
: return ARM_NE
;
23055 case EQ
: return ARM_EQ
;
23056 case GE
: return ARM_PL
;
23057 case LT
: return ARM_MI
;
23058 default: return ARM_NV
;
23064 case NE
: return ARM_NE
;
23065 case EQ
: return ARM_EQ
;
23066 default: return ARM_NV
;
23072 case NE
: return ARM_MI
;
23073 case EQ
: return ARM_PL
;
23074 default: return ARM_NV
;
23079 /* We can handle all cases except UNEQ and LTGT. */
23082 case GE
: return ARM_GE
;
23083 case GT
: return ARM_GT
;
23084 case LE
: return ARM_LS
;
23085 case LT
: return ARM_MI
;
23086 case NE
: return ARM_NE
;
23087 case EQ
: return ARM_EQ
;
23088 case ORDERED
: return ARM_VC
;
23089 case UNORDERED
: return ARM_VS
;
23090 case UNLT
: return ARM_LT
;
23091 case UNLE
: return ARM_LE
;
23092 case UNGT
: return ARM_HI
;
23093 case UNGE
: return ARM_PL
;
23094 /* UNEQ and LTGT do not have a representation. */
23095 case UNEQ
: /* Fall through. */
23096 case LTGT
: /* Fall through. */
23097 default: return ARM_NV
;
23103 case NE
: return ARM_NE
;
23104 case EQ
: return ARM_EQ
;
23105 case GE
: return ARM_LE
;
23106 case GT
: return ARM_LT
;
23107 case LE
: return ARM_GE
;
23108 case LT
: return ARM_GT
;
23109 case GEU
: return ARM_LS
;
23110 case GTU
: return ARM_CC
;
23111 case LEU
: return ARM_CS
;
23112 case LTU
: return ARM_HI
;
23113 default: return ARM_NV
;
23119 case LTU
: return ARM_CS
;
23120 case GEU
: return ARM_CC
;
23121 case NE
: return ARM_CS
;
23122 case EQ
: return ARM_CC
;
23123 default: return ARM_NV
;
23129 case NE
: return ARM_NE
;
23130 case EQ
: return ARM_EQ
;
23131 case GEU
: return ARM_CS
;
23132 case GTU
: return ARM_HI
;
23133 case LEU
: return ARM_LS
;
23134 case LTU
: return ARM_CC
;
23135 default: return ARM_NV
;
23141 case GE
: return ARM_GE
;
23142 case LT
: return ARM_LT
;
23143 case GEU
: return ARM_CS
;
23144 case LTU
: return ARM_CC
;
23145 default: return ARM_NV
;
23151 case NE
: return ARM_VS
;
23152 case EQ
: return ARM_VC
;
23153 default: return ARM_NV
;
23159 case NE
: return ARM_NE
;
23160 case EQ
: return ARM_EQ
;
23161 case GE
: return ARM_GE
;
23162 case GT
: return ARM_GT
;
23163 case LE
: return ARM_LE
;
23164 case LT
: return ARM_LT
;
23165 case GEU
: return ARM_CS
;
23166 case GTU
: return ARM_HI
;
23167 case LEU
: return ARM_LS
;
23168 case LTU
: return ARM_CC
;
23169 default: return ARM_NV
;
23172 default: gcc_unreachable ();
23176 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
23177 static enum arm_cond_code
23178 get_arm_condition_code (rtx comparison
)
23180 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
23181 gcc_assert (code
!= ARM_NV
);
23185 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
23186 code registers when not targetting Thumb1. The VFP condition register
23187 only exists when generating hard-float code. */
23189 arm_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
23195 *p2
= TARGET_HARD_FLOAT
? VFPCC_REGNUM
: INVALID_REGNUM
;
23199 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23202 thumb2_final_prescan_insn (rtx_insn
*insn
)
23204 rtx_insn
*first_insn
= insn
;
23205 rtx body
= PATTERN (insn
);
23207 enum arm_cond_code code
;
23212 /* max_insns_skipped in the tune was already taken into account in the
23213 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23214 just emit the IT blocks as we can. It does not make sense to split
23216 max
= MAX_INSN_PER_IT_BLOCK
;
23218 /* Remove the previous insn from the count of insns to be output. */
23219 if (arm_condexec_count
)
23220 arm_condexec_count
--;
23222 /* Nothing to do if we are already inside a conditional block. */
23223 if (arm_condexec_count
)
23226 if (GET_CODE (body
) != COND_EXEC
)
23229 /* Conditional jumps are implemented directly. */
23233 predicate
= COND_EXEC_TEST (body
);
23234 arm_current_cc
= get_arm_condition_code (predicate
);
23236 n
= get_attr_ce_count (insn
);
23237 arm_condexec_count
= 1;
23238 arm_condexec_mask
= (1 << n
) - 1;
23239 arm_condexec_masklen
= n
;
23240 /* See if subsequent instructions can be combined into the same block. */
23243 insn
= next_nonnote_insn (insn
);
23245 /* Jumping into the middle of an IT block is illegal, so a label or
23246 barrier terminates the block. */
23247 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
23250 body
= PATTERN (insn
);
23251 /* USE and CLOBBER aren't really insns, so just skip them. */
23252 if (GET_CODE (body
) == USE
23253 || GET_CODE (body
) == CLOBBER
)
23256 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23257 if (GET_CODE (body
) != COND_EXEC
)
23259 /* Maximum number of conditionally executed instructions in a block. */
23260 n
= get_attr_ce_count (insn
);
23261 if (arm_condexec_masklen
+ n
> max
)
23264 predicate
= COND_EXEC_TEST (body
);
23265 code
= get_arm_condition_code (predicate
);
23266 mask
= (1 << n
) - 1;
23267 if (arm_current_cc
== code
)
23268 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
23269 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
23272 arm_condexec_count
++;
23273 arm_condexec_masklen
+= n
;
23275 /* A jump must be the last instruction in a conditional block. */
23279 /* Restore recog_data (getting the attributes of other insns can
23280 destroy this array, but final.c assumes that it remains intact
23281 across this call). */
23282 extract_constrain_insn_cached (first_insn
);
23286 arm_final_prescan_insn (rtx_insn
*insn
)
23288 /* BODY will hold the body of INSN. */
23289 rtx body
= PATTERN (insn
);
23291 /* This will be 1 if trying to repeat the trick, and things need to be
23292 reversed if it appears to fail. */
23295 /* If we start with a return insn, we only succeed if we find another one. */
23296 int seeking_return
= 0;
23297 enum rtx_code return_code
= UNKNOWN
;
23299 /* START_INSN will hold the insn from where we start looking. This is the
23300 first insn after the following code_label if REVERSE is true. */
23301 rtx_insn
*start_insn
= insn
;
23303 /* If in state 4, check if the target branch is reached, in order to
23304 change back to state 0. */
23305 if (arm_ccfsm_state
== 4)
23307 if (insn
== arm_target_insn
)
23309 arm_target_insn
= NULL
;
23310 arm_ccfsm_state
= 0;
23315 /* If in state 3, it is possible to repeat the trick, if this insn is an
23316 unconditional branch to a label, and immediately following this branch
23317 is the previous target label which is only used once, and the label this
23318 branch jumps to is not too far off. */
23319 if (arm_ccfsm_state
== 3)
23321 if (simplejump_p (insn
))
23323 start_insn
= next_nonnote_insn (start_insn
);
23324 if (BARRIER_P (start_insn
))
23326 /* XXX Isn't this always a barrier? */
23327 start_insn
= next_nonnote_insn (start_insn
);
23329 if (LABEL_P (start_insn
)
23330 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23331 && LABEL_NUSES (start_insn
) == 1)
23336 else if (ANY_RETURN_P (body
))
23338 start_insn
= next_nonnote_insn (start_insn
);
23339 if (BARRIER_P (start_insn
))
23340 start_insn
= next_nonnote_insn (start_insn
);
23341 if (LABEL_P (start_insn
)
23342 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23343 && LABEL_NUSES (start_insn
) == 1)
23346 seeking_return
= 1;
23347 return_code
= GET_CODE (body
);
23356 gcc_assert (!arm_ccfsm_state
|| reverse
);
23357 if (!JUMP_P (insn
))
23360 /* This jump might be paralleled with a clobber of the condition codes
23361 the jump should always come first */
23362 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
23363 body
= XVECEXP (body
, 0, 0);
23366 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
23367 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
23370 int fail
= FALSE
, succeed
= FALSE
;
23371 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23372 int then_not_else
= TRUE
;
23373 rtx_insn
*this_insn
= start_insn
;
23376 /* Register the insn jumped to. */
23379 if (!seeking_return
)
23380 label
= XEXP (SET_SRC (body
), 0);
23382 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
23383 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
23384 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
23386 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
23387 then_not_else
= FALSE
;
23389 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
23391 seeking_return
= 1;
23392 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
23394 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
23396 seeking_return
= 1;
23397 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
23398 then_not_else
= FALSE
;
23401 gcc_unreachable ();
23403 /* See how many insns this branch skips, and what kind of insns. If all
23404 insns are okay, and the label or unconditional branch to the same
23405 label is not too far away, succeed. */
23406 for (insns_skipped
= 0;
23407 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
23411 this_insn
= next_nonnote_insn (this_insn
);
23415 switch (GET_CODE (this_insn
))
23418 /* Succeed if it is the target label, otherwise fail since
23419 control falls in from somewhere else. */
23420 if (this_insn
== label
)
23422 arm_ccfsm_state
= 1;
23430 /* Succeed if the following insn is the target label.
23432 If return insns are used then the last insn in a function
23433 will be a barrier. */
23434 this_insn
= next_nonnote_insn (this_insn
);
23435 if (this_insn
&& this_insn
== label
)
23437 arm_ccfsm_state
= 1;
23445 /* The AAPCS says that conditional calls should not be
23446 used since they make interworking inefficient (the
23447 linker can't transform BL<cond> into BLX). That's
23448 only a problem if the machine has BLX. */
23455 /* Succeed if the following insn is the target label, or
23456 if the following two insns are a barrier and the
23458 this_insn
= next_nonnote_insn (this_insn
);
23459 if (this_insn
&& BARRIER_P (this_insn
))
23460 this_insn
= next_nonnote_insn (this_insn
);
23462 if (this_insn
&& this_insn
== label
23463 && insns_skipped
< max_insns_skipped
)
23465 arm_ccfsm_state
= 1;
23473 /* If this is an unconditional branch to the same label, succeed.
23474 If it is to another label, do nothing. If it is conditional,
23476 /* XXX Probably, the tests for SET and the PC are
23479 scanbody
= PATTERN (this_insn
);
23480 if (GET_CODE (scanbody
) == SET
23481 && GET_CODE (SET_DEST (scanbody
)) == PC
)
23483 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
23484 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
23486 arm_ccfsm_state
= 2;
23489 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
23492 /* Fail if a conditional return is undesirable (e.g. on a
23493 StrongARM), but still allow this if optimizing for size. */
23494 else if (GET_CODE (scanbody
) == return_code
23495 && !use_return_insn (TRUE
, NULL
)
23498 else if (GET_CODE (scanbody
) == return_code
)
23500 arm_ccfsm_state
= 2;
23503 else if (GET_CODE (scanbody
) == PARALLEL
)
23505 switch (get_attr_conds (this_insn
))
23515 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
23520 /* Instructions using or affecting the condition codes make it
23522 scanbody
= PATTERN (this_insn
);
23523 if (!(GET_CODE (scanbody
) == SET
23524 || GET_CODE (scanbody
) == PARALLEL
)
23525 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
23535 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
23536 arm_target_label
= CODE_LABEL_NUMBER (label
);
23539 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
23541 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
23543 this_insn
= next_nonnote_insn (this_insn
);
23544 gcc_assert (!this_insn
23545 || (!BARRIER_P (this_insn
)
23546 && !LABEL_P (this_insn
)));
23550 /* Oh, dear! we ran off the end.. give up. */
23551 extract_constrain_insn_cached (insn
);
23552 arm_ccfsm_state
= 0;
23553 arm_target_insn
= NULL
;
23556 arm_target_insn
= this_insn
;
23559 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23562 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
23564 if (reverse
|| then_not_else
)
23565 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
23568 /* Restore recog_data (getting the attributes of other insns can
23569 destroy this array, but final.c assumes that it remains intact
23570 across this call. */
23571 extract_constrain_insn_cached (insn
);
23575 /* Output IT instructions. */
23577 thumb2_asm_output_opcode (FILE * stream
)
23582 if (arm_condexec_mask
)
23584 for (n
= 0; n
< arm_condexec_masklen
; n
++)
23585 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
23587 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
23588 arm_condition_codes
[arm_current_cc
]);
23589 arm_condexec_mask
= 0;
23593 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
23594 UNITS_PER_WORD bytes wide. */
23595 static unsigned int
23596 arm_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
23599 && regno
> PC_REGNUM
23600 && regno
!= FRAME_POINTER_REGNUM
23601 && regno
!= ARG_POINTER_REGNUM
23602 && !IS_VFP_REGNUM (regno
))
23605 return ARM_NUM_REGS (mode
);
23608 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23610 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
23612 if (GET_MODE_CLASS (mode
) == MODE_CC
)
23613 return (regno
== CC_REGNUM
23614 || (TARGET_HARD_FLOAT
23615 && regno
== VFPCC_REGNUM
));
23617 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23621 /* For the Thumb we only allow values bigger than SImode in
23622 registers 0 - 6, so that there is always a second low
23623 register available to hold the upper part of the value.
23624 We probably we ought to ensure that the register is the
23625 start of an even numbered register pair. */
23626 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23628 if (TARGET_HARD_FLOAT
&& IS_VFP_REGNUM (regno
))
23630 if (mode
== SFmode
|| mode
== SImode
)
23631 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23633 if (mode
== DFmode
)
23634 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23636 if (mode
== HFmode
)
23637 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23639 /* VFP registers can hold HImode values. */
23640 if (mode
== HImode
)
23641 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23644 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23645 || (VALID_NEON_QREG_MODE (mode
)
23646 && NEON_REGNO_OK_FOR_QUAD (regno
))
23647 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23648 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23649 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23650 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23651 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23656 if (TARGET_REALLY_IWMMXT
)
23658 if (IS_IWMMXT_GR_REGNUM (regno
))
23659 return mode
== SImode
;
23661 if (IS_IWMMXT_REGNUM (regno
))
23662 return VALID_IWMMXT_REG_MODE (mode
);
23665 /* We allow almost any value to be stored in the general registers.
23666 Restrict doubleword quantities to even register pairs in ARM state
23667 so that we can use ldrd. Do not allow very large Neon structure
23668 opaque modes in general registers; they would use too many. */
23669 if (regno
<= LAST_ARM_REGNUM
)
23671 if (ARM_NUM_REGS (mode
) > 4)
23677 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23680 if (regno
== FRAME_POINTER_REGNUM
23681 || regno
== ARG_POINTER_REGNUM
)
23682 /* We only allow integers in the fake hard registers. */
23683 return GET_MODE_CLASS (mode
) == MODE_INT
;
23688 /* Implement TARGET_MODES_TIEABLE_P. */
23691 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23693 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23696 /* We specifically want to allow elements of "structure" modes to
23697 be tieable to the structure. This more general condition allows
23698 other rarer situations too. */
23700 && (VALID_NEON_DREG_MODE (mode1
)
23701 || VALID_NEON_QREG_MODE (mode1
)
23702 || VALID_NEON_STRUCT_MODE (mode1
))
23703 && (VALID_NEON_DREG_MODE (mode2
)
23704 || VALID_NEON_QREG_MODE (mode2
)
23705 || VALID_NEON_STRUCT_MODE (mode2
)))
23711 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23712 not used in arm mode. */
23715 arm_regno_class (int regno
)
23717 if (regno
== PC_REGNUM
)
23722 if (regno
== STACK_POINTER_REGNUM
)
23724 if (regno
== CC_REGNUM
)
23731 if (TARGET_THUMB2
&& regno
< 8)
23734 if ( regno
<= LAST_ARM_REGNUM
23735 || regno
== FRAME_POINTER_REGNUM
23736 || regno
== ARG_POINTER_REGNUM
)
23737 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23739 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23740 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23742 if (IS_VFP_REGNUM (regno
))
23744 if (regno
<= D7_VFP_REGNUM
)
23745 return VFP_D0_D7_REGS
;
23746 else if (regno
<= LAST_LO_VFP_REGNUM
)
23747 return VFP_LO_REGS
;
23749 return VFP_HI_REGS
;
23752 if (IS_IWMMXT_REGNUM (regno
))
23753 return IWMMXT_REGS
;
23755 if (IS_IWMMXT_GR_REGNUM (regno
))
23756 return IWMMXT_GR_REGS
;
23761 /* Handle a special case when computing the offset
23762 of an argument from the frame pointer. */
23764 arm_debugger_arg_offset (int value
, rtx addr
)
23768 /* We are only interested if dbxout_parms() failed to compute the offset. */
23772 /* We can only cope with the case where the address is held in a register. */
23776 /* If we are using the frame pointer to point at the argument, then
23777 an offset of 0 is correct. */
23778 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23781 /* If we are using the stack pointer to point at the
23782 argument, then an offset of 0 is correct. */
23783 /* ??? Check this is consistent with thumb2 frame layout. */
23784 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23785 && REGNO (addr
) == SP_REGNUM
)
23788 /* Oh dear. The argument is pointed to by a register rather
23789 than being held in a register, or being stored at a known
23790 offset from the frame pointer. Since GDB only understands
23791 those two kinds of argument we must translate the address
23792 held in the register into an offset from the frame pointer.
23793 We do this by searching through the insns for the function
23794 looking to see where this register gets its value. If the
23795 register is initialized from the frame pointer plus an offset
23796 then we are in luck and we can continue, otherwise we give up.
23798 This code is exercised by producing debugging information
23799 for a function with arguments like this:
23801 double func (double a, double b, int c, double d) {return d;}
23803 Without this code the stab for parameter 'd' will be set to
23804 an offset of 0 from the frame pointer, rather than 8. */
23806 /* The if() statement says:
23808 If the insn is a normal instruction
23809 and if the insn is setting the value in a register
23810 and if the register being set is the register holding the address of the argument
23811 and if the address is computing by an addition
23812 that involves adding to a register
23813 which is the frame pointer
23818 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23820 if ( NONJUMP_INSN_P (insn
)
23821 && GET_CODE (PATTERN (insn
)) == SET
23822 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23823 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23824 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23825 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23826 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23829 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23838 warning (0, "unable to compute real location of stacked parameter");
23839 value
= 8; /* XXX magic hack */
23845 /* Implement TARGET_PROMOTED_TYPE. */
23848 arm_promoted_type (const_tree t
)
23850 if (SCALAR_FLOAT_TYPE_P (t
)
23851 && TYPE_PRECISION (t
) == 16
23852 && TYPE_MAIN_VARIANT (t
) == arm_fp16_type_node
)
23853 return float_type_node
;
23857 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23858 This simply adds HFmode as a supported mode; even though we don't
23859 implement arithmetic on this type directly, it's supported by
23860 optabs conversions, much the way the double-word arithmetic is
23861 special-cased in the default hook. */
23864 arm_scalar_mode_supported_p (scalar_mode mode
)
23866 if (mode
== HFmode
)
23867 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23868 else if (ALL_FIXED_POINT_MODE_P (mode
))
23871 return default_scalar_mode_supported_p (mode
);
23874 /* Set the value of FLT_EVAL_METHOD.
23875 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23877 0: evaluate all operations and constants, whose semantic type has at
23878 most the range and precision of type float, to the range and
23879 precision of float; evaluate all other operations and constants to
23880 the range and precision of the semantic type;
23882 N, where _FloatN is a supported interchange floating type
23883 evaluate all operations and constants, whose semantic type has at
23884 most the range and precision of _FloatN type, to the range and
23885 precision of the _FloatN type; evaluate all other operations and
23886 constants to the range and precision of the semantic type;
23888 If we have the ARMv8.2-A extensions then we support _Float16 in native
23889 precision, so we should set this to 16. Otherwise, we support the type,
23890 but want to evaluate expressions in float precision, so set this to
23893 static enum flt_eval_method
23894 arm_excess_precision (enum excess_precision_type type
)
23898 case EXCESS_PRECISION_TYPE_FAST
:
23899 case EXCESS_PRECISION_TYPE_STANDARD
:
23900 /* We can calculate either in 16-bit range and precision or
23901 32-bit range and precision. Make that decision based on whether
23902 we have native support for the ARMv8.2-A 16-bit floating-point
23903 instructions or not. */
23904 return (TARGET_VFP_FP16INST
23905 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23906 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
23907 case EXCESS_PRECISION_TYPE_IMPLICIT
:
23908 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
23910 gcc_unreachable ();
23912 return FLT_EVAL_METHOD_UNPREDICTABLE
;
23916 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23917 _Float16 if we are using anything other than ieee format for 16-bit
23918 floating point. Otherwise, punt to the default implementation. */
23919 static opt_scalar_float_mode
23920 arm_floatn_mode (int n
, bool extended
)
23922 if (!extended
&& n
== 16)
23924 if (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
)
23926 return opt_scalar_float_mode ();
23929 return default_floatn_mode (n
, extended
);
23933 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23934 not to early-clobber SRC registers in the process.
23936 We assume that the operands described by SRC and DEST represent a
23937 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23938 number of components into which the copy has been decomposed. */
23940 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23944 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23945 || REGNO (operands
[0]) < REGNO (operands
[1]))
23947 for (i
= 0; i
< count
; i
++)
23949 operands
[2 * i
] = dest
[i
];
23950 operands
[2 * i
+ 1] = src
[i
];
23955 for (i
= 0; i
< count
; i
++)
23957 operands
[2 * i
] = dest
[count
- i
- 1];
23958 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23963 /* Split operands into moves from op[1] + op[2] into op[0]. */
23966 neon_split_vcombine (rtx operands
[3])
23968 unsigned int dest
= REGNO (operands
[0]);
23969 unsigned int src1
= REGNO (operands
[1]);
23970 unsigned int src2
= REGNO (operands
[2]);
23971 machine_mode halfmode
= GET_MODE (operands
[1]);
23972 unsigned int halfregs
= REG_NREGS (operands
[1]);
23973 rtx destlo
, desthi
;
23975 if (src1
== dest
&& src2
== dest
+ halfregs
)
23977 /* No-op move. Can't split to nothing; emit something. */
23978 emit_note (NOTE_INSN_DELETED
);
23982 /* Preserve register attributes for variable tracking. */
23983 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23984 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23985 GET_MODE_SIZE (halfmode
));
23987 /* Special case of reversed high/low parts. Use VSWP. */
23988 if (src2
== dest
&& src1
== dest
+ halfregs
)
23990 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
23991 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
23992 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
23996 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
23998 /* Try to avoid unnecessary moves if part of the result
23999 is in the right place already. */
24001 emit_move_insn (destlo
, operands
[1]);
24002 if (src2
!= dest
+ halfregs
)
24003 emit_move_insn (desthi
, operands
[2]);
24007 if (src2
!= dest
+ halfregs
)
24008 emit_move_insn (desthi
, operands
[2]);
24010 emit_move_insn (destlo
, operands
[1]);
24014 /* Return the number (counting from 0) of
24015 the least significant set bit in MASK. */
24018 number_of_first_bit_set (unsigned mask
)
24020 return ctz_hwi (mask
);
24023 /* Like emit_multi_reg_push, but allowing for a different set of
24024 registers to be described as saved. MASK is the set of registers
24025 to be saved; REAL_REGS is the set of registers to be described as
24026 saved. If REAL_REGS is 0, only describe the stack adjustment. */
24029 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
24031 unsigned long regno
;
24032 rtx par
[10], tmp
, reg
;
24036 /* Build the parallel of the registers actually being stored. */
24037 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
24039 regno
= ctz_hwi (mask
);
24040 reg
= gen_rtx_REG (SImode
, regno
);
24043 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
24045 tmp
= gen_rtx_USE (VOIDmode
, reg
);
24050 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
24051 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
24052 tmp
= gen_frame_mem (BLKmode
, tmp
);
24053 tmp
= gen_rtx_SET (tmp
, par
[0]);
24056 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
24057 insn
= emit_insn (tmp
);
24059 /* Always build the stack adjustment note for unwind info. */
24060 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
24061 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
24064 /* Build the parallel of the registers recorded as saved for unwind. */
24065 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
24067 regno
= ctz_hwi (real_regs
);
24068 reg
= gen_rtx_REG (SImode
, regno
);
24070 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
24071 tmp
= gen_frame_mem (SImode
, tmp
);
24072 tmp
= gen_rtx_SET (tmp
, reg
);
24073 RTX_FRAME_RELATED_P (tmp
) = 1;
24081 RTX_FRAME_RELATED_P (par
[0]) = 1;
24082 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
24085 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
24090 /* Emit code to push or pop registers to or from the stack. F is the
24091 assembly file. MASK is the registers to pop. */
24093 thumb_pop (FILE *f
, unsigned long mask
)
24096 int lo_mask
= mask
& 0xFF;
24100 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
24102 /* Special case. Do not generate a POP PC statement here, do it in
24104 thumb_exit (f
, -1);
24108 fprintf (f
, "\tpop\t{");
24110 /* Look at the low registers first. */
24111 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
24115 asm_fprintf (f
, "%r", regno
);
24117 if ((lo_mask
& ~1) != 0)
24122 if (mask
& (1 << PC_REGNUM
))
24124 /* Catch popping the PC. */
24125 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
|| crtl
->calls_eh_return
24126 || IS_CMSE_ENTRY (arm_current_func_type ()))
24128 /* The PC is never poped directly, instead
24129 it is popped into r3 and then BX is used. */
24130 fprintf (f
, "}\n");
24132 thumb_exit (f
, -1);
24141 asm_fprintf (f
, "%r", PC_REGNUM
);
24145 fprintf (f
, "}\n");
24148 /* Generate code to return from a thumb function.
24149 If 'reg_containing_return_addr' is -1, then the return address is
24150 actually on the stack, at the stack pointer.
24152 Note: do not forget to update length attribute of corresponding insn pattern
24153 when changing assembly output (eg. length attribute of epilogue_insns when
24154 updating Armv8-M Baseline Security Extensions register clearing
24157 thumb_exit (FILE *f
, int reg_containing_return_addr
)
24159 unsigned regs_available_for_popping
;
24160 unsigned regs_to_pop
;
24162 unsigned available
;
24166 int restore_a4
= FALSE
;
24168 /* Compute the registers we need to pop. */
24172 if (reg_containing_return_addr
== -1)
24174 regs_to_pop
|= 1 << LR_REGNUM
;
24178 if (TARGET_BACKTRACE
)
24180 /* Restore the (ARM) frame pointer and stack pointer. */
24181 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
24185 /* If there is nothing to pop then just emit the BX instruction and
24187 if (pops_needed
== 0)
24189 if (crtl
->calls_eh_return
)
24190 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24192 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24194 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n",
24195 reg_containing_return_addr
);
24196 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
24199 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24202 /* Otherwise if we are not supporting interworking and we have not created
24203 a backtrace structure and the function was not entered in ARM mode then
24204 just pop the return address straight into the PC. */
24205 else if (!TARGET_INTERWORK
24206 && !TARGET_BACKTRACE
24207 && !is_called_in_ARM_mode (current_function_decl
)
24208 && !crtl
->calls_eh_return
24209 && !IS_CMSE_ENTRY (arm_current_func_type ()))
24211 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
24215 /* Find out how many of the (return) argument registers we can corrupt. */
24216 regs_available_for_popping
= 0;
24218 /* If returning via __builtin_eh_return, the bottom three registers
24219 all contain information needed for the return. */
24220 if (crtl
->calls_eh_return
)
24224 /* If we can deduce the registers used from the function's
24225 return value. This is more reliable that examining
24226 df_regs_ever_live_p () because that will be set if the register is
24227 ever used in the function, not just if the register is used
24228 to hold a return value. */
24230 if (crtl
->return_rtx
!= 0)
24231 mode
= GET_MODE (crtl
->return_rtx
);
24233 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
24235 size
= GET_MODE_SIZE (mode
);
24239 /* In a void function we can use any argument register.
24240 In a function that returns a structure on the stack
24241 we can use the second and third argument registers. */
24242 if (mode
== VOIDmode
)
24243 regs_available_for_popping
=
24244 (1 << ARG_REGISTER (1))
24245 | (1 << ARG_REGISTER (2))
24246 | (1 << ARG_REGISTER (3));
24248 regs_available_for_popping
=
24249 (1 << ARG_REGISTER (2))
24250 | (1 << ARG_REGISTER (3));
24252 else if (size
<= 4)
24253 regs_available_for_popping
=
24254 (1 << ARG_REGISTER (2))
24255 | (1 << ARG_REGISTER (3));
24256 else if (size
<= 8)
24257 regs_available_for_popping
=
24258 (1 << ARG_REGISTER (3));
24261 /* Match registers to be popped with registers into which we pop them. */
24262 for (available
= regs_available_for_popping
,
24263 required
= regs_to_pop
;
24264 required
!= 0 && available
!= 0;
24265 available
&= ~(available
& - available
),
24266 required
&= ~(required
& - required
))
24269 /* If we have any popping registers left over, remove them. */
24271 regs_available_for_popping
&= ~available
;
24273 /* Otherwise if we need another popping register we can use
24274 the fourth argument register. */
24275 else if (pops_needed
)
24277 /* If we have not found any free argument registers and
24278 reg a4 contains the return address, we must move it. */
24279 if (regs_available_for_popping
== 0
24280 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
24282 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24283 reg_containing_return_addr
= LR_REGNUM
;
24285 else if (size
> 12)
24287 /* Register a4 is being used to hold part of the return value,
24288 but we have dire need of a free, low register. */
24291 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
24294 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
24296 /* The fourth argument register is available. */
24297 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
24303 /* Pop as many registers as we can. */
24304 thumb_pop (f
, regs_available_for_popping
);
24306 /* Process the registers we popped. */
24307 if (reg_containing_return_addr
== -1)
24309 /* The return address was popped into the lowest numbered register. */
24310 regs_to_pop
&= ~(1 << LR_REGNUM
);
24312 reg_containing_return_addr
=
24313 number_of_first_bit_set (regs_available_for_popping
);
24315 /* Remove this register for the mask of available registers, so that
24316 the return address will not be corrupted by further pops. */
24317 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
24320 /* If we popped other registers then handle them here. */
24321 if (regs_available_for_popping
)
24325 /* Work out which register currently contains the frame pointer. */
24326 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24328 /* Move it into the correct place. */
24329 asm_fprintf (f
, "\tmov\t%r, %r\n",
24330 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
24332 /* (Temporarily) remove it from the mask of popped registers. */
24333 regs_available_for_popping
&= ~(1 << frame_pointer
);
24334 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
24336 if (regs_available_for_popping
)
24340 /* We popped the stack pointer as well,
24341 find the register that contains it. */
24342 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24344 /* Move it into the stack register. */
24345 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
24347 /* At this point we have popped all necessary registers, so
24348 do not worry about restoring regs_available_for_popping
24349 to its correct value:
24351 assert (pops_needed == 0)
24352 assert (regs_available_for_popping == (1 << frame_pointer))
24353 assert (regs_to_pop == (1 << STACK_POINTER)) */
24357 /* Since we have just move the popped value into the frame
24358 pointer, the popping register is available for reuse, and
24359 we know that we still have the stack pointer left to pop. */
24360 regs_available_for_popping
|= (1 << frame_pointer
);
24364 /* If we still have registers left on the stack, but we no longer have
24365 any registers into which we can pop them, then we must move the return
24366 address into the link register and make available the register that
24368 if (regs_available_for_popping
== 0 && pops_needed
> 0)
24370 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
24372 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
24373 reg_containing_return_addr
);
24375 reg_containing_return_addr
= LR_REGNUM
;
24378 /* If we have registers left on the stack then pop some more.
24379 We know that at most we will want to pop FP and SP. */
24380 if (pops_needed
> 0)
24385 thumb_pop (f
, regs_available_for_popping
);
24387 /* We have popped either FP or SP.
24388 Move whichever one it is into the correct register. */
24389 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24390 move_to
= number_of_first_bit_set (regs_to_pop
);
24392 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
24396 /* If we still have not popped everything then we must have only
24397 had one register available to us and we are now popping the SP. */
24398 if (pops_needed
> 0)
24402 thumb_pop (f
, regs_available_for_popping
);
24404 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24406 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
24408 assert (regs_to_pop == (1 << STACK_POINTER))
24409 assert (pops_needed == 1)
24413 /* If necessary restore the a4 register. */
24416 if (reg_containing_return_addr
!= LR_REGNUM
)
24418 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24419 reg_containing_return_addr
= LR_REGNUM
;
24422 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
24425 if (crtl
->calls_eh_return
)
24426 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24428 /* Return to caller. */
24429 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24431 /* This is for the cases where LR is not being used to contain the return
24432 address. It may therefore contain information that we might not want
24433 to leak, hence it must be cleared. The value in R0 will never be a
24434 secret at this point, so it is safe to use it, see the clearing code
24435 in 'cmse_nonsecure_entry_clear_before_return'. */
24436 if (reg_containing_return_addr
!= LR_REGNUM
)
24437 asm_fprintf (f
, "\tmov\tlr, r0\n");
24439 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr
);
24440 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
24443 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24446 /* Scan INSN just before assembler is output for it.
24447 For Thumb-1, we track the status of the condition codes; this
24448 information is used in the cbranchsi4_insn pattern. */
24450 thumb1_final_prescan_insn (rtx_insn
*insn
)
24452 if (flag_print_asm_name
)
24453 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
24454 INSN_ADDRESSES (INSN_UID (insn
)));
24455 /* Don't overwrite the previous setter when we get to a cbranch. */
24456 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
24458 enum attr_conds conds
;
24460 if (cfun
->machine
->thumb1_cc_insn
)
24462 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
24463 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
24466 conds
= get_attr_conds (insn
);
24467 if (conds
== CONDS_SET
)
24469 rtx set
= single_set (insn
);
24470 cfun
->machine
->thumb1_cc_insn
= insn
;
24471 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
24472 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
24473 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
24474 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
24476 rtx src1
= XEXP (SET_SRC (set
), 1);
24477 if (src1
== const0_rtx
)
24478 cfun
->machine
->thumb1_cc_mode
= CCmode
;
24480 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
24482 /* Record the src register operand instead of dest because
24483 cprop_hardreg pass propagates src. */
24484 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
24487 else if (conds
!= CONDS_NOCOND
)
24488 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
24491 /* Check if unexpected far jump is used. */
24492 if (cfun
->machine
->lr_save_eliminated
24493 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24494 internal_error("Unexpected thumb1 far jump");
24498 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
24500 unsigned HOST_WIDE_INT mask
= 0xff;
24503 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
24504 if (val
== 0) /* XXX */
24507 for (i
= 0; i
< 25; i
++)
24508 if ((val
& (mask
<< i
)) == val
)
24514 /* Returns nonzero if the current function contains,
24515 or might contain a far jump. */
24517 thumb_far_jump_used_p (void)
24520 bool far_jump
= false;
24521 unsigned int func_size
= 0;
24523 /* If we have already decided that far jumps may be used,
24524 do not bother checking again, and always return true even if
24525 it turns out that they are not being used. Once we have made
24526 the decision that far jumps are present (and that hence the link
24527 register will be pushed onto the stack) we cannot go back on it. */
24528 if (cfun
->machine
->far_jump_used
)
24531 /* If this function is not being called from the prologue/epilogue
24532 generation code then it must be being called from the
24533 INITIAL_ELIMINATION_OFFSET macro. */
24534 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
24536 /* In this case we know that we are being asked about the elimination
24537 of the arg pointer register. If that register is not being used,
24538 then there are no arguments on the stack, and we do not have to
24539 worry that a far jump might force the prologue to push the link
24540 register, changing the stack offsets. In this case we can just
24541 return false, since the presence of far jumps in the function will
24542 not affect stack offsets.
24544 If the arg pointer is live (or if it was live, but has now been
24545 eliminated and so set to dead) then we do have to test to see if
24546 the function might contain a far jump. This test can lead to some
24547 false negatives, since before reload is completed, then length of
24548 branch instructions is not known, so gcc defaults to returning their
24549 longest length, which in turn sets the far jump attribute to true.
24551 A false negative will not result in bad code being generated, but it
24552 will result in a needless push and pop of the link register. We
24553 hope that this does not occur too often.
24555 If we need doubleword stack alignment this could affect the other
24556 elimination offsets so we can't risk getting it wrong. */
24557 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
24558 cfun
->machine
->arg_pointer_live
= 1;
24559 else if (!cfun
->machine
->arg_pointer_live
)
24563 /* We should not change far_jump_used during or after reload, as there is
24564 no chance to change stack frame layout. */
24565 if (reload_in_progress
|| reload_completed
)
24568 /* Check to see if the function contains a branch
24569 insn with the far jump attribute set. */
24570 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
24572 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24576 func_size
+= get_attr_length (insn
);
24579 /* Attribute far_jump will always be true for thumb1 before
24580 shorten_branch pass. So checking far_jump attribute before
24581 shorten_branch isn't much useful.
24583 Following heuristic tries to estimate more accurately if a far jump
24584 may finally be used. The heuristic is very conservative as there is
24585 no chance to roll-back the decision of not to use far jump.
24587 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24588 2-byte insn is associated with a 4 byte constant pool. Using
24589 function size 2048/3 as the threshold is conservative enough. */
24592 if ((func_size
* 3) >= 2048)
24594 /* Record the fact that we have decided that
24595 the function does use far jumps. */
24596 cfun
->machine
->far_jump_used
= 1;
24604 /* Return nonzero if FUNC must be entered in ARM mode. */
24606 is_called_in_ARM_mode (tree func
)
24608 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
24610 /* Ignore the problem about functions whose address is taken. */
24611 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
24615 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
24621 /* Given the stack offsets and register mask in OFFSETS, decide how
24622 many additional registers to push instead of subtracting a constant
24623 from SP. For epilogues the principle is the same except we use pop.
24624 FOR_PROLOGUE indicates which we're generating. */
24626 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
24628 HOST_WIDE_INT amount
;
24629 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
24630 /* Extract a mask of the ones we can give to the Thumb's push/pop
24632 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
24633 /* Then count how many other high registers will need to be pushed. */
24634 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24635 int n_free
, reg_base
, size
;
24637 if (!for_prologue
&& frame_pointer_needed
)
24638 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24640 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24642 /* If the stack frame size is 512 exactly, we can save one load
24643 instruction, which should make this a win even when optimizing
24645 if (!optimize_size
&& amount
!= 512)
24648 /* Can't do this if there are high registers to push. */
24649 if (high_regs_pushed
!= 0)
24652 /* Shouldn't do it in the prologue if no registers would normally
24653 be pushed at all. In the epilogue, also allow it if we'll have
24654 a pop insn for the PC. */
24657 || TARGET_BACKTRACE
24658 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
24659 || TARGET_INTERWORK
24660 || crtl
->args
.pretend_args_size
!= 0))
24663 /* Don't do this if thumb_expand_prologue wants to emit instructions
24664 between the push and the stack frame allocation. */
24666 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24667 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
24674 size
= arm_size_return_regs ();
24675 reg_base
= ARM_NUM_INTS (size
);
24676 live_regs_mask
>>= reg_base
;
24679 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
24680 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
24682 live_regs_mask
>>= 1;
24688 gcc_assert (amount
/ 4 * 4 == amount
);
24690 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
24691 return (amount
- 508) / 4;
24692 if (amount
<= n_free
* 4)
24697 /* The bits which aren't usefully expanded as rtl. */
24699 thumb1_unexpanded_epilogue (void)
24701 arm_stack_offsets
*offsets
;
24703 unsigned long live_regs_mask
= 0;
24704 int high_regs_pushed
= 0;
24706 int had_to_push_lr
;
24709 if (cfun
->machine
->return_used_this_function
!= 0)
24712 if (IS_NAKED (arm_current_func_type ()))
24715 offsets
= arm_get_frame_offsets ();
24716 live_regs_mask
= offsets
->saved_regs_mask
;
24717 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24719 /* If we can deduce the registers used from the function's return value.
24720 This is more reliable that examining df_regs_ever_live_p () because that
24721 will be set if the register is ever used in the function, not just if
24722 the register is used to hold a return value. */
24723 size
= arm_size_return_regs ();
24725 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24728 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24729 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24732 /* The prolog may have pushed some high registers to use as
24733 work registers. e.g. the testsuite file:
24734 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24735 compiles to produce:
24736 push {r4, r5, r6, r7, lr}
24740 as part of the prolog. We have to undo that pushing here. */
24742 if (high_regs_pushed
)
24744 unsigned long mask
= live_regs_mask
& 0xff;
24747 /* The available low registers depend on the size of the value we are
24755 /* Oh dear! We have no low registers into which we can pop
24758 ("no low registers available for popping high registers");
24760 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
24761 if (live_regs_mask
& (1 << next_hi_reg
))
24764 while (high_regs_pushed
)
24766 /* Find lo register(s) into which the high register(s) can
24768 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24770 if (mask
& (1 << regno
))
24771 high_regs_pushed
--;
24772 if (high_regs_pushed
== 0)
24776 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24778 /* Pop the values into the low register(s). */
24779 thumb_pop (asm_out_file
, mask
);
24781 /* Move the value(s) into the high registers. */
24782 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24784 if (mask
& (1 << regno
))
24786 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24789 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24790 if (live_regs_mask
& (1 << next_hi_reg
))
24795 live_regs_mask
&= ~0x0f00;
24798 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24799 live_regs_mask
&= 0xff;
24801 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24803 /* Pop the return address into the PC. */
24804 if (had_to_push_lr
)
24805 live_regs_mask
|= 1 << PC_REGNUM
;
24807 /* Either no argument registers were pushed or a backtrace
24808 structure was created which includes an adjusted stack
24809 pointer, so just pop everything. */
24810 if (live_regs_mask
)
24811 thumb_pop (asm_out_file
, live_regs_mask
);
24813 /* We have either just popped the return address into the
24814 PC or it is was kept in LR for the entire function.
24815 Note that thumb_pop has already called thumb_exit if the
24816 PC was in the list. */
24817 if (!had_to_push_lr
)
24818 thumb_exit (asm_out_file
, LR_REGNUM
);
24822 /* Pop everything but the return address. */
24823 if (live_regs_mask
)
24824 thumb_pop (asm_out_file
, live_regs_mask
);
24826 if (had_to_push_lr
)
24830 /* We have no free low regs, so save one. */
24831 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24835 /* Get the return address into a temporary register. */
24836 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24840 /* Move the return address to lr. */
24841 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24843 /* Restore the low register. */
24844 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24849 regno
= LAST_ARG_REGNUM
;
24854 /* Remove the argument registers that were pushed onto the stack. */
24855 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24856 SP_REGNUM
, SP_REGNUM
,
24857 crtl
->args
.pretend_args_size
);
24859 thumb_exit (asm_out_file
, regno
);
24865 /* Functions to save and restore machine-specific function data. */
24866 static struct machine_function
*
24867 arm_init_machine_status (void)
24869 struct machine_function
*machine
;
24870 machine
= ggc_cleared_alloc
<machine_function
> ();
24872 #if ARM_FT_UNKNOWN != 0
24873 machine
->func_type
= ARM_FT_UNKNOWN
;
24875 machine
->static_chain_stack_bytes
= -1;
24879 /* Return an RTX indicating where the return address to the
24880 calling function can be found. */
24882 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24887 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24890 /* Do anything needed before RTL is emitted for each function. */
24892 arm_init_expanders (void)
24894 /* Arrange to initialize and mark the machine per-function status. */
24895 init_machine_status
= arm_init_machine_status
;
24897 /* This is to stop the combine pass optimizing away the alignment
24898 adjustment of va_arg. */
24899 /* ??? It is claimed that this should not be necessary. */
24901 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24904 /* Check that FUNC is called with a different mode. */
24907 arm_change_mode_p (tree func
)
24909 if (TREE_CODE (func
) != FUNCTION_DECL
)
24912 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
24915 callee_tree
= target_option_default_node
;
24917 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
24918 int flags
= callee_opts
->x_target_flags
;
24920 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
24923 /* Like arm_compute_initial_elimination offset. Simpler because there
24924 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24925 to point at the base of the local variables after static stack
24926 space for a function has been allocated. */
24929 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24931 arm_stack_offsets
*offsets
;
24933 offsets
= arm_get_frame_offsets ();
24937 case ARG_POINTER_REGNUM
:
24940 case STACK_POINTER_REGNUM
:
24941 return offsets
->outgoing_args
- offsets
->saved_args
;
24943 case FRAME_POINTER_REGNUM
:
24944 return offsets
->soft_frame
- offsets
->saved_args
;
24946 case ARM_HARD_FRAME_POINTER_REGNUM
:
24947 return offsets
->saved_regs
- offsets
->saved_args
;
24949 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24950 return offsets
->locals_base
- offsets
->saved_args
;
24953 gcc_unreachable ();
24957 case FRAME_POINTER_REGNUM
:
24960 case STACK_POINTER_REGNUM
:
24961 return offsets
->outgoing_args
- offsets
->soft_frame
;
24963 case ARM_HARD_FRAME_POINTER_REGNUM
:
24964 return offsets
->saved_regs
- offsets
->soft_frame
;
24966 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24967 return offsets
->locals_base
- offsets
->soft_frame
;
24970 gcc_unreachable ();
24975 gcc_unreachable ();
24979 /* Generate the function's prologue. */
24982 thumb1_expand_prologue (void)
24986 HOST_WIDE_INT amount
;
24987 HOST_WIDE_INT size
;
24988 arm_stack_offsets
*offsets
;
24989 unsigned long func_type
;
24991 unsigned long live_regs_mask
;
24992 unsigned long l_mask
;
24993 unsigned high_regs_pushed
= 0;
24994 bool lr_needs_saving
;
24996 func_type
= arm_current_func_type ();
24998 /* Naked functions don't have prologues. */
24999 if (IS_NAKED (func_type
))
25001 if (flag_stack_usage_info
)
25002 current_function_static_stack_size
= 0;
25006 if (IS_INTERRUPT (func_type
))
25008 error ("interrupt Service Routines cannot be coded in Thumb mode");
25012 if (is_called_in_ARM_mode (current_function_decl
))
25013 emit_insn (gen_prologue_thumb1_interwork ());
25015 offsets
= arm_get_frame_offsets ();
25016 live_regs_mask
= offsets
->saved_regs_mask
;
25017 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
25019 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
25020 l_mask
= live_regs_mask
& 0x40ff;
25021 /* Then count how many other high registers will need to be pushed. */
25022 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
25024 if (crtl
->args
.pretend_args_size
)
25026 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
25028 if (cfun
->machine
->uses_anonymous_args
)
25030 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
25031 unsigned long mask
;
25033 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
25034 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
25036 insn
= thumb1_emit_multi_reg_push (mask
, 0);
25040 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25041 stack_pointer_rtx
, x
));
25043 RTX_FRAME_RELATED_P (insn
) = 1;
25046 if (TARGET_BACKTRACE
)
25048 HOST_WIDE_INT offset
= 0;
25049 unsigned work_register
;
25050 rtx work_reg
, x
, arm_hfp_rtx
;
25052 /* We have been asked to create a stack backtrace structure.
25053 The code looks like this:
25057 0 sub SP, #16 Reserve space for 4 registers.
25058 2 push {R7} Push low registers.
25059 4 add R7, SP, #20 Get the stack pointer before the push.
25060 6 str R7, [SP, #8] Store the stack pointer
25061 (before reserving the space).
25062 8 mov R7, PC Get hold of the start of this code + 12.
25063 10 str R7, [SP, #16] Store it.
25064 12 mov R7, FP Get hold of the current frame pointer.
25065 14 str R7, [SP, #4] Store it.
25066 16 mov R7, LR Get hold of the current return address.
25067 18 str R7, [SP, #12] Store it.
25068 20 add R7, SP, #16 Point at the start of the
25069 backtrace structure.
25070 22 mov FP, R7 Put this value into the frame pointer. */
25072 work_register
= thumb_find_work_register (live_regs_mask
);
25073 work_reg
= gen_rtx_REG (SImode
, work_register
);
25074 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
25076 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25077 stack_pointer_rtx
, GEN_INT (-16)));
25078 RTX_FRAME_RELATED_P (insn
) = 1;
25082 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
25083 RTX_FRAME_RELATED_P (insn
) = 1;
25084 lr_needs_saving
= false;
25086 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
25089 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
25090 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
25092 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
25093 x
= gen_frame_mem (SImode
, x
);
25094 emit_move_insn (x
, work_reg
);
25096 /* Make sure that the instruction fetching the PC is in the right place
25097 to calculate "start of backtrace creation code + 12". */
25098 /* ??? The stores using the common WORK_REG ought to be enough to
25099 prevent the scheduler from doing anything weird. Failing that
25100 we could always move all of the following into an UNSPEC_VOLATILE. */
25103 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
25104 emit_move_insn (work_reg
, x
);
25106 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
25107 x
= gen_frame_mem (SImode
, x
);
25108 emit_move_insn (x
, work_reg
);
25110 emit_move_insn (work_reg
, arm_hfp_rtx
);
25112 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
25113 x
= gen_frame_mem (SImode
, x
);
25114 emit_move_insn (x
, work_reg
);
25118 emit_move_insn (work_reg
, arm_hfp_rtx
);
25120 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
25121 x
= gen_frame_mem (SImode
, x
);
25122 emit_move_insn (x
, work_reg
);
25124 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
25125 emit_move_insn (work_reg
, x
);
25127 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
25128 x
= gen_frame_mem (SImode
, x
);
25129 emit_move_insn (x
, work_reg
);
25132 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
25133 emit_move_insn (work_reg
, x
);
25135 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
25136 x
= gen_frame_mem (SImode
, x
);
25137 emit_move_insn (x
, work_reg
);
25139 x
= GEN_INT (offset
+ 12);
25140 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
25142 emit_move_insn (arm_hfp_rtx
, work_reg
);
25144 /* Optimization: If we are not pushing any low registers but we are going
25145 to push some high registers then delay our first push. This will just
25146 be a push of LR and we can combine it with the push of the first high
25148 else if ((l_mask
& 0xff) != 0
25149 || (high_regs_pushed
== 0 && lr_needs_saving
))
25151 unsigned long mask
= l_mask
;
25152 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
25153 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
25154 RTX_FRAME_RELATED_P (insn
) = 1;
25155 lr_needs_saving
= false;
25158 if (high_regs_pushed
)
25160 unsigned pushable_regs
;
25161 unsigned next_hi_reg
;
25162 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
25163 : crtl
->args
.info
.nregs
;
25164 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
25166 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
25167 if (live_regs_mask
& (1 << next_hi_reg
))
25170 /* Here we need to mask out registers used for passing arguments
25171 even if they can be pushed. This is to avoid using them to stash the high
25172 registers. Such kind of stash may clobber the use of arguments. */
25173 pushable_regs
= l_mask
& (~arg_regs_mask
);
25174 if (lr_needs_saving
)
25175 pushable_regs
&= ~(1 << LR_REGNUM
);
25177 if (pushable_regs
== 0)
25178 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
25180 while (high_regs_pushed
> 0)
25182 unsigned long real_regs_mask
= 0;
25183 unsigned long push_mask
= 0;
25185 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
25187 if (pushable_regs
& (1 << regno
))
25189 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25190 gen_rtx_REG (SImode
, next_hi_reg
));
25192 high_regs_pushed
--;
25193 real_regs_mask
|= (1 << next_hi_reg
);
25194 push_mask
|= (1 << regno
);
25196 if (high_regs_pushed
)
25198 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
25200 if (live_regs_mask
& (1 << next_hi_reg
))
25208 /* If we had to find a work register and we have not yet
25209 saved the LR then add it to the list of regs to push. */
25210 if (lr_needs_saving
)
25212 push_mask
|= 1 << LR_REGNUM
;
25213 real_regs_mask
|= 1 << LR_REGNUM
;
25214 lr_needs_saving
= false;
25217 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
25218 RTX_FRAME_RELATED_P (insn
) = 1;
25222 /* Load the pic register before setting the frame pointer,
25223 so we can use r7 as a temporary work register. */
25224 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
25225 arm_load_pic_register (live_regs_mask
);
25227 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
25228 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
25229 stack_pointer_rtx
);
25231 size
= offsets
->outgoing_args
- offsets
->saved_args
;
25232 if (flag_stack_usage_info
)
25233 current_function_static_stack_size
= size
;
25235 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25236 if ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
25237 || flag_stack_clash_protection
)
25239 sorry ("-fstack-check=specific for Thumb-1");
25241 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25242 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
25247 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25248 GEN_INT (- amount
)));
25249 RTX_FRAME_RELATED_P (insn
) = 1;
25255 /* The stack decrement is too big for an immediate value in a single
25256 insn. In theory we could issue multiple subtracts, but after
25257 three of them it becomes more space efficient to place the full
25258 value in the constant pool and load into a register. (Also the
25259 ARM debugger really likes to see only one stack decrement per
25260 function). So instead we look for a scratch register into which
25261 we can load the decrement, and then we subtract this from the
25262 stack pointer. Unfortunately on the thumb the only available
25263 scratch registers are the argument registers, and we cannot use
25264 these as they may hold arguments to the function. Instead we
25265 attempt to locate a call preserved register which is used by this
25266 function. If we can find one, then we know that it will have
25267 been pushed at the start of the prologue and so we can corrupt
25269 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
25270 if (live_regs_mask
& (1 << regno
))
25273 gcc_assert(regno
<= LAST_LO_REGNUM
);
25275 reg
= gen_rtx_REG (SImode
, regno
);
25277 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
25279 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25280 stack_pointer_rtx
, reg
));
25282 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
25283 plus_constant (Pmode
, stack_pointer_rtx
,
25285 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
25286 RTX_FRAME_RELATED_P (insn
) = 1;
25290 if (frame_pointer_needed
)
25291 thumb_set_frame_pointer (offsets
);
25293 /* If we are profiling, make sure no instructions are scheduled before
25294 the call to mcount. Similarly if the user has requested no
25295 scheduling in the prolog. Similarly if we want non-call exceptions
25296 using the EABI unwinder, to prevent faulting instructions from being
25297 swapped with a stack adjustment. */
25298 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
25299 || (arm_except_unwind_info (&global_options
) == UI_TARGET
25300 && cfun
->can_throw_non_call_exceptions
))
25301 emit_insn (gen_blockage ());
25303 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
25304 if (live_regs_mask
& 0xff)
25305 cfun
->machine
->lr_save_eliminated
= 0;
25308 /* Clear caller saved registers not used to pass return values and leaked
25309 condition flags before exiting a cmse_nonsecure_entry function. */
25312 cmse_nonsecure_entry_clear_before_return (void)
25314 int regno
, maxregno
= TARGET_HARD_FLOAT
? LAST_VFP_REGNUM
: IP_REGNUM
;
25315 uint32_t padding_bits_to_clear
= 0;
25316 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
25317 rtx r1_reg
, result_rtl
, clearing_reg
= NULL_RTX
;
25320 bitmap_clear (to_clear_bitmap
);
25321 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
, NUM_ARG_REGS
);
25322 bitmap_set_bit (to_clear_bitmap
, IP_REGNUM
);
25324 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25326 if (TARGET_HARD_FLOAT
)
25328 int float_bits
= D7_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1;
25330 bitmap_set_range (to_clear_bitmap
, FIRST_VFP_REGNUM
, float_bits
);
25332 /* Make sure we don't clear the two scratch registers used to clear the
25333 relevant FPSCR bits in output_return_instruction. */
25334 emit_use (gen_rtx_REG (SImode
, IP_REGNUM
));
25335 bitmap_clear_bit (to_clear_bitmap
, IP_REGNUM
);
25336 emit_use (gen_rtx_REG (SImode
, 4));
25337 bitmap_clear_bit (to_clear_bitmap
, 4);
25340 /* If the user has defined registers to be caller saved, these are no longer
25341 restored by the function before returning and must thus be cleared for
25342 security purposes. */
25343 for (regno
= NUM_ARG_REGS
; regno
<= maxregno
; regno
++)
25345 /* We do not touch registers that can be used to pass arguments as per
25346 the AAPCS, since these should never be made callee-saved by user
25348 if (IN_RANGE (regno
, FIRST_VFP_REGNUM
, D7_VFP_REGNUM
))
25350 if (IN_RANGE (regno
, IP_REGNUM
, PC_REGNUM
))
25352 if (call_used_regs
[regno
])
25353 bitmap_set_bit (to_clear_bitmap
, regno
);
25356 /* Make sure we do not clear the registers used to return the result in. */
25357 result_type
= TREE_TYPE (DECL_RESULT (current_function_decl
));
25358 if (!VOID_TYPE_P (result_type
))
25360 uint64_t to_clear_return_mask
;
25361 result_rtl
= arm_function_value (result_type
, current_function_decl
, 0);
25363 /* No need to check that we return in registers, because we don't
25364 support returning on stack yet. */
25365 gcc_assert (REG_P (result_rtl
));
25366 to_clear_return_mask
25367 = compute_not_to_clear_mask (result_type
, result_rtl
, 0,
25368 &padding_bits_to_clear
);
25369 if (to_clear_return_mask
)
25371 gcc_assert ((unsigned) maxregno
< sizeof (long long) * __CHAR_BIT__
);
25372 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
25374 if (to_clear_return_mask
& (1ULL << regno
))
25375 bitmap_clear_bit (to_clear_bitmap
, regno
);
25380 if (padding_bits_to_clear
!= 0)
25382 int to_clear_bitmap_size
= SBITMAP_SIZE ((sbitmap
) to_clear_bitmap
);
25383 auto_sbitmap
to_clear_arg_regs_bitmap (to_clear_bitmap_size
);
25385 /* Padding_bits_to_clear is not 0 so we know we are dealing with
25386 returning a composite type, which only uses r0. Let's make sure that
25387 r1-r3 is cleared too. */
25388 bitmap_clear (to_clear_arg_regs_bitmap
);
25389 bitmap_set_range (to_clear_arg_regs_bitmap
, R1_REGNUM
, NUM_ARG_REGS
- 1);
25390 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap
, to_clear_bitmap
));
25393 /* Clear full registers that leak before returning. */
25394 clearing_reg
= gen_rtx_REG (SImode
, TARGET_THUMB1
? R0_REGNUM
: LR_REGNUM
);
25395 r1_reg
= gen_rtx_REG (SImode
, R0_REGNUM
+ 1);
25396 cmse_clear_registers (to_clear_bitmap
, &padding_bits_to_clear
, 1, r1_reg
,
25400 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25401 POP instruction can be generated. LR should be replaced by PC. All
25402 the checks required are already done by USE_RETURN_INSN (). Hence,
25403 all we really need to check here is if single register is to be
25404 returned, or multiple register return. */
25406 thumb2_expand_return (bool simple_return
)
25409 unsigned long saved_regs_mask
;
25410 arm_stack_offsets
*offsets
;
25412 offsets
= arm_get_frame_offsets ();
25413 saved_regs_mask
= offsets
->saved_regs_mask
;
25415 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25416 if (saved_regs_mask
& (1 << i
))
25419 if (!simple_return
&& saved_regs_mask
)
25421 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25422 functions or adapt code to handle according to ACLE. This path should
25423 not be reachable for cmse_nonsecure_entry functions though we prefer
25424 to assert it for now to ensure that future code changes do not silently
25425 change this behavior. */
25426 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25429 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25430 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
25431 rtx addr
= gen_rtx_MEM (SImode
,
25432 gen_rtx_POST_INC (SImode
,
25433 stack_pointer_rtx
));
25434 set_mem_alias_set (addr
, get_frame_alias_set ());
25435 XVECEXP (par
, 0, 0) = ret_rtx
;
25436 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
25437 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
25438 emit_jump_insn (par
);
25442 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
25443 saved_regs_mask
|= (1 << PC_REGNUM
);
25444 arm_emit_multi_reg_pop (saved_regs_mask
);
25449 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25450 cmse_nonsecure_entry_clear_before_return ();
25451 emit_jump_insn (simple_return_rtx
);
25456 thumb1_expand_epilogue (void)
25458 HOST_WIDE_INT amount
;
25459 arm_stack_offsets
*offsets
;
25462 /* Naked functions don't have prologues. */
25463 if (IS_NAKED (arm_current_func_type ()))
25466 offsets
= arm_get_frame_offsets ();
25467 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25469 if (frame_pointer_needed
)
25471 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
25472 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25474 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
25476 gcc_assert (amount
>= 0);
25479 emit_insn (gen_blockage ());
25482 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25483 GEN_INT (amount
)));
25486 /* r3 is always free in the epilogue. */
25487 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
25489 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
25490 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
25494 /* Emit a USE (stack_pointer_rtx), so that
25495 the stack adjustment will not be deleted. */
25496 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25498 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
25499 emit_insn (gen_blockage ());
25501 /* Emit a clobber for each insn that will be restored in the epilogue,
25502 so that flow2 will get register lifetimes correct. */
25503 for (regno
= 0; regno
< 13; regno
++)
25504 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
25505 emit_clobber (gen_rtx_REG (SImode
, regno
));
25507 if (! df_regs_ever_live_p (LR_REGNUM
))
25508 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
25510 /* Clear all caller-saved regs that are not used to return. */
25511 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25512 cmse_nonsecure_entry_clear_before_return ();
25515 /* Epilogue code for APCS frame. */
25517 arm_expand_epilogue_apcs_frame (bool really_return
)
25519 unsigned long func_type
;
25520 unsigned long saved_regs_mask
;
25523 int floats_from_frame
= 0;
25524 arm_stack_offsets
*offsets
;
25526 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
25527 func_type
= arm_current_func_type ();
25529 /* Get frame offsets for ARM. */
25530 offsets
= arm_get_frame_offsets ();
25531 saved_regs_mask
= offsets
->saved_regs_mask
;
25533 /* Find the offset of the floating-point save area in the frame. */
25535 = (offsets
->saved_args
25536 + arm_compute_static_chain_stack_bytes ()
25539 /* Compute how many core registers saved and how far away the floats are. */
25540 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25541 if (saved_regs_mask
& (1 << i
))
25544 floats_from_frame
+= 4;
25547 if (TARGET_HARD_FLOAT
)
25550 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
25552 /* The offset is from IP_REGNUM. */
25553 int saved_size
= arm_get_vfp_saved_size ();
25554 if (saved_size
> 0)
25557 floats_from_frame
+= saved_size
;
25558 insn
= emit_insn (gen_addsi3 (ip_rtx
,
25559 hard_frame_pointer_rtx
,
25560 GEN_INT (-floats_from_frame
)));
25561 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
25562 ip_rtx
, hard_frame_pointer_rtx
);
25565 /* Generate VFP register multi-pop. */
25566 start_reg
= FIRST_VFP_REGNUM
;
25568 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
25569 /* Look for a case where a reg does not need restoring. */
25570 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25571 && (!df_regs_ever_live_p (i
+ 1)
25572 || call_used_regs
[i
+ 1]))
25574 if (start_reg
!= i
)
25575 arm_emit_vfp_multi_reg_pop (start_reg
,
25576 (i
- start_reg
) / 2,
25577 gen_rtx_REG (SImode
,
25582 /* Restore the remaining regs that we have discovered (or possibly
25583 even all of them, if the conditional in the for loop never
25585 if (start_reg
!= i
)
25586 arm_emit_vfp_multi_reg_pop (start_reg
,
25587 (i
- start_reg
) / 2,
25588 gen_rtx_REG (SImode
, IP_REGNUM
));
25593 /* The frame pointer is guaranteed to be non-double-word aligned, as
25594 it is set to double-word-aligned old_stack_pointer - 4. */
25596 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
25598 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
25599 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25601 rtx addr
= gen_frame_mem (V2SImode
,
25602 plus_constant (Pmode
, hard_frame_pointer_rtx
,
25604 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25605 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25606 gen_rtx_REG (V2SImode
, i
),
25612 /* saved_regs_mask should contain IP which contains old stack pointer
25613 at the time of activation creation. Since SP and IP are adjacent registers,
25614 we can restore the value directly into SP. */
25615 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
25616 saved_regs_mask
&= ~(1 << IP_REGNUM
);
25617 saved_regs_mask
|= (1 << SP_REGNUM
);
25619 /* There are two registers left in saved_regs_mask - LR and PC. We
25620 only need to restore LR (the return address), but to
25621 save time we can load it directly into PC, unless we need a
25622 special function exit sequence, or we are not really returning. */
25624 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
25625 && !crtl
->calls_eh_return
)
25626 /* Delete LR from the register mask, so that LR on
25627 the stack is loaded into the PC in the register mask. */
25628 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25630 saved_regs_mask
&= ~(1 << PC_REGNUM
);
25632 num_regs
= bit_count (saved_regs_mask
);
25633 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
25636 emit_insn (gen_blockage ());
25637 /* Unwind the stack to just below the saved registers. */
25638 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25639 hard_frame_pointer_rtx
,
25640 GEN_INT (- 4 * num_regs
)));
25642 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
25643 stack_pointer_rtx
, hard_frame_pointer_rtx
);
25646 arm_emit_multi_reg_pop (saved_regs_mask
);
25648 if (IS_INTERRUPT (func_type
))
25650 /* Interrupt handlers will have pushed the
25651 IP onto the stack, so restore it now. */
25653 rtx addr
= gen_rtx_MEM (SImode
,
25654 gen_rtx_POST_INC (SImode
,
25655 stack_pointer_rtx
));
25656 set_mem_alias_set (addr
, get_frame_alias_set ());
25657 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
25658 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25659 gen_rtx_REG (SImode
, IP_REGNUM
),
25663 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
25666 if (crtl
->calls_eh_return
)
25667 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25669 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25671 if (IS_STACKALIGN (func_type
))
25672 /* Restore the original stack pointer. Before prologue, the stack was
25673 realigned and the original stack pointer saved in r0. For details,
25674 see comment in arm_expand_prologue. */
25675 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25677 emit_jump_insn (simple_return_rtx
);
25680 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25681 function is not a sibcall. */
25683 arm_expand_epilogue (bool really_return
)
25685 unsigned long func_type
;
25686 unsigned long saved_regs_mask
;
25690 arm_stack_offsets
*offsets
;
25692 func_type
= arm_current_func_type ();
25694 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25695 let output_return_instruction take care of instruction emission if any. */
25696 if (IS_NAKED (func_type
)
25697 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
25700 emit_jump_insn (simple_return_rtx
);
25704 /* If we are throwing an exception, then we really must be doing a
25705 return, so we can't tail-call. */
25706 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
25708 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
25710 arm_expand_epilogue_apcs_frame (really_return
);
25714 /* Get frame offsets for ARM. */
25715 offsets
= arm_get_frame_offsets ();
25716 saved_regs_mask
= offsets
->saved_regs_mask
;
25717 num_regs
= bit_count (saved_regs_mask
);
25719 if (frame_pointer_needed
)
25722 /* Restore stack pointer if necessary. */
25725 /* In ARM mode, frame pointer points to first saved register.
25726 Restore stack pointer to last saved register. */
25727 amount
= offsets
->frame
- offsets
->saved_regs
;
25729 /* Force out any pending memory operations that reference stacked data
25730 before stack de-allocation occurs. */
25731 emit_insn (gen_blockage ());
25732 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25733 hard_frame_pointer_rtx
,
25734 GEN_INT (amount
)));
25735 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25737 hard_frame_pointer_rtx
);
25739 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25741 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25745 /* In Thumb-2 mode, the frame pointer points to the last saved
25747 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25750 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
25751 hard_frame_pointer_rtx
,
25752 GEN_INT (amount
)));
25753 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25754 hard_frame_pointer_rtx
,
25755 hard_frame_pointer_rtx
);
25758 /* Force out any pending memory operations that reference stacked data
25759 before stack de-allocation occurs. */
25760 emit_insn (gen_blockage ());
25761 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
25762 hard_frame_pointer_rtx
));
25763 arm_add_cfa_adjust_cfa_note (insn
, 0,
25765 hard_frame_pointer_rtx
);
25766 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25768 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25773 /* Pop off outgoing args and local frame to adjust stack pointer to
25774 last saved register. */
25775 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25779 /* Force out any pending memory operations that reference stacked data
25780 before stack de-allocation occurs. */
25781 emit_insn (gen_blockage ());
25782 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25784 GEN_INT (amount
)));
25785 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25786 stack_pointer_rtx
, stack_pointer_rtx
);
25787 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25789 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25793 if (TARGET_HARD_FLOAT
)
25795 /* Generate VFP register multi-pop. */
25796 int end_reg
= LAST_VFP_REGNUM
+ 1;
25798 /* Scan the registers in reverse order. We need to match
25799 any groupings made in the prologue and generate matching
25800 vldm operations. The need to match groups is because,
25801 unlike pop, vldm can only do consecutive regs. */
25802 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
25803 /* Look for a case where a reg does not need restoring. */
25804 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25805 && (!df_regs_ever_live_p (i
+ 1)
25806 || call_used_regs
[i
+ 1]))
25808 /* Restore the regs discovered so far (from reg+2 to
25810 if (end_reg
> i
+ 2)
25811 arm_emit_vfp_multi_reg_pop (i
+ 2,
25812 (end_reg
- (i
+ 2)) / 2,
25813 stack_pointer_rtx
);
25817 /* Restore the remaining regs that we have discovered (or possibly
25818 even all of them, if the conditional in the for loop never
25820 if (end_reg
> i
+ 2)
25821 arm_emit_vfp_multi_reg_pop (i
+ 2,
25822 (end_reg
- (i
+ 2)) / 2,
25823 stack_pointer_rtx
);
25827 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
25828 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25831 rtx addr
= gen_rtx_MEM (V2SImode
,
25832 gen_rtx_POST_INC (SImode
,
25833 stack_pointer_rtx
));
25834 set_mem_alias_set (addr
, get_frame_alias_set ());
25835 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25836 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25837 gen_rtx_REG (V2SImode
, i
),
25839 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25840 stack_pointer_rtx
, stack_pointer_rtx
);
25843 if (saved_regs_mask
)
25846 bool return_in_pc
= false;
25848 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
25849 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
25850 && !IS_CMSE_ENTRY (func_type
)
25851 && !IS_STACKALIGN (func_type
)
25853 && crtl
->args
.pretend_args_size
== 0
25854 && saved_regs_mask
& (1 << LR_REGNUM
)
25855 && !crtl
->calls_eh_return
)
25857 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25858 saved_regs_mask
|= (1 << PC_REGNUM
);
25859 return_in_pc
= true;
25862 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
25864 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25865 if (saved_regs_mask
& (1 << i
))
25867 rtx addr
= gen_rtx_MEM (SImode
,
25868 gen_rtx_POST_INC (SImode
,
25869 stack_pointer_rtx
));
25870 set_mem_alias_set (addr
, get_frame_alias_set ());
25872 if (i
== PC_REGNUM
)
25874 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25875 XVECEXP (insn
, 0, 0) = ret_rtx
;
25876 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
25878 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
25879 insn
= emit_jump_insn (insn
);
25883 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
25885 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25886 gen_rtx_REG (SImode
, i
),
25888 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25890 stack_pointer_rtx
);
25897 && current_tune
->prefer_ldrd_strd
25898 && !optimize_function_for_size_p (cfun
))
25901 thumb2_emit_ldrd_pop (saved_regs_mask
);
25902 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
25903 arm_emit_ldrd_pop (saved_regs_mask
);
25905 arm_emit_multi_reg_pop (saved_regs_mask
);
25908 arm_emit_multi_reg_pop (saved_regs_mask
);
25916 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
25920 rtx dwarf
= NULL_RTX
;
25922 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25924 GEN_INT (amount
)));
25926 RTX_FRAME_RELATED_P (tmp
) = 1;
25928 if (cfun
->machine
->uses_anonymous_args
)
25930 /* Restore pretend args. Refer arm_expand_prologue on how to save
25931 pretend_args in stack. */
25932 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25933 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25934 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25935 if (saved_regs_mask
& (1 << i
))
25937 rtx reg
= gen_rtx_REG (SImode
, i
);
25938 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25941 REG_NOTES (tmp
) = dwarf
;
25943 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25944 stack_pointer_rtx
, stack_pointer_rtx
);
25947 /* Clear all caller-saved regs that are not used to return. */
25948 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25950 /* CMSE_ENTRY always returns. */
25951 gcc_assert (really_return
);
25952 cmse_nonsecure_entry_clear_before_return ();
25955 if (!really_return
)
25958 if (crtl
->calls_eh_return
)
25959 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25961 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25963 if (IS_STACKALIGN (func_type
))
25964 /* Restore the original stack pointer. Before prologue, the stack was
25965 realigned and the original stack pointer saved in r0. For details,
25966 see comment in arm_expand_prologue. */
25967 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25969 emit_jump_insn (simple_return_rtx
);
25972 /* Implementation of insn prologue_thumb1_interwork. This is the first
25973 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25976 thumb1_output_interwork (void)
25979 FILE *f
= asm_out_file
;
25981 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25982 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25984 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25986 /* Generate code sequence to switch us into Thumb mode. */
25987 /* The .code 32 directive has already been emitted by
25988 ASM_DECLARE_FUNCTION_NAME. */
25989 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25990 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
25992 /* Generate a label, so that the debugger will notice the
25993 change in instruction sets. This label is also used by
25994 the assembler to bypass the ARM code when this function
25995 is called from a Thumb encoded function elsewhere in the
25996 same file. Hence the definition of STUB_NAME here must
25997 agree with the definition in gas/config/tc-arm.c. */
25999 #define STUB_NAME ".real_start_of"
26001 fprintf (f
, "\t.code\t16\n");
26003 if (arm_dllexport_name_p (name
))
26004 name
= arm_strip_name_encoding (name
);
26006 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
26007 fprintf (f
, "\t.thumb_func\n");
26008 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
26013 /* Handle the case of a double word load into a low register from
26014 a computed memory address. The computed address may involve a
26015 register which is overwritten by the load. */
26017 thumb_load_double_from_address (rtx
*operands
)
26025 gcc_assert (REG_P (operands
[0]));
26026 gcc_assert (MEM_P (operands
[1]));
26028 /* Get the memory address. */
26029 addr
= XEXP (operands
[1], 0);
26031 /* Work out how the memory address is computed. */
26032 switch (GET_CODE (addr
))
26035 operands
[2] = adjust_address (operands
[1], SImode
, 4);
26037 if (REGNO (operands
[0]) == REGNO (addr
))
26039 output_asm_insn ("ldr\t%H0, %2", operands
);
26040 output_asm_insn ("ldr\t%0, %1", operands
);
26044 output_asm_insn ("ldr\t%0, %1", operands
);
26045 output_asm_insn ("ldr\t%H0, %2", operands
);
26050 /* Compute <address> + 4 for the high order load. */
26051 operands
[2] = adjust_address (operands
[1], SImode
, 4);
26053 output_asm_insn ("ldr\t%0, %1", operands
);
26054 output_asm_insn ("ldr\t%H0, %2", operands
);
26058 arg1
= XEXP (addr
, 0);
26059 arg2
= XEXP (addr
, 1);
26061 if (CONSTANT_P (arg1
))
26062 base
= arg2
, offset
= arg1
;
26064 base
= arg1
, offset
= arg2
;
26066 gcc_assert (REG_P (base
));
26068 /* Catch the case of <address> = <reg> + <reg> */
26069 if (REG_P (offset
))
26071 int reg_offset
= REGNO (offset
);
26072 int reg_base
= REGNO (base
);
26073 int reg_dest
= REGNO (operands
[0]);
26075 /* Add the base and offset registers together into the
26076 higher destination register. */
26077 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
26078 reg_dest
+ 1, reg_base
, reg_offset
);
26080 /* Load the lower destination register from the address in
26081 the higher destination register. */
26082 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
26083 reg_dest
, reg_dest
+ 1);
26085 /* Load the higher destination register from its own address
26087 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
26088 reg_dest
+ 1, reg_dest
+ 1);
26092 /* Compute <address> + 4 for the high order load. */
26093 operands
[2] = adjust_address (operands
[1], SImode
, 4);
26095 /* If the computed address is held in the low order register
26096 then load the high order register first, otherwise always
26097 load the low order register first. */
26098 if (REGNO (operands
[0]) == REGNO (base
))
26100 output_asm_insn ("ldr\t%H0, %2", operands
);
26101 output_asm_insn ("ldr\t%0, %1", operands
);
26105 output_asm_insn ("ldr\t%0, %1", operands
);
26106 output_asm_insn ("ldr\t%H0, %2", operands
);
26112 /* With no registers to worry about we can just load the value
26114 operands
[2] = adjust_address (operands
[1], SImode
, 4);
26116 output_asm_insn ("ldr\t%H0, %2", operands
);
26117 output_asm_insn ("ldr\t%0, %1", operands
);
26121 gcc_unreachable ();
26128 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
26133 if (REGNO (operands
[4]) > REGNO (operands
[5]))
26134 std::swap (operands
[4], operands
[5]);
26136 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
26137 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
26141 if (REGNO (operands
[4]) > REGNO (operands
[5]))
26142 std::swap (operands
[4], operands
[5]);
26143 if (REGNO (operands
[5]) > REGNO (operands
[6]))
26144 std::swap (operands
[5], operands
[6]);
26145 if (REGNO (operands
[4]) > REGNO (operands
[5]))
26146 std::swap (operands
[4], operands
[5]);
26148 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
26149 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
26153 gcc_unreachable ();
26159 /* Output a call-via instruction for thumb state. */
26161 thumb_call_via_reg (rtx reg
)
26163 int regno
= REGNO (reg
);
26166 gcc_assert (regno
< LR_REGNUM
);
26168 /* If we are in the normal text section we can use a single instance
26169 per compilation unit. If we are doing function sections, then we need
26170 an entry per section, since we can't rely on reachability. */
26171 if (in_section
== text_section
)
26173 thumb_call_reg_needed
= 1;
26175 if (thumb_call_via_label
[regno
] == NULL
)
26176 thumb_call_via_label
[regno
] = gen_label_rtx ();
26177 labelp
= thumb_call_via_label
+ regno
;
26181 if (cfun
->machine
->call_via
[regno
] == NULL
)
26182 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
26183 labelp
= cfun
->machine
->call_via
+ regno
;
26186 output_asm_insn ("bl\t%a0", labelp
);
26190 /* Routines for generating rtl. */
26192 thumb_expand_movmemqi (rtx
*operands
)
26194 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
26195 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
26196 HOST_WIDE_INT len
= INTVAL (operands
[2]);
26197 HOST_WIDE_INT offset
= 0;
26201 emit_insn (gen_movmem12b (out
, in
, out
, in
));
26207 emit_insn (gen_movmem8b (out
, in
, out
, in
));
26213 rtx reg
= gen_reg_rtx (SImode
);
26214 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
26215 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
26222 rtx reg
= gen_reg_rtx (HImode
);
26223 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
26224 plus_constant (Pmode
, in
,
26226 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
26235 rtx reg
= gen_reg_rtx (QImode
);
26236 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
26237 plus_constant (Pmode
, in
,
26239 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
26246 thumb_reload_out_hi (rtx
*operands
)
26248 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
26251 /* Return the length of a function name prefix
26252 that starts with the character 'c'. */
26254 arm_get_strip_length (int c
)
26258 ARM_NAME_ENCODING_LENGTHS
26263 /* Return a pointer to a function's name with any
26264 and all prefix encodings stripped from it. */
26266 arm_strip_name_encoding (const char *name
)
26270 while ((skip
= arm_get_strip_length (* name
)))
26276 /* If there is a '*' anywhere in the name's prefix, then
26277 emit the stripped name verbatim, otherwise prepend an
26278 underscore if leading underscores are being used. */
26280 arm_asm_output_labelref (FILE *stream
, const char *name
)
26285 while ((skip
= arm_get_strip_length (* name
)))
26287 verbatim
|= (*name
== '*');
26292 fputs (name
, stream
);
26294 asm_fprintf (stream
, "%U%s", name
);
26297 /* This function is used to emit an EABI tag and its associated value.
26298 We emit the numerical value of the tag in case the assembler does not
26299 support textual tags. (Eg gas prior to 2.20). If requested we include
26300 the tag name in a comment so that anyone reading the assembler output
26301 will know which tag is being set.
26303 This function is not static because arm-c.c needs it too. */
26306 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
26308 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
26309 if (flag_verbose_asm
|| flag_debug_asm
)
26310 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
26311 asm_fprintf (asm_out_file
, "\n");
26314 /* This function is used to print CPU tuning information as comment
26315 in assembler file. Pointers are not printed for now. */
26318 arm_print_tune_info (void)
26320 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune parameters\n");
26321 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"constant_limit:\t%d\n",
26322 current_tune
->constant_limit
);
26323 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26324 "max_insns_skipped:\t%d\n", current_tune
->max_insns_skipped
);
26325 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26326 "prefetch.num_slots:\t%d\n", current_tune
->prefetch
.num_slots
);
26327 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26328 "prefetch.l1_cache_size:\t%d\n",
26329 current_tune
->prefetch
.l1_cache_size
);
26330 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26331 "prefetch.l1_cache_line_size:\t%d\n",
26332 current_tune
->prefetch
.l1_cache_line_size
);
26333 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26334 "prefer_constant_pool:\t%d\n",
26335 (int) current_tune
->prefer_constant_pool
);
26336 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26337 "branch_cost:\t(s:speed, p:predictable)\n");
26338 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\ts&p\tcost\n");
26339 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t00\t%d\n",
26340 current_tune
->branch_cost (false, false));
26341 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t01\t%d\n",
26342 current_tune
->branch_cost (false, true));
26343 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t10\t%d\n",
26344 current_tune
->branch_cost (true, false));
26345 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t11\t%d\n",
26346 current_tune
->branch_cost (true, true));
26347 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26348 "prefer_ldrd_strd:\t%d\n",
26349 (int) current_tune
->prefer_ldrd_strd
);
26350 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26351 "logical_op_non_short_circuit:\t[%d,%d]\n",
26352 (int) current_tune
->logical_op_non_short_circuit_thumb
,
26353 (int) current_tune
->logical_op_non_short_circuit_arm
);
26354 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26355 "prefer_neon_for_64bits:\t%d\n",
26356 (int) current_tune
->prefer_neon_for_64bits
);
26357 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26358 "disparage_flag_setting_t16_encodings:\t%d\n",
26359 (int) current_tune
->disparage_flag_setting_t16_encodings
);
26360 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26361 "string_ops_prefer_neon:\t%d\n",
26362 (int) current_tune
->string_ops_prefer_neon
);
26363 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26364 "max_insns_inline_memset:\t%d\n",
26365 current_tune
->max_insns_inline_memset
);
26366 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"fusible_ops:\t%u\n",
26367 current_tune
->fusible_ops
);
26368 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"sched_autopref:\t%d\n",
26369 (int) current_tune
->sched_autopref
);
26372 /* Print .arch and .arch_extension directives corresponding to the
26373 current architecture configuration. */
26375 arm_print_asm_arch_directives ()
26377 const arch_option
*arch
26378 = arm_parse_arch_option_name (all_architectures
, "-march",
26379 arm_active_target
.arch_name
);
26380 auto_sbitmap
opt_bits (isa_num_bits
);
26384 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_active_target
.arch_name
);
26385 arm_last_printed_arch_string
= arm_active_target
.arch_name
;
26386 if (!arch
->common
.extensions
)
26389 for (const struct cpu_arch_extension
*opt
= arch
->common
.extensions
;
26395 arm_initialize_isa (opt_bits
, opt
->isa_bits
);
26397 /* If every feature bit of this option is set in the target
26398 ISA specification, print out the option name. However,
26399 don't print anything if all the bits are part of the
26400 FPU specification. */
26401 if (bitmap_subset_p (opt_bits
, arm_active_target
.isa
)
26402 && !bitmap_subset_p (opt_bits
, isa_all_fpubits
))
26403 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", opt
->name
);
26409 arm_file_start (void)
26415 /* We don't have a specified CPU. Use the architecture to
26418 Note: it might be better to do this unconditionally, then the
26419 assembler would not need to know about all new CPU names as
26421 if (!arm_active_target
.core_name
)
26423 /* armv7ve doesn't support any extensions. */
26424 if (strcmp (arm_active_target
.arch_name
, "armv7ve") == 0)
26426 /* Keep backward compatability for assemblers
26427 which don't support armv7ve. */
26428 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
26429 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
26430 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
26431 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
26432 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
26433 arm_last_printed_arch_string
= "armv7ve";
26436 arm_print_asm_arch_directives ();
26438 else if (strncmp (arm_active_target
.core_name
, "generic", 7) == 0)
26440 asm_fprintf (asm_out_file
, "\t.arch %s\n",
26441 arm_active_target
.core_name
+ 8);
26442 arm_last_printed_arch_string
= arm_active_target
.core_name
+ 8;
26446 const char* truncated_name
26447 = arm_rewrite_selected_cpu (arm_active_target
.core_name
);
26448 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
26451 if (print_tune_info
)
26452 arm_print_tune_info ();
26454 if (! TARGET_SOFT_FLOAT
)
26456 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
26457 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26459 if (TARGET_HARD_FLOAT_ABI
)
26460 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26463 /* Some of these attributes only apply when the corresponding features
26464 are used. However we don't have any easy way of figuring this out.
26465 Conservatively record the setting that would have been used. */
26467 if (flag_rounding_math
)
26468 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26470 if (!flag_unsafe_math_optimizations
)
26472 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26473 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26475 if (flag_signaling_nans
)
26476 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26478 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26479 flag_finite_math_only
? 1 : 3);
26481 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26482 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26483 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26484 flag_short_enums
? 1 : 2);
26486 /* Tag_ABI_optimization_goals. */
26489 else if (optimize
>= 2)
26495 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
26497 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26500 if (arm_fp16_format
)
26501 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26502 (int) arm_fp16_format
);
26504 if (arm_lang_output_object_attributes_hook
)
26505 arm_lang_output_object_attributes_hook();
26508 default_file_start ();
26512 arm_file_end (void)
26516 if (NEED_INDICATE_EXEC_STACK
)
26517 /* Add .note.GNU-stack. */
26518 file_end_indicate_exec_stack ();
26520 if (! thumb_call_reg_needed
)
26523 switch_to_section (text_section
);
26524 asm_fprintf (asm_out_file
, "\t.code 16\n");
26525 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
26527 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
26529 rtx label
= thumb_call_via_label
[regno
];
26533 targetm
.asm_out
.internal_label (asm_out_file
, "L",
26534 CODE_LABEL_NUMBER (label
));
26535 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
26541 /* Symbols in the text segment can be accessed without indirecting via the
26542 constant pool; it may take an extra binary operation, but this is still
26543 faster than indirecting via memory. Don't do this when not optimizing,
26544 since we won't be calculating al of the offsets necessary to do this
26548 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
26550 if (optimize
> 0 && TREE_CONSTANT (decl
))
26551 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
26553 default_encode_section_info (decl
, rtl
, first
);
26555 #endif /* !ARM_PE */
26558 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
26560 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
26561 && !strcmp (prefix
, "L"))
26563 arm_ccfsm_state
= 0;
26564 arm_target_insn
= NULL
;
26566 default_internal_label (stream
, prefix
, labelno
);
26569 /* Output code to add DELTA to the first argument, and then jump
26570 to FUNCTION. Used for C++ multiple inheritance. */
26573 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26574 HOST_WIDE_INT
, tree function
)
26576 static int thunk_label
= 0;
26579 int mi_delta
= delta
;
26580 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
26582 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
26585 mi_delta
= - mi_delta
;
26587 final_start_function (emit_barrier (), file
, 1);
26591 int labelno
= thunk_label
++;
26592 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
26593 /* Thunks are entered in arm mode when available. */
26594 if (TARGET_THUMB1_ONLY
)
26596 /* push r3 so we can use it as a temporary. */
26597 /* TODO: Omit this save if r3 is not used. */
26598 fputs ("\tpush {r3}\n", file
);
26599 fputs ("\tldr\tr3, ", file
);
26603 fputs ("\tldr\tr12, ", file
);
26605 assemble_name (file
, label
);
26606 fputc ('\n', file
);
26609 /* If we are generating PIC, the ldr instruction below loads
26610 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26611 the address of the add + 8, so we have:
26613 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26616 Note that we have "+ 1" because some versions of GNU ld
26617 don't set the low bit of the result for R_ARM_REL32
26618 relocations against thumb function symbols.
26619 On ARMv6M this is +4, not +8. */
26620 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
26621 assemble_name (file
, labelpc
);
26622 fputs (":\n", file
);
26623 if (TARGET_THUMB1_ONLY
)
26625 /* This is 2 insns after the start of the thunk, so we know it
26626 is 4-byte aligned. */
26627 fputs ("\tadd\tr3, pc, r3\n", file
);
26628 fputs ("\tmov r12, r3\n", file
);
26631 fputs ("\tadd\tr12, pc, r12\n", file
);
26633 else if (TARGET_THUMB1_ONLY
)
26634 fputs ("\tmov r12, r3\n", file
);
26636 if (TARGET_THUMB1_ONLY
)
26638 if (mi_delta
> 255)
26640 fputs ("\tldr\tr3, ", file
);
26641 assemble_name (file
, label
);
26642 fputs ("+4\n", file
);
26643 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
26644 mi_op
, this_regno
, this_regno
);
26646 else if (mi_delta
!= 0)
26648 /* Thumb1 unified syntax requires s suffix in instruction name when
26649 one of the operands is immediate. */
26650 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
26651 mi_op
, this_regno
, this_regno
,
26657 /* TODO: Use movw/movt for large constants when available. */
26658 while (mi_delta
!= 0)
26660 if ((mi_delta
& (3 << shift
)) == 0)
26664 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
26665 mi_op
, this_regno
, this_regno
,
26666 mi_delta
& (0xff << shift
));
26667 mi_delta
&= ~(0xff << shift
);
26674 if (TARGET_THUMB1_ONLY
)
26675 fputs ("\tpop\t{r3}\n", file
);
26677 fprintf (file
, "\tbx\tr12\n");
26678 ASM_OUTPUT_ALIGN (file
, 2);
26679 assemble_name (file
, label
);
26680 fputs (":\n", file
);
26683 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26684 rtx tem
= XEXP (DECL_RTL (function
), 0);
26685 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26686 pipeline offset is four rather than eight. Adjust the offset
26688 tem
= plus_constant (GET_MODE (tem
), tem
,
26689 TARGET_THUMB1_ONLY
? -3 : -7);
26690 tem
= gen_rtx_MINUS (GET_MODE (tem
),
26692 gen_rtx_SYMBOL_REF (Pmode
,
26693 ggc_strdup (labelpc
)));
26694 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
26697 /* Output ".word .LTHUNKn". */
26698 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
26700 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
26701 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
26705 fputs ("\tb\t", file
);
26706 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
26707 if (NEED_PLT_RELOC
)
26708 fputs ("(PLT)", file
);
26709 fputc ('\n', file
);
26712 final_end_function ();
26715 /* MI thunk handling for TARGET_32BIT. */
26718 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26719 HOST_WIDE_INT vcall_offset
, tree function
)
26721 /* On ARM, this_regno is R0 or R1 depending on
26722 whether the function returns an aggregate or not.
26724 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
26726 ? R1_REGNUM
: R0_REGNUM
);
26728 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
26729 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
26730 reload_completed
= 1;
26731 emit_note (NOTE_INSN_PROLOGUE_END
);
26733 /* Add DELTA to THIS_RTX. */
26735 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
26736 delta
, this_rtx
, this_rtx
, false);
26738 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26739 if (vcall_offset
!= 0)
26741 /* Load *THIS_RTX. */
26742 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
26743 /* Compute *THIS_RTX + VCALL_OFFSET. */
26744 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
26746 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26747 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
26748 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
26751 /* Generate a tail call to the target function. */
26752 if (!TREE_USED (function
))
26754 assemble_external (function
);
26755 TREE_USED (function
) = 1;
26757 rtx funexp
= XEXP (DECL_RTL (function
), 0);
26758 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
26759 rtx_insn
* insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
26760 SIBLING_CALL_P (insn
) = 1;
26762 insn
= get_insns ();
26763 shorten_branches (insn
);
26764 final_start_function (insn
, file
, 1);
26765 final (insn
, file
, 1);
26766 final_end_function ();
26768 /* Stop pretending this is a post-reload pass. */
26769 reload_completed
= 0;
26772 /* Output code to add DELTA to the first argument, and then jump
26773 to FUNCTION. Used for C++ multiple inheritance. */
26776 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
26777 HOST_WIDE_INT vcall_offset
, tree function
)
26780 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26782 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26786 arm_emit_vector_const (FILE *file
, rtx x
)
26789 const char * pattern
;
26791 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
26793 switch (GET_MODE (x
))
26795 case E_V2SImode
: pattern
= "%08x"; break;
26796 case E_V4HImode
: pattern
= "%04x"; break;
26797 case E_V8QImode
: pattern
= "%02x"; break;
26798 default: gcc_unreachable ();
26801 fprintf (file
, "0x");
26802 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
26806 element
= CONST_VECTOR_ELT (x
, i
);
26807 fprintf (file
, pattern
, INTVAL (element
));
26813 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26814 HFmode constant pool entries are actually loaded with ldr. */
26816 arm_emit_fp16_const (rtx c
)
26820 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
26821 if (WORDS_BIG_ENDIAN
)
26822 assemble_zeros (2);
26823 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
26824 if (!WORDS_BIG_ENDIAN
)
26825 assemble_zeros (2);
26829 arm_output_load_gr (rtx
*operands
)
26836 if (!MEM_P (operands
[1])
26837 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
26838 || !REG_P (reg
= XEXP (sum
, 0))
26839 || !CONST_INT_P (offset
= XEXP (sum
, 1))
26840 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
26841 return "wldrw%?\t%0, %1";
26843 /* Fix up an out-of-range load of a GR register. */
26844 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
26845 wcgr
= operands
[0];
26847 output_asm_insn ("ldr%?\t%0, %1", operands
);
26849 operands
[0] = wcgr
;
26851 output_asm_insn ("tmcr%?\t%0, %1", operands
);
26852 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
26857 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26859 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26860 named arg and all anonymous args onto the stack.
26861 XXX I know the prologue shouldn't be pushing registers, but it is faster
26865 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
26869 int second_time ATTRIBUTE_UNUSED
)
26871 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
26874 cfun
->machine
->uses_anonymous_args
= 1;
26875 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
26877 nregs
= pcum
->aapcs_ncrn
;
26880 int res
= arm_needs_doubleword_align (mode
, type
);
26881 if (res
< 0 && warn_psabi
)
26882 inform (input_location
, "parameter passing for argument of "
26883 "type %qT changed in GCC 7.1", type
);
26889 nregs
= pcum
->nregs
;
26891 if (nregs
< NUM_ARG_REGS
)
26892 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
26895 /* We can't rely on the caller doing the proper promotion when
26896 using APCS or ATPCS. */
26899 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
26901 return !TARGET_AAPCS_BASED
;
26904 static machine_mode
26905 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
26907 int *punsignedp ATTRIBUTE_UNUSED
,
26908 const_tree fntype ATTRIBUTE_UNUSED
,
26909 int for_return ATTRIBUTE_UNUSED
)
26911 if (GET_MODE_CLASS (mode
) == MODE_INT
26912 && GET_MODE_SIZE (mode
) < 4)
26920 arm_default_short_enums (void)
26922 return ARM_DEFAULT_SHORT_ENUMS
;
26926 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26929 arm_align_anon_bitfield (void)
26931 return TARGET_AAPCS_BASED
;
26935 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26938 arm_cxx_guard_type (void)
26940 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
26944 /* The EABI says test the least significant bit of a guard variable. */
26947 arm_cxx_guard_mask_bit (void)
26949 return TARGET_AAPCS_BASED
;
26953 /* The EABI specifies that all array cookies are 8 bytes long. */
26956 arm_get_cookie_size (tree type
)
26960 if (!TARGET_AAPCS_BASED
)
26961 return default_cxx_get_cookie_size (type
);
26963 size
= build_int_cst (sizetype
, 8);
26968 /* The EABI says that array cookies should also contain the element size. */
26971 arm_cookie_has_size (void)
26973 return TARGET_AAPCS_BASED
;
26977 /* The EABI says constructors and destructors should return a pointer to
26978 the object constructed/destroyed. */
26981 arm_cxx_cdtor_returns_this (void)
26983 return TARGET_AAPCS_BASED
;
26986 /* The EABI says that an inline function may never be the key
26990 arm_cxx_key_method_may_be_inline (void)
26992 return !TARGET_AAPCS_BASED
;
26996 arm_cxx_determine_class_data_visibility (tree decl
)
26998 if (!TARGET_AAPCS_BASED
26999 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
27002 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27003 is exported. However, on systems without dynamic vague linkage,
27004 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
27005 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
27006 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
27008 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
27009 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
27013 arm_cxx_class_data_always_comdat (void)
27015 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27016 vague linkage if the class has no key function. */
27017 return !TARGET_AAPCS_BASED
;
27021 /* The EABI says __aeabi_atexit should be used to register static
27025 arm_cxx_use_aeabi_atexit (void)
27027 return TARGET_AAPCS_BASED
;
27032 arm_set_return_address (rtx source
, rtx scratch
)
27034 arm_stack_offsets
*offsets
;
27035 HOST_WIDE_INT delta
;
27037 unsigned long saved_regs
;
27039 offsets
= arm_get_frame_offsets ();
27040 saved_regs
= offsets
->saved_regs_mask
;
27042 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
27043 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
27046 if (frame_pointer_needed
)
27047 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
27050 /* LR will be the first saved register. */
27051 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
27056 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
27057 GEN_INT (delta
& ~4095)));
27062 addr
= stack_pointer_rtx
;
27064 addr
= plus_constant (Pmode
, addr
, delta
);
27067 /* The store needs to be marked to prevent DSE from deleting
27068 it as dead if it is based on fp. */
27069 mem
= gen_frame_mem (Pmode
, addr
);
27070 MEM_VOLATILE_P (mem
) = true;
27071 emit_move_insn (mem
, source
);
27077 thumb_set_return_address (rtx source
, rtx scratch
)
27079 arm_stack_offsets
*offsets
;
27080 HOST_WIDE_INT delta
;
27081 HOST_WIDE_INT limit
;
27084 unsigned long mask
;
27088 offsets
= arm_get_frame_offsets ();
27089 mask
= offsets
->saved_regs_mask
;
27090 if (mask
& (1 << LR_REGNUM
))
27093 /* Find the saved regs. */
27094 if (frame_pointer_needed
)
27096 delta
= offsets
->soft_frame
- offsets
->saved_args
;
27097 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
27103 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
27106 /* Allow for the stack frame. */
27107 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
27109 /* The link register is always the first saved register. */
27112 /* Construct the address. */
27113 addr
= gen_rtx_REG (SImode
, reg
);
27116 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
27117 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
27121 addr
= plus_constant (Pmode
, addr
, delta
);
27123 /* The store needs to be marked to prevent DSE from deleting
27124 it as dead if it is based on fp. */
27125 mem
= gen_frame_mem (Pmode
, addr
);
27126 MEM_VOLATILE_P (mem
) = true;
27127 emit_move_insn (mem
, source
);
27130 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
27133 /* Implements target hook vector_mode_supported_p. */
27135 arm_vector_mode_supported_p (machine_mode mode
)
27137 /* Neon also supports V2SImode, etc. listed in the clause below. */
27138 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
27139 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
27140 || mode
== V2DImode
|| mode
== V8HFmode
))
27143 if ((TARGET_NEON
|| TARGET_IWMMXT
)
27144 && ((mode
== V2SImode
)
27145 || (mode
== V4HImode
)
27146 || (mode
== V8QImode
)))
27149 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
27150 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
27151 || mode
== V2HAmode
))
27157 /* Implements target hook array_mode_supported_p. */
27160 arm_array_mode_supported_p (machine_mode mode
,
27161 unsigned HOST_WIDE_INT nelems
)
27163 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
27164 for now, as the lane-swapping logic needs to be extended in the expanders.
27165 See PR target/82518. */
27166 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
27167 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
27168 && (nelems
>= 2 && nelems
<= 4))
27174 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27175 registers when autovectorizing for Neon, at least until multiple vector
27176 widths are supported properly by the middle-end. */
27178 static machine_mode
27179 arm_preferred_simd_mode (scalar_mode mode
)
27185 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
27187 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
27189 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
27191 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
27193 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
27200 if (TARGET_REALLY_IWMMXT
)
27216 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27218 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27219 using r0-r4 for function arguments, r7 for the stack frame and don't have
27220 enough left over to do doubleword arithmetic. For Thumb-2 all the
27221 potentially problematic instructions accept high registers so this is not
27222 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27223 that require many low registers. */
27225 arm_class_likely_spilled_p (reg_class_t rclass
)
27227 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
27228 || rclass
== CC_REG
)
27234 /* Implements target hook small_register_classes_for_mode_p. */
27236 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
27238 return TARGET_THUMB1
;
27241 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27242 ARM insns and therefore guarantee that the shift count is modulo 256.
27243 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27244 guarantee no particular behavior for out-of-range counts. */
27246 static unsigned HOST_WIDE_INT
27247 arm_shift_truncation_mask (machine_mode mode
)
27249 return mode
== SImode
? 255 : 0;
27253 /* Map internal gcc register numbers to DWARF2 register numbers. */
27256 arm_dbx_register_number (unsigned int regno
)
27261 if (IS_VFP_REGNUM (regno
))
27263 /* See comment in arm_dwarf_register_span. */
27264 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
27265 return 64 + regno
- FIRST_VFP_REGNUM
;
27267 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
27270 if (IS_IWMMXT_GR_REGNUM (regno
))
27271 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
27273 if (IS_IWMMXT_REGNUM (regno
))
27274 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
27276 return DWARF_FRAME_REGISTERS
;
27279 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27280 GCC models tham as 64 32-bit registers, so we need to describe this to
27281 the DWARF generation code. Other registers can use the default. */
27283 arm_dwarf_register_span (rtx rtl
)
27291 regno
= REGNO (rtl
);
27292 if (!IS_VFP_REGNUM (regno
))
27295 /* XXX FIXME: The EABI defines two VFP register ranges:
27296 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27298 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27299 corresponding D register. Until GDB supports this, we shall use the
27300 legacy encodings. We also use these encodings for D0-D15 for
27301 compatibility with older debuggers. */
27302 mode
= GET_MODE (rtl
);
27303 if (GET_MODE_SIZE (mode
) < 8)
27306 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
27308 nregs
= GET_MODE_SIZE (mode
) / 4;
27309 for (i
= 0; i
< nregs
; i
+= 2)
27310 if (TARGET_BIG_END
)
27312 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
27313 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
27317 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
27318 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
27323 nregs
= GET_MODE_SIZE (mode
) / 8;
27324 for (i
= 0; i
< nregs
; i
++)
27325 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
27328 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
27331 #if ARM_UNWIND_INFO
27332 /* Emit unwind directives for a store-multiple instruction or stack pointer
27333 push during alignment.
27334 These should only ever be generated by the function prologue code, so
27335 expect them to have a particular form.
27336 The store-multiple instruction sometimes pushes pc as the last register,
27337 although it should not be tracked into unwind information, or for -Os
27338 sometimes pushes some dummy registers before first register that needs
27339 to be tracked in unwind information; such dummy registers are there just
27340 to avoid separate stack adjustment, and will not be restored in the
27344 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
27347 HOST_WIDE_INT offset
;
27348 HOST_WIDE_INT nregs
;
27352 unsigned padfirst
= 0, padlast
= 0;
27355 e
= XVECEXP (p
, 0, 0);
27356 gcc_assert (GET_CODE (e
) == SET
);
27358 /* First insn will adjust the stack pointer. */
27359 gcc_assert (GET_CODE (e
) == SET
27360 && REG_P (SET_DEST (e
))
27361 && REGNO (SET_DEST (e
)) == SP_REGNUM
27362 && GET_CODE (SET_SRC (e
)) == PLUS
);
27364 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
27365 nregs
= XVECLEN (p
, 0) - 1;
27366 gcc_assert (nregs
);
27368 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
27371 /* For -Os dummy registers can be pushed at the beginning to
27372 avoid separate stack pointer adjustment. */
27373 e
= XVECEXP (p
, 0, 1);
27374 e
= XEXP (SET_DEST (e
), 0);
27375 if (GET_CODE (e
) == PLUS
)
27376 padfirst
= INTVAL (XEXP (e
, 1));
27377 gcc_assert (padfirst
== 0 || optimize_size
);
27378 /* The function prologue may also push pc, but not annotate it as it is
27379 never restored. We turn this into a stack pointer adjustment. */
27380 e
= XVECEXP (p
, 0, nregs
);
27381 e
= XEXP (SET_DEST (e
), 0);
27382 if (GET_CODE (e
) == PLUS
)
27383 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
27385 padlast
= offset
- 4;
27386 gcc_assert (padlast
== 0 || padlast
== 4);
27388 fprintf (asm_out_file
, "\t.pad #4\n");
27390 fprintf (asm_out_file
, "\t.save {");
27392 else if (IS_VFP_REGNUM (reg
))
27395 fprintf (asm_out_file
, "\t.vsave {");
27398 /* Unknown register type. */
27399 gcc_unreachable ();
27401 /* If the stack increment doesn't match the size of the saved registers,
27402 something has gone horribly wrong. */
27403 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
27407 /* The remaining insns will describe the stores. */
27408 for (i
= 1; i
<= nregs
; i
++)
27410 /* Expect (set (mem <addr>) (reg)).
27411 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27412 e
= XVECEXP (p
, 0, i
);
27413 gcc_assert (GET_CODE (e
) == SET
27414 && MEM_P (SET_DEST (e
))
27415 && REG_P (SET_SRC (e
)));
27417 reg
= REGNO (SET_SRC (e
));
27418 gcc_assert (reg
>= lastreg
);
27421 fprintf (asm_out_file
, ", ");
27422 /* We can't use %r for vfp because we need to use the
27423 double precision register names. */
27424 if (IS_VFP_REGNUM (reg
))
27425 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
27427 asm_fprintf (asm_out_file
, "%r", reg
);
27431 /* Check that the addresses are consecutive. */
27432 e
= XEXP (SET_DEST (e
), 0);
27433 if (GET_CODE (e
) == PLUS
)
27434 gcc_assert (REG_P (XEXP (e
, 0))
27435 && REGNO (XEXP (e
, 0)) == SP_REGNUM
27436 && CONST_INT_P (XEXP (e
, 1))
27437 && offset
== INTVAL (XEXP (e
, 1)));
27441 && REGNO (e
) == SP_REGNUM
);
27442 offset
+= reg_size
;
27445 fprintf (asm_out_file
, "}\n");
27447 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
27450 /* Emit unwind directives for a SET. */
27453 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
27461 switch (GET_CODE (e0
))
27464 /* Pushing a single register. */
27465 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
27466 || !REG_P (XEXP (XEXP (e0
, 0), 0))
27467 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
27470 asm_fprintf (asm_out_file
, "\t.save ");
27471 if (IS_VFP_REGNUM (REGNO (e1
)))
27472 asm_fprintf(asm_out_file
, "{d%d}\n",
27473 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
27475 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
27479 if (REGNO (e0
) == SP_REGNUM
)
27481 /* A stack increment. */
27482 if (GET_CODE (e1
) != PLUS
27483 || !REG_P (XEXP (e1
, 0))
27484 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
27485 || !CONST_INT_P (XEXP (e1
, 1)))
27488 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
27489 -INTVAL (XEXP (e1
, 1)));
27491 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
27493 HOST_WIDE_INT offset
;
27495 if (GET_CODE (e1
) == PLUS
)
27497 if (!REG_P (XEXP (e1
, 0))
27498 || !CONST_INT_P (XEXP (e1
, 1)))
27500 reg
= REGNO (XEXP (e1
, 0));
27501 offset
= INTVAL (XEXP (e1
, 1));
27502 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
27503 HARD_FRAME_POINTER_REGNUM
, reg
,
27506 else if (REG_P (e1
))
27509 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
27510 HARD_FRAME_POINTER_REGNUM
, reg
);
27515 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
27517 /* Move from sp to reg. */
27518 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
27520 else if (GET_CODE (e1
) == PLUS
27521 && REG_P (XEXP (e1
, 0))
27522 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
27523 && CONST_INT_P (XEXP (e1
, 1)))
27525 /* Set reg to offset from sp. */
27526 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
27527 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
27539 /* Emit unwind directives for the given insn. */
27542 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
27545 bool handled_one
= false;
27547 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27550 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27551 && (TREE_NOTHROW (current_function_decl
)
27552 || crtl
->all_throwers_are_sibcalls
))
27555 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
27558 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
27560 switch (REG_NOTE_KIND (note
))
27562 case REG_FRAME_RELATED_EXPR
:
27563 pat
= XEXP (note
, 0);
27566 case REG_CFA_REGISTER
:
27567 pat
= XEXP (note
, 0);
27570 pat
= PATTERN (insn
);
27571 if (GET_CODE (pat
) == PARALLEL
)
27572 pat
= XVECEXP (pat
, 0, 0);
27575 /* Only emitted for IS_STACKALIGN re-alignment. */
27580 src
= SET_SRC (pat
);
27581 dest
= SET_DEST (pat
);
27583 gcc_assert (src
== stack_pointer_rtx
);
27584 reg
= REGNO (dest
);
27585 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27588 handled_one
= true;
27591 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27592 to get correct dwarf information for shrink-wrap. We should not
27593 emit unwind information for it because these are used either for
27594 pretend arguments or notes to adjust sp and restore registers from
27596 case REG_CFA_DEF_CFA
:
27597 case REG_CFA_ADJUST_CFA
:
27598 case REG_CFA_RESTORE
:
27601 case REG_CFA_EXPRESSION
:
27602 case REG_CFA_OFFSET
:
27603 /* ??? Only handling here what we actually emit. */
27604 gcc_unreachable ();
27612 pat
= PATTERN (insn
);
27615 switch (GET_CODE (pat
))
27618 arm_unwind_emit_set (asm_out_file
, pat
);
27622 /* Store multiple. */
27623 arm_unwind_emit_sequence (asm_out_file
, pat
);
27632 /* Output a reference from a function exception table to the type_info
27633 object X. The EABI specifies that the symbol should be relocated by
27634 an R_ARM_TARGET2 relocation. */
27637 arm_output_ttype (rtx x
)
27639 fputs ("\t.word\t", asm_out_file
);
27640 output_addr_const (asm_out_file
, x
);
27641 /* Use special relocations for symbol references. */
27642 if (!CONST_INT_P (x
))
27643 fputs ("(TARGET2)", asm_out_file
);
27644 fputc ('\n', asm_out_file
);
27649 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27652 arm_asm_emit_except_personality (rtx personality
)
27654 fputs ("\t.personality\t", asm_out_file
);
27655 output_addr_const (asm_out_file
, personality
);
27656 fputc ('\n', asm_out_file
);
27658 #endif /* ARM_UNWIND_INFO */
27660 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27663 arm_asm_init_sections (void)
27665 #if ARM_UNWIND_INFO
27666 exception_section
= get_unnamed_section (0, output_section_asm_op
,
27668 #endif /* ARM_UNWIND_INFO */
27670 #ifdef OBJECT_FORMAT_ELF
27671 if (target_pure_code
)
27672 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
27676 /* Output unwind directives for the start/end of a function. */
27679 arm_output_fn_unwind (FILE * f
, bool prologue
)
27681 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27685 fputs ("\t.fnstart\n", f
);
27688 /* If this function will never be unwound, then mark it as such.
27689 The came condition is used in arm_unwind_emit to suppress
27690 the frame annotations. */
27691 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27692 && (TREE_NOTHROW (current_function_decl
)
27693 || crtl
->all_throwers_are_sibcalls
))
27694 fputs("\t.cantunwind\n", f
);
27696 fputs ("\t.fnend\n", f
);
27701 arm_emit_tls_decoration (FILE *fp
, rtx x
)
27703 enum tls_reloc reloc
;
27706 val
= XVECEXP (x
, 0, 0);
27707 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
27709 output_addr_const (fp
, val
);
27714 fputs ("(tlsgd)", fp
);
27717 fputs ("(tlsldm)", fp
);
27720 fputs ("(tlsldo)", fp
);
27723 fputs ("(gottpoff)", fp
);
27726 fputs ("(tpoff)", fp
);
27729 fputs ("(tlsdesc)", fp
);
27732 gcc_unreachable ();
27741 fputs (" + (. - ", fp
);
27742 output_addr_const (fp
, XVECEXP (x
, 0, 2));
27743 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27744 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
27745 output_addr_const (fp
, XVECEXP (x
, 0, 3));
27755 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27758 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
27760 gcc_assert (size
== 4);
27761 fputs ("\t.word\t", file
);
27762 output_addr_const (file
, x
);
27763 fputs ("(tlsldo)", file
);
27766 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27769 arm_output_addr_const_extra (FILE *fp
, rtx x
)
27771 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
27772 return arm_emit_tls_decoration (fp
, x
);
27773 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
27776 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
27778 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
27779 assemble_name_raw (fp
, label
);
27783 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
27785 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
27789 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27793 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
27795 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27799 output_addr_const (fp
, XVECEXP (x
, 0, 1));
27803 else if (GET_CODE (x
) == CONST_VECTOR
)
27804 return arm_emit_vector_const (fp
, x
);
27809 /* Output assembly for a shift instruction.
27810 SET_FLAGS determines how the instruction modifies the condition codes.
27811 0 - Do not set condition codes.
27812 1 - Set condition codes.
27813 2 - Use smallest instruction. */
27815 arm_output_shift(rtx
* operands
, int set_flags
)
27818 static const char flag_chars
[3] = {'?', '.', '!'};
27823 c
= flag_chars
[set_flags
];
27824 shift
= shift_op(operands
[3], &val
);
27828 operands
[2] = GEN_INT(val
);
27829 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
27832 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
27834 output_asm_insn (pattern
, operands
);
27838 /* Output assembly for a WMMX immediate shift instruction. */
27840 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
27842 int shift
= INTVAL (operands
[2]);
27844 machine_mode opmode
= GET_MODE (operands
[0]);
27846 gcc_assert (shift
>= 0);
27848 /* If the shift value in the register versions is > 63 (for D qualifier),
27849 31 (for W qualifier) or 15 (for H qualifier). */
27850 if (((opmode
== V4HImode
) && (shift
> 15))
27851 || ((opmode
== V2SImode
) && (shift
> 31))
27852 || ((opmode
== DImode
) && (shift
> 63)))
27856 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27857 output_asm_insn (templ
, operands
);
27858 if (opmode
== DImode
)
27860 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
27861 output_asm_insn (templ
, operands
);
27866 /* The destination register will contain all zeros. */
27867 sprintf (templ
, "wzero\t%%0");
27868 output_asm_insn (templ
, operands
);
27873 if ((opmode
== DImode
) && (shift
> 32))
27875 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27876 output_asm_insn (templ
, operands
);
27877 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
27878 output_asm_insn (templ
, operands
);
27882 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
27883 output_asm_insn (templ
, operands
);
27888 /* Output assembly for a WMMX tinsr instruction. */
27890 arm_output_iwmmxt_tinsr (rtx
*operands
)
27892 int mask
= INTVAL (operands
[3]);
27895 int units
= mode_nunits
[GET_MODE (operands
[0])];
27896 gcc_assert ((mask
& (mask
- 1)) == 0);
27897 for (i
= 0; i
< units
; ++i
)
27899 if ((mask
& 0x01) == 1)
27905 gcc_assert (i
< units
);
27907 switch (GET_MODE (operands
[0]))
27910 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
27913 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
27916 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
27919 gcc_unreachable ();
27922 output_asm_insn (templ
, operands
);
27927 /* Output a Thumb-1 casesi dispatch sequence. */
27929 thumb1_output_casesi (rtx
*operands
)
27931 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
27933 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27935 switch (GET_MODE(diff_vec
))
27938 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27939 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27941 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27942 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27944 return "bl\t%___gnu_thumb1_case_si";
27946 gcc_unreachable ();
27950 /* Output a Thumb-2 casesi instruction. */
27952 thumb2_output_casesi (rtx
*operands
)
27954 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
27956 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27958 output_asm_insn ("cmp\t%0, %1", operands
);
27959 output_asm_insn ("bhi\t%l3", operands
);
27960 switch (GET_MODE(diff_vec
))
27963 return "tbb\t[%|pc, %0]";
27965 return "tbh\t[%|pc, %0, lsl #1]";
27969 output_asm_insn ("adr\t%4, %l2", operands
);
27970 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
27971 output_asm_insn ("add\t%4, %4, %5", operands
);
27976 output_asm_insn ("adr\t%4, %l2", operands
);
27977 return "ldr\t%|pc, [%4, %0, lsl #2]";
27980 gcc_unreachable ();
27984 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27985 per-core tuning structs. */
27987 arm_issue_rate (void)
27989 return current_tune
->issue_rate
;
27992 /* Return how many instructions should scheduler lookahead to choose the
27995 arm_first_cycle_multipass_dfa_lookahead (void)
27997 int issue_rate
= arm_issue_rate ();
27999 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
28002 /* Enable modeling of L2 auto-prefetcher. */
28004 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
28006 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
28010 arm_mangle_type (const_tree type
)
28012 /* The ARM ABI documents (10th October 2008) say that "__va_list"
28013 has to be managled as if it is in the "std" namespace. */
28014 if (TARGET_AAPCS_BASED
28015 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
28016 return "St9__va_list";
28018 /* Half-precision float. */
28019 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
28022 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28024 if (TYPE_NAME (type
) != NULL
)
28025 return arm_mangle_builtin_type (type
);
28027 /* Use the default mangling. */
28031 /* Order of allocation of core registers for Thumb: this allocation is
28032 written over the corresponding initial entries of the array
28033 initialized with REG_ALLOC_ORDER. We allocate all low registers
28034 first. Saving and restoring a low register is usually cheaper than
28035 using a call-clobbered high register. */
28037 static const int thumb_core_reg_alloc_order
[] =
28039 3, 2, 1, 0, 4, 5, 6, 7,
28040 12, 14, 8, 9, 10, 11
28043 /* Adjust register allocation order when compiling for Thumb. */
28046 arm_order_regs_for_local_alloc (void)
28048 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
28049 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
28051 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
28052 sizeof (thumb_core_reg_alloc_order
));
28055 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
28058 arm_frame_pointer_required (void)
28060 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
28063 /* If the function receives nonlocal gotos, it needs to save the frame
28064 pointer in the nonlocal_goto_save_area object. */
28065 if (cfun
->has_nonlocal_label
)
28068 /* The frame pointer is required for non-leaf APCS frames. */
28069 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !crtl
->is_leaf
)
28072 /* If we are probing the stack in the prologue, we will have a faulting
28073 instruction prior to the stack adjustment and this requires a frame
28074 pointer if we want to catch the exception using the EABI unwinder. */
28075 if (!IS_INTERRUPT (arm_current_func_type ())
28076 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
28077 || flag_stack_clash_protection
)
28078 && arm_except_unwind_info (&global_options
) == UI_TARGET
28079 && cfun
->can_throw_non_call_exceptions
)
28081 HOST_WIDE_INT size
= get_frame_size ();
28083 /* That's irrelevant if there is no stack adjustment. */
28087 /* That's relevant only if there is a stack probe. */
28088 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
28090 /* We don't have the final size of the frame so adjust. */
28091 size
+= 32 * UNITS_PER_WORD
;
28092 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
28102 /* Only thumb1 can't support conditional execution, so return true if
28103 the target is not thumb1. */
28105 arm_have_conditional_execution (void)
28107 return !TARGET_THUMB1
;
28110 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
28111 static HOST_WIDE_INT
28112 arm_vector_alignment (const_tree type
)
28114 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
28116 if (TARGET_AAPCS_BASED
)
28117 align
= MIN (align
, 64);
28123 arm_autovectorize_vector_sizes (vector_sizes
*sizes
)
28125 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
28127 sizes
->safe_push (16);
28128 sizes
->safe_push (8);
28133 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
28135 /* Vectors which aren't in packed structures will not be less aligned than
28136 the natural alignment of their element type, so this is safe. */
28137 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
28140 return default_builtin_vector_alignment_reachable (type
, is_packed
);
28144 arm_builtin_support_vector_misalignment (machine_mode mode
,
28145 const_tree type
, int misalignment
,
28148 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
28150 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
28155 /* If the misalignment is unknown, we should be able to handle the access
28156 so long as it is not to a member of a packed data structure. */
28157 if (misalignment
== -1)
28160 /* Return true if the misalignment is a multiple of the natural alignment
28161 of the vector's element type. This is probably always going to be
28162 true in practice, since we've already established that this isn't a
28164 return ((misalignment
% align
) == 0);
28167 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
28172 arm_conditional_register_usage (void)
28176 if (TARGET_THUMB1
&& optimize_size
)
28178 /* When optimizing for size on Thumb-1, it's better not
28179 to use the HI regs, because of the overhead of
28181 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
28182 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
28185 /* The link register can be clobbered by any branch insn,
28186 but we have no way to track that at present, so mark
28187 it as unavailable. */
28189 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
28191 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
)
28193 /* VFPv3 registers are disabled when earlier VFP
28194 versions are selected due to the definition of
28195 LAST_VFP_REGNUM. */
28196 for (regno
= FIRST_VFP_REGNUM
;
28197 regno
<= LAST_VFP_REGNUM
; ++ regno
)
28199 fixed_regs
[regno
] = 0;
28200 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
28201 || regno
>= FIRST_VFP_REGNUM
+ 32;
28205 if (TARGET_REALLY_IWMMXT
)
28207 regno
= FIRST_IWMMXT_GR_REGNUM
;
28208 /* The 2002/10/09 revision of the XScale ABI has wCG0
28209 and wCG1 as call-preserved registers. The 2002/11/21
28210 revision changed this so that all wCG registers are
28211 scratch registers. */
28212 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
28213 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
28214 fixed_regs
[regno
] = 0;
28215 /* The XScale ABI has wR0 - wR9 as scratch registers,
28216 the rest as call-preserved registers. */
28217 for (regno
= FIRST_IWMMXT_REGNUM
;
28218 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
28220 fixed_regs
[regno
] = 0;
28221 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
28225 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
28227 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
28228 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
28230 else if (TARGET_APCS_STACK
)
28232 fixed_regs
[10] = 1;
28233 call_used_regs
[10] = 1;
28235 /* -mcaller-super-interworking reserves r11 for calls to
28236 _interwork_r11_call_via_rN(). Making the register global
28237 is an easy way of ensuring that it remains valid for all
28239 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
28240 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
28242 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28243 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28244 if (TARGET_CALLER_INTERWORKING
)
28245 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28247 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28251 arm_preferred_rename_class (reg_class_t rclass
)
28253 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28254 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28255 and code size can be reduced. */
28256 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
28262 /* Compute the attribute "length" of insn "*push_multi".
28263 So this function MUST be kept in sync with that insn pattern. */
28265 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
28267 int i
, regno
, hi_reg
;
28268 int num_saves
= XVECLEN (parallel_op
, 0);
28278 regno
= REGNO (first_op
);
28279 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28280 list is 8-bit. Normally this means all registers in the list must be
28281 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28282 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28283 with 16-bit encoding. */
28284 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
28285 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
28287 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
28288 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
28296 /* Compute the attribute "length" of insn. Currently, this function is used
28297 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28298 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28299 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28300 true if OPERANDS contains insn which explicit updates base register. */
28303 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
28312 rtx parallel_op
= operands
[0];
28313 /* Initialize to elements number of PARALLEL. */
28314 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
28315 /* Initialize the value to base register. */
28316 unsigned regno
= REGNO (operands
[1]);
28317 /* Skip return and write back pattern.
28318 We only need register pop pattern for later analysis. */
28319 unsigned first_indx
= 0;
28320 first_indx
+= return_pc
? 1 : 0;
28321 first_indx
+= write_back_p
? 1 : 0;
28323 /* A pop operation can be done through LDM or POP. If the base register is SP
28324 and if it's with write back, then a LDM will be alias of POP. */
28325 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
28326 bool ldm_p
= !pop_p
;
28328 /* Check base register for LDM. */
28329 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
28332 /* Check each register in the list. */
28333 for (; indx
>= first_indx
; indx
--)
28335 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
28336 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28337 comment in arm_attr_length_push_multi. */
28338 if (REGNO_REG_CLASS (regno
) == HI_REGS
28339 && (regno
!= PC_REGNUM
|| ldm_p
))
28346 /* Compute the number of instructions emitted by output_move_double. */
28348 arm_count_output_move_double_insns (rtx
*operands
)
28352 /* output_move_double may modify the operands array, so call it
28353 here on a copy of the array. */
28354 ops
[0] = operands
[0];
28355 ops
[1] = operands
[1];
28356 output_move_double (ops
, false, &count
);
28361 vfp3_const_double_for_fract_bits (rtx operand
)
28363 REAL_VALUE_TYPE r0
;
28365 if (!CONST_DOUBLE_P (operand
))
28368 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
28369 if (exact_real_inverse (DFmode
, &r0
)
28370 && !REAL_VALUE_NEGATIVE (r0
))
28372 if (exact_real_truncate (DFmode
, &r0
))
28374 HOST_WIDE_INT value
= real_to_integer (&r0
);
28375 value
= value
& 0xffffffff;
28376 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
28378 int ret
= exact_log2 (value
);
28379 gcc_assert (IN_RANGE (ret
, 0, 31));
28387 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28388 log2 is in [1, 32], return that log2. Otherwise return -1.
28389 This is used in the patterns for vcvt.s32.f32 floating-point to
28390 fixed-point conversions. */
28393 vfp3_const_double_for_bits (rtx x
)
28395 const REAL_VALUE_TYPE
*r
;
28397 if (!CONST_DOUBLE_P (x
))
28400 r
= CONST_DOUBLE_REAL_VALUE (x
);
28402 if (REAL_VALUE_NEGATIVE (*r
)
28403 || REAL_VALUE_ISNAN (*r
)
28404 || REAL_VALUE_ISINF (*r
)
28405 || !real_isinteger (r
, SFmode
))
28408 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
28410 /* The exact_log2 above will have returned -1 if this is
28411 not an exact log2. */
28412 if (!IN_RANGE (hwint
, 1, 32))
28419 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28422 arm_pre_atomic_barrier (enum memmodel model
)
28424 if (need_atomic_barrier_p (model
, true))
28425 emit_insn (gen_memory_barrier ());
28429 arm_post_atomic_barrier (enum memmodel model
)
28431 if (need_atomic_barrier_p (model
, false))
28432 emit_insn (gen_memory_barrier ());
28435 /* Emit the load-exclusive and store-exclusive instructions.
28436 Use acquire and release versions if necessary. */
28439 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
28441 rtx (*gen
) (rtx
, rtx
);
28447 case E_QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
28448 case E_HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
28449 case E_SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
28450 case E_DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
28452 gcc_unreachable ();
28459 case E_QImode
: gen
= gen_arm_load_exclusiveqi
; break;
28460 case E_HImode
: gen
= gen_arm_load_exclusivehi
; break;
28461 case E_SImode
: gen
= gen_arm_load_exclusivesi
; break;
28462 case E_DImode
: gen
= gen_arm_load_exclusivedi
; break;
28464 gcc_unreachable ();
28468 emit_insn (gen (rval
, mem
));
28472 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
28475 rtx (*gen
) (rtx
, rtx
, rtx
);
28481 case E_QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
28482 case E_HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
28483 case E_SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
28484 case E_DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
28486 gcc_unreachable ();
28493 case E_QImode
: gen
= gen_arm_store_exclusiveqi
; break;
28494 case E_HImode
: gen
= gen_arm_store_exclusivehi
; break;
28495 case E_SImode
: gen
= gen_arm_store_exclusivesi
; break;
28496 case E_DImode
: gen
= gen_arm_store_exclusivedi
; break;
28498 gcc_unreachable ();
28502 emit_insn (gen (bval
, rval
, mem
));
28505 /* Mark the previous jump instruction as unlikely. */
28508 emit_unlikely_jump (rtx insn
)
28510 rtx_insn
*jump
= emit_jump_insn (insn
);
28511 add_reg_br_prob_note (jump
, profile_probability::very_unlikely ());
28514 /* Expand a compare and swap pattern. */
28517 arm_expand_compare_and_swap (rtx operands
[])
28519 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
28521 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
28523 bval
= operands
[0];
28524 rval
= operands
[1];
28526 oldval
= operands
[3];
28527 newval
= operands
[4];
28528 is_weak
= operands
[5];
28529 mod_s
= operands
[6];
28530 mod_f
= operands
[7];
28531 mode
= GET_MODE (mem
);
28533 /* Normally the succ memory model must be stronger than fail, but in the
28534 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28535 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28537 if (TARGET_HAVE_LDACQ
28538 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
28539 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
28540 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
28546 /* For narrow modes, we're going to perform the comparison in SImode,
28547 so do the zero-extension now. */
28548 rval
= gen_reg_rtx (SImode
);
28549 oldval
= convert_modes (SImode
, mode
, oldval
, true);
28553 /* Force the value into a register if needed. We waited until after
28554 the zero-extension above to do this properly. */
28555 if (!arm_add_operand (oldval
, SImode
))
28556 oldval
= force_reg (SImode
, oldval
);
28560 if (!cmpdi_operand (oldval
, mode
))
28561 oldval
= force_reg (mode
, oldval
);
28565 gcc_unreachable ();
28572 case E_QImode
: gen
= gen_atomic_compare_and_swapt1qi_1
; break;
28573 case E_HImode
: gen
= gen_atomic_compare_and_swapt1hi_1
; break;
28574 case E_SImode
: gen
= gen_atomic_compare_and_swapt1si_1
; break;
28575 case E_DImode
: gen
= gen_atomic_compare_and_swapt1di_1
; break;
28577 gcc_unreachable ();
28584 case E_QImode
: gen
= gen_atomic_compare_and_swap32qi_1
; break;
28585 case E_HImode
: gen
= gen_atomic_compare_and_swap32hi_1
; break;
28586 case E_SImode
: gen
= gen_atomic_compare_and_swap32si_1
; break;
28587 case E_DImode
: gen
= gen_atomic_compare_and_swap32di_1
; break;
28589 gcc_unreachable ();
28593 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CC_Zmode
, CC_REGNUM
);
28594 emit_insn (gen (bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
28596 if (mode
== QImode
|| mode
== HImode
)
28597 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
28599 /* In all cases, we arrange for success to be signaled by Z set.
28600 This arrangement allows for the boolean result to be used directly
28601 in a subsequent branch, post optimization. For Thumb-1 targets, the
28602 boolean negation of the result is also stored in bval because Thumb-1
28603 backend lacks dependency tracking for CC flag due to flag-setting not
28604 being represented at RTL level. */
28606 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
28609 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
28610 emit_insn (gen_rtx_SET (bval
, x
));
28614 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28615 another memory store between the load-exclusive and store-exclusive can
28616 reset the monitor from Exclusive to Open state. This means we must wait
28617 until after reload to split the pattern, lest we get a register spill in
28618 the middle of the atomic sequence. Success of the compare and swap is
28619 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28620 for Thumb-1 targets (ie. negation of the boolean value returned by
28621 atomic_compare_and_swapmode standard pattern in operand 0). */
28624 arm_split_compare_and_swap (rtx operands
[])
28626 rtx rval
, mem
, oldval
, newval
, neg_bval
;
28628 enum memmodel mod_s
, mod_f
;
28630 rtx_code_label
*label1
, *label2
;
28633 rval
= operands
[1];
28635 oldval
= operands
[3];
28636 newval
= operands
[4];
28637 is_weak
= (operands
[5] != const0_rtx
);
28638 mod_s
= memmodel_from_int (INTVAL (operands
[6]));
28639 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
28640 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
28641 mode
= GET_MODE (mem
);
28643 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
28645 bool use_acquire
= TARGET_HAVE_LDACQ
28646 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28647 || is_mm_release (mod_s
));
28649 bool use_release
= TARGET_HAVE_LDACQ
28650 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28651 || is_mm_acquire (mod_s
));
28653 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28654 a full barrier is emitted after the store-release. */
28656 use_acquire
= false;
28658 /* Checks whether a barrier is needed and emits one accordingly. */
28659 if (!(use_acquire
|| use_release
))
28660 arm_pre_atomic_barrier (mod_s
);
28665 label1
= gen_label_rtx ();
28666 emit_label (label1
);
28668 label2
= gen_label_rtx ();
28670 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
28672 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28673 as required to communicate with arm_expand_compare_and_swap. */
28676 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
28677 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28678 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
28679 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
28680 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
28684 emit_move_insn (neg_bval
, const1_rtx
);
28685 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
28686 if (thumb1_cmpneg_operand (oldval
, SImode
))
28687 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval
, rval
, oldval
,
28690 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
28693 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
28695 /* Weak or strong, we want EQ to be true for success, so that we
28696 match the flags that we got from the compare above. */
28699 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
28700 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
28701 emit_insn (gen_rtx_SET (cond
, x
));
28706 /* Z is set to boolean value of !neg_bval, as required to communicate
28707 with arm_expand_compare_and_swap. */
28708 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
28709 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
28712 if (!is_mm_relaxed (mod_f
))
28713 emit_label (label2
);
28715 /* Checks whether a barrier is needed and emits one accordingly. */
28717 || !(use_acquire
|| use_release
))
28718 arm_post_atomic_barrier (mod_s
);
28720 if (is_mm_relaxed (mod_f
))
28721 emit_label (label2
);
28724 /* Split an atomic operation pattern. Operation is given by CODE and is one
28725 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28726 operation). Operation is performed on the content at MEM and on VALUE
28727 following the memory model MODEL_RTX. The content at MEM before and after
28728 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28729 success of the operation is returned in COND. Using a scratch register or
28730 an operand register for these determines what result is returned for that
28734 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
28735 rtx value
, rtx model_rtx
, rtx cond
)
28737 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
28738 machine_mode mode
= GET_MODE (mem
);
28739 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
28740 rtx_code_label
*label
;
28741 bool all_low_regs
, bind_old_new
;
28744 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
28746 bool use_acquire
= TARGET_HAVE_LDACQ
28747 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28748 || is_mm_release (model
));
28750 bool use_release
= TARGET_HAVE_LDACQ
28751 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28752 || is_mm_acquire (model
));
28754 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28755 a full barrier is emitted after the store-release. */
28757 use_acquire
= false;
28759 /* Checks whether a barrier is needed and emits one accordingly. */
28760 if (!(use_acquire
|| use_release
))
28761 arm_pre_atomic_barrier (model
);
28763 label
= gen_label_rtx ();
28764 emit_label (label
);
28767 new_out
= gen_lowpart (wmode
, new_out
);
28769 old_out
= gen_lowpart (wmode
, old_out
);
28772 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
28774 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
28776 /* Does the operation require destination and first operand to use the same
28777 register? This is decided by register constraints of relevant insn
28778 patterns in thumb1.md. */
28779 gcc_assert (!new_out
|| REG_P (new_out
));
28780 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
28781 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
28782 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
28787 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
28789 /* We want to return the old value while putting the result of the operation
28790 in the same register as the old value so copy the old value over to the
28791 destination register and use that register for the operation. */
28792 if (old_out
&& bind_old_new
)
28794 emit_move_insn (new_out
, old_out
);
28805 x
= gen_rtx_AND (wmode
, old_out
, value
);
28806 emit_insn (gen_rtx_SET (new_out
, x
));
28807 x
= gen_rtx_NOT (wmode
, new_out
);
28808 emit_insn (gen_rtx_SET (new_out
, x
));
28812 if (CONST_INT_P (value
))
28814 value
= GEN_INT (-INTVAL (value
));
28820 if (mode
== DImode
)
28822 /* DImode plus/minus need to clobber flags. */
28823 /* The adddi3 and subdi3 patterns are incorrectly written so that
28824 they require matching operands, even when we could easily support
28825 three operands. Thankfully, this can be fixed up post-splitting,
28826 as the individual add+adc patterns do accept three operands and
28827 post-reload cprop can make these moves go away. */
28828 emit_move_insn (new_out
, old_out
);
28830 x
= gen_adddi3 (new_out
, new_out
, value
);
28832 x
= gen_subdi3 (new_out
, new_out
, value
);
28839 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
28840 emit_insn (gen_rtx_SET (new_out
, x
));
28844 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
28847 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28848 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
28850 /* Checks whether a barrier is needed and emits one accordingly. */
28852 || !(use_acquire
|| use_release
))
28853 arm_post_atomic_barrier (model
);
28856 #define MAX_VECT_LEN 16
28858 struct expand_vec_perm_d
28860 rtx target
, op0
, op1
;
28861 vec_perm_indices perm
;
28862 machine_mode vmode
;
28867 /* Generate a variable permutation. */
28870 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28872 machine_mode vmode
= GET_MODE (target
);
28873 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28875 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
28876 gcc_checking_assert (GET_MODE (op0
) == vmode
);
28877 gcc_checking_assert (GET_MODE (op1
) == vmode
);
28878 gcc_checking_assert (GET_MODE (sel
) == vmode
);
28879 gcc_checking_assert (TARGET_NEON
);
28883 if (vmode
== V8QImode
)
28884 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
28886 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
28892 if (vmode
== V8QImode
)
28894 pair
= gen_reg_rtx (V16QImode
);
28895 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
28896 pair
= gen_lowpart (TImode
, pair
);
28897 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
28901 pair
= gen_reg_rtx (OImode
);
28902 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
28903 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
28909 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28911 machine_mode vmode
= GET_MODE (target
);
28912 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
28913 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28916 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28917 numbering of elements for big-endian, we must reverse the order. */
28918 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
28920 /* The VTBL instruction does not use a modulo index, so we must take care
28921 of that ourselves. */
28922 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28923 mask
= gen_const_vec_duplicate (vmode
, mask
);
28924 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
28926 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
28929 /* Map lane ordering between architectural lane order, and GCC lane order,
28930 taking into account ABI. See comment above output_move_neon for details. */
28933 neon_endian_lane_map (machine_mode mode
, int lane
)
28935 if (BYTES_BIG_ENDIAN
)
28937 int nelems
= GET_MODE_NUNITS (mode
);
28938 /* Reverse lane order. */
28939 lane
= (nelems
- 1 - lane
);
28940 /* Reverse D register order, to match ABI. */
28941 if (GET_MODE_SIZE (mode
) == 16)
28942 lane
= lane
^ (nelems
/ 2);
28947 /* Some permutations index into pairs of vectors, this is a helper function
28948 to map indexes into those pairs of vectors. */
28951 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
28953 int nelem
= GET_MODE_NUNITS (mode
);
28954 if (BYTES_BIG_ENDIAN
)
28956 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
28960 /* Generate or test for an insn that supports a constant permutation. */
28962 /* Recognize patterns for the VUZP insns. */
28965 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
28967 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
28968 rtx out0
, out1
, in0
, in1
;
28969 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28973 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28976 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28977 big endian pattern on 64 bit vectors, so we correct for that. */
28978 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
28979 && GET_MODE_SIZE (d
->vmode
) == 8 ? nelt
: 0;
28981 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
28983 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28985 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
28989 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28991 for (i
= 0; i
< nelt
; i
++)
28994 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
28995 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
29005 case E_V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
29006 case E_V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
29007 case E_V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
29008 case E_V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
29009 case E_V8HFmode
: gen
= gen_neon_vuzpv8hf_internal
; break;
29010 case E_V4HFmode
: gen
= gen_neon_vuzpv4hf_internal
; break;
29011 case E_V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
29012 case E_V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
29013 case E_V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
29014 case E_V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
29016 gcc_unreachable ();
29021 if (swap_nelt
!= 0)
29022 std::swap (in0
, in1
);
29025 out1
= gen_reg_rtx (d
->vmode
);
29027 std::swap (out0
, out1
);
29029 emit_insn (gen (out0
, in0
, in1
, out1
));
29033 /* Recognize patterns for the VZIP insns. */
29036 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
29038 unsigned int i
, high
, mask
, nelt
= d
->perm
.length ();
29039 rtx out0
, out1
, in0
, in1
;
29040 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
29044 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
29047 is_swapped
= BYTES_BIG_ENDIAN
;
29049 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
29052 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
29054 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
29058 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
29060 for (i
= 0; i
< nelt
/ 2; i
++)
29063 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
29064 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
29068 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
29069 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
29080 case E_V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
29081 case E_V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
29082 case E_V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
29083 case E_V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
29084 case E_V8HFmode
: gen
= gen_neon_vzipv8hf_internal
; break;
29085 case E_V4HFmode
: gen
= gen_neon_vzipv4hf_internal
; break;
29086 case E_V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
29087 case E_V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
29088 case E_V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
29089 case E_V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
29091 gcc_unreachable ();
29097 std::swap (in0
, in1
);
29100 out1
= gen_reg_rtx (d
->vmode
);
29102 std::swap (out0
, out1
);
29104 emit_insn (gen (out0
, in0
, in1
, out1
));
29108 /* Recognize patterns for the VREV insns. */
29111 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
29113 unsigned int i
, j
, diff
, nelt
= d
->perm
.length ();
29114 rtx (*gen
)(rtx
, rtx
);
29116 if (!d
->one_vector_p
)
29125 case E_V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
29126 case E_V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
29134 case E_V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
29135 case E_V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
29136 case E_V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
29137 case E_V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
29138 case E_V8HFmode
: gen
= gen_neon_vrev64v8hf
; break;
29139 case E_V4HFmode
: gen
= gen_neon_vrev64v4hf
; break;
29147 case E_V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
29148 case E_V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
29149 case E_V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
29150 case E_V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
29151 case E_V4SImode
: gen
= gen_neon_vrev64v4si
; break;
29152 case E_V2SImode
: gen
= gen_neon_vrev64v2si
; break;
29153 case E_V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
29154 case E_V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
29163 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
29164 for (j
= 0; j
<= diff
; j
+= 1)
29166 /* This is guaranteed to be true as the value of diff
29167 is 7, 3, 1 and we should have enough elements in the
29168 queue to generate this. Getting a vector mask with a
29169 value of diff other than these values implies that
29170 something is wrong by the time we get here. */
29171 gcc_assert (i
+ j
< nelt
);
29172 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
29180 emit_insn (gen (d
->target
, d
->op0
));
29184 /* Recognize patterns for the VTRN insns. */
29187 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
29189 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
29190 rtx out0
, out1
, in0
, in1
;
29191 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
29193 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
29196 /* Note that these are little-endian tests. Adjust for big-endian later. */
29197 if (d
->perm
[0] == 0)
29199 else if (d
->perm
[0] == 1)
29203 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
29205 for (i
= 0; i
< nelt
; i
+= 2)
29207 if (d
->perm
[i
] != i
+ odd
)
29209 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
29219 case E_V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
29220 case E_V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
29221 case E_V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
29222 case E_V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
29223 case E_V8HFmode
: gen
= gen_neon_vtrnv8hf_internal
; break;
29224 case E_V4HFmode
: gen
= gen_neon_vtrnv4hf_internal
; break;
29225 case E_V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
29226 case E_V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
29227 case E_V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
29228 case E_V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
29230 gcc_unreachable ();
29235 if (BYTES_BIG_ENDIAN
)
29237 std::swap (in0
, in1
);
29242 out1
= gen_reg_rtx (d
->vmode
);
29244 std::swap (out0
, out1
);
29246 emit_insn (gen (out0
, in0
, in1
, out1
));
29250 /* Recognize patterns for the VEXT insns. */
29253 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
29255 unsigned int i
, nelt
= d
->perm
.length ();
29256 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
29259 unsigned int location
;
29261 unsigned int next
= d
->perm
[0] + 1;
29263 /* TODO: Handle GCC's numbering of elements for big-endian. */
29264 if (BYTES_BIG_ENDIAN
)
29267 /* Check if the extracted indexes are increasing by one. */
29268 for (i
= 1; i
< nelt
; next
++, i
++)
29270 /* If we hit the most significant element of the 2nd vector in
29271 the previous iteration, no need to test further. */
29272 if (next
== 2 * nelt
)
29275 /* If we are operating on only one vector: it could be a
29276 rotation. If there are only two elements of size < 64, let
29277 arm_evpc_neon_vrev catch it. */
29278 if (d
->one_vector_p
&& (next
== nelt
))
29280 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
29286 if (d
->perm
[i
] != next
)
29290 location
= d
->perm
[0];
29294 case E_V16QImode
: gen
= gen_neon_vextv16qi
; break;
29295 case E_V8QImode
: gen
= gen_neon_vextv8qi
; break;
29296 case E_V4HImode
: gen
= gen_neon_vextv4hi
; break;
29297 case E_V8HImode
: gen
= gen_neon_vextv8hi
; break;
29298 case E_V2SImode
: gen
= gen_neon_vextv2si
; break;
29299 case E_V4SImode
: gen
= gen_neon_vextv4si
; break;
29300 case E_V4HFmode
: gen
= gen_neon_vextv4hf
; break;
29301 case E_V8HFmode
: gen
= gen_neon_vextv8hf
; break;
29302 case E_V2SFmode
: gen
= gen_neon_vextv2sf
; break;
29303 case E_V4SFmode
: gen
= gen_neon_vextv4sf
; break;
29304 case E_V2DImode
: gen
= gen_neon_vextv2di
; break;
29313 offset
= GEN_INT (location
);
29314 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
29318 /* The NEON VTBL instruction is a fully variable permuation that's even
29319 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29320 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29321 can do slightly better by expanding this as a constant where we don't
29322 have to apply a mask. */
29325 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
29327 rtx rperm
[MAX_VECT_LEN
], sel
;
29328 machine_mode vmode
= d
->vmode
;
29329 unsigned int i
, nelt
= d
->perm
.length ();
29331 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29332 numbering of elements for big-endian, we must reverse the order. */
29333 if (BYTES_BIG_ENDIAN
)
29339 /* Generic code will try constant permutation twice. Once with the
29340 original mode and again with the elements lowered to QImode.
29341 So wait and don't do the selector expansion ourselves. */
29342 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
29345 for (i
= 0; i
< nelt
; ++i
)
29346 rperm
[i
] = GEN_INT (d
->perm
[i
]);
29347 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
29348 sel
= force_reg (vmode
, sel
);
29350 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
29355 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
29357 /* Check if the input mask matches vext before reordering the
29360 if (arm_evpc_neon_vext (d
))
29363 /* The pattern matching functions above are written to look for a small
29364 number to begin the sequence (0, 1, N/2). If we begin with an index
29365 from the second operand, we can swap the operands. */
29366 unsigned int nelt
= d
->perm
.length ();
29367 if (d
->perm
[0] >= nelt
)
29369 d
->perm
.rotate_inputs (1);
29370 std::swap (d
->op0
, d
->op1
);
29375 if (arm_evpc_neon_vuzp (d
))
29377 if (arm_evpc_neon_vzip (d
))
29379 if (arm_evpc_neon_vrev (d
))
29381 if (arm_evpc_neon_vtrn (d
))
29383 return arm_evpc_neon_vtbl (d
);
29388 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
29391 arm_vectorize_vec_perm_const (machine_mode vmode
, rtx target
, rtx op0
, rtx op1
,
29392 const vec_perm_indices
&sel
)
29394 struct expand_vec_perm_d d
;
29395 int i
, nelt
, which
;
29397 if (!VALID_NEON_DREG_MODE (vmode
) && !VALID_NEON_QREG_MODE (vmode
))
29405 gcc_assert (VECTOR_MODE_P (d
.vmode
));
29406 d
.testing_p
= !target
;
29408 nelt
= GET_MODE_NUNITS (d
.vmode
);
29409 for (i
= which
= 0; i
< nelt
; ++i
)
29411 int ei
= sel
[i
] & (2 * nelt
- 1);
29412 which
|= (ei
< nelt
? 1 : 2);
29421 d
.one_vector_p
= false;
29422 if (d
.testing_p
|| !rtx_equal_p (op0
, op1
))
29425 /* The elements of PERM do not suggest that only the first operand
29426 is used, but both operands are identical. Allow easier matching
29427 of the permutation by folding the permutation into the single
29432 d
.one_vector_p
= true;
29437 d
.one_vector_p
= true;
29441 d
.perm
.new_vector (sel
.encoding (), d
.one_vector_p
? 1 : 2, nelt
);
29444 return arm_expand_vec_perm_const_1 (&d
);
29446 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
29447 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
29448 if (!d
.one_vector_p
)
29449 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
29452 bool ret
= arm_expand_vec_perm_const_1 (&d
);
29459 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
29461 /* If we are soft float and we do not have ldrd
29462 then all auto increment forms are ok. */
29463 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
29468 /* Post increment and Pre Decrement are supported for all
29469 instruction forms except for vector forms. */
29472 if (VECTOR_MODE_P (mode
))
29474 if (code
!= ARM_PRE_DEC
)
29484 /* Without LDRD and mode size greater than
29485 word size, there is no point in auto-incrementing
29486 because ldm and stm will not have these forms. */
29487 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
29490 /* Vector and floating point modes do not support
29491 these auto increment forms. */
29492 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
29505 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29506 on ARM, since we know that shifts by negative amounts are no-ops.
29507 Additionally, the default expansion code is not available or suitable
29508 for post-reload insn splits (this can occur when the register allocator
29509 chooses not to do a shift in NEON).
29511 This function is used in both initial expand and post-reload splits, and
29512 handles all kinds of 64-bit shifts.
29514 Input requirements:
29515 - It is safe for the input and output to be the same register, but
29516 early-clobber rules apply for the shift amount and scratch registers.
29517 - Shift by register requires both scratch registers. In all other cases
29518 the scratch registers may be NULL.
29519 - Ashiftrt by a register also clobbers the CC register. */
29521 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
29522 rtx amount
, rtx scratch1
, rtx scratch2
)
29524 rtx out_high
= gen_highpart (SImode
, out
);
29525 rtx out_low
= gen_lowpart (SImode
, out
);
29526 rtx in_high
= gen_highpart (SImode
, in
);
29527 rtx in_low
= gen_lowpart (SImode
, in
);
29530 in = the register pair containing the input value.
29531 out = the destination register pair.
29532 up = the high- or low-part of each pair.
29533 down = the opposite part to "up".
29534 In a shift, we can consider bits to shift from "up"-stream to
29535 "down"-stream, so in a left-shift "up" is the low-part and "down"
29536 is the high-part of each register pair. */
29538 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
29539 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
29540 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
29541 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
29543 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
29545 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
29546 && GET_MODE (out
) == DImode
);
29548 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
29549 && GET_MODE (in
) == DImode
);
29551 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
29552 && GET_MODE (amount
) == SImode
)
29553 || CONST_INT_P (amount
)));
29554 gcc_assert (scratch1
== NULL
29555 || (GET_CODE (scratch1
) == SCRATCH
)
29556 || (GET_MODE (scratch1
) == SImode
29557 && REG_P (scratch1
)));
29558 gcc_assert (scratch2
== NULL
29559 || (GET_CODE (scratch2
) == SCRATCH
)
29560 || (GET_MODE (scratch2
) == SImode
29561 && REG_P (scratch2
)));
29562 gcc_assert (!REG_P (out
) || !REG_P (amount
)
29563 || !HARD_REGISTER_P (out
)
29564 || (REGNO (out
) != REGNO (amount
)
29565 && REGNO (out
) + 1 != REGNO (amount
)));
29567 /* Macros to make following code more readable. */
29568 #define SUB_32(DEST,SRC) \
29569 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29570 #define RSB_32(DEST,SRC) \
29571 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29572 #define SUB_S_32(DEST,SRC) \
29573 gen_addsi3_compare0 ((DEST), (SRC), \
29575 #define SET(DEST,SRC) \
29576 gen_rtx_SET ((DEST), (SRC))
29577 #define SHIFT(CODE,SRC,AMOUNT) \
29578 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29579 #define LSHIFT(CODE,SRC,AMOUNT) \
29580 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29581 SImode, (SRC), (AMOUNT))
29582 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29583 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29584 SImode, (SRC), (AMOUNT))
29586 gen_rtx_IOR (SImode, (A), (B))
29587 #define BRANCH(COND,LABEL) \
29588 gen_arm_cond_branch ((LABEL), \
29589 gen_rtx_ ## COND (CCmode, cc_reg, \
29593 /* Shifts by register and shifts by constant are handled separately. */
29594 if (CONST_INT_P (amount
))
29596 /* We have a shift-by-constant. */
29598 /* First, handle out-of-range shift amounts.
29599 In both cases we try to match the result an ARM instruction in a
29600 shift-by-register would give. This helps reduce execution
29601 differences between optimization levels, but it won't stop other
29602 parts of the compiler doing different things. This is "undefined
29603 behavior, in any case. */
29604 if (INTVAL (amount
) <= 0)
29605 emit_insn (gen_movdi (out
, in
));
29606 else if (INTVAL (amount
) >= 64)
29608 if (code
== ASHIFTRT
)
29610 rtx const31_rtx
= GEN_INT (31);
29611 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
29612 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
29615 emit_insn (gen_movdi (out
, const0_rtx
));
29618 /* Now handle valid shifts. */
29619 else if (INTVAL (amount
) < 32)
29621 /* Shifts by a constant less than 32. */
29622 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
29624 /* Clearing the out register in DImode first avoids lots
29625 of spilling and results in less stack usage.
29626 Later this redundant insn is completely removed.
29627 Do that only if "in" and "out" are different registers. */
29628 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29629 emit_insn (SET (out
, const0_rtx
));
29630 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29631 emit_insn (SET (out_down
,
29632 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
29634 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29638 /* Shifts by a constant greater than 31. */
29639 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
29641 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29642 emit_insn (SET (out
, const0_rtx
));
29643 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
29644 if (code
== ASHIFTRT
)
29645 emit_insn (gen_ashrsi3 (out_up
, in_up
,
29648 emit_insn (SET (out_up
, const0_rtx
));
29653 /* We have a shift-by-register. */
29654 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
29656 /* This alternative requires the scratch registers. */
29657 gcc_assert (scratch1
&& REG_P (scratch1
));
29658 gcc_assert (scratch2
&& REG_P (scratch2
));
29660 /* We will need the values "amount-32" and "32-amount" later.
29661 Swapping them around now allows the later code to be more general. */
29665 emit_insn (SUB_32 (scratch1
, amount
));
29666 emit_insn (RSB_32 (scratch2
, amount
));
29669 emit_insn (RSB_32 (scratch1
, amount
));
29670 /* Also set CC = amount > 32. */
29671 emit_insn (SUB_S_32 (scratch2
, amount
));
29674 emit_insn (RSB_32 (scratch1
, amount
));
29675 emit_insn (SUB_32 (scratch2
, amount
));
29678 gcc_unreachable ();
29681 /* Emit code like this:
29684 out_down = in_down << amount;
29685 out_down = (in_up << (amount - 32)) | out_down;
29686 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29687 out_up = in_up << amount;
29690 out_down = in_down >> amount;
29691 out_down = (in_up << (32 - amount)) | out_down;
29693 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29694 out_up = in_up << amount;
29697 out_down = in_down >> amount;
29698 out_down = (in_up << (32 - amount)) | out_down;
29700 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29701 out_up = in_up << amount;
29703 The ARM and Thumb2 variants are the same but implemented slightly
29704 differently. If this were only called during expand we could just
29705 use the Thumb2 case and let combine do the right thing, but this
29706 can also be called from post-reload splitters. */
29708 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29710 if (!TARGET_THUMB2
)
29712 /* Emit code for ARM mode. */
29713 emit_insn (SET (out_down
,
29714 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
29715 if (code
== ASHIFTRT
)
29717 rtx_code_label
*done_label
= gen_label_rtx ();
29718 emit_jump_insn (BRANCH (LT
, done_label
));
29719 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
29721 emit_label (done_label
);
29724 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
29729 /* Emit code for Thumb2 mode.
29730 Thumb2 can't do shift and or in one insn. */
29731 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
29732 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
29734 if (code
== ASHIFTRT
)
29736 rtx_code_label
*done_label
= gen_label_rtx ();
29737 emit_jump_insn (BRANCH (LT
, done_label
));
29738 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
29739 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
29740 emit_label (done_label
);
29744 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
29745 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
29749 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29763 /* Returns true if the pattern is a valid symbolic address, which is either a
29764 symbol_ref or (symbol_ref + addend).
29766 According to the ARM ELF ABI, the initial addend of REL-type relocations
29767 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29768 literal field of the instruction as a 16-bit signed value in the range
29769 -32768 <= A < 32768. */
29772 arm_valid_symbolic_address_p (rtx addr
)
29774 rtx xop0
, xop1
= NULL_RTX
;
29777 if (target_word_relocations
)
29780 if (GET_CODE (tmp
) == SYMBOL_REF
|| GET_CODE (tmp
) == LABEL_REF
)
29783 /* (const (plus: symbol_ref const_int)) */
29784 if (GET_CODE (addr
) == CONST
)
29785 tmp
= XEXP (addr
, 0);
29787 if (GET_CODE (tmp
) == PLUS
)
29789 xop0
= XEXP (tmp
, 0);
29790 xop1
= XEXP (tmp
, 1);
29792 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
29793 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
29799 /* Returns true if a valid comparison operation and makes
29800 the operands in a form that is valid. */
29802 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
29804 enum rtx_code code
= GET_CODE (*comparison
);
29806 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
29807 ? GET_MODE (*op2
) : GET_MODE (*op1
);
29809 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
29811 if (code
== UNEQ
|| code
== LTGT
)
29814 code_int
= (int)code
;
29815 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
29816 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
29821 if (!arm_add_operand (*op1
, mode
))
29822 *op1
= force_reg (mode
, *op1
);
29823 if (!arm_add_operand (*op2
, mode
))
29824 *op2
= force_reg (mode
, *op2
);
29828 if (!cmpdi_operand (*op1
, mode
))
29829 *op1
= force_reg (mode
, *op1
);
29830 if (!cmpdi_operand (*op2
, mode
))
29831 *op2
= force_reg (mode
, *op2
);
29835 if (!TARGET_VFP_FP16INST
)
29837 /* FP16 comparisons are done in SF mode. */
29839 *op1
= convert_to_mode (mode
, *op1
, 1);
29840 *op2
= convert_to_mode (mode
, *op2
, 1);
29841 /* Fall through. */
29844 if (!vfp_compare_operand (*op1
, mode
))
29845 *op1
= force_reg (mode
, *op1
);
29846 if (!vfp_compare_operand (*op2
, mode
))
29847 *op2
= force_reg (mode
, *op2
);
29857 /* Maximum number of instructions to set block of memory. */
29859 arm_block_set_max_insns (void)
29861 if (optimize_function_for_size_p (cfun
))
29864 return current_tune
->max_insns_inline_memset
;
29867 /* Return TRUE if it's profitable to set block of memory for
29868 non-vectorized case. VAL is the value to set the memory
29869 with. LENGTH is the number of bytes to set. ALIGN is the
29870 alignment of the destination memory in bytes. UNALIGNED_P
29871 is TRUE if we can only set the memory with instructions
29872 meeting alignment requirements. USE_STRD_P is TRUE if we
29873 can use strd to set the memory. */
29875 arm_block_set_non_vect_profit_p (rtx val
,
29876 unsigned HOST_WIDE_INT length
,
29877 unsigned HOST_WIDE_INT align
,
29878 bool unaligned_p
, bool use_strd_p
)
29881 /* For leftovers in bytes of 0-7, we can set the memory block using
29882 strb/strh/str with minimum instruction number. */
29883 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29887 num
= arm_const_inline_cost (SET
, val
);
29888 num
+= length
/ align
+ length
% align
;
29890 else if (use_strd_p
)
29892 num
= arm_const_double_inline_cost (val
);
29893 num
+= (length
>> 3) + leftover
[length
& 7];
29897 num
= arm_const_inline_cost (SET
, val
);
29898 num
+= (length
>> 2) + leftover
[length
& 3];
29901 /* We may be able to combine last pair STRH/STRB into a single STR
29902 by shifting one byte back. */
29903 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
29906 return (num
<= arm_block_set_max_insns ());
29909 /* Return TRUE if it's profitable to set block of memory for
29910 vectorized case. LENGTH is the number of bytes to set.
29911 ALIGN is the alignment of destination memory in bytes.
29912 MODE is the vector mode used to set the memory. */
29914 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
29915 unsigned HOST_WIDE_INT align
,
29919 bool unaligned_p
= ((align
& 3) != 0);
29920 unsigned int nelt
= GET_MODE_NUNITS (mode
);
29922 /* Instruction loading constant value. */
29924 /* Instructions storing the memory. */
29925 num
+= (length
+ nelt
- 1) / nelt
;
29926 /* Instructions adjusting the address expression. Only need to
29927 adjust address expression if it's 4 bytes aligned and bytes
29928 leftover can only be stored by mis-aligned store instruction. */
29929 if (!unaligned_p
&& (length
& 3) != 0)
29932 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29933 if (!unaligned_p
&& mode
== V16QImode
)
29936 return (num
<= arm_block_set_max_insns ());
29939 /* Set a block of memory using vectorization instructions for the
29940 unaligned case. We fill the first LENGTH bytes of the memory
29941 area starting from DSTBASE with byte constant VALUE. ALIGN is
29942 the alignment requirement of memory. Return TRUE if succeeded. */
29944 arm_block_set_unaligned_vect (rtx dstbase
,
29945 unsigned HOST_WIDE_INT length
,
29946 unsigned HOST_WIDE_INT value
,
29947 unsigned HOST_WIDE_INT align
)
29949 unsigned int i
, nelt_v16
, nelt_v8
, nelt_mode
;
29952 rtx (*gen_func
) (rtx
, rtx
);
29954 unsigned HOST_WIDE_INT v
= value
;
29955 unsigned int offset
= 0;
29956 gcc_assert ((align
& 0x3) != 0);
29957 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29958 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29959 if (length
>= nelt_v16
)
29962 gen_func
= gen_movmisalignv16qi
;
29967 gen_func
= gen_movmisalignv8qi
;
29969 nelt_mode
= GET_MODE_NUNITS (mode
);
29970 gcc_assert (length
>= nelt_mode
);
29971 /* Skip if it isn't profitable. */
29972 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29975 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29976 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29978 v
= sext_hwi (v
, BITS_PER_WORD
);
29980 reg
= gen_reg_rtx (mode
);
29981 val_vec
= gen_const_vec_duplicate (mode
, GEN_INT (v
));
29982 /* Emit instruction loading the constant value. */
29983 emit_move_insn (reg
, val_vec
);
29985 /* Handle nelt_mode bytes in a vector. */
29986 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29988 emit_insn ((*gen_func
) (mem
, reg
));
29989 if (i
+ 2 * nelt_mode
<= length
)
29991 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
29992 offset
+= nelt_mode
;
29993 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29997 /* If there are not less than nelt_v8 bytes leftover, we must be in
29999 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
30001 /* Handle (8, 16) bytes leftover. */
30002 if (i
+ nelt_v8
< length
)
30004 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
30005 offset
+= length
- i
;
30006 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30008 /* We are shifting bytes back, set the alignment accordingly. */
30009 if ((length
& 1) != 0 && align
>= 2)
30010 set_mem_align (mem
, BITS_PER_UNIT
);
30012 emit_insn (gen_movmisalignv16qi (mem
, reg
));
30014 /* Handle (0, 8] bytes leftover. */
30015 else if (i
< length
&& i
+ nelt_v8
>= length
)
30017 if (mode
== V16QImode
)
30018 reg
= gen_lowpart (V8QImode
, reg
);
30020 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
30021 + (nelt_mode
- nelt_v8
))));
30022 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
30023 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
30025 /* We are shifting bytes back, set the alignment accordingly. */
30026 if ((length
& 1) != 0 && align
>= 2)
30027 set_mem_align (mem
, BITS_PER_UNIT
);
30029 emit_insn (gen_movmisalignv8qi (mem
, reg
));
30035 /* Set a block of memory using vectorization instructions for the
30036 aligned case. We fill the first LENGTH bytes of the memory area
30037 starting from DSTBASE with byte constant VALUE. ALIGN is the
30038 alignment requirement of memory. Return TRUE if succeeded. */
30040 arm_block_set_aligned_vect (rtx dstbase
,
30041 unsigned HOST_WIDE_INT length
,
30042 unsigned HOST_WIDE_INT value
,
30043 unsigned HOST_WIDE_INT align
)
30045 unsigned int i
, nelt_v8
, nelt_v16
, nelt_mode
;
30046 rtx dst
, addr
, mem
;
30049 unsigned HOST_WIDE_INT v
= value
;
30050 unsigned int offset
= 0;
30052 gcc_assert ((align
& 0x3) == 0);
30053 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
30054 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
30055 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
30060 nelt_mode
= GET_MODE_NUNITS (mode
);
30061 gcc_assert (length
>= nelt_mode
);
30062 /* Skip if it isn't profitable. */
30063 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
30066 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30068 v
= sext_hwi (v
, BITS_PER_WORD
);
30070 reg
= gen_reg_rtx (mode
);
30071 val_vec
= gen_const_vec_duplicate (mode
, GEN_INT (v
));
30072 /* Emit instruction loading the constant value. */
30073 emit_move_insn (reg
, val_vec
);
30076 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
30077 if (mode
== V16QImode
)
30079 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30080 emit_insn (gen_movmisalignv16qi (mem
, reg
));
30082 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
30083 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
30085 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
30086 offset
+= length
- nelt_mode
;
30087 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30088 /* We are shifting bytes back, set the alignment accordingly. */
30089 if ((length
& 0x3) == 0)
30090 set_mem_align (mem
, BITS_PER_UNIT
* 4);
30091 else if ((length
& 0x1) == 0)
30092 set_mem_align (mem
, BITS_PER_UNIT
* 2);
30094 set_mem_align (mem
, BITS_PER_UNIT
);
30096 emit_insn (gen_movmisalignv16qi (mem
, reg
));
30099 /* Fall through for bytes leftover. */
30101 nelt_mode
= GET_MODE_NUNITS (mode
);
30102 reg
= gen_lowpart (V8QImode
, reg
);
30105 /* Handle 8 bytes in a vector. */
30106 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
30108 addr
= plus_constant (Pmode
, dst
, i
);
30109 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
30110 emit_move_insn (mem
, reg
);
30113 /* Handle single word leftover by shifting 4 bytes back. We can
30114 use aligned access for this case. */
30115 if (i
+ UNITS_PER_WORD
== length
)
30117 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
30118 offset
+= i
- UNITS_PER_WORD
;
30119 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
30120 /* We are shifting 4 bytes back, set the alignment accordingly. */
30121 if (align
> UNITS_PER_WORD
)
30122 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
30124 emit_move_insn (mem
, reg
);
30126 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30127 We have to use unaligned access for this case. */
30128 else if (i
< length
)
30130 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
30131 offset
+= length
- nelt_mode
;
30132 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30133 /* We are shifting bytes back, set the alignment accordingly. */
30134 if ((length
& 1) == 0)
30135 set_mem_align (mem
, BITS_PER_UNIT
* 2);
30137 set_mem_align (mem
, BITS_PER_UNIT
);
30139 emit_insn (gen_movmisalignv8qi (mem
, reg
));
30145 /* Set a block of memory using plain strh/strb instructions, only
30146 using instructions allowed by ALIGN on processor. We fill the
30147 first LENGTH bytes of the memory area starting from DSTBASE
30148 with byte constant VALUE. ALIGN is the alignment requirement
30151 arm_block_set_unaligned_non_vect (rtx dstbase
,
30152 unsigned HOST_WIDE_INT length
,
30153 unsigned HOST_WIDE_INT value
,
30154 unsigned HOST_WIDE_INT align
)
30157 rtx dst
, addr
, mem
;
30158 rtx val_exp
, val_reg
, reg
;
30160 HOST_WIDE_INT v
= value
;
30162 gcc_assert (align
== 1 || align
== 2);
30165 v
|= (value
<< BITS_PER_UNIT
);
30167 v
= sext_hwi (v
, BITS_PER_WORD
);
30168 val_exp
= GEN_INT (v
);
30169 /* Skip if it isn't profitable. */
30170 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30171 align
, true, false))
30174 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30175 mode
= (align
== 2 ? HImode
: QImode
);
30176 val_reg
= force_reg (SImode
, val_exp
);
30177 reg
= gen_lowpart (mode
, val_reg
);
30179 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
30181 addr
= plus_constant (Pmode
, dst
, i
);
30182 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
30183 emit_move_insn (mem
, reg
);
30186 /* Handle single byte leftover. */
30187 if (i
+ 1 == length
)
30189 reg
= gen_lowpart (QImode
, val_reg
);
30190 addr
= plus_constant (Pmode
, dst
, i
);
30191 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
30192 emit_move_insn (mem
, reg
);
30196 gcc_assert (i
== length
);
30200 /* Set a block of memory using plain strd/str/strh/strb instructions,
30201 to permit unaligned copies on processors which support unaligned
30202 semantics for those instructions. We fill the first LENGTH bytes
30203 of the memory area starting from DSTBASE with byte constant VALUE.
30204 ALIGN is the alignment requirement of memory. */
30206 arm_block_set_aligned_non_vect (rtx dstbase
,
30207 unsigned HOST_WIDE_INT length
,
30208 unsigned HOST_WIDE_INT value
,
30209 unsigned HOST_WIDE_INT align
)
30212 rtx dst
, addr
, mem
;
30213 rtx val_exp
, val_reg
, reg
;
30214 unsigned HOST_WIDE_INT v
;
30217 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
30218 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
30220 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
30221 if (length
< UNITS_PER_WORD
)
30222 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
30225 v
|= (v
<< BITS_PER_WORD
);
30227 v
= sext_hwi (v
, BITS_PER_WORD
);
30229 val_exp
= GEN_INT (v
);
30230 /* Skip if it isn't profitable. */
30231 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30232 align
, false, use_strd_p
))
30237 /* Try without strd. */
30238 v
= (v
>> BITS_PER_WORD
);
30239 v
= sext_hwi (v
, BITS_PER_WORD
);
30240 val_exp
= GEN_INT (v
);
30241 use_strd_p
= false;
30242 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30243 align
, false, use_strd_p
))
30248 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30249 /* Handle double words using strd if possible. */
30252 val_reg
= force_reg (DImode
, val_exp
);
30254 for (; (i
+ 8 <= length
); i
+= 8)
30256 addr
= plus_constant (Pmode
, dst
, i
);
30257 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
30258 emit_move_insn (mem
, reg
);
30262 val_reg
= force_reg (SImode
, val_exp
);
30264 /* Handle words. */
30265 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
30266 for (; (i
+ 4 <= length
); i
+= 4)
30268 addr
= plus_constant (Pmode
, dst
, i
);
30269 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
30270 if ((align
& 3) == 0)
30271 emit_move_insn (mem
, reg
);
30273 emit_insn (gen_unaligned_storesi (mem
, reg
));
30276 /* Merge last pair of STRH and STRB into a STR if possible. */
30277 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
30279 addr
= plus_constant (Pmode
, dst
, i
- 1);
30280 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
30281 /* We are shifting one byte back, set the alignment accordingly. */
30282 if ((align
& 1) == 0)
30283 set_mem_align (mem
, BITS_PER_UNIT
);
30285 /* Most likely this is an unaligned access, and we can't tell at
30286 compilation time. */
30287 emit_insn (gen_unaligned_storesi (mem
, reg
));
30291 /* Handle half word leftover. */
30292 if (i
+ 2 <= length
)
30294 reg
= gen_lowpart (HImode
, val_reg
);
30295 addr
= plus_constant (Pmode
, dst
, i
);
30296 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
30297 if ((align
& 1) == 0)
30298 emit_move_insn (mem
, reg
);
30300 emit_insn (gen_unaligned_storehi (mem
, reg
));
30305 /* Handle single byte leftover. */
30306 if (i
+ 1 == length
)
30308 reg
= gen_lowpart (QImode
, val_reg
);
30309 addr
= plus_constant (Pmode
, dst
, i
);
30310 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
30311 emit_move_insn (mem
, reg
);
30317 /* Set a block of memory using vectorization instructions for both
30318 aligned and unaligned cases. We fill the first LENGTH bytes of
30319 the memory area starting from DSTBASE with byte constant VALUE.
30320 ALIGN is the alignment requirement of memory. */
30322 arm_block_set_vect (rtx dstbase
,
30323 unsigned HOST_WIDE_INT length
,
30324 unsigned HOST_WIDE_INT value
,
30325 unsigned HOST_WIDE_INT align
)
30327 /* Check whether we need to use unaligned store instruction. */
30328 if (((align
& 3) != 0 || (length
& 3) != 0)
30329 /* Check whether unaligned store instruction is available. */
30330 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
30333 if ((align
& 3) == 0)
30334 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
30336 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
30339 /* Expand string store operation. Firstly we try to do that by using
30340 vectorization instructions, then try with ARM unaligned access and
30341 double-word store if profitable. OPERANDS[0] is the destination,
30342 OPERANDS[1] is the number of bytes, operands[2] is the value to
30343 initialize the memory, OPERANDS[3] is the known alignment of the
30346 arm_gen_setmem (rtx
*operands
)
30348 rtx dstbase
= operands
[0];
30349 unsigned HOST_WIDE_INT length
;
30350 unsigned HOST_WIDE_INT value
;
30351 unsigned HOST_WIDE_INT align
;
30353 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
30356 length
= UINTVAL (operands
[1]);
30360 value
= (UINTVAL (operands
[2]) & 0xFF);
30361 align
= UINTVAL (operands
[3]);
30362 if (TARGET_NEON
&& length
>= 8
30363 && current_tune
->string_ops_prefer_neon
30364 && arm_block_set_vect (dstbase
, length
, value
, align
))
30367 if (!unaligned_access
&& (align
& 3) != 0)
30368 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
30370 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
30375 arm_macro_fusion_p (void)
30377 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
30380 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30381 for MOVW / MOVT macro fusion. */
30384 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
30386 /* We are trying to fuse
30387 movw imm / movt imm
30388 instructions as a group that gets scheduled together. */
30390 rtx set_dest
= SET_DEST (curr_set
);
30392 if (GET_MODE (set_dest
) != SImode
)
30395 /* We are trying to match:
30396 prev (movw) == (set (reg r0) (const_int imm16))
30397 curr (movt) == (set (zero_extract (reg r0)
30400 (const_int imm16_1))
30402 prev (movw) == (set (reg r1)
30403 (high (symbol_ref ("SYM"))))
30404 curr (movt) == (set (reg r0)
30406 (symbol_ref ("SYM")))) */
30408 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
30410 if (CONST_INT_P (SET_SRC (curr_set
))
30411 && CONST_INT_P (SET_SRC (prev_set
))
30412 && REG_P (XEXP (set_dest
, 0))
30413 && REG_P (SET_DEST (prev_set
))
30414 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
30418 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
30419 && REG_P (SET_DEST (curr_set
))
30420 && REG_P (SET_DEST (prev_set
))
30421 && GET_CODE (SET_SRC (prev_set
)) == HIGH
30422 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
30429 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
30431 rtx prev_set
= single_set (prev
);
30432 rtx curr_set
= single_set (curr
);
30438 if (any_condjump_p (curr
))
30441 if (!arm_macro_fusion_p ())
30444 if (current_tune
->fusible_ops
& tune_params::FUSE_AES_AESMC
30445 && aarch_crypto_can_dual_issue (prev
, curr
))
30448 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
30449 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
30455 /* Return true iff the instruction fusion described by OP is enabled. */
30457 arm_fusion_enabled_p (tune_params::fuse_ops op
)
30459 return current_tune
->fusible_ops
& op
;
30462 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30463 scheduled for speculative execution. Reject the long-running division
30464 and square-root instructions. */
30467 arm_sched_can_speculate_insn (rtx_insn
*insn
)
30469 switch (get_attr_type (insn
))
30477 case TYPE_NEON_FP_SQRT_S
:
30478 case TYPE_NEON_FP_SQRT_D
:
30479 case TYPE_NEON_FP_SQRT_S_Q
:
30480 case TYPE_NEON_FP_SQRT_D_Q
:
30481 case TYPE_NEON_FP_DIV_S
:
30482 case TYPE_NEON_FP_DIV_D
:
30483 case TYPE_NEON_FP_DIV_S_Q
:
30484 case TYPE_NEON_FP_DIV_D_Q
:
30491 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30493 static unsigned HOST_WIDE_INT
30494 arm_asan_shadow_offset (void)
30496 return HOST_WIDE_INT_1U
<< 29;
30500 /* This is a temporary fix for PR60655. Ideally we need
30501 to handle most of these cases in the generic part but
30502 currently we reject minus (..) (sym_ref). We try to
30503 ameliorate the case with minus (sym_ref1) (sym_ref2)
30504 where they are in the same section. */
30507 arm_const_not_ok_for_debug_p (rtx p
)
30509 tree decl_op0
= NULL
;
30510 tree decl_op1
= NULL
;
30512 if (GET_CODE (p
) == UNSPEC
)
30514 if (GET_CODE (p
) == MINUS
)
30516 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
30518 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
30520 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
30521 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
30523 if ((VAR_P (decl_op1
)
30524 || TREE_CODE (decl_op1
) == CONST_DECL
)
30525 && (VAR_P (decl_op0
)
30526 || TREE_CODE (decl_op0
) == CONST_DECL
))
30527 return (get_variable_section (decl_op1
, false)
30528 != get_variable_section (decl_op0
, false));
30530 if (TREE_CODE (decl_op1
) == LABEL_DECL
30531 && TREE_CODE (decl_op0
) == LABEL_DECL
)
30532 return (DECL_CONTEXT (decl_op1
)
30533 != DECL_CONTEXT (decl_op0
));
30543 /* return TRUE if x is a reference to a value in a constant pool */
30545 arm_is_constant_pool_ref (rtx x
)
30548 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
30549 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
30552 /* Remember the last target of arm_set_current_function. */
30553 static GTY(()) tree arm_previous_fndecl
;
30555 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30558 save_restore_target_globals (tree new_tree
)
30560 /* If we have a previous state, use it. */
30561 if (TREE_TARGET_GLOBALS (new_tree
))
30562 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
30563 else if (new_tree
== target_option_default_node
)
30564 restore_target_globals (&default_target_globals
);
30567 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30568 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
30571 arm_option_params_internal ();
30574 /* Invalidate arm_previous_fndecl. */
30577 arm_reset_previous_fndecl (void)
30579 arm_previous_fndecl
= NULL_TREE
;
30582 /* Establish appropriate back-end context for processing the function
30583 FNDECL. The argument might be NULL to indicate processing at top
30584 level, outside of any function scope. */
30587 arm_set_current_function (tree fndecl
)
30589 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
30592 tree old_tree
= (arm_previous_fndecl
30593 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
30596 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30598 /* If current function has no attributes but previous one did,
30599 use the default node. */
30600 if (! new_tree
&& old_tree
)
30601 new_tree
= target_option_default_node
;
30603 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30604 the default have been handled by save_restore_target_globals from
30605 arm_pragma_target_parse. */
30606 if (old_tree
== new_tree
)
30609 arm_previous_fndecl
= fndecl
;
30611 /* First set the target options. */
30612 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
30614 save_restore_target_globals (new_tree
);
30617 /* Implement TARGET_OPTION_PRINT. */
30620 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
30622 int flags
= ptr
->x_target_flags
;
30623 const char *fpu_name
;
30625 fpu_name
= (ptr
->x_arm_fpu_index
== TARGET_FPU_auto
30626 ? "auto" : all_fpus
[ptr
->x_arm_fpu_index
].name
);
30628 fprintf (file
, "%*sselected isa %s\n", indent
, "",
30629 TARGET_THUMB2_P (flags
) ? "thumb2" :
30630 TARGET_THUMB_P (flags
) ? "thumb1" :
30633 if (ptr
->x_arm_arch_string
)
30634 fprintf (file
, "%*sselected architecture %s\n", indent
, "",
30635 ptr
->x_arm_arch_string
);
30637 if (ptr
->x_arm_cpu_string
)
30638 fprintf (file
, "%*sselected CPU %s\n", indent
, "",
30639 ptr
->x_arm_cpu_string
);
30641 if (ptr
->x_arm_tune_string
)
30642 fprintf (file
, "%*sselected tune %s\n", indent
, "",
30643 ptr
->x_arm_tune_string
);
30645 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_name
);
30648 /* Hook to determine if one function can safely inline another. */
30651 arm_can_inline_p (tree caller
, tree callee
)
30653 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
30654 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
30655 bool can_inline
= true;
30657 struct cl_target_option
*caller_opts
30658 = TREE_TARGET_OPTION (caller_tree
? caller_tree
30659 : target_option_default_node
);
30661 struct cl_target_option
*callee_opts
30662 = TREE_TARGET_OPTION (callee_tree
? callee_tree
30663 : target_option_default_node
);
30665 if (callee_opts
== caller_opts
)
30668 /* Callee's ISA features should be a subset of the caller's. */
30669 struct arm_build_target caller_target
;
30670 struct arm_build_target callee_target
;
30671 caller_target
.isa
= sbitmap_alloc (isa_num_bits
);
30672 callee_target
.isa
= sbitmap_alloc (isa_num_bits
);
30674 arm_configure_build_target (&caller_target
, caller_opts
, &global_options_set
,
30676 arm_configure_build_target (&callee_target
, callee_opts
, &global_options_set
,
30678 if (!bitmap_subset_p (callee_target
.isa
, caller_target
.isa
))
30679 can_inline
= false;
30681 sbitmap_free (caller_target
.isa
);
30682 sbitmap_free (callee_target
.isa
);
30684 /* OK to inline between different modes.
30685 Function with mode specific instructions, e.g using asm,
30686 must be explicitly protected with noinline. */
30690 /* Hook to fix function's alignment affected by target attribute. */
30693 arm_relayout_function (tree fndecl
)
30695 if (DECL_USER_ALIGN (fndecl
))
30698 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30701 callee_tree
= target_option_default_node
;
30703 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
30706 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts
->x_target_flags
)));
30709 /* Inner function to process the attribute((target(...))), take an argument and
30710 set the current options from the argument. If we have a list, recursively
30711 go over the list. */
30714 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
30716 if (TREE_CODE (args
) == TREE_LIST
)
30720 for (; args
; args
= TREE_CHAIN (args
))
30721 if (TREE_VALUE (args
)
30722 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
30727 else if (TREE_CODE (args
) != STRING_CST
)
30729 error ("attribute %<target%> argument not a string");
30733 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
30736 while ((q
= strtok (argstr
, ",")) != NULL
)
30738 while (ISSPACE (*q
)) ++q
;
30741 if (!strncmp (q
, "thumb", 5))
30742 opts
->x_target_flags
|= MASK_THUMB
;
30744 else if (!strncmp (q
, "arm", 3))
30745 opts
->x_target_flags
&= ~MASK_THUMB
;
30747 else if (!strncmp (q
, "fpu=", 4))
30750 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+4,
30751 &fpu_index
, CL_TARGET
))
30753 error ("invalid fpu for target attribute or pragma %qs", q
);
30756 if (fpu_index
== TARGET_FPU_auto
)
30758 /* This doesn't really make sense until we support
30759 general dynamic selection of the architecture and all
30761 sorry ("auto fpu selection not currently permitted here");
30764 opts
->x_arm_fpu_index
= (enum fpu_type
) fpu_index
;
30766 else if (!strncmp (q
, "arch=", 5))
30769 const arch_option
*arm_selected_arch
30770 = arm_parse_arch_option_name (all_architectures
, "arch", arch
);
30772 if (!arm_selected_arch
)
30774 error ("invalid architecture for target attribute or pragma %qs",
30779 opts
->x_arm_arch_string
= xstrndup (arch
, strlen (arch
));
30781 else if (q
[0] == '+')
30783 opts
->x_arm_arch_string
30784 = xasprintf ("%s%s", opts
->x_arm_arch_string
, q
);
30788 error ("unknown target attribute or pragma %qs", q
);
30796 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30799 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
30800 struct gcc_options
*opts_set
)
30802 struct cl_target_option cl_opts
;
30804 if (!arm_valid_target_attribute_rec (args
, opts
))
30807 cl_target_option_save (&cl_opts
, opts
);
30808 arm_configure_build_target (&arm_active_target
, &cl_opts
, opts_set
, false);
30809 arm_option_check_internal (opts
);
30810 /* Do any overrides, such as global options arch=xxx.
30811 We do this since arm_active_target was overridden. */
30812 arm_option_reconfigure_globals ();
30813 arm_options_perform_arch_sanity_checks ();
30814 arm_option_override_internal (opts
, opts_set
);
30816 return build_target_option_node (opts
);
30820 add_attribute (const char * mode
, tree
*attributes
)
30822 size_t len
= strlen (mode
);
30823 tree value
= build_string (len
, mode
);
30825 TREE_TYPE (value
) = build_array_type (char_type_node
,
30826 build_index_type (size_int (len
)));
30828 *attributes
= tree_cons (get_identifier ("target"),
30829 build_tree_list (NULL_TREE
, value
),
30833 /* For testing. Insert thumb or arm modes alternatively on functions. */
30836 arm_insert_attributes (tree fndecl
, tree
* attributes
)
30840 if (! TARGET_FLIP_THUMB
)
30843 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
30844 || DECL_BUILT_IN (fndecl
) || DECL_ARTIFICIAL (fndecl
))
30847 /* Nested definitions must inherit mode. */
30848 if (current_function_decl
)
30850 mode
= TARGET_THUMB
? "thumb" : "arm";
30851 add_attribute (mode
, attributes
);
30855 /* If there is already a setting don't change it. */
30856 if (lookup_attribute ("target", *attributes
) != NULL
)
30859 mode
= thumb_flipper
? "thumb" : "arm";
30860 add_attribute (mode
, attributes
);
30862 thumb_flipper
= !thumb_flipper
;
30865 /* Hook to validate attribute((target("string"))). */
30868 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
30869 tree args
, int ARG_UNUSED (flags
))
30872 struct gcc_options func_options
;
30873 tree cur_tree
, new_optimize
;
30874 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
30876 /* Get the optimization options of the current function. */
30877 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
30879 /* If the function changed the optimization levels as well as setting target
30880 options, start with the optimizations specified. */
30881 if (!func_optimize
)
30882 func_optimize
= optimization_default_node
;
30884 /* Init func_options. */
30885 memset (&func_options
, 0, sizeof (func_options
));
30886 init_options_struct (&func_options
, NULL
);
30887 lang_hooks
.init_options_struct (&func_options
);
30889 /* Initialize func_options to the defaults. */
30890 cl_optimization_restore (&func_options
,
30891 TREE_OPTIMIZATION (func_optimize
));
30893 cl_target_option_restore (&func_options
,
30894 TREE_TARGET_OPTION (target_option_default_node
));
30896 /* Set func_options flags with new target mode. */
30897 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
30898 &global_options_set
);
30900 if (cur_tree
== NULL_TREE
)
30903 new_optimize
= build_optimization_node (&func_options
);
30905 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
30907 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
30909 finalize_options_struct (&func_options
);
30914 /* Match an ISA feature bitmap to a named FPU. We always use the
30915 first entry that exactly matches the feature set, so that we
30916 effectively canonicalize the FPU name for the assembler. */
30918 arm_identify_fpu_from_isa (sbitmap isa
)
30920 auto_sbitmap
fpubits (isa_num_bits
);
30921 auto_sbitmap
cand_fpubits (isa_num_bits
);
30923 bitmap_and (fpubits
, isa
, isa_all_fpubits
);
30925 /* If there are no ISA feature bits relating to the FPU, we must be
30926 doing soft-float. */
30927 if (bitmap_empty_p (fpubits
))
30930 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
30932 arm_initialize_isa (cand_fpubits
, all_fpus
[i
].isa_bits
);
30933 if (bitmap_equal_p (fpubits
, cand_fpubits
))
30934 return all_fpus
[i
].name
;
30936 /* We must find an entry, or things have gone wrong. */
30937 gcc_unreachable ();
30940 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
30941 by the function fndecl. */
30943 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
30945 tree target_parts
= DECL_FUNCTION_SPECIFIC_TARGET (decl
);
30947 struct cl_target_option
*targ_options
;
30949 targ_options
= TREE_TARGET_OPTION (target_parts
);
30951 targ_options
= TREE_TARGET_OPTION (target_option_current_node
);
30952 gcc_assert (targ_options
);
30954 /* Only update the assembler .arch string if it is distinct from the last
30955 such string we printed. arch_to_print is set conditionally in case
30956 targ_options->x_arm_arch_string is NULL which can be the case
30957 when cc1 is invoked directly without passing -march option. */
30958 std::string arch_to_print
;
30959 if (targ_options
->x_arm_arch_string
)
30960 arch_to_print
= targ_options
->x_arm_arch_string
;
30962 if (arch_to_print
!= arm_last_printed_arch_string
)
30964 std::string arch_name
30965 = arch_to_print
.substr (0, arch_to_print
.find ("+"));
30966 asm_fprintf (asm_out_file
, "\t.arch %s\n", arch_name
.c_str ());
30967 const arch_option
*arch
30968 = arm_parse_arch_option_name (all_architectures
, "-march",
30969 targ_options
->x_arm_arch_string
);
30970 auto_sbitmap
opt_bits (isa_num_bits
);
30973 if (arch
->common
.extensions
)
30975 for (const struct cpu_arch_extension
*opt
= arch
->common
.extensions
;
30981 arm_initialize_isa (opt_bits
, opt
->isa_bits
);
30982 if (bitmap_subset_p (opt_bits
, arm_active_target
.isa
)
30983 && !bitmap_subset_p (opt_bits
, isa_all_fpubits
))
30984 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n",
30990 arm_last_printed_arch_string
= arch_to_print
;
30993 fprintf (stream
, "\t.syntax unified\n");
30997 if (is_called_in_ARM_mode (decl
)
30998 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
30999 && cfun
->is_thunk
))
31000 fprintf (stream
, "\t.code 32\n");
31001 else if (TARGET_THUMB1
)
31002 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
31004 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
31007 fprintf (stream
, "\t.arm\n");
31009 std::string fpu_to_print
31010 = TARGET_SOFT_FLOAT
31011 ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target
.isa
);
31013 if (fpu_to_print
!= arm_last_printed_arch_string
)
31015 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_to_print
.c_str ());
31016 arm_last_printed_fpu_string
= fpu_to_print
;
31019 if (TARGET_POKE_FUNCTION_NAME
)
31020 arm_poke_function_name (stream
, (const char *) name
);
31023 /* If MEM is in the form of [base+offset], extract the two parts
31024 of address and set to BASE and OFFSET, otherwise return false
31025 after clearing BASE and OFFSET. */
31028 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
31032 gcc_assert (MEM_P (mem
));
31034 addr
= XEXP (mem
, 0);
31036 /* Strip off const from addresses like (const (addr)). */
31037 if (GET_CODE (addr
) == CONST
)
31038 addr
= XEXP (addr
, 0);
31040 if (GET_CODE (addr
) == REG
)
31043 *offset
= const0_rtx
;
31047 if (GET_CODE (addr
) == PLUS
31048 && GET_CODE (XEXP (addr
, 0)) == REG
31049 && CONST_INT_P (XEXP (addr
, 1)))
31051 *base
= XEXP (addr
, 0);
31052 *offset
= XEXP (addr
, 1);
31057 *offset
= NULL_RTX
;
31062 /* If INSN is a load or store of address in the form of [base+offset],
31063 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
31064 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
31065 otherwise return FALSE. */
31068 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
31072 gcc_assert (INSN_P (insn
));
31073 x
= PATTERN (insn
);
31074 if (GET_CODE (x
) != SET
)
31078 dest
= SET_DEST (x
);
31079 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
31082 extract_base_offset_in_addr (dest
, base
, offset
);
31084 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
31087 extract_base_offset_in_addr (src
, base
, offset
);
31092 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
31095 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31097 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31098 and PRI are only calculated for these instructions. For other instruction,
31099 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
31100 instruction fusion can be supported by returning different priorities.
31102 It's important that irrelevant instructions get the largest FUSION_PRI. */
31105 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
31106 int *fusion_pri
, int *pri
)
31112 gcc_assert (INSN_P (insn
));
31115 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
31122 /* Load goes first. */
31124 *fusion_pri
= tmp
- 1;
31126 *fusion_pri
= tmp
- 2;
31130 /* INSN with smaller base register goes first. */
31131 tmp
-= ((REGNO (base
) & 0xff) << 20);
31133 /* INSN with smaller offset goes first. */
31134 off_val
= (int)(INTVAL (offset
));
31136 tmp
-= (off_val
& 0xfffff);
31138 tmp
+= ((- off_val
) & 0xfffff);
31145 /* Construct and return a PARALLEL RTX vector with elements numbering the
31146 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31147 the vector - from the perspective of the architecture. This does not
31148 line up with GCC's perspective on lane numbers, so we end up with
31149 different masks depending on our target endian-ness. The diagram
31150 below may help. We must draw the distinction when building masks
31151 which select one half of the vector. An instruction selecting
31152 architectural low-lanes for a big-endian target, must be described using
31153 a mask selecting GCC high-lanes.
31155 Big-Endian Little-Endian
31157 GCC 0 1 2 3 3 2 1 0
31158 | x | x | x | x | | x | x | x | x |
31159 Architecture 3 2 1 0 3 2 1 0
31161 Low Mask: { 2, 3 } { 0, 1 }
31162 High Mask: { 0, 1 } { 2, 3 }
31166 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
31168 int nunits
= GET_MODE_NUNITS (mode
);
31169 rtvec v
= rtvec_alloc (nunits
/ 2);
31170 int high_base
= nunits
/ 2;
31176 if (BYTES_BIG_ENDIAN
)
31177 base
= high
? low_base
: high_base
;
31179 base
= high
? high_base
: low_base
;
31181 for (i
= 0; i
< nunits
/ 2; i
++)
31182 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
31184 t1
= gen_rtx_PARALLEL (mode
, v
);
31188 /* Check OP for validity as a PARALLEL RTX vector with elements
31189 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31190 from the perspective of the architecture. See the diagram above
31191 arm_simd_vect_par_cnst_half_p for more details. */
31194 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
31197 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
31198 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
31199 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
31202 if (!VECTOR_MODE_P (mode
))
31205 if (count_op
!= count_ideal
)
31208 for (i
= 0; i
< count_ideal
; i
++)
31210 rtx elt_op
= XVECEXP (op
, 0, i
);
31211 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
31213 if (!CONST_INT_P (elt_op
)
31214 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
31220 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31223 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
31226 /* For now, we punt and not handle this for TARGET_THUMB1. */
31227 if (vcall_offset
&& TARGET_THUMB1
)
31230 /* Otherwise ok. */
31234 /* Generate RTL for a conditional branch with rtx comparison CODE in
31235 mode CC_MODE. The destination of the unlikely conditional branch
31239 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
31243 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
31244 gen_rtx_REG (cc_mode
, CC_REGNUM
),
31247 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
31248 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
31250 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
31253 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31255 For pure-code sections there is no letter code for this attribute, so
31256 output all the section flags numerically when this is needed. */
31259 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
31262 if (flags
& SECTION_ARM_PURECODE
)
31266 if (!(flags
& SECTION_DEBUG
))
31268 if (flags
& SECTION_EXCLUDE
)
31269 *num
|= 0x80000000;
31270 if (flags
& SECTION_WRITE
)
31272 if (flags
& SECTION_CODE
)
31274 if (flags
& SECTION_MERGE
)
31276 if (flags
& SECTION_STRINGS
)
31278 if (flags
& SECTION_TLS
)
31280 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
31289 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31291 If pure-code is passed as an option, make sure all functions are in
31292 sections that have the SHF_ARM_PURECODE attribute. */
31295 arm_function_section (tree decl
, enum node_frequency freq
,
31296 bool startup
, bool exit
)
31298 const char * section_name
;
31301 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
31302 return default_function_section (decl
, freq
, startup
, exit
);
31304 if (!target_pure_code
)
31305 return default_function_section (decl
, freq
, startup
, exit
);
31308 section_name
= DECL_SECTION_NAME (decl
);
31310 /* If a function is not in a named section then it falls under the 'default'
31311 text section, also known as '.text'. We can preserve previous behavior as
31312 the default text section already has the SHF_ARM_PURECODE section
31316 section
*default_sec
= default_function_section (decl
, freq
, startup
,
31319 /* If default_sec is not null, then it must be a special section like for
31320 example .text.startup. We set the pure-code attribute and return the
31321 same section to preserve existing behavior. */
31323 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
31324 return default_sec
;
31327 /* Otherwise look whether a section has already been created with
31329 sec
= get_named_section (decl
, section_name
, 0);
31331 /* If that is not the case passing NULL as the section's name to
31332 'get_named_section' will create a section with the declaration's
31334 sec
= get_named_section (decl
, NULL
, 0);
31336 /* Set the SHF_ARM_PURECODE attribute. */
31337 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
31342 /* Implements the TARGET_SECTION_FLAGS hook.
31344 If DECL is a function declaration and pure-code is passed as an option
31345 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31346 section's name and RELOC indicates whether the declarations initializer may
31347 contain runtime relocations. */
31349 static unsigned int
31350 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
31352 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
31354 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
31355 flags
|= SECTION_ARM_PURECODE
;
31360 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31363 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
31365 rtx
*quot_p
, rtx
*rem_p
)
31367 if (mode
== SImode
)
31368 gcc_assert (!TARGET_IDIV
);
31370 scalar_int_mode libval_mode
31371 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode
));
31373 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
31375 op0
, GET_MODE (op0
),
31376 op1
, GET_MODE (op1
));
31378 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
31379 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
31380 GET_MODE_SIZE (mode
));
31382 gcc_assert (quotient
);
31383 gcc_assert (remainder
);
31385 *quot_p
= quotient
;
31386 *rem_p
= remainder
;
31389 /* This function checks for the availability of the coprocessor builtin passed
31390 in BUILTIN for the current target. Returns true if it is available and
31391 false otherwise. If a BUILTIN is passed for which this function has not
31392 been implemented it will cause an exception. */
31395 arm_coproc_builtin_available (enum unspecv builtin
)
31397 /* None of these builtins are available in Thumb mode if the target only
31398 supports Thumb-1. */
31416 case VUNSPEC_LDC2L
:
31418 case VUNSPEC_STC2L
:
31421 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31428 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31430 if (arm_arch6
|| arm_arch5te
)
31433 case VUNSPEC_MCRR2
:
31434 case VUNSPEC_MRRC2
:
31439 gcc_unreachable ();
31444 /* This function returns true if OP is a valid memory operand for the ldc and
31445 stc coprocessor instructions and false otherwise. */
31448 arm_coproc_ldc_stc_legitimate_address (rtx op
)
31450 HOST_WIDE_INT range
;
31451 /* Has to be a memory operand. */
31457 /* We accept registers. */
31461 switch GET_CODE (op
)
31465 /* Or registers with an offset. */
31466 if (!REG_P (XEXP (op
, 0)))
31471 /* The offset must be an immediate though. */
31472 if (!CONST_INT_P (op
))
31475 range
= INTVAL (op
);
31477 /* Within the range of [-1020,1020]. */
31478 if (!IN_RANGE (range
, -1020, 1020))
31481 /* And a multiple of 4. */
31482 return (range
% 4) == 0;
31488 return REG_P (XEXP (op
, 0));
31490 gcc_unreachable ();
31495 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31497 In VFPv1, VFP registers could only be accessed in the mode they were
31498 set, so subregs would be invalid there. However, we don't support
31499 VFPv1 at the moment, and the restriction was lifted in VFPv2.
31501 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31502 VFP registers in little-endian order. We can't describe that accurately to
31503 GCC, so avoid taking subregs of such values.
31505 The only exception is going from a 128-bit to a 64-bit type. In that
31506 case the data layout happens to be consistent for big-endian, so we
31507 explicitly allow that case. */
31510 arm_can_change_mode_class (machine_mode from
, machine_mode to
,
31511 reg_class_t rclass
)
31514 && !(GET_MODE_SIZE (from
) == 16 && GET_MODE_SIZE (to
) == 8)
31515 && (GET_MODE_SIZE (from
) > UNITS_PER_WORD
31516 || GET_MODE_SIZE (to
) > UNITS_PER_WORD
)
31517 && reg_classes_intersect_p (VFP_REGS
, rclass
))
31522 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
31523 strcpy from constants will be faster. */
31525 static HOST_WIDE_INT
31526 arm_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
31528 unsigned int factor
= (TARGET_THUMB
|| ! arm_tune_xscale
? 1 : 2);
31529 if (TREE_CODE (exp
) == STRING_CST
&& !optimize_size
)
31530 return MAX (align
, BITS_PER_WORD
* factor
);
31535 namespace selftest
{
31537 /* Scan the static data tables generated by parsecpu.awk looking for
31538 potential issues with the data. We primarily check for
31539 inconsistencies in the option extensions at present (extensions
31540 that duplicate others but aren't marked as aliases). Furthermore,
31541 for correct canonicalization later options must never be a subset
31542 of an earlier option. Any extension should also only specify other
31543 feature bits and never an architecture bit. The architecture is inferred
31544 from the declaration of the extension. */
31546 arm_test_cpu_arch_data (void)
31548 const arch_option
*arch
;
31549 const cpu_option
*cpu
;
31550 auto_sbitmap
target_isa (isa_num_bits
);
31551 auto_sbitmap
isa1 (isa_num_bits
);
31552 auto_sbitmap
isa2 (isa_num_bits
);
31554 for (arch
= all_architectures
; arch
->common
.name
!= NULL
; ++arch
)
31556 const cpu_arch_extension
*ext1
, *ext2
;
31558 if (arch
->common
.extensions
== NULL
)
31561 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
31563 for (ext1
= arch
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
31568 arm_initialize_isa (isa1
, ext1
->isa_bits
);
31569 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
31571 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
31574 arm_initialize_isa (isa2
, ext2
->isa_bits
);
31575 /* If the option is a subset of the parent option, it doesn't
31576 add anything and so isn't useful. */
31577 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
31579 /* If the extension specifies any architectural bits then
31580 disallow it. Extensions should only specify feature bits. */
31581 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
31586 for (cpu
= all_cores
; cpu
->common
.name
!= NULL
; ++cpu
)
31588 const cpu_arch_extension
*ext1
, *ext2
;
31590 if (cpu
->common
.extensions
== NULL
)
31593 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
31595 for (ext1
= cpu
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
31600 arm_initialize_isa (isa1
, ext1
->isa_bits
);
31601 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
31603 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
31606 arm_initialize_isa (isa2
, ext2
->isa_bits
);
31607 /* If the option is a subset of the parent option, it doesn't
31608 add anything and so isn't useful. */
31609 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
31611 /* If the extension specifies any architectural bits then
31612 disallow it. Extensions should only specify feature bits. */
31613 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
31619 /* Scan the static data tables generated by parsecpu.awk looking for
31620 potential issues with the data. Here we check for consistency between the
31621 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31622 a feature bit that is not defined by any FPU flag. */
31624 arm_test_fpu_data (void)
31626 auto_sbitmap
isa_all_fpubits (isa_num_bits
);
31627 auto_sbitmap
fpubits (isa_num_bits
);
31628 auto_sbitmap
tmpset (isa_num_bits
);
31630 static const enum isa_feature fpu_bitlist
[]
31631 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
31632 arm_initialize_isa (isa_all_fpubits
, fpu_bitlist
);
31634 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
31636 arm_initialize_isa (fpubits
, all_fpus
[i
].isa_bits
);
31637 bitmap_and_compl (tmpset
, isa_all_fpubits
, fpubits
);
31638 bitmap_clear (isa_all_fpubits
);
31639 bitmap_copy (isa_all_fpubits
, tmpset
);
31642 if (!bitmap_empty_p (isa_all_fpubits
))
31644 fprintf (stderr
, "Error: found feature bits in the ALL_FPU_INTERAL"
31645 " group that are not defined by any FPU.\n"
31646 " Check your arm-cpus.in.\n");
31647 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits
));
31652 arm_run_selftests (void)
31654 arm_test_cpu_arch_data ();
31655 arm_test_fpu_data ();
31657 } /* Namespace selftest. */
31659 #undef TARGET_RUN_TARGET_SELFTESTS
31660 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31661 #endif /* CHECKING_P */
31663 struct gcc_target targetm
= TARGET_INITIALIZER
;
31665 #include "gt-arm.h"