1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
26 #define INCLUDE_STRING
28 #include "coretypes.h"
37 #include "stringpool.h"
44 #include "diagnostic-core.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
51 #include "insn-attr.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
65 #include "target-globals.h"
67 #include "tm-constrs.h"
69 #include "optabs-libfuncs.h"
74 /* This file should be included last. */
75 #include "target-def.h"
77 /* Forward definitions of types. */
78 typedef struct minipool_node Mnode
;
79 typedef struct minipool_fixup Mfix
;
81 void (*arm_lang_output_object_attributes_hook
)(void);
88 /* Forward function declarations. */
89 static bool arm_const_not_ok_for_debug_p (rtx
);
90 static int arm_needs_doubleword_align (machine_mode
, const_tree
);
91 static int arm_compute_static_chain_stack_bytes (void);
92 static arm_stack_offsets
*arm_get_frame_offsets (void);
93 static void arm_compute_frame_layout (void);
94 static void arm_add_gc_roots (void);
95 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
96 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
97 static unsigned bit_count (unsigned long);
98 static unsigned bitmap_popcount (const sbitmap
);
99 static int arm_address_register_rtx_p (rtx
, int);
100 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
101 static bool is_called_in_ARM_mode (tree
);
102 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
103 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
104 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
105 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
106 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
107 inline static int thumb1_index_register_rtx_p (rtx
, int);
108 static int thumb_far_jump_used_p (void);
109 static bool thumb_force_lr_save (void);
110 static unsigned arm_size_return_regs (void);
111 static bool arm_assemble_integer (rtx
, unsigned int, int);
112 static void arm_print_operand (FILE *, rtx
, int);
113 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
114 static bool arm_print_operand_punct_valid_p (unsigned char code
);
115 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
116 static arm_cc
get_arm_condition_code (rtx
);
117 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
118 static const char *output_multi_immediate (rtx
*, const char *, const char *,
120 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
121 static struct machine_function
*arm_init_machine_status (void);
122 static void thumb_exit (FILE *, int);
123 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
124 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
125 static Mnode
*add_minipool_forward_ref (Mfix
*);
126 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
127 static Mnode
*add_minipool_backward_ref (Mfix
*);
128 static void assign_minipool_offsets (Mfix
*);
129 static void arm_print_value (FILE *, rtx
);
130 static void dump_minipool (rtx_insn
*);
131 static int arm_barrier_cost (rtx_insn
*);
132 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
133 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
134 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
136 static void arm_reorg (void);
137 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
138 static unsigned long arm_compute_save_reg0_reg12_mask (void);
139 static unsigned long arm_compute_save_core_reg_mask (void);
140 static unsigned long arm_isr_value (tree
);
141 static unsigned long arm_compute_func_type (void);
142 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
143 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
144 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
145 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
146 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
148 static tree
arm_handle_cmse_nonsecure_entry (tree
*, tree
, tree
, int, bool *);
149 static tree
arm_handle_cmse_nonsecure_call (tree
*, tree
, tree
, int, bool *);
150 static void arm_output_function_epilogue (FILE *);
151 static void arm_output_function_prologue (FILE *);
152 static int arm_comp_type_attributes (const_tree
, const_tree
);
153 static void arm_set_default_type_attributes (tree
);
154 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
155 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
156 static int optimal_immediate_sequence (enum rtx_code code
,
157 unsigned HOST_WIDE_INT val
,
158 struct four_ints
*return_sequence
);
159 static int optimal_immediate_sequence_1 (enum rtx_code code
,
160 unsigned HOST_WIDE_INT val
,
161 struct four_ints
*return_sequence
,
163 static int arm_get_strip_length (int);
164 static bool arm_function_ok_for_sibcall (tree
, tree
);
165 static machine_mode
arm_promote_function_mode (const_tree
,
168 static bool arm_return_in_memory (const_tree
, const_tree
);
169 static rtx
arm_function_value (const_tree
, const_tree
, bool);
170 static rtx
arm_libcall_value_1 (machine_mode
);
171 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
172 static bool arm_function_value_regno_p (const unsigned int);
173 static void arm_internal_label (FILE *, const char *, unsigned long);
174 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
176 static bool arm_have_conditional_execution (void);
177 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
178 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
179 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
180 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
181 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
182 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
183 static void emit_constant_insn (rtx cond
, rtx pattern
);
184 static rtx_insn
*emit_set_insn (rtx
, rtx
);
185 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
186 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
188 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
190 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
192 static pad_direction
arm_function_arg_padding (machine_mode
, const_tree
);
193 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
194 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
196 static rtx
aapcs_libcall_value (machine_mode
);
197 static int aapcs_select_return_coproc (const_tree
, const_tree
);
199 #ifdef OBJECT_FORMAT_ELF
200 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
201 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
204 static void arm_encode_section_info (tree
, rtx
, int);
207 static void arm_file_end (void);
208 static void arm_file_start (void);
209 static void arm_insert_attributes (tree
, tree
*);
211 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
213 static bool arm_pass_by_reference (cumulative_args_t
,
214 machine_mode
, const_tree
, bool);
215 static bool arm_promote_prototypes (const_tree
);
216 static bool arm_default_short_enums (void);
217 static bool arm_align_anon_bitfield (void);
218 static bool arm_return_in_msb (const_tree
);
219 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
220 static bool arm_return_in_memory (const_tree
, const_tree
);
222 static void arm_unwind_emit (FILE *, rtx_insn
*);
223 static bool arm_output_ttype (rtx
);
224 static void arm_asm_emit_except_personality (rtx
);
226 static void arm_asm_init_sections (void);
227 static rtx
arm_dwarf_register_span (rtx
);
229 static tree
arm_cxx_guard_type (void);
230 static bool arm_cxx_guard_mask_bit (void);
231 static tree
arm_get_cookie_size (tree
);
232 static bool arm_cookie_has_size (void);
233 static bool arm_cxx_cdtor_returns_this (void);
234 static bool arm_cxx_key_method_may_be_inline (void);
235 static void arm_cxx_determine_class_data_visibility (tree
);
236 static bool arm_cxx_class_data_always_comdat (void);
237 static bool arm_cxx_use_aeabi_atexit (void);
238 static void arm_init_libfuncs (void);
239 static tree
arm_build_builtin_va_list (void);
240 static void arm_expand_builtin_va_start (tree
, rtx
);
241 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
242 static void arm_option_override (void);
243 static void arm_option_save (struct cl_target_option
*, struct gcc_options
*);
244 static void arm_option_restore (struct gcc_options
*,
245 struct cl_target_option
*);
246 static void arm_override_options_after_change (void);
247 static void arm_option_print (FILE *, int, struct cl_target_option
*);
248 static void arm_set_current_function (tree
);
249 static bool arm_can_inline_p (tree
, tree
);
250 static void arm_relayout_function (tree
);
251 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
252 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
253 static bool arm_sched_can_speculate_insn (rtx_insn
*);
254 static bool arm_macro_fusion_p (void);
255 static bool arm_cannot_copy_insn_p (rtx_insn
*);
256 static int arm_issue_rate (void);
257 static int arm_first_cycle_multipass_dfa_lookahead (void);
258 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
259 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
260 static bool arm_output_addr_const_extra (FILE *, rtx
);
261 static bool arm_allocate_stack_slots_for_args (void);
262 static bool arm_warn_func_return (tree
);
263 static tree
arm_promoted_type (const_tree t
);
264 static bool arm_scalar_mode_supported_p (scalar_mode
);
265 static bool arm_frame_pointer_required (void);
266 static bool arm_can_eliminate (const int, const int);
267 static void arm_asm_trampoline_template (FILE *);
268 static void arm_trampoline_init (rtx
, tree
, rtx
);
269 static rtx
arm_trampoline_adjust_address (rtx
);
270 static rtx_insn
*arm_pic_static_addr (rtx orig
, rtx reg
);
271 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
272 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
273 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
274 static bool arm_array_mode_supported_p (machine_mode
,
275 unsigned HOST_WIDE_INT
);
276 static machine_mode
arm_preferred_simd_mode (scalar_mode
);
277 static bool arm_class_likely_spilled_p (reg_class_t
);
278 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
279 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
280 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
284 static void arm_conditional_register_usage (void);
285 static enum flt_eval_method
arm_excess_precision (enum excess_precision_type
);
286 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
287 static unsigned int arm_autovectorize_vector_sizes (void);
288 static int arm_default_branch_cost (bool, bool);
289 static int arm_cortex_a5_branch_cost (bool, bool);
290 static int arm_cortex_m_branch_cost (bool, bool);
291 static int arm_cortex_m7_branch_cost (bool, bool);
293 static bool arm_vectorize_vec_perm_const_ok (machine_mode
, vec_perm_indices
);
295 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
297 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
299 int misalign ATTRIBUTE_UNUSED
);
300 static unsigned arm_add_stmt_cost (void *data
, int count
,
301 enum vect_cost_for_stmt kind
,
302 struct _stmt_vec_info
*stmt_info
,
304 enum vect_cost_model_location where
);
306 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
307 bool op0_preserve_value
);
308 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
310 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
311 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
313 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
314 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
315 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
317 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
318 static opt_scalar_float_mode
arm_floatn_mode (int, bool);
319 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode
);
320 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode
);
321 static bool arm_modes_tieable_p (machine_mode
, machine_mode
);
322 static HOST_WIDE_INT
arm_constant_alignment (const_tree
, HOST_WIDE_INT
);
324 /* Table of machine attributes. */
325 static const struct attribute_spec arm_attribute_table
[] =
327 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
328 affects_type_identity, handler, exclude } */
329 /* Function calls made to this symbol must be done indirectly, because
330 it may lie outside of the 26 bit addressing range of a normal function
332 { "long_call", 0, 0, false, true, true, false, NULL
, NULL
},
333 /* Whereas these functions are always known to reside within the 26 bit
335 { "short_call", 0, 0, false, true, true, false, NULL
, NULL
},
336 /* Specify the procedure call conventions for a function. */
337 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute
,
339 /* Interrupt Service Routines have special prologue and epilogue requirements. */
340 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
342 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
344 { "naked", 0, 0, true, false, false, false,
345 arm_handle_fndecl_attribute
, NULL
},
347 /* ARM/PE has three new attributes:
349 dllexport - for exporting a function/variable that will live in a dll
350 dllimport - for importing a function/variable from a dll
352 Microsoft allows multiple declspecs in one __declspec, separating
353 them with spaces. We do NOT support this. Instead, use __declspec
356 { "dllimport", 0, 0, true, false, false, false, NULL
, NULL
},
357 { "dllexport", 0, 0, true, false, false, false, NULL
, NULL
},
358 { "interfacearm", 0, 0, true, false, false, false,
359 arm_handle_fndecl_attribute
, NULL
},
360 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
361 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute
,
363 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute
,
365 { "notshared", 0, 0, false, true, false, false,
366 arm_handle_notshared_attribute
, NULL
},
368 /* ARMv8-M Security Extensions support. */
369 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
370 arm_handle_cmse_nonsecure_entry
, NULL
},
371 { "cmse_nonsecure_call", 0, 0, true, false, false, true,
372 arm_handle_cmse_nonsecure_call
, NULL
},
373 { NULL
, 0, 0, false, false, false, false, NULL
, NULL
}
376 /* Initialize the GCC target structure. */
377 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
378 #undef TARGET_MERGE_DECL_ATTRIBUTES
379 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
382 #undef TARGET_LEGITIMIZE_ADDRESS
383 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
385 #undef TARGET_ATTRIBUTE_TABLE
386 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
388 #undef TARGET_INSERT_ATTRIBUTES
389 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
391 #undef TARGET_ASM_FILE_START
392 #define TARGET_ASM_FILE_START arm_file_start
393 #undef TARGET_ASM_FILE_END
394 #define TARGET_ASM_FILE_END arm_file_end
396 #undef TARGET_ASM_ALIGNED_SI_OP
397 #define TARGET_ASM_ALIGNED_SI_OP NULL
398 #undef TARGET_ASM_INTEGER
399 #define TARGET_ASM_INTEGER arm_assemble_integer
401 #undef TARGET_PRINT_OPERAND
402 #define TARGET_PRINT_OPERAND arm_print_operand
403 #undef TARGET_PRINT_OPERAND_ADDRESS
404 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
405 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
406 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
408 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
409 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
411 #undef TARGET_ASM_FUNCTION_PROLOGUE
412 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
414 #undef TARGET_ASM_FUNCTION_EPILOGUE
415 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
417 #undef TARGET_CAN_INLINE_P
418 #define TARGET_CAN_INLINE_P arm_can_inline_p
420 #undef TARGET_RELAYOUT_FUNCTION
421 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
423 #undef TARGET_OPTION_OVERRIDE
424 #define TARGET_OPTION_OVERRIDE arm_option_override
426 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
427 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
429 #undef TARGET_OPTION_SAVE
430 #define TARGET_OPTION_SAVE arm_option_save
432 #undef TARGET_OPTION_RESTORE
433 #define TARGET_OPTION_RESTORE arm_option_restore
435 #undef TARGET_OPTION_PRINT
436 #define TARGET_OPTION_PRINT arm_option_print
438 #undef TARGET_COMP_TYPE_ATTRIBUTES
439 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
441 #undef TARGET_SCHED_CAN_SPECULATE_INSN
442 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
444 #undef TARGET_SCHED_MACRO_FUSION_P
445 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
447 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
448 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
450 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
451 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
453 #undef TARGET_SCHED_ADJUST_COST
454 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
456 #undef TARGET_SET_CURRENT_FUNCTION
457 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
459 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
460 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
462 #undef TARGET_SCHED_REORDER
463 #define TARGET_SCHED_REORDER arm_sched_reorder
465 #undef TARGET_REGISTER_MOVE_COST
466 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
468 #undef TARGET_MEMORY_MOVE_COST
469 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
471 #undef TARGET_ENCODE_SECTION_INFO
473 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
475 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
478 #undef TARGET_STRIP_NAME_ENCODING
479 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
481 #undef TARGET_ASM_INTERNAL_LABEL
482 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
484 #undef TARGET_FLOATN_MODE
485 #define TARGET_FLOATN_MODE arm_floatn_mode
487 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
488 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
490 #undef TARGET_FUNCTION_VALUE
491 #define TARGET_FUNCTION_VALUE arm_function_value
493 #undef TARGET_LIBCALL_VALUE
494 #define TARGET_LIBCALL_VALUE arm_libcall_value
496 #undef TARGET_FUNCTION_VALUE_REGNO_P
497 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
499 #undef TARGET_ASM_OUTPUT_MI_THUNK
500 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
501 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
502 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
504 #undef TARGET_RTX_COSTS
505 #define TARGET_RTX_COSTS arm_rtx_costs
506 #undef TARGET_ADDRESS_COST
507 #define TARGET_ADDRESS_COST arm_address_cost
509 #undef TARGET_SHIFT_TRUNCATION_MASK
510 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
511 #undef TARGET_VECTOR_MODE_SUPPORTED_P
512 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
513 #undef TARGET_ARRAY_MODE_SUPPORTED_P
514 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
515 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
516 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
517 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
518 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
519 arm_autovectorize_vector_sizes
521 #undef TARGET_MACHINE_DEPENDENT_REORG
522 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
524 #undef TARGET_INIT_BUILTINS
525 #define TARGET_INIT_BUILTINS arm_init_builtins
526 #undef TARGET_EXPAND_BUILTIN
527 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
528 #undef TARGET_BUILTIN_DECL
529 #define TARGET_BUILTIN_DECL arm_builtin_decl
531 #undef TARGET_INIT_LIBFUNCS
532 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
534 #undef TARGET_PROMOTE_FUNCTION_MODE
535 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
536 #undef TARGET_PROMOTE_PROTOTYPES
537 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
538 #undef TARGET_PASS_BY_REFERENCE
539 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
540 #undef TARGET_ARG_PARTIAL_BYTES
541 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
542 #undef TARGET_FUNCTION_ARG
543 #define TARGET_FUNCTION_ARG arm_function_arg
544 #undef TARGET_FUNCTION_ARG_ADVANCE
545 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
546 #undef TARGET_FUNCTION_ARG_PADDING
547 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
548 #undef TARGET_FUNCTION_ARG_BOUNDARY
549 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
551 #undef TARGET_SETUP_INCOMING_VARARGS
552 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
554 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
555 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
557 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
558 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
559 #undef TARGET_TRAMPOLINE_INIT
560 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
561 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
562 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
564 #undef TARGET_WARN_FUNC_RETURN
565 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
567 #undef TARGET_DEFAULT_SHORT_ENUMS
568 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
570 #undef TARGET_ALIGN_ANON_BITFIELD
571 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
573 #undef TARGET_NARROW_VOLATILE_BITFIELD
574 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
576 #undef TARGET_CXX_GUARD_TYPE
577 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
579 #undef TARGET_CXX_GUARD_MASK_BIT
580 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
582 #undef TARGET_CXX_GET_COOKIE_SIZE
583 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
585 #undef TARGET_CXX_COOKIE_HAS_SIZE
586 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
588 #undef TARGET_CXX_CDTOR_RETURNS_THIS
589 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
591 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
592 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
594 #undef TARGET_CXX_USE_AEABI_ATEXIT
595 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
597 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
598 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
599 arm_cxx_determine_class_data_visibility
601 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
602 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
604 #undef TARGET_RETURN_IN_MSB
605 #define TARGET_RETURN_IN_MSB arm_return_in_msb
607 #undef TARGET_RETURN_IN_MEMORY
608 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
610 #undef TARGET_MUST_PASS_IN_STACK
611 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
614 #undef TARGET_ASM_UNWIND_EMIT
615 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
617 /* EABI unwinding tables use a different format for the typeinfo tables. */
618 #undef TARGET_ASM_TTYPE
619 #define TARGET_ASM_TTYPE arm_output_ttype
621 #undef TARGET_ARM_EABI_UNWINDER
622 #define TARGET_ARM_EABI_UNWINDER true
624 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
625 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
627 #endif /* ARM_UNWIND_INFO */
629 #undef TARGET_ASM_INIT_SECTIONS
630 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
632 #undef TARGET_DWARF_REGISTER_SPAN
633 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
635 #undef TARGET_CANNOT_COPY_INSN_P
636 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
639 #undef TARGET_HAVE_TLS
640 #define TARGET_HAVE_TLS true
643 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
644 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
646 #undef TARGET_LEGITIMATE_CONSTANT_P
647 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
649 #undef TARGET_CANNOT_FORCE_CONST_MEM
650 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
652 #undef TARGET_MAX_ANCHOR_OFFSET
653 #define TARGET_MAX_ANCHOR_OFFSET 4095
655 /* The minimum is set such that the total size of the block
656 for a particular anchor is -4088 + 1 + 4095 bytes, which is
657 divisible by eight, ensuring natural spacing of anchors. */
658 #undef TARGET_MIN_ANCHOR_OFFSET
659 #define TARGET_MIN_ANCHOR_OFFSET -4088
661 #undef TARGET_SCHED_ISSUE_RATE
662 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
664 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
665 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
666 arm_first_cycle_multipass_dfa_lookahead
668 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
669 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
670 arm_first_cycle_multipass_dfa_lookahead_guard
672 #undef TARGET_MANGLE_TYPE
673 #define TARGET_MANGLE_TYPE arm_mangle_type
675 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
676 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
678 #undef TARGET_BUILD_BUILTIN_VA_LIST
679 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
680 #undef TARGET_EXPAND_BUILTIN_VA_START
681 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
682 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
683 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
686 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
687 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
690 #undef TARGET_LEGITIMATE_ADDRESS_P
691 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
693 #undef TARGET_PREFERRED_RELOAD_CLASS
694 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
696 #undef TARGET_PROMOTED_TYPE
697 #define TARGET_PROMOTED_TYPE arm_promoted_type
699 #undef TARGET_SCALAR_MODE_SUPPORTED_P
700 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
702 #undef TARGET_COMPUTE_FRAME_LAYOUT
703 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
705 #undef TARGET_FRAME_POINTER_REQUIRED
706 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
708 #undef TARGET_CAN_ELIMINATE
709 #define TARGET_CAN_ELIMINATE arm_can_eliminate
711 #undef TARGET_CONDITIONAL_REGISTER_USAGE
712 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
714 #undef TARGET_CLASS_LIKELY_SPILLED_P
715 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
717 #undef TARGET_VECTORIZE_BUILTINS
718 #define TARGET_VECTORIZE_BUILTINS
720 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
721 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
722 arm_builtin_vectorized_function
724 #undef TARGET_VECTOR_ALIGNMENT
725 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
727 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
728 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
729 arm_vector_alignment_reachable
731 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
732 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
733 arm_builtin_support_vector_misalignment
735 #undef TARGET_PREFERRED_RENAME_CLASS
736 #define TARGET_PREFERRED_RENAME_CLASS \
737 arm_preferred_rename_class
739 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
740 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
741 arm_vectorize_vec_perm_const_ok
743 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
744 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
745 arm_builtin_vectorization_cost
746 #undef TARGET_VECTORIZE_ADD_STMT_COST
747 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
749 #undef TARGET_CANONICALIZE_COMPARISON
750 #define TARGET_CANONICALIZE_COMPARISON \
751 arm_canonicalize_comparison
753 #undef TARGET_ASAN_SHADOW_OFFSET
754 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
756 #undef MAX_INSN_PER_IT_BLOCK
757 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
759 #undef TARGET_CAN_USE_DOLOOP_P
760 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
762 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
763 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
765 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
766 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
768 #undef TARGET_SCHED_FUSION_PRIORITY
769 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
771 #undef TARGET_ASM_FUNCTION_SECTION
772 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
774 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
775 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
777 #undef TARGET_SECTION_TYPE_FLAGS
778 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
780 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
781 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
783 #undef TARGET_C_EXCESS_PRECISION
784 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
786 /* Although the architecture reserves bits 0 and 1, only the former is
787 used for ARM/Thumb ISA selection in v7 and earlier versions. */
788 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
789 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
791 #undef TARGET_FIXED_CONDITION_CODE_REGS
792 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
794 #undef TARGET_HARD_REGNO_NREGS
795 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
796 #undef TARGET_HARD_REGNO_MODE_OK
797 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
799 #undef TARGET_MODES_TIEABLE_P
800 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
802 #undef TARGET_CAN_CHANGE_MODE_CLASS
803 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
805 #undef TARGET_CONSTANT_ALIGNMENT
806 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
808 /* Obstack for minipool constant handling. */
809 static struct obstack minipool_obstack
;
810 static char * minipool_startobj
;
812 /* The maximum number of insns skipped which
813 will be conditionalised if possible. */
814 static int max_insns_skipped
= 5;
816 extern FILE * asm_out_file
;
818 /* True if we are currently building a constant table. */
819 int making_const_table
;
821 /* The processor for which instructions should be scheduled. */
822 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
824 /* The current tuning set. */
825 const struct tune_params
*current_tune
;
827 /* Which floating point hardware to schedule for. */
830 /* Used for Thumb call_via trampolines. */
831 rtx thumb_call_via_label
[14];
832 static int thumb_call_reg_needed
;
834 /* The bits in this mask specify which instruction scheduling options should
836 unsigned int tune_flags
= 0;
838 /* The highest ARM architecture version supported by the
840 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
842 /* Active target architecture and tuning. */
844 struct arm_build_target arm_active_target
;
846 /* The following are used in the arm.md file as equivalents to bits
847 in the above two flag variables. */
849 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
852 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
855 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
858 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
861 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
864 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
867 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
870 /* Nonzero if this chip supports the ARM 6K extensions. */
873 /* Nonzero if this chip supports the ARM 6KZ extensions. */
876 /* Nonzero if instructions present in ARMv6-M can be used. */
879 /* Nonzero if this chip supports the ARM 7 extensions. */
882 /* Nonzero if this chip supports the Large Physical Address Extension. */
883 int arm_arch_lpae
= 0;
885 /* Nonzero if instructions not present in the 'M' profile can be used. */
886 int arm_arch_notm
= 0;
888 /* Nonzero if instructions present in ARMv7E-M can be used. */
891 /* Nonzero if instructions present in ARMv8 can be used. */
894 /* Nonzero if this chip supports the ARMv8.1 extensions. */
897 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
900 /* Nonzero if this chip supports the FP16 instructions extension of ARM
902 int arm_fp16_inst
= 0;
904 /* Nonzero if this chip can benefit from load scheduling. */
905 int arm_ld_sched
= 0;
907 /* Nonzero if this chip is a StrongARM. */
908 int arm_tune_strongarm
= 0;
910 /* Nonzero if this chip supports Intel Wireless MMX technology. */
911 int arm_arch_iwmmxt
= 0;
913 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
914 int arm_arch_iwmmxt2
= 0;
916 /* Nonzero if this chip is an XScale. */
917 int arm_arch_xscale
= 0;
919 /* Nonzero if tuning for XScale */
920 int arm_tune_xscale
= 0;
922 /* Nonzero if we want to tune for stores that access the write-buffer.
923 This typically means an ARM6 or ARM7 with MMU or MPU. */
924 int arm_tune_wbuf
= 0;
926 /* Nonzero if tuning for Cortex-A9. */
927 int arm_tune_cortex_a9
= 0;
929 /* Nonzero if we should define __THUMB_INTERWORK__ in the
931 XXX This is a bit of a hack, it's intended to help work around
932 problems in GLD which doesn't understand that armv5t code is
933 interworking clean. */
934 int arm_cpp_interwork
= 0;
936 /* Nonzero if chip supports Thumb 1. */
939 /* Nonzero if chip supports Thumb 2. */
942 /* Nonzero if chip supports integer division instruction. */
943 int arm_arch_arm_hwdiv
;
944 int arm_arch_thumb_hwdiv
;
946 /* Nonzero if chip disallows volatile memory access in IT block. */
947 int arm_arch_no_volatile_ce
;
949 /* Nonzero if we should use Neon to handle 64-bits operations rather
950 than core registers. */
951 int prefer_neon_for_64bits
= 0;
953 /* Nonzero if we shouldn't use literal pools. */
954 bool arm_disable_literal_pool
= false;
956 /* The register number to be used for the PIC offset register. */
957 unsigned arm_pic_register
= INVALID_REGNUM
;
959 enum arm_pcs arm_pcs_default
;
961 /* For an explanation of these variables, see final_prescan_insn below. */
963 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
964 enum arm_cond_code arm_current_cc
;
967 int arm_target_label
;
968 /* The number of conditionally executed insns, including the current insn. */
969 int arm_condexec_count
= 0;
970 /* A bitmask specifying the patterns for the IT block.
971 Zero means do not output an IT block before this insn. */
972 int arm_condexec_mask
= 0;
973 /* The number of bits used in arm_condexec_mask. */
974 int arm_condexec_masklen
= 0;
976 /* Nonzero if chip supports the ARMv8 CRC instructions. */
977 int arm_arch_crc
= 0;
979 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
980 int arm_arch_dotprod
= 0;
982 /* Nonzero if chip supports the ARMv8-M security extensions. */
983 int arm_arch_cmse
= 0;
985 /* Nonzero if the core has a very small, high-latency, multiply unit. */
986 int arm_m_profile_small_mul
= 0;
988 /* The condition codes of the ARM, and the inverse function. */
989 static const char * const arm_condition_codes
[] =
991 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
992 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
995 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
996 int arm_regs_in_sequence
[] =
998 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1001 #define ARM_LSL_NAME "lsl"
1002 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1004 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1005 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1006 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1008 /* Initialization code. */
1012 enum processor_type scheduler
;
1013 unsigned int tune_flags
;
1014 const struct tune_params
*tune
;
1017 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1018 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1025 /* arm generic vectorizer costs. */
1027 struct cpu_vec_costs arm_default_vec_cost
= {
1028 1, /* scalar_stmt_cost. */
1029 1, /* scalar load_cost. */
1030 1, /* scalar_store_cost. */
1031 1, /* vec_stmt_cost. */
1032 1, /* vec_to_scalar_cost. */
1033 1, /* scalar_to_vec_cost. */
1034 1, /* vec_align_load_cost. */
1035 1, /* vec_unalign_load_cost. */
1036 1, /* vec_unalign_store_cost. */
1037 1, /* vec_store_cost. */
1038 3, /* cond_taken_branch_cost. */
1039 1, /* cond_not_taken_branch_cost. */
1042 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1043 #include "aarch-cost-tables.h"
1047 const struct cpu_cost_table cortexa9_extra_costs
=
1054 COSTS_N_INSNS (1), /* shift_reg. */
1055 COSTS_N_INSNS (1), /* arith_shift. */
1056 COSTS_N_INSNS (2), /* arith_shift_reg. */
1058 COSTS_N_INSNS (1), /* log_shift_reg. */
1059 COSTS_N_INSNS (1), /* extend. */
1060 COSTS_N_INSNS (2), /* extend_arith. */
1061 COSTS_N_INSNS (1), /* bfi. */
1062 COSTS_N_INSNS (1), /* bfx. */
1066 true /* non_exec_costs_exec. */
1071 COSTS_N_INSNS (3), /* simple. */
1072 COSTS_N_INSNS (3), /* flag_setting. */
1073 COSTS_N_INSNS (2), /* extend. */
1074 COSTS_N_INSNS (3), /* add. */
1075 COSTS_N_INSNS (2), /* extend_add. */
1076 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1080 0, /* simple (N/A). */
1081 0, /* flag_setting (N/A). */
1082 COSTS_N_INSNS (4), /* extend. */
1084 COSTS_N_INSNS (4), /* extend_add. */
1090 COSTS_N_INSNS (2), /* load. */
1091 COSTS_N_INSNS (2), /* load_sign_extend. */
1092 COSTS_N_INSNS (2), /* ldrd. */
1093 COSTS_N_INSNS (2), /* ldm_1st. */
1094 1, /* ldm_regs_per_insn_1st. */
1095 2, /* ldm_regs_per_insn_subsequent. */
1096 COSTS_N_INSNS (5), /* loadf. */
1097 COSTS_N_INSNS (5), /* loadd. */
1098 COSTS_N_INSNS (1), /* load_unaligned. */
1099 COSTS_N_INSNS (2), /* store. */
1100 COSTS_N_INSNS (2), /* strd. */
1101 COSTS_N_INSNS (2), /* stm_1st. */
1102 1, /* stm_regs_per_insn_1st. */
1103 2, /* stm_regs_per_insn_subsequent. */
1104 COSTS_N_INSNS (1), /* storef. */
1105 COSTS_N_INSNS (1), /* stored. */
1106 COSTS_N_INSNS (1), /* store_unaligned. */
1107 COSTS_N_INSNS (1), /* loadv. */
1108 COSTS_N_INSNS (1) /* storev. */
1113 COSTS_N_INSNS (14), /* div. */
1114 COSTS_N_INSNS (4), /* mult. */
1115 COSTS_N_INSNS (7), /* mult_addsub. */
1116 COSTS_N_INSNS (30), /* fma. */
1117 COSTS_N_INSNS (3), /* addsub. */
1118 COSTS_N_INSNS (1), /* fpconst. */
1119 COSTS_N_INSNS (1), /* neg. */
1120 COSTS_N_INSNS (3), /* compare. */
1121 COSTS_N_INSNS (3), /* widen. */
1122 COSTS_N_INSNS (3), /* narrow. */
1123 COSTS_N_INSNS (3), /* toint. */
1124 COSTS_N_INSNS (3), /* fromint. */
1125 COSTS_N_INSNS (3) /* roundint. */
1129 COSTS_N_INSNS (24), /* div. */
1130 COSTS_N_INSNS (5), /* mult. */
1131 COSTS_N_INSNS (8), /* mult_addsub. */
1132 COSTS_N_INSNS (30), /* fma. */
1133 COSTS_N_INSNS (3), /* addsub. */
1134 COSTS_N_INSNS (1), /* fpconst. */
1135 COSTS_N_INSNS (1), /* neg. */
1136 COSTS_N_INSNS (3), /* compare. */
1137 COSTS_N_INSNS (3), /* widen. */
1138 COSTS_N_INSNS (3), /* narrow. */
1139 COSTS_N_INSNS (3), /* toint. */
1140 COSTS_N_INSNS (3), /* fromint. */
1141 COSTS_N_INSNS (3) /* roundint. */
1146 COSTS_N_INSNS (1) /* alu. */
1150 const struct cpu_cost_table cortexa8_extra_costs
=
1156 COSTS_N_INSNS (1), /* shift. */
1158 COSTS_N_INSNS (1), /* arith_shift. */
1159 0, /* arith_shift_reg. */
1160 COSTS_N_INSNS (1), /* log_shift. */
1161 0, /* log_shift_reg. */
1163 0, /* extend_arith. */
1169 true /* non_exec_costs_exec. */
1174 COSTS_N_INSNS (1), /* simple. */
1175 COSTS_N_INSNS (1), /* flag_setting. */
1176 COSTS_N_INSNS (1), /* extend. */
1177 COSTS_N_INSNS (1), /* add. */
1178 COSTS_N_INSNS (1), /* extend_add. */
1179 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1183 0, /* simple (N/A). */
1184 0, /* flag_setting (N/A). */
1185 COSTS_N_INSNS (2), /* extend. */
1187 COSTS_N_INSNS (2), /* extend_add. */
1193 COSTS_N_INSNS (1), /* load. */
1194 COSTS_N_INSNS (1), /* load_sign_extend. */
1195 COSTS_N_INSNS (1), /* ldrd. */
1196 COSTS_N_INSNS (1), /* ldm_1st. */
1197 1, /* ldm_regs_per_insn_1st. */
1198 2, /* ldm_regs_per_insn_subsequent. */
1199 COSTS_N_INSNS (1), /* loadf. */
1200 COSTS_N_INSNS (1), /* loadd. */
1201 COSTS_N_INSNS (1), /* load_unaligned. */
1202 COSTS_N_INSNS (1), /* store. */
1203 COSTS_N_INSNS (1), /* strd. */
1204 COSTS_N_INSNS (1), /* stm_1st. */
1205 1, /* stm_regs_per_insn_1st. */
1206 2, /* stm_regs_per_insn_subsequent. */
1207 COSTS_N_INSNS (1), /* storef. */
1208 COSTS_N_INSNS (1), /* stored. */
1209 COSTS_N_INSNS (1), /* store_unaligned. */
1210 COSTS_N_INSNS (1), /* loadv. */
1211 COSTS_N_INSNS (1) /* storev. */
1216 COSTS_N_INSNS (36), /* div. */
1217 COSTS_N_INSNS (11), /* mult. */
1218 COSTS_N_INSNS (20), /* mult_addsub. */
1219 COSTS_N_INSNS (30), /* fma. */
1220 COSTS_N_INSNS (9), /* addsub. */
1221 COSTS_N_INSNS (3), /* fpconst. */
1222 COSTS_N_INSNS (3), /* neg. */
1223 COSTS_N_INSNS (6), /* compare. */
1224 COSTS_N_INSNS (4), /* widen. */
1225 COSTS_N_INSNS (4), /* narrow. */
1226 COSTS_N_INSNS (8), /* toint. */
1227 COSTS_N_INSNS (8), /* fromint. */
1228 COSTS_N_INSNS (8) /* roundint. */
1232 COSTS_N_INSNS (64), /* div. */
1233 COSTS_N_INSNS (16), /* mult. */
1234 COSTS_N_INSNS (25), /* mult_addsub. */
1235 COSTS_N_INSNS (30), /* fma. */
1236 COSTS_N_INSNS (9), /* addsub. */
1237 COSTS_N_INSNS (3), /* fpconst. */
1238 COSTS_N_INSNS (3), /* neg. */
1239 COSTS_N_INSNS (6), /* compare. */
1240 COSTS_N_INSNS (6), /* widen. */
1241 COSTS_N_INSNS (6), /* narrow. */
1242 COSTS_N_INSNS (8), /* toint. */
1243 COSTS_N_INSNS (8), /* fromint. */
1244 COSTS_N_INSNS (8) /* roundint. */
1249 COSTS_N_INSNS (1) /* alu. */
1253 const struct cpu_cost_table cortexa5_extra_costs
=
1259 COSTS_N_INSNS (1), /* shift. */
1260 COSTS_N_INSNS (1), /* shift_reg. */
1261 COSTS_N_INSNS (1), /* arith_shift. */
1262 COSTS_N_INSNS (1), /* arith_shift_reg. */
1263 COSTS_N_INSNS (1), /* log_shift. */
1264 COSTS_N_INSNS (1), /* log_shift_reg. */
1265 COSTS_N_INSNS (1), /* extend. */
1266 COSTS_N_INSNS (1), /* extend_arith. */
1267 COSTS_N_INSNS (1), /* bfi. */
1268 COSTS_N_INSNS (1), /* bfx. */
1269 COSTS_N_INSNS (1), /* clz. */
1270 COSTS_N_INSNS (1), /* rev. */
1272 true /* non_exec_costs_exec. */
1279 COSTS_N_INSNS (1), /* flag_setting. */
1280 COSTS_N_INSNS (1), /* extend. */
1281 COSTS_N_INSNS (1), /* add. */
1282 COSTS_N_INSNS (1), /* extend_add. */
1283 COSTS_N_INSNS (7) /* idiv. */
1287 0, /* simple (N/A). */
1288 0, /* flag_setting (N/A). */
1289 COSTS_N_INSNS (1), /* extend. */
1291 COSTS_N_INSNS (2), /* extend_add. */
1297 COSTS_N_INSNS (1), /* load. */
1298 COSTS_N_INSNS (1), /* load_sign_extend. */
1299 COSTS_N_INSNS (6), /* ldrd. */
1300 COSTS_N_INSNS (1), /* ldm_1st. */
1301 1, /* ldm_regs_per_insn_1st. */
1302 2, /* ldm_regs_per_insn_subsequent. */
1303 COSTS_N_INSNS (2), /* loadf. */
1304 COSTS_N_INSNS (4), /* loadd. */
1305 COSTS_N_INSNS (1), /* load_unaligned. */
1306 COSTS_N_INSNS (1), /* store. */
1307 COSTS_N_INSNS (3), /* strd. */
1308 COSTS_N_INSNS (1), /* stm_1st. */
1309 1, /* stm_regs_per_insn_1st. */
1310 2, /* stm_regs_per_insn_subsequent. */
1311 COSTS_N_INSNS (2), /* storef. */
1312 COSTS_N_INSNS (2), /* stored. */
1313 COSTS_N_INSNS (1), /* store_unaligned. */
1314 COSTS_N_INSNS (1), /* loadv. */
1315 COSTS_N_INSNS (1) /* storev. */
1320 COSTS_N_INSNS (15), /* div. */
1321 COSTS_N_INSNS (3), /* mult. */
1322 COSTS_N_INSNS (7), /* mult_addsub. */
1323 COSTS_N_INSNS (7), /* fma. */
1324 COSTS_N_INSNS (3), /* addsub. */
1325 COSTS_N_INSNS (3), /* fpconst. */
1326 COSTS_N_INSNS (3), /* neg. */
1327 COSTS_N_INSNS (3), /* compare. */
1328 COSTS_N_INSNS (3), /* widen. */
1329 COSTS_N_INSNS (3), /* narrow. */
1330 COSTS_N_INSNS (3), /* toint. */
1331 COSTS_N_INSNS (3), /* fromint. */
1332 COSTS_N_INSNS (3) /* roundint. */
1336 COSTS_N_INSNS (30), /* div. */
1337 COSTS_N_INSNS (6), /* mult. */
1338 COSTS_N_INSNS (10), /* mult_addsub. */
1339 COSTS_N_INSNS (7), /* fma. */
1340 COSTS_N_INSNS (3), /* addsub. */
1341 COSTS_N_INSNS (3), /* fpconst. */
1342 COSTS_N_INSNS (3), /* neg. */
1343 COSTS_N_INSNS (3), /* compare. */
1344 COSTS_N_INSNS (3), /* widen. */
1345 COSTS_N_INSNS (3), /* narrow. */
1346 COSTS_N_INSNS (3), /* toint. */
1347 COSTS_N_INSNS (3), /* fromint. */
1348 COSTS_N_INSNS (3) /* roundint. */
1353 COSTS_N_INSNS (1) /* alu. */
1358 const struct cpu_cost_table cortexa7_extra_costs
=
1364 COSTS_N_INSNS (1), /* shift. */
1365 COSTS_N_INSNS (1), /* shift_reg. */
1366 COSTS_N_INSNS (1), /* arith_shift. */
1367 COSTS_N_INSNS (1), /* arith_shift_reg. */
1368 COSTS_N_INSNS (1), /* log_shift. */
1369 COSTS_N_INSNS (1), /* log_shift_reg. */
1370 COSTS_N_INSNS (1), /* extend. */
1371 COSTS_N_INSNS (1), /* extend_arith. */
1372 COSTS_N_INSNS (1), /* bfi. */
1373 COSTS_N_INSNS (1), /* bfx. */
1374 COSTS_N_INSNS (1), /* clz. */
1375 COSTS_N_INSNS (1), /* rev. */
1377 true /* non_exec_costs_exec. */
1384 COSTS_N_INSNS (1), /* flag_setting. */
1385 COSTS_N_INSNS (1), /* extend. */
1386 COSTS_N_INSNS (1), /* add. */
1387 COSTS_N_INSNS (1), /* extend_add. */
1388 COSTS_N_INSNS (7) /* idiv. */
1392 0, /* simple (N/A). */
1393 0, /* flag_setting (N/A). */
1394 COSTS_N_INSNS (1), /* extend. */
1396 COSTS_N_INSNS (2), /* extend_add. */
1402 COSTS_N_INSNS (1), /* load. */
1403 COSTS_N_INSNS (1), /* load_sign_extend. */
1404 COSTS_N_INSNS (3), /* ldrd. */
1405 COSTS_N_INSNS (1), /* ldm_1st. */
1406 1, /* ldm_regs_per_insn_1st. */
1407 2, /* ldm_regs_per_insn_subsequent. */
1408 COSTS_N_INSNS (2), /* loadf. */
1409 COSTS_N_INSNS (2), /* loadd. */
1410 COSTS_N_INSNS (1), /* load_unaligned. */
1411 COSTS_N_INSNS (1), /* store. */
1412 COSTS_N_INSNS (3), /* strd. */
1413 COSTS_N_INSNS (1), /* stm_1st. */
1414 1, /* stm_regs_per_insn_1st. */
1415 2, /* stm_regs_per_insn_subsequent. */
1416 COSTS_N_INSNS (2), /* storef. */
1417 COSTS_N_INSNS (2), /* stored. */
1418 COSTS_N_INSNS (1), /* store_unaligned. */
1419 COSTS_N_INSNS (1), /* loadv. */
1420 COSTS_N_INSNS (1) /* storev. */
1425 COSTS_N_INSNS (15), /* div. */
1426 COSTS_N_INSNS (3), /* mult. */
1427 COSTS_N_INSNS (7), /* mult_addsub. */
1428 COSTS_N_INSNS (7), /* fma. */
1429 COSTS_N_INSNS (3), /* addsub. */
1430 COSTS_N_INSNS (3), /* fpconst. */
1431 COSTS_N_INSNS (3), /* neg. */
1432 COSTS_N_INSNS (3), /* compare. */
1433 COSTS_N_INSNS (3), /* widen. */
1434 COSTS_N_INSNS (3), /* narrow. */
1435 COSTS_N_INSNS (3), /* toint. */
1436 COSTS_N_INSNS (3), /* fromint. */
1437 COSTS_N_INSNS (3) /* roundint. */
1441 COSTS_N_INSNS (30), /* div. */
1442 COSTS_N_INSNS (6), /* mult. */
1443 COSTS_N_INSNS (10), /* mult_addsub. */
1444 COSTS_N_INSNS (7), /* fma. */
1445 COSTS_N_INSNS (3), /* addsub. */
1446 COSTS_N_INSNS (3), /* fpconst. */
1447 COSTS_N_INSNS (3), /* neg. */
1448 COSTS_N_INSNS (3), /* compare. */
1449 COSTS_N_INSNS (3), /* widen. */
1450 COSTS_N_INSNS (3), /* narrow. */
1451 COSTS_N_INSNS (3), /* toint. */
1452 COSTS_N_INSNS (3), /* fromint. */
1453 COSTS_N_INSNS (3) /* roundint. */
1458 COSTS_N_INSNS (1) /* alu. */
1462 const struct cpu_cost_table cortexa12_extra_costs
=
1469 COSTS_N_INSNS (1), /* shift_reg. */
1470 COSTS_N_INSNS (1), /* arith_shift. */
1471 COSTS_N_INSNS (1), /* arith_shift_reg. */
1472 COSTS_N_INSNS (1), /* log_shift. */
1473 COSTS_N_INSNS (1), /* log_shift_reg. */
1475 COSTS_N_INSNS (1), /* extend_arith. */
1477 COSTS_N_INSNS (1), /* bfx. */
1478 COSTS_N_INSNS (1), /* clz. */
1479 COSTS_N_INSNS (1), /* rev. */
1481 true /* non_exec_costs_exec. */
1486 COSTS_N_INSNS (2), /* simple. */
1487 COSTS_N_INSNS (3), /* flag_setting. */
1488 COSTS_N_INSNS (2), /* extend. */
1489 COSTS_N_INSNS (3), /* add. */
1490 COSTS_N_INSNS (2), /* extend_add. */
1491 COSTS_N_INSNS (18) /* idiv. */
1495 0, /* simple (N/A). */
1496 0, /* flag_setting (N/A). */
1497 COSTS_N_INSNS (3), /* extend. */
1499 COSTS_N_INSNS (3), /* extend_add. */
1505 COSTS_N_INSNS (3), /* load. */
1506 COSTS_N_INSNS (3), /* load_sign_extend. */
1507 COSTS_N_INSNS (3), /* ldrd. */
1508 COSTS_N_INSNS (3), /* ldm_1st. */
1509 1, /* ldm_regs_per_insn_1st. */
1510 2, /* ldm_regs_per_insn_subsequent. */
1511 COSTS_N_INSNS (3), /* loadf. */
1512 COSTS_N_INSNS (3), /* loadd. */
1513 0, /* load_unaligned. */
1517 1, /* stm_regs_per_insn_1st. */
1518 2, /* stm_regs_per_insn_subsequent. */
1519 COSTS_N_INSNS (2), /* storef. */
1520 COSTS_N_INSNS (2), /* stored. */
1521 0, /* store_unaligned. */
1522 COSTS_N_INSNS (1), /* loadv. */
1523 COSTS_N_INSNS (1) /* storev. */
1528 COSTS_N_INSNS (17), /* div. */
1529 COSTS_N_INSNS (4), /* mult. */
1530 COSTS_N_INSNS (8), /* mult_addsub. */
1531 COSTS_N_INSNS (8), /* fma. */
1532 COSTS_N_INSNS (4), /* addsub. */
1533 COSTS_N_INSNS (2), /* fpconst. */
1534 COSTS_N_INSNS (2), /* neg. */
1535 COSTS_N_INSNS (2), /* compare. */
1536 COSTS_N_INSNS (4), /* widen. */
1537 COSTS_N_INSNS (4), /* narrow. */
1538 COSTS_N_INSNS (4), /* toint. */
1539 COSTS_N_INSNS (4), /* fromint. */
1540 COSTS_N_INSNS (4) /* roundint. */
1544 COSTS_N_INSNS (31), /* div. */
1545 COSTS_N_INSNS (4), /* mult. */
1546 COSTS_N_INSNS (8), /* mult_addsub. */
1547 COSTS_N_INSNS (8), /* fma. */
1548 COSTS_N_INSNS (4), /* addsub. */
1549 COSTS_N_INSNS (2), /* fpconst. */
1550 COSTS_N_INSNS (2), /* neg. */
1551 COSTS_N_INSNS (2), /* compare. */
1552 COSTS_N_INSNS (4), /* widen. */
1553 COSTS_N_INSNS (4), /* narrow. */
1554 COSTS_N_INSNS (4), /* toint. */
1555 COSTS_N_INSNS (4), /* fromint. */
1556 COSTS_N_INSNS (4) /* roundint. */
1561 COSTS_N_INSNS (1) /* alu. */
1565 const struct cpu_cost_table cortexa15_extra_costs
=
1573 COSTS_N_INSNS (1), /* arith_shift. */
1574 COSTS_N_INSNS (1), /* arith_shift_reg. */
1575 COSTS_N_INSNS (1), /* log_shift. */
1576 COSTS_N_INSNS (1), /* log_shift_reg. */
1578 COSTS_N_INSNS (1), /* extend_arith. */
1579 COSTS_N_INSNS (1), /* bfi. */
1584 true /* non_exec_costs_exec. */
1589 COSTS_N_INSNS (2), /* simple. */
1590 COSTS_N_INSNS (3), /* flag_setting. */
1591 COSTS_N_INSNS (2), /* extend. */
1592 COSTS_N_INSNS (2), /* add. */
1593 COSTS_N_INSNS (2), /* extend_add. */
1594 COSTS_N_INSNS (18) /* idiv. */
1598 0, /* simple (N/A). */
1599 0, /* flag_setting (N/A). */
1600 COSTS_N_INSNS (3), /* extend. */
1602 COSTS_N_INSNS (3), /* extend_add. */
1608 COSTS_N_INSNS (3), /* load. */
1609 COSTS_N_INSNS (3), /* load_sign_extend. */
1610 COSTS_N_INSNS (3), /* ldrd. */
1611 COSTS_N_INSNS (4), /* ldm_1st. */
1612 1, /* ldm_regs_per_insn_1st. */
1613 2, /* ldm_regs_per_insn_subsequent. */
1614 COSTS_N_INSNS (4), /* loadf. */
1615 COSTS_N_INSNS (4), /* loadd. */
1616 0, /* load_unaligned. */
1619 COSTS_N_INSNS (1), /* stm_1st. */
1620 1, /* stm_regs_per_insn_1st. */
1621 2, /* stm_regs_per_insn_subsequent. */
1624 0, /* store_unaligned. */
1625 COSTS_N_INSNS (1), /* loadv. */
1626 COSTS_N_INSNS (1) /* storev. */
1631 COSTS_N_INSNS (17), /* div. */
1632 COSTS_N_INSNS (4), /* mult. */
1633 COSTS_N_INSNS (8), /* mult_addsub. */
1634 COSTS_N_INSNS (8), /* fma. */
1635 COSTS_N_INSNS (4), /* addsub. */
1636 COSTS_N_INSNS (2), /* fpconst. */
1637 COSTS_N_INSNS (2), /* neg. */
1638 COSTS_N_INSNS (5), /* compare. */
1639 COSTS_N_INSNS (4), /* widen. */
1640 COSTS_N_INSNS (4), /* narrow. */
1641 COSTS_N_INSNS (4), /* toint. */
1642 COSTS_N_INSNS (4), /* fromint. */
1643 COSTS_N_INSNS (4) /* roundint. */
1647 COSTS_N_INSNS (31), /* div. */
1648 COSTS_N_INSNS (4), /* mult. */
1649 COSTS_N_INSNS (8), /* mult_addsub. */
1650 COSTS_N_INSNS (8), /* fma. */
1651 COSTS_N_INSNS (4), /* addsub. */
1652 COSTS_N_INSNS (2), /* fpconst. */
1653 COSTS_N_INSNS (2), /* neg. */
1654 COSTS_N_INSNS (2), /* compare. */
1655 COSTS_N_INSNS (4), /* widen. */
1656 COSTS_N_INSNS (4), /* narrow. */
1657 COSTS_N_INSNS (4), /* toint. */
1658 COSTS_N_INSNS (4), /* fromint. */
1659 COSTS_N_INSNS (4) /* roundint. */
1664 COSTS_N_INSNS (1) /* alu. */
1668 const struct cpu_cost_table v7m_extra_costs
=
1676 0, /* arith_shift. */
1677 COSTS_N_INSNS (1), /* arith_shift_reg. */
1679 COSTS_N_INSNS (1), /* log_shift_reg. */
1681 COSTS_N_INSNS (1), /* extend_arith. */
1686 COSTS_N_INSNS (1), /* non_exec. */
1687 false /* non_exec_costs_exec. */
1692 COSTS_N_INSNS (1), /* simple. */
1693 COSTS_N_INSNS (1), /* flag_setting. */
1694 COSTS_N_INSNS (2), /* extend. */
1695 COSTS_N_INSNS (1), /* add. */
1696 COSTS_N_INSNS (3), /* extend_add. */
1697 COSTS_N_INSNS (8) /* idiv. */
1701 0, /* simple (N/A). */
1702 0, /* flag_setting (N/A). */
1703 COSTS_N_INSNS (2), /* extend. */
1705 COSTS_N_INSNS (3), /* extend_add. */
1711 COSTS_N_INSNS (2), /* load. */
1712 0, /* load_sign_extend. */
1713 COSTS_N_INSNS (3), /* ldrd. */
1714 COSTS_N_INSNS (2), /* ldm_1st. */
1715 1, /* ldm_regs_per_insn_1st. */
1716 1, /* ldm_regs_per_insn_subsequent. */
1717 COSTS_N_INSNS (2), /* loadf. */
1718 COSTS_N_INSNS (3), /* loadd. */
1719 COSTS_N_INSNS (1), /* load_unaligned. */
1720 COSTS_N_INSNS (2), /* store. */
1721 COSTS_N_INSNS (3), /* strd. */
1722 COSTS_N_INSNS (2), /* stm_1st. */
1723 1, /* stm_regs_per_insn_1st. */
1724 1, /* stm_regs_per_insn_subsequent. */
1725 COSTS_N_INSNS (2), /* storef. */
1726 COSTS_N_INSNS (3), /* stored. */
1727 COSTS_N_INSNS (1), /* store_unaligned. */
1728 COSTS_N_INSNS (1), /* loadv. */
1729 COSTS_N_INSNS (1) /* storev. */
1734 COSTS_N_INSNS (7), /* div. */
1735 COSTS_N_INSNS (2), /* mult. */
1736 COSTS_N_INSNS (5), /* mult_addsub. */
1737 COSTS_N_INSNS (3), /* fma. */
1738 COSTS_N_INSNS (1), /* addsub. */
1750 COSTS_N_INSNS (15), /* div. */
1751 COSTS_N_INSNS (5), /* mult. */
1752 COSTS_N_INSNS (7), /* mult_addsub. */
1753 COSTS_N_INSNS (7), /* fma. */
1754 COSTS_N_INSNS (3), /* addsub. */
1767 COSTS_N_INSNS (1) /* alu. */
1771 const struct addr_mode_cost_table generic_addr_mode_costs
=
1775 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1776 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1777 COSTS_N_INSNS (0) /* AMO_WB. */
1781 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1782 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1783 COSTS_N_INSNS (0) /* AMO_WB. */
1787 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1788 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1789 COSTS_N_INSNS (0) /* AMO_WB. */
1793 const struct tune_params arm_slowmul_tune
=
1795 &generic_extra_costs
, /* Insn extra costs. */
1796 &generic_addr_mode_costs
, /* Addressing mode costs. */
1797 NULL
, /* Sched adj cost. */
1798 arm_default_branch_cost
,
1799 &arm_default_vec_cost
,
1800 3, /* Constant limit. */
1801 5, /* Max cond insns. */
1802 8, /* Memset max inline. */
1803 1, /* Issue rate. */
1804 ARM_PREFETCH_NOT_BENEFICIAL
,
1805 tune_params::PREF_CONST_POOL_TRUE
,
1806 tune_params::PREF_LDRD_FALSE
,
1807 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1808 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1809 tune_params::DISPARAGE_FLAGS_NEITHER
,
1810 tune_params::PREF_NEON_64_FALSE
,
1811 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1812 tune_params::FUSE_NOTHING
,
1813 tune_params::SCHED_AUTOPREF_OFF
1816 const struct tune_params arm_fastmul_tune
=
1818 &generic_extra_costs
, /* Insn extra costs. */
1819 &generic_addr_mode_costs
, /* Addressing mode costs. */
1820 NULL
, /* Sched adj cost. */
1821 arm_default_branch_cost
,
1822 &arm_default_vec_cost
,
1823 1, /* Constant limit. */
1824 5, /* Max cond insns. */
1825 8, /* Memset max inline. */
1826 1, /* Issue rate. */
1827 ARM_PREFETCH_NOT_BENEFICIAL
,
1828 tune_params::PREF_CONST_POOL_TRUE
,
1829 tune_params::PREF_LDRD_FALSE
,
1830 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1831 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1832 tune_params::DISPARAGE_FLAGS_NEITHER
,
1833 tune_params::PREF_NEON_64_FALSE
,
1834 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1835 tune_params::FUSE_NOTHING
,
1836 tune_params::SCHED_AUTOPREF_OFF
1839 /* StrongARM has early execution of branches, so a sequence that is worth
1840 skipping is shorter. Set max_insns_skipped to a lower value. */
1842 const struct tune_params arm_strongarm_tune
=
1844 &generic_extra_costs
, /* Insn extra costs. */
1845 &generic_addr_mode_costs
, /* Addressing mode costs. */
1846 NULL
, /* Sched adj cost. */
1847 arm_default_branch_cost
,
1848 &arm_default_vec_cost
,
1849 1, /* Constant limit. */
1850 3, /* Max cond insns. */
1851 8, /* Memset max inline. */
1852 1, /* Issue rate. */
1853 ARM_PREFETCH_NOT_BENEFICIAL
,
1854 tune_params::PREF_CONST_POOL_TRUE
,
1855 tune_params::PREF_LDRD_FALSE
,
1856 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1857 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1858 tune_params::DISPARAGE_FLAGS_NEITHER
,
1859 tune_params::PREF_NEON_64_FALSE
,
1860 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1861 tune_params::FUSE_NOTHING
,
1862 tune_params::SCHED_AUTOPREF_OFF
1865 const struct tune_params arm_xscale_tune
=
1867 &generic_extra_costs
, /* Insn extra costs. */
1868 &generic_addr_mode_costs
, /* Addressing mode costs. */
1869 xscale_sched_adjust_cost
,
1870 arm_default_branch_cost
,
1871 &arm_default_vec_cost
,
1872 2, /* Constant limit. */
1873 3, /* Max cond insns. */
1874 8, /* Memset max inline. */
1875 1, /* Issue rate. */
1876 ARM_PREFETCH_NOT_BENEFICIAL
,
1877 tune_params::PREF_CONST_POOL_TRUE
,
1878 tune_params::PREF_LDRD_FALSE
,
1879 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1880 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1881 tune_params::DISPARAGE_FLAGS_NEITHER
,
1882 tune_params::PREF_NEON_64_FALSE
,
1883 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1884 tune_params::FUSE_NOTHING
,
1885 tune_params::SCHED_AUTOPREF_OFF
1888 const struct tune_params arm_9e_tune
=
1890 &generic_extra_costs
, /* Insn extra costs. */
1891 &generic_addr_mode_costs
, /* Addressing mode costs. */
1892 NULL
, /* Sched adj cost. */
1893 arm_default_branch_cost
,
1894 &arm_default_vec_cost
,
1895 1, /* Constant limit. */
1896 5, /* Max cond insns. */
1897 8, /* Memset max inline. */
1898 1, /* Issue rate. */
1899 ARM_PREFETCH_NOT_BENEFICIAL
,
1900 tune_params::PREF_CONST_POOL_TRUE
,
1901 tune_params::PREF_LDRD_FALSE
,
1902 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1903 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1904 tune_params::DISPARAGE_FLAGS_NEITHER
,
1905 tune_params::PREF_NEON_64_FALSE
,
1906 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1907 tune_params::FUSE_NOTHING
,
1908 tune_params::SCHED_AUTOPREF_OFF
1911 const struct tune_params arm_marvell_pj4_tune
=
1913 &generic_extra_costs
, /* Insn extra costs. */
1914 &generic_addr_mode_costs
, /* Addressing mode costs. */
1915 NULL
, /* Sched adj cost. */
1916 arm_default_branch_cost
,
1917 &arm_default_vec_cost
,
1918 1, /* Constant limit. */
1919 5, /* Max cond insns. */
1920 8, /* Memset max inline. */
1921 2, /* Issue rate. */
1922 ARM_PREFETCH_NOT_BENEFICIAL
,
1923 tune_params::PREF_CONST_POOL_TRUE
,
1924 tune_params::PREF_LDRD_FALSE
,
1925 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1926 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1927 tune_params::DISPARAGE_FLAGS_NEITHER
,
1928 tune_params::PREF_NEON_64_FALSE
,
1929 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1930 tune_params::FUSE_NOTHING
,
1931 tune_params::SCHED_AUTOPREF_OFF
1934 const struct tune_params arm_v6t2_tune
=
1936 &generic_extra_costs
, /* Insn extra costs. */
1937 &generic_addr_mode_costs
, /* Addressing mode costs. */
1938 NULL
, /* Sched adj cost. */
1939 arm_default_branch_cost
,
1940 &arm_default_vec_cost
,
1941 1, /* Constant limit. */
1942 5, /* Max cond insns. */
1943 8, /* Memset max inline. */
1944 1, /* Issue rate. */
1945 ARM_PREFETCH_NOT_BENEFICIAL
,
1946 tune_params::PREF_CONST_POOL_FALSE
,
1947 tune_params::PREF_LDRD_FALSE
,
1948 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1949 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1950 tune_params::DISPARAGE_FLAGS_NEITHER
,
1951 tune_params::PREF_NEON_64_FALSE
,
1952 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1953 tune_params::FUSE_NOTHING
,
1954 tune_params::SCHED_AUTOPREF_OFF
1958 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1959 const struct tune_params arm_cortex_tune
=
1961 &generic_extra_costs
,
1962 &generic_addr_mode_costs
, /* Addressing mode costs. */
1963 NULL
, /* Sched adj cost. */
1964 arm_default_branch_cost
,
1965 &arm_default_vec_cost
,
1966 1, /* Constant limit. */
1967 5, /* Max cond insns. */
1968 8, /* Memset max inline. */
1969 2, /* Issue rate. */
1970 ARM_PREFETCH_NOT_BENEFICIAL
,
1971 tune_params::PREF_CONST_POOL_FALSE
,
1972 tune_params::PREF_LDRD_FALSE
,
1973 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1974 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1975 tune_params::DISPARAGE_FLAGS_NEITHER
,
1976 tune_params::PREF_NEON_64_FALSE
,
1977 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1978 tune_params::FUSE_NOTHING
,
1979 tune_params::SCHED_AUTOPREF_OFF
1982 const struct tune_params arm_cortex_a8_tune
=
1984 &cortexa8_extra_costs
,
1985 &generic_addr_mode_costs
, /* Addressing mode costs. */
1986 NULL
, /* Sched adj cost. */
1987 arm_default_branch_cost
,
1988 &arm_default_vec_cost
,
1989 1, /* Constant limit. */
1990 5, /* Max cond insns. */
1991 8, /* Memset max inline. */
1992 2, /* Issue rate. */
1993 ARM_PREFETCH_NOT_BENEFICIAL
,
1994 tune_params::PREF_CONST_POOL_FALSE
,
1995 tune_params::PREF_LDRD_FALSE
,
1996 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1997 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1998 tune_params::DISPARAGE_FLAGS_NEITHER
,
1999 tune_params::PREF_NEON_64_FALSE
,
2000 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2001 tune_params::FUSE_NOTHING
,
2002 tune_params::SCHED_AUTOPREF_OFF
2005 const struct tune_params arm_cortex_a7_tune
=
2007 &cortexa7_extra_costs
,
2008 &generic_addr_mode_costs
, /* Addressing mode costs. */
2009 NULL
, /* Sched adj cost. */
2010 arm_default_branch_cost
,
2011 &arm_default_vec_cost
,
2012 1, /* Constant limit. */
2013 5, /* Max cond insns. */
2014 8, /* Memset max inline. */
2015 2, /* Issue rate. */
2016 ARM_PREFETCH_NOT_BENEFICIAL
,
2017 tune_params::PREF_CONST_POOL_FALSE
,
2018 tune_params::PREF_LDRD_FALSE
,
2019 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2020 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2021 tune_params::DISPARAGE_FLAGS_NEITHER
,
2022 tune_params::PREF_NEON_64_FALSE
,
2023 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2024 tune_params::FUSE_NOTHING
,
2025 tune_params::SCHED_AUTOPREF_OFF
2028 const struct tune_params arm_cortex_a15_tune
=
2030 &cortexa15_extra_costs
,
2031 &generic_addr_mode_costs
, /* Addressing mode costs. */
2032 NULL
, /* Sched adj cost. */
2033 arm_default_branch_cost
,
2034 &arm_default_vec_cost
,
2035 1, /* Constant limit. */
2036 2, /* Max cond insns. */
2037 8, /* Memset max inline. */
2038 3, /* Issue rate. */
2039 ARM_PREFETCH_NOT_BENEFICIAL
,
2040 tune_params::PREF_CONST_POOL_FALSE
,
2041 tune_params::PREF_LDRD_TRUE
,
2042 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2043 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2044 tune_params::DISPARAGE_FLAGS_ALL
,
2045 tune_params::PREF_NEON_64_FALSE
,
2046 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2047 tune_params::FUSE_NOTHING
,
2048 tune_params::SCHED_AUTOPREF_FULL
2051 const struct tune_params arm_cortex_a35_tune
=
2053 &cortexa53_extra_costs
,
2054 &generic_addr_mode_costs
, /* Addressing mode costs. */
2055 NULL
, /* Sched adj cost. */
2056 arm_default_branch_cost
,
2057 &arm_default_vec_cost
,
2058 1, /* Constant limit. */
2059 5, /* Max cond insns. */
2060 8, /* Memset max inline. */
2061 1, /* Issue rate. */
2062 ARM_PREFETCH_NOT_BENEFICIAL
,
2063 tune_params::PREF_CONST_POOL_FALSE
,
2064 tune_params::PREF_LDRD_FALSE
,
2065 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2066 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2067 tune_params::DISPARAGE_FLAGS_NEITHER
,
2068 tune_params::PREF_NEON_64_FALSE
,
2069 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2070 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2071 tune_params::SCHED_AUTOPREF_OFF
2074 const struct tune_params arm_cortex_a53_tune
=
2076 &cortexa53_extra_costs
,
2077 &generic_addr_mode_costs
, /* Addressing mode costs. */
2078 NULL
, /* Sched adj cost. */
2079 arm_default_branch_cost
,
2080 &arm_default_vec_cost
,
2081 1, /* Constant limit. */
2082 5, /* Max cond insns. */
2083 8, /* Memset max inline. */
2084 2, /* Issue rate. */
2085 ARM_PREFETCH_NOT_BENEFICIAL
,
2086 tune_params::PREF_CONST_POOL_FALSE
,
2087 tune_params::PREF_LDRD_FALSE
,
2088 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2089 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2090 tune_params::DISPARAGE_FLAGS_NEITHER
,
2091 tune_params::PREF_NEON_64_FALSE
,
2092 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2093 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2094 tune_params::SCHED_AUTOPREF_OFF
2097 const struct tune_params arm_cortex_a57_tune
=
2099 &cortexa57_extra_costs
,
2100 &generic_addr_mode_costs
, /* addressing mode costs */
2101 NULL
, /* Sched adj cost. */
2102 arm_default_branch_cost
,
2103 &arm_default_vec_cost
,
2104 1, /* Constant limit. */
2105 2, /* Max cond insns. */
2106 8, /* Memset max inline. */
2107 3, /* Issue rate. */
2108 ARM_PREFETCH_NOT_BENEFICIAL
,
2109 tune_params::PREF_CONST_POOL_FALSE
,
2110 tune_params::PREF_LDRD_TRUE
,
2111 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2112 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2113 tune_params::DISPARAGE_FLAGS_ALL
,
2114 tune_params::PREF_NEON_64_FALSE
,
2115 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2116 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2117 tune_params::SCHED_AUTOPREF_FULL
2120 const struct tune_params arm_exynosm1_tune
=
2122 &exynosm1_extra_costs
,
2123 &generic_addr_mode_costs
, /* Addressing mode costs. */
2124 NULL
, /* Sched adj cost. */
2125 arm_default_branch_cost
,
2126 &arm_default_vec_cost
,
2127 1, /* Constant limit. */
2128 2, /* Max cond insns. */
2129 8, /* Memset max inline. */
2130 3, /* Issue rate. */
2131 ARM_PREFETCH_NOT_BENEFICIAL
,
2132 tune_params::PREF_CONST_POOL_FALSE
,
2133 tune_params::PREF_LDRD_TRUE
,
2134 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2135 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2136 tune_params::DISPARAGE_FLAGS_ALL
,
2137 tune_params::PREF_NEON_64_FALSE
,
2138 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2139 tune_params::FUSE_NOTHING
,
2140 tune_params::SCHED_AUTOPREF_OFF
2143 const struct tune_params arm_xgene1_tune
=
2145 &xgene1_extra_costs
,
2146 &generic_addr_mode_costs
, /* Addressing mode costs. */
2147 NULL
, /* Sched adj cost. */
2148 arm_default_branch_cost
,
2149 &arm_default_vec_cost
,
2150 1, /* Constant limit. */
2151 2, /* Max cond insns. */
2152 32, /* Memset max inline. */
2153 4, /* Issue rate. */
2154 ARM_PREFETCH_NOT_BENEFICIAL
,
2155 tune_params::PREF_CONST_POOL_FALSE
,
2156 tune_params::PREF_LDRD_TRUE
,
2157 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2158 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2159 tune_params::DISPARAGE_FLAGS_ALL
,
2160 tune_params::PREF_NEON_64_FALSE
,
2161 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2162 tune_params::FUSE_NOTHING
,
2163 tune_params::SCHED_AUTOPREF_OFF
2166 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2167 less appealing. Set max_insns_skipped to a low value. */
2169 const struct tune_params arm_cortex_a5_tune
=
2171 &cortexa5_extra_costs
,
2172 &generic_addr_mode_costs
, /* Addressing mode costs. */
2173 NULL
, /* Sched adj cost. */
2174 arm_cortex_a5_branch_cost
,
2175 &arm_default_vec_cost
,
2176 1, /* Constant limit. */
2177 1, /* Max cond insns. */
2178 8, /* Memset max inline. */
2179 2, /* Issue rate. */
2180 ARM_PREFETCH_NOT_BENEFICIAL
,
2181 tune_params::PREF_CONST_POOL_FALSE
,
2182 tune_params::PREF_LDRD_FALSE
,
2183 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2184 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2185 tune_params::DISPARAGE_FLAGS_NEITHER
,
2186 tune_params::PREF_NEON_64_FALSE
,
2187 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2188 tune_params::FUSE_NOTHING
,
2189 tune_params::SCHED_AUTOPREF_OFF
2192 const struct tune_params arm_cortex_a9_tune
=
2194 &cortexa9_extra_costs
,
2195 &generic_addr_mode_costs
, /* Addressing mode costs. */
2196 cortex_a9_sched_adjust_cost
,
2197 arm_default_branch_cost
,
2198 &arm_default_vec_cost
,
2199 1, /* Constant limit. */
2200 5, /* Max cond insns. */
2201 8, /* Memset max inline. */
2202 2, /* Issue rate. */
2203 ARM_PREFETCH_BENEFICIAL(4,32,32),
2204 tune_params::PREF_CONST_POOL_FALSE
,
2205 tune_params::PREF_LDRD_FALSE
,
2206 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2207 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2208 tune_params::DISPARAGE_FLAGS_NEITHER
,
2209 tune_params::PREF_NEON_64_FALSE
,
2210 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2211 tune_params::FUSE_NOTHING
,
2212 tune_params::SCHED_AUTOPREF_OFF
2215 const struct tune_params arm_cortex_a12_tune
=
2217 &cortexa12_extra_costs
,
2218 &generic_addr_mode_costs
, /* Addressing mode costs. */
2219 NULL
, /* Sched adj cost. */
2220 arm_default_branch_cost
,
2221 &arm_default_vec_cost
, /* Vectorizer costs. */
2222 1, /* Constant limit. */
2223 2, /* Max cond insns. */
2224 8, /* Memset max inline. */
2225 2, /* Issue rate. */
2226 ARM_PREFETCH_NOT_BENEFICIAL
,
2227 tune_params::PREF_CONST_POOL_FALSE
,
2228 tune_params::PREF_LDRD_TRUE
,
2229 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2230 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2231 tune_params::DISPARAGE_FLAGS_ALL
,
2232 tune_params::PREF_NEON_64_FALSE
,
2233 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2234 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2235 tune_params::SCHED_AUTOPREF_OFF
2238 const struct tune_params arm_cortex_a73_tune
=
2240 &cortexa57_extra_costs
,
2241 &generic_addr_mode_costs
, /* Addressing mode costs. */
2242 NULL
, /* Sched adj cost. */
2243 arm_default_branch_cost
,
2244 &arm_default_vec_cost
, /* Vectorizer costs. */
2245 1, /* Constant limit. */
2246 2, /* Max cond insns. */
2247 8, /* Memset max inline. */
2248 2, /* Issue rate. */
2249 ARM_PREFETCH_NOT_BENEFICIAL
,
2250 tune_params::PREF_CONST_POOL_FALSE
,
2251 tune_params::PREF_LDRD_TRUE
,
2252 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2253 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2254 tune_params::DISPARAGE_FLAGS_ALL
,
2255 tune_params::PREF_NEON_64_FALSE
,
2256 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2257 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2258 tune_params::SCHED_AUTOPREF_FULL
2261 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2262 cycle to execute each. An LDR from the constant pool also takes two cycles
2263 to execute, but mildly increases pipelining opportunity (consecutive
2264 loads/stores can be pipelined together, saving one cycle), and may also
2265 improve icache utilisation. Hence we prefer the constant pool for such
2268 const struct tune_params arm_v7m_tune
=
2271 &generic_addr_mode_costs
, /* Addressing mode costs. */
2272 NULL
, /* Sched adj cost. */
2273 arm_cortex_m_branch_cost
,
2274 &arm_default_vec_cost
,
2275 1, /* Constant limit. */
2276 2, /* Max cond insns. */
2277 8, /* Memset max inline. */
2278 1, /* Issue rate. */
2279 ARM_PREFETCH_NOT_BENEFICIAL
,
2280 tune_params::PREF_CONST_POOL_TRUE
,
2281 tune_params::PREF_LDRD_FALSE
,
2282 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2283 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2284 tune_params::DISPARAGE_FLAGS_NEITHER
,
2285 tune_params::PREF_NEON_64_FALSE
,
2286 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2287 tune_params::FUSE_NOTHING
,
2288 tune_params::SCHED_AUTOPREF_OFF
2291 /* Cortex-M7 tuning. */
2293 const struct tune_params arm_cortex_m7_tune
=
2296 &generic_addr_mode_costs
, /* Addressing mode costs. */
2297 NULL
, /* Sched adj cost. */
2298 arm_cortex_m7_branch_cost
,
2299 &arm_default_vec_cost
,
2300 0, /* Constant limit. */
2301 1, /* Max cond insns. */
2302 8, /* Memset max inline. */
2303 2, /* Issue rate. */
2304 ARM_PREFETCH_NOT_BENEFICIAL
,
2305 tune_params::PREF_CONST_POOL_TRUE
,
2306 tune_params::PREF_LDRD_FALSE
,
2307 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2308 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2309 tune_params::DISPARAGE_FLAGS_NEITHER
,
2310 tune_params::PREF_NEON_64_FALSE
,
2311 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2312 tune_params::FUSE_NOTHING
,
2313 tune_params::SCHED_AUTOPREF_OFF
2316 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2317 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2319 const struct tune_params arm_v6m_tune
=
2321 &generic_extra_costs
, /* Insn extra costs. */
2322 &generic_addr_mode_costs
, /* Addressing mode costs. */
2323 NULL
, /* Sched adj cost. */
2324 arm_default_branch_cost
,
2325 &arm_default_vec_cost
, /* Vectorizer costs. */
2326 1, /* Constant limit. */
2327 5, /* Max cond insns. */
2328 8, /* Memset max inline. */
2329 1, /* Issue rate. */
2330 ARM_PREFETCH_NOT_BENEFICIAL
,
2331 tune_params::PREF_CONST_POOL_FALSE
,
2332 tune_params::PREF_LDRD_FALSE
,
2333 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2334 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2335 tune_params::DISPARAGE_FLAGS_NEITHER
,
2336 tune_params::PREF_NEON_64_FALSE
,
2337 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2338 tune_params::FUSE_NOTHING
,
2339 tune_params::SCHED_AUTOPREF_OFF
2342 const struct tune_params arm_fa726te_tune
=
2344 &generic_extra_costs
, /* Insn extra costs. */
2345 &generic_addr_mode_costs
, /* Addressing mode costs. */
2346 fa726te_sched_adjust_cost
,
2347 arm_default_branch_cost
,
2348 &arm_default_vec_cost
,
2349 1, /* Constant limit. */
2350 5, /* Max cond insns. */
2351 8, /* Memset max inline. */
2352 2, /* Issue rate. */
2353 ARM_PREFETCH_NOT_BENEFICIAL
,
2354 tune_params::PREF_CONST_POOL_TRUE
,
2355 tune_params::PREF_LDRD_FALSE
,
2356 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2357 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2358 tune_params::DISPARAGE_FLAGS_NEITHER
,
2359 tune_params::PREF_NEON_64_FALSE
,
2360 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2361 tune_params::FUSE_NOTHING
,
2362 tune_params::SCHED_AUTOPREF_OFF
2365 /* Auto-generated CPU, FPU and architecture tables. */
2366 #include "arm-cpu-data.h"
2368 /* The name of the preprocessor macro to define for this architecture. PROFILE
2369 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2370 is thus chosen to be big enough to hold the longest architecture name. */
2372 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2374 /* Supported TLS relocations. */
2382 TLS_DESCSEQ
/* GNU scheme */
2385 /* The maximum number of insns to be used when loading a constant. */
2387 arm_constant_limit (bool size_p
)
2389 return size_p
? 1 : current_tune
->constant_limit
;
2392 /* Emit an insn that's a simple single-set. Both the operands must be known
2394 inline static rtx_insn
*
2395 emit_set_insn (rtx x
, rtx y
)
2397 return emit_insn (gen_rtx_SET (x
, y
));
2400 /* Return the number of bits set in VALUE. */
2402 bit_count (unsigned long value
)
2404 unsigned long count
= 0;
2409 value
&= value
- 1; /* Clear the least-significant set bit. */
2415 /* Return the number of bits set in BMAP. */
2417 bitmap_popcount (const sbitmap bmap
)
2419 unsigned int count
= 0;
2421 sbitmap_iterator sbi
;
2423 EXECUTE_IF_SET_IN_BITMAP (bmap
, 0, n
, sbi
)
2432 } arm_fixed_mode_set
;
2434 /* A small helper for setting fixed-point library libfuncs. */
2437 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2438 const char *funcname
, const char *modename
,
2443 if (num_suffix
== 0)
2444 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2446 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2448 set_optab_libfunc (optable
, mode
, buffer
);
2452 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2453 machine_mode from
, const char *funcname
,
2454 const char *toname
, const char *fromname
)
2457 const char *maybe_suffix_2
= "";
2459 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2460 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2461 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2462 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2463 maybe_suffix_2
= "2";
2465 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2468 set_conv_libfunc (optable
, to
, from
, buffer
);
2471 /* Set up library functions unique to ARM. */
2474 arm_init_libfuncs (void)
2476 /* For Linux, we have access to kernel support for atomic operations. */
2477 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2478 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2480 /* There are no special library functions unless we are using the
2485 /* The functions below are described in Section 4 of the "Run-Time
2486 ABI for the ARM architecture", Version 1.0. */
2488 /* Double-precision floating-point arithmetic. Table 2. */
2489 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2490 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2491 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2492 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2493 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2495 /* Double-precision comparisons. Table 3. */
2496 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2497 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2498 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2499 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2500 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2501 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2502 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2504 /* Single-precision floating-point arithmetic. Table 4. */
2505 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2506 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2507 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2508 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2509 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2511 /* Single-precision comparisons. Table 5. */
2512 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2513 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2514 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2515 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2516 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2517 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2518 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2520 /* Floating-point to integer conversions. Table 6. */
2521 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2522 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2523 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2524 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2525 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2526 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2527 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2528 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2530 /* Conversions between floating types. Table 7. */
2531 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2532 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2534 /* Integer to floating-point conversions. Table 8. */
2535 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2536 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2537 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2538 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2539 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2540 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2541 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2542 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2544 /* Long long. Table 9. */
2545 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2546 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2547 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2548 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2549 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2550 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2551 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2552 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2554 /* Integer (32/32->32) division. \S 4.3.1. */
2555 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2556 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2558 /* The divmod functions are designed so that they can be used for
2559 plain division, even though they return both the quotient and the
2560 remainder. The quotient is returned in the usual location (i.e.,
2561 r0 for SImode, {r0, r1} for DImode), just as would be expected
2562 for an ordinary division routine. Because the AAPCS calling
2563 conventions specify that all of { r0, r1, r2, r3 } are
2564 callee-saved registers, there is no need to tell the compiler
2565 explicitly that those registers are clobbered by these
2567 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2568 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2570 /* For SImode division the ABI provides div-without-mod routines,
2571 which are faster. */
2572 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2573 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2575 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2576 divmod libcalls instead. */
2577 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2578 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2579 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2580 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2582 /* Half-precision float operations. The compiler handles all operations
2583 with NULL libfuncs by converting the SFmode. */
2584 switch (arm_fp16_format
)
2586 case ARM_FP16_FORMAT_IEEE
:
2587 case ARM_FP16_FORMAT_ALTERNATIVE
:
2590 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2591 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2593 : "__gnu_f2h_alternative"));
2594 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2595 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2597 : "__gnu_h2f_alternative"));
2599 set_conv_libfunc (trunc_optab
, HFmode
, DFmode
,
2600 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2602 : "__gnu_d2h_alternative"));
2605 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2606 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2607 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2608 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2609 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2612 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2613 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2614 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2615 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2616 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2617 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2618 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2625 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2627 const arm_fixed_mode_set fixed_arith_modes
[] =
2630 { E_UQQmode
, "uqq" },
2632 { E_UHQmode
, "uhq" },
2634 { E_USQmode
, "usq" },
2636 { E_UDQmode
, "udq" },
2638 { E_UTQmode
, "utq" },
2640 { E_UHAmode
, "uha" },
2642 { E_USAmode
, "usa" },
2644 { E_UDAmode
, "uda" },
2646 { E_UTAmode
, "uta" }
2648 const arm_fixed_mode_set fixed_conv_modes
[] =
2651 { E_UQQmode
, "uqq" },
2653 { E_UHQmode
, "uhq" },
2655 { E_USQmode
, "usq" },
2657 { E_UDQmode
, "udq" },
2659 { E_UTQmode
, "utq" },
2661 { E_UHAmode
, "uha" },
2663 { E_USAmode
, "usa" },
2665 { E_UDAmode
, "uda" },
2667 { E_UTAmode
, "uta" },
2678 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2680 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2681 "add", fixed_arith_modes
[i
].name
, 3);
2682 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2683 "ssadd", fixed_arith_modes
[i
].name
, 3);
2684 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2685 "usadd", fixed_arith_modes
[i
].name
, 3);
2686 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2687 "sub", fixed_arith_modes
[i
].name
, 3);
2688 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2689 "sssub", fixed_arith_modes
[i
].name
, 3);
2690 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2691 "ussub", fixed_arith_modes
[i
].name
, 3);
2692 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2693 "mul", fixed_arith_modes
[i
].name
, 3);
2694 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2695 "ssmul", fixed_arith_modes
[i
].name
, 3);
2696 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2697 "usmul", fixed_arith_modes
[i
].name
, 3);
2698 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2699 "div", fixed_arith_modes
[i
].name
, 3);
2700 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2701 "udiv", fixed_arith_modes
[i
].name
, 3);
2702 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2703 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2704 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2705 "usdiv", fixed_arith_modes
[i
].name
, 3);
2706 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2707 "neg", fixed_arith_modes
[i
].name
, 2);
2708 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2709 "ssneg", fixed_arith_modes
[i
].name
, 2);
2710 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2711 "usneg", fixed_arith_modes
[i
].name
, 2);
2712 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2713 "ashl", fixed_arith_modes
[i
].name
, 3);
2714 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2715 "ashr", fixed_arith_modes
[i
].name
, 3);
2716 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2717 "lshr", fixed_arith_modes
[i
].name
, 3);
2718 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2719 "ssashl", fixed_arith_modes
[i
].name
, 3);
2720 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2721 "usashl", fixed_arith_modes
[i
].name
, 3);
2722 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2723 "cmp", fixed_arith_modes
[i
].name
, 2);
2726 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2727 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2730 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2731 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2734 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2735 fixed_conv_modes
[j
].mode
, "fract",
2736 fixed_conv_modes
[i
].name
,
2737 fixed_conv_modes
[j
].name
);
2738 arm_set_fixed_conv_libfunc (satfract_optab
,
2739 fixed_conv_modes
[i
].mode
,
2740 fixed_conv_modes
[j
].mode
, "satfract",
2741 fixed_conv_modes
[i
].name
,
2742 fixed_conv_modes
[j
].name
);
2743 arm_set_fixed_conv_libfunc (fractuns_optab
,
2744 fixed_conv_modes
[i
].mode
,
2745 fixed_conv_modes
[j
].mode
, "fractuns",
2746 fixed_conv_modes
[i
].name
,
2747 fixed_conv_modes
[j
].name
);
2748 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2749 fixed_conv_modes
[i
].mode
,
2750 fixed_conv_modes
[j
].mode
, "satfractuns",
2751 fixed_conv_modes
[i
].name
,
2752 fixed_conv_modes
[j
].name
);
2756 if (TARGET_AAPCS_BASED
)
2757 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2760 /* On AAPCS systems, this is the "struct __va_list". */
2761 static GTY(()) tree va_list_type
;
2763 /* Return the type to use as __builtin_va_list. */
2765 arm_build_builtin_va_list (void)
2770 if (!TARGET_AAPCS_BASED
)
2771 return std_build_builtin_va_list ();
2773 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2781 The C Library ABI further reinforces this definition in \S
2784 We must follow this definition exactly. The structure tag
2785 name is visible in C++ mangled names, and thus forms a part
2786 of the ABI. The field name may be used by people who
2787 #include <stdarg.h>. */
2788 /* Create the type. */
2789 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2790 /* Give it the required name. */
2791 va_list_name
= build_decl (BUILTINS_LOCATION
,
2793 get_identifier ("__va_list"),
2795 DECL_ARTIFICIAL (va_list_name
) = 1;
2796 TYPE_NAME (va_list_type
) = va_list_name
;
2797 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2798 /* Create the __ap field. */
2799 ap_field
= build_decl (BUILTINS_LOCATION
,
2801 get_identifier ("__ap"),
2803 DECL_ARTIFICIAL (ap_field
) = 1;
2804 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2805 TYPE_FIELDS (va_list_type
) = ap_field
;
2806 /* Compute its layout. */
2807 layout_type (va_list_type
);
2809 return va_list_type
;
2812 /* Return an expression of type "void *" pointing to the next
2813 available argument in a variable-argument list. VALIST is the
2814 user-level va_list object, of type __builtin_va_list. */
2816 arm_extract_valist_ptr (tree valist
)
2818 if (TREE_TYPE (valist
) == error_mark_node
)
2819 return error_mark_node
;
2821 /* On an AAPCS target, the pointer is stored within "struct
2823 if (TARGET_AAPCS_BASED
)
2825 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2826 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2827 valist
, ap_field
, NULL_TREE
);
2833 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2835 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2837 valist
= arm_extract_valist_ptr (valist
);
2838 std_expand_builtin_va_start (valist
, nextarg
);
2841 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2843 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2846 valist
= arm_extract_valist_ptr (valist
);
2847 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2850 /* Check any incompatible options that the user has specified. */
2852 arm_option_check_internal (struct gcc_options
*opts
)
2854 int flags
= opts
->x_target_flags
;
2856 /* iWMMXt and NEON are incompatible. */
2858 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_neon
))
2859 error ("iWMMXt and NEON are incompatible");
2861 /* Make sure that the processor choice does not conflict with any of the
2862 other command line choices. */
2863 if (TARGET_ARM_P (flags
)
2864 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
))
2865 error ("target CPU does not support ARM mode");
2867 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2868 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2869 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2871 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2872 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2874 /* If this target is normally configured to use APCS frames, warn if they
2875 are turned off and debugging is turned on. */
2876 if (TARGET_ARM_P (flags
)
2877 && write_symbols
!= NO_DEBUG
2878 && !TARGET_APCS_FRAME
2879 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2880 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2882 /* iWMMXt unsupported under Thumb mode. */
2883 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2884 error ("iWMMXt unsupported under Thumb mode");
2886 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2887 error ("can not use -mtp=cp15 with 16-bit Thumb");
2889 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2891 error ("RTP PIC is incompatible with Thumb");
2895 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2897 if ((target_pure_code
|| target_slow_flash_data
)
2898 && (!TARGET_HAVE_MOVT
|| arm_arch_notm
|| flag_pic
|| TARGET_NEON
))
2900 const char *flag
= (target_pure_code
? "-mpure-code" :
2901 "-mslow-flash-data");
2902 error ("%s only supports non-pic code on M-profile targets with the "
2903 "MOVT instruction", flag
);
2908 /* Recompute the global settings depending on target attribute options. */
2911 arm_option_params_internal (void)
2913 /* If we are not using the default (ARM mode) section anchor offset
2914 ranges, then set the correct ranges now. */
2917 /* Thumb-1 LDR instructions cannot have negative offsets.
2918 Permissible positive offset ranges are 5-bit (for byte loads),
2919 6-bit (for halfword loads), or 7-bit (for word loads).
2920 Empirical results suggest a 7-bit anchor range gives the best
2921 overall code size. */
2922 targetm
.min_anchor_offset
= 0;
2923 targetm
.max_anchor_offset
= 127;
2925 else if (TARGET_THUMB2
)
2927 /* The minimum is set such that the total size of the block
2928 for a particular anchor is 248 + 1 + 4095 bytes, which is
2929 divisible by eight, ensuring natural spacing of anchors. */
2930 targetm
.min_anchor_offset
= -248;
2931 targetm
.max_anchor_offset
= 4095;
2935 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2936 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2939 /* Increase the number of conditional instructions with -Os. */
2940 max_insns_skipped
= optimize_size
? 4 : current_tune
->max_insns_skipped
;
2942 /* For THUMB2, we limit the conditional sequence to one IT block. */
2944 max_insns_skipped
= MIN (max_insns_skipped
, MAX_INSN_PER_IT_BLOCK
);
2947 /* True if -mflip-thumb should next add an attribute for the default
2948 mode, false if it should next add an attribute for the opposite mode. */
2949 static GTY(()) bool thumb_flipper
;
2951 /* Options after initial target override. */
2952 static GTY(()) tree init_optimize
;
2955 arm_override_options_after_change_1 (struct gcc_options
*opts
)
2957 if (opts
->x_align_functions
<= 0)
2958 opts
->x_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
2959 && opts
->x_optimize_size
? 2 : 4;
2962 /* Implement targetm.override_options_after_change. */
2965 arm_override_options_after_change (void)
2967 arm_configure_build_target (&arm_active_target
,
2968 TREE_TARGET_OPTION (target_option_default_node
),
2969 &global_options_set
, false);
2971 arm_override_options_after_change_1 (&global_options
);
2974 /* Implement TARGET_OPTION_SAVE. */
2976 arm_option_save (struct cl_target_option
*ptr
, struct gcc_options
*opts
)
2978 ptr
->x_arm_arch_string
= opts
->x_arm_arch_string
;
2979 ptr
->x_arm_cpu_string
= opts
->x_arm_cpu_string
;
2980 ptr
->x_arm_tune_string
= opts
->x_arm_tune_string
;
2983 /* Implement TARGET_OPTION_RESTORE. */
2985 arm_option_restore (struct gcc_options
*opts
, struct cl_target_option
*ptr
)
2987 opts
->x_arm_arch_string
= ptr
->x_arm_arch_string
;
2988 opts
->x_arm_cpu_string
= ptr
->x_arm_cpu_string
;
2989 opts
->x_arm_tune_string
= ptr
->x_arm_tune_string
;
2990 arm_configure_build_target (&arm_active_target
, ptr
, &global_options_set
,
2994 /* Reset options between modes that the user has specified. */
2996 arm_option_override_internal (struct gcc_options
*opts
,
2997 struct gcc_options
*opts_set
)
2999 arm_override_options_after_change_1 (opts
);
3001 if (TARGET_INTERWORK
&& !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3003 /* The default is to enable interworking, so this warning message would
3004 be confusing to users who have just compiled with, eg, -march=armv3. */
3005 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3006 opts
->x_target_flags
&= ~MASK_INTERWORK
;
3009 if (TARGET_THUMB_P (opts
->x_target_flags
)
3010 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3012 warning (0, "target CPU does not support THUMB instructions");
3013 opts
->x_target_flags
&= ~MASK_THUMB
;
3016 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
3018 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3019 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
3022 /* Callee super interworking implies thumb interworking. Adding
3023 this to the flags here simplifies the logic elsewhere. */
3024 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
3025 opts
->x_target_flags
|= MASK_INTERWORK
;
3027 /* need to remember initial values so combinaisons of options like
3028 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3029 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
3031 if (! opts_set
->x_arm_restrict_it
)
3032 opts
->x_arm_restrict_it
= arm_arch8
;
3034 /* ARM execution state and M profile don't have [restrict] IT. */
3035 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
3036 opts
->x_arm_restrict_it
= 0;
3038 /* Enable -munaligned-access by default for
3039 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3040 i.e. Thumb2 and ARM state only.
3041 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3042 - ARMv8 architecture-base processors.
3044 Disable -munaligned-access by default for
3045 - all pre-ARMv6 architecture-based processors
3046 - ARMv6-M architecture-based processors
3047 - ARMv8-M Baseline processors. */
3049 if (! opts_set
->x_unaligned_access
)
3051 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
3052 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
3054 else if (opts
->x_unaligned_access
== 1
3055 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3057 warning (0, "target CPU does not support unaligned accesses");
3058 opts
->x_unaligned_access
= 0;
3061 /* Don't warn since it's on by default in -O2. */
3062 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3063 opts
->x_flag_schedule_insns
= 0;
3065 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
3067 /* Disable shrink-wrap when optimizing function for size, since it tends to
3068 generate additional returns. */
3069 if (optimize_function_for_size_p (cfun
)
3070 && TARGET_THUMB2_P (opts
->x_target_flags
))
3071 opts
->x_flag_shrink_wrap
= false;
3073 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
3075 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3076 - epilogue_insns - does not accurately model the corresponding insns
3077 emitted in the asm file. In particular, see the comment in thumb_exit
3078 'Find out how many of the (return) argument registers we can corrupt'.
3079 As a consequence, the epilogue may clobber registers without fipa-ra
3080 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3081 TODO: Accurately model clobbers for epilogue_insns and reenable
3083 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3084 opts
->x_flag_ipa_ra
= 0;
3086 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3088 /* Thumb2 inline assembly code should always use unified syntax.
3089 This will apply to ARM and Thumb1 eventually. */
3090 opts
->x_inline_asm_unified
= TARGET_THUMB2_P (opts
->x_target_flags
);
3092 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3093 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3097 static sbitmap isa_all_fpubits
;
3098 static sbitmap isa_quirkbits
;
3100 /* Configure a build target TARGET from the user-specified options OPTS and
3101 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3102 architecture have been specified, but the two are not identical. */
3104 arm_configure_build_target (struct arm_build_target
*target
,
3105 struct cl_target_option
*opts
,
3106 struct gcc_options
*opts_set
,
3107 bool warn_compatible
)
3109 const cpu_option
*arm_selected_tune
= NULL
;
3110 const arch_option
*arm_selected_arch
= NULL
;
3111 const cpu_option
*arm_selected_cpu
= NULL
;
3112 const arm_fpu_desc
*arm_selected_fpu
= NULL
;
3113 const char *tune_opts
= NULL
;
3114 const char *arch_opts
= NULL
;
3115 const char *cpu_opts
= NULL
;
3117 bitmap_clear (target
->isa
);
3118 target
->core_name
= NULL
;
3119 target
->arch_name
= NULL
;
3121 if (opts_set
->x_arm_arch_string
)
3123 arm_selected_arch
= arm_parse_arch_option_name (all_architectures
,
3125 opts
->x_arm_arch_string
);
3126 arch_opts
= strchr (opts
->x_arm_arch_string
, '+');
3129 if (opts_set
->x_arm_cpu_string
)
3131 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "-mcpu",
3132 opts
->x_arm_cpu_string
);
3133 cpu_opts
= strchr (opts
->x_arm_cpu_string
, '+');
3134 arm_selected_tune
= arm_selected_cpu
;
3135 /* If taking the tuning from -mcpu, we don't need to rescan the
3136 options for tuning. */
3139 if (opts_set
->x_arm_tune_string
)
3141 arm_selected_tune
= arm_parse_cpu_option_name (all_cores
, "-mtune",
3142 opts
->x_arm_tune_string
);
3143 tune_opts
= strchr (opts
->x_arm_tune_string
, '+');
3146 if (arm_selected_arch
)
3148 arm_initialize_isa (target
->isa
, arm_selected_arch
->common
.isa_bits
);
3149 arm_parse_option_features (target
->isa
, &arm_selected_arch
->common
,
3152 if (arm_selected_cpu
)
3154 auto_sbitmap
cpu_isa (isa_num_bits
);
3155 auto_sbitmap
isa_delta (isa_num_bits
);
3157 arm_initialize_isa (cpu_isa
, arm_selected_cpu
->common
.isa_bits
);
3158 arm_parse_option_features (cpu_isa
, &arm_selected_cpu
->common
,
3160 bitmap_xor (isa_delta
, cpu_isa
, target
->isa
);
3161 /* Ignore any bits that are quirk bits. */
3162 bitmap_and_compl (isa_delta
, isa_delta
, isa_quirkbits
);
3163 /* Ignore (for now) any bits that might be set by -mfpu. */
3164 bitmap_and_compl (isa_delta
, isa_delta
, isa_all_fpubits
);
3166 if (!bitmap_empty_p (isa_delta
))
3168 if (warn_compatible
)
3169 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3170 arm_selected_cpu
->common
.name
,
3171 arm_selected_arch
->common
.name
);
3172 /* -march wins for code generation.
3173 -mcpu wins for default tuning. */
3174 if (!arm_selected_tune
)
3175 arm_selected_tune
= arm_selected_cpu
;
3177 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3178 target
->arch_name
= arm_selected_arch
->common
.name
;
3182 /* Architecture and CPU are essentially the same.
3183 Prefer the CPU setting. */
3184 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3185 target
->core_name
= arm_selected_cpu
->common
.name
;
3186 /* Copy the CPU's capabilities, so that we inherit the
3187 appropriate extensions and quirks. */
3188 bitmap_copy (target
->isa
, cpu_isa
);
3193 /* Pick a CPU based on the architecture. */
3194 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3195 target
->arch_name
= arm_selected_arch
->common
.name
;
3196 /* Note: target->core_name is left unset in this path. */
3199 else if (arm_selected_cpu
)
3201 target
->core_name
= arm_selected_cpu
->common
.name
;
3202 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3203 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3205 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3207 /* If the user did not specify a processor or architecture, choose
3211 const cpu_option
*sel
;
3212 auto_sbitmap
sought_isa (isa_num_bits
);
3213 bitmap_clear (sought_isa
);
3214 auto_sbitmap
default_isa (isa_num_bits
);
3216 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "default CPU",
3217 TARGET_CPU_DEFAULT
);
3218 cpu_opts
= strchr (TARGET_CPU_DEFAULT
, '+');
3219 gcc_assert (arm_selected_cpu
->common
.name
);
3221 /* RWE: All of the selection logic below (to the end of this
3222 'if' clause) looks somewhat suspect. It appears to be mostly
3223 there to support forcing thumb support when the default CPU
3224 does not have thumb (somewhat dubious in terms of what the
3225 user might be expecting). I think it should be removed once
3226 support for the pre-thumb era cores is removed. */
3227 sel
= arm_selected_cpu
;
3228 arm_initialize_isa (default_isa
, sel
->common
.isa_bits
);
3229 arm_parse_option_features (default_isa
, &arm_selected_cpu
->common
,
3232 /* Now check to see if the user has specified any command line
3233 switches that require certain abilities from the cpu. */
3235 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3237 bitmap_set_bit (sought_isa
, isa_bit_thumb
);
3238 bitmap_set_bit (sought_isa
, isa_bit_mode32
);
3240 /* There are no ARM processors that support both APCS-26 and
3241 interworking. Therefore we forcibly remove MODE26 from
3242 from the isa features here (if it was set), so that the
3243 search below will always be able to find a compatible
3245 bitmap_clear_bit (default_isa
, isa_bit_mode26
);
3248 /* If there are such requirements and the default CPU does not
3249 satisfy them, we need to run over the complete list of
3250 cores looking for one that is satisfactory. */
3251 if (!bitmap_empty_p (sought_isa
)
3252 && !bitmap_subset_p (sought_isa
, default_isa
))
3254 auto_sbitmap
candidate_isa (isa_num_bits
);
3255 /* We're only interested in a CPU with at least the
3256 capabilities of the default CPU and the required
3257 additional features. */
3258 bitmap_ior (default_isa
, default_isa
, sought_isa
);
3260 /* Try to locate a CPU type that supports all of the abilities
3261 of the default CPU, plus the extra abilities requested by
3263 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3265 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3266 /* An exact match? */
3267 if (bitmap_equal_p (default_isa
, candidate_isa
))
3271 if (sel
->common
.name
== NULL
)
3273 unsigned current_bit_count
= isa_num_bits
;
3274 const cpu_option
*best_fit
= NULL
;
3276 /* Ideally we would like to issue an error message here
3277 saying that it was not possible to find a CPU compatible
3278 with the default CPU, but which also supports the command
3279 line options specified by the programmer, and so they
3280 ought to use the -mcpu=<name> command line option to
3281 override the default CPU type.
3283 If we cannot find a CPU that has exactly the
3284 characteristics of the default CPU and the given
3285 command line options we scan the array again looking
3286 for a best match. The best match must have at least
3287 the capabilities of the perfect match. */
3288 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3290 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3292 if (bitmap_subset_p (default_isa
, candidate_isa
))
3296 bitmap_and_compl (candidate_isa
, candidate_isa
,
3298 count
= bitmap_popcount (candidate_isa
);
3300 if (count
< current_bit_count
)
3303 current_bit_count
= count
;
3307 gcc_assert (best_fit
);
3311 arm_selected_cpu
= sel
;
3314 /* Now we know the CPU, we can finally initialize the target
3316 target
->core_name
= arm_selected_cpu
->common
.name
;
3317 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3318 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3320 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3323 gcc_assert (arm_selected_cpu
);
3324 gcc_assert (arm_selected_arch
);
3326 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
)
3328 arm_selected_fpu
= &all_fpus
[opts
->x_arm_fpu_index
];
3329 auto_sbitmap
fpu_bits (isa_num_bits
);
3331 arm_initialize_isa (fpu_bits
, arm_selected_fpu
->isa_bits
);
3332 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpubits
);
3333 bitmap_ior (target
->isa
, target
->isa
, fpu_bits
);
3336 if (!arm_selected_tune
)
3337 arm_selected_tune
= arm_selected_cpu
;
3338 else /* Validate the features passed to -mtune. */
3339 arm_parse_option_features (NULL
, &arm_selected_tune
->common
, tune_opts
);
3341 const cpu_tune
*tune_data
= &all_tunes
[arm_selected_tune
- all_cores
];
3343 /* Finish initializing the target structure. */
3344 target
->arch_pp_name
= arm_selected_arch
->arch
;
3345 target
->base_arch
= arm_selected_arch
->base_arch
;
3346 target
->profile
= arm_selected_arch
->profile
;
3348 target
->tune_flags
= tune_data
->tune_flags
;
3349 target
->tune
= tune_data
->tune
;
3350 target
->tune_core
= tune_data
->scheduler
;
3351 arm_option_reconfigure_globals ();
3354 /* Fix up any incompatible options that the user has specified. */
3356 arm_option_override (void)
3358 static const enum isa_feature fpu_bitlist
[]
3359 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
3360 static const enum isa_feature quirk_bitlist
[] = { ISA_ALL_QUIRKS
, isa_nobit
};
3361 cl_target_option opts
;
3363 isa_quirkbits
= sbitmap_alloc (isa_num_bits
);
3364 arm_initialize_isa (isa_quirkbits
, quirk_bitlist
);
3366 isa_all_fpubits
= sbitmap_alloc (isa_num_bits
);
3367 arm_initialize_isa (isa_all_fpubits
, fpu_bitlist
);
3369 arm_active_target
.isa
= sbitmap_alloc (isa_num_bits
);
3371 if (!global_options_set
.x_arm_fpu_index
)
3376 ok
= opt_enum_arg_to_value (OPT_mfpu_
, FPUTYPE_AUTO
, &fpu_index
,
3379 arm_fpu_index
= (enum fpu_type
) fpu_index
;
3382 cl_target_option_save (&opts
, &global_options
);
3383 arm_configure_build_target (&arm_active_target
, &opts
, &global_options_set
,
3386 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3387 SUBTARGET_OVERRIDE_OPTIONS
;
3390 /* Initialize boolean versions of the architectural flags, for use
3391 in the arm.md file and for enabling feature flags. */
3392 arm_option_reconfigure_globals ();
3394 arm_tune
= arm_active_target
.tune_core
;
3395 tune_flags
= arm_active_target
.tune_flags
;
3396 current_tune
= arm_active_target
.tune
;
3398 /* TBD: Dwarf info for apcs frame is not handled yet. */
3399 if (TARGET_APCS_FRAME
)
3400 flag_shrink_wrap
= false;
3402 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3404 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3405 target_flags
|= MASK_APCS_FRAME
;
3408 if (TARGET_POKE_FUNCTION_NAME
)
3409 target_flags
|= MASK_APCS_FRAME
;
3411 if (TARGET_APCS_REENT
&& flag_pic
)
3412 error ("-fpic and -mapcs-reent are incompatible");
3414 if (TARGET_APCS_REENT
)
3415 warning (0, "APCS reentrant code not supported. Ignored");
3417 /* Set up some tuning parameters. */
3418 arm_ld_sched
= (tune_flags
& TF_LDSCHED
) != 0;
3419 arm_tune_strongarm
= (tune_flags
& TF_STRONG
) != 0;
3420 arm_tune_wbuf
= (tune_flags
& TF_WBUF
) != 0;
3421 arm_tune_xscale
= (tune_flags
& TF_XSCALE
) != 0;
3422 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3423 arm_m_profile_small_mul
= (tune_flags
& TF_SMALLMUL
) != 0;
3425 /* For arm2/3 there is no need to do any scheduling if we are doing
3426 software floating-point. */
3427 if (TARGET_SOFT_FLOAT
&& (tune_flags
& TF_NO_MODE32
))
3428 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3430 /* Override the default structure alignment for AAPCS ABI. */
3431 if (!global_options_set
.x_arm_structure_size_boundary
)
3433 if (TARGET_AAPCS_BASED
)
3434 arm_structure_size_boundary
= 8;
3438 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3440 if (arm_structure_size_boundary
!= 8
3441 && arm_structure_size_boundary
!= 32
3442 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3444 if (ARM_DOUBLEWORD_ALIGN
)
3446 "structure size boundary can only be set to 8, 32 or 64");
3448 warning (0, "structure size boundary can only be set to 8 or 32");
3449 arm_structure_size_boundary
3450 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3454 if (TARGET_VXWORKS_RTP
)
3456 if (!global_options_set
.x_arm_pic_data_is_text_relative
)
3457 arm_pic_data_is_text_relative
= 0;
3460 && !arm_pic_data_is_text_relative
3461 && !(global_options_set
.x_target_flags
& MASK_SINGLE_PIC_BASE
))
3462 /* When text & data segments don't have a fixed displacement, the
3463 intended use is with a single, read only, pic base register.
3464 Unless the user explicitly requested not to do that, set
3466 target_flags
|= MASK_SINGLE_PIC_BASE
;
3468 /* If stack checking is disabled, we can use r10 as the PIC register,
3469 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3470 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3472 if (TARGET_VXWORKS_RTP
)
3473 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3474 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3477 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3478 arm_pic_register
= 9;
3480 if (arm_pic_register_string
!= NULL
)
3482 int pic_register
= decode_reg_name (arm_pic_register_string
);
3485 warning (0, "-mpic-register= is useless without -fpic");
3487 /* Prevent the user from choosing an obviously stupid PIC register. */
3488 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3489 || pic_register
== HARD_FRAME_POINTER_REGNUM
3490 || pic_register
== STACK_POINTER_REGNUM
3491 || pic_register
>= PC_REGNUM
3492 || (TARGET_VXWORKS_RTP
3493 && (unsigned int) pic_register
!= arm_pic_register
))
3494 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3496 arm_pic_register
= pic_register
;
3499 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3500 if (fix_cm3_ldrd
== 2)
3502 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_cm3_ldrd
))
3508 /* Hot/Cold partitioning is not currently supported, since we can't
3509 handle literal pool placement in that case. */
3510 if (flag_reorder_blocks_and_partition
)
3512 inform (input_location
,
3513 "-freorder-blocks-and-partition not supported on this architecture");
3514 flag_reorder_blocks_and_partition
= 0;
3515 flag_reorder_blocks
= 1;
3519 /* Hoisting PIC address calculations more aggressively provides a small,
3520 but measurable, size reduction for PIC code. Therefore, we decrease
3521 the bar for unrestricted expression hoisting to the cost of PIC address
3522 calculation, which is 2 instructions. */
3523 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3524 global_options
.x_param_values
,
3525 global_options_set
.x_param_values
);
3527 /* ARM EABI defaults to strict volatile bitfields. */
3528 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3529 && abi_version_at_least(2))
3530 flag_strict_volatile_bitfields
= 1;
3532 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3533 have deemed it beneficial (signified by setting
3534 prefetch.num_slots to 1 or more). */
3535 if (flag_prefetch_loop_arrays
< 0
3538 && current_tune
->prefetch
.num_slots
> 0)
3539 flag_prefetch_loop_arrays
= 1;
3541 /* Set up parameters to be used in prefetching algorithm. Do not
3542 override the defaults unless we are tuning for a core we have
3543 researched values for. */
3544 if (current_tune
->prefetch
.num_slots
> 0)
3545 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3546 current_tune
->prefetch
.num_slots
,
3547 global_options
.x_param_values
,
3548 global_options_set
.x_param_values
);
3549 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3550 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3551 current_tune
->prefetch
.l1_cache_line_size
,
3552 global_options
.x_param_values
,
3553 global_options_set
.x_param_values
);
3554 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3555 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3556 current_tune
->prefetch
.l1_cache_size
,
3557 global_options
.x_param_values
,
3558 global_options_set
.x_param_values
);
3560 /* Use Neon to perform 64-bits operations rather than core
3562 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3563 if (use_neon_for_64bits
== 1)
3564 prefer_neon_for_64bits
= true;
3566 /* Use the alternative scheduling-pressure algorithm by default. */
3567 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3568 global_options
.x_param_values
,
3569 global_options_set
.x_param_values
);
3571 /* Look through ready list and all of queue for instructions
3572 relevant for L2 auto-prefetcher. */
3573 int param_sched_autopref_queue_depth
;
3575 switch (current_tune
->sched_autopref
)
3577 case tune_params::SCHED_AUTOPREF_OFF
:
3578 param_sched_autopref_queue_depth
= -1;
3581 case tune_params::SCHED_AUTOPREF_RANK
:
3582 param_sched_autopref_queue_depth
= 0;
3585 case tune_params::SCHED_AUTOPREF_FULL
:
3586 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3593 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3594 param_sched_autopref_queue_depth
,
3595 global_options
.x_param_values
,
3596 global_options_set
.x_param_values
);
3598 /* Currently, for slow flash data, we just disable literal pools. We also
3599 disable it for pure-code. */
3600 if (target_slow_flash_data
|| target_pure_code
)
3601 arm_disable_literal_pool
= true;
3603 /* Disable scheduling fusion by default if it's not armv7 processor
3604 or doesn't prefer ldrd/strd. */
3605 if (flag_schedule_fusion
== 2
3606 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3607 flag_schedule_fusion
= 0;
3609 /* Need to remember initial options before they are overriden. */
3610 init_optimize
= build_optimization_node (&global_options
);
3612 arm_options_perform_arch_sanity_checks ();
3613 arm_option_override_internal (&global_options
, &global_options_set
);
3614 arm_option_check_internal (&global_options
);
3615 arm_option_params_internal ();
3617 /* Create the default target_options structure. */
3618 target_option_default_node
= target_option_current_node
3619 = build_target_option_node (&global_options
);
3621 /* Register global variables with the garbage collector. */
3622 arm_add_gc_roots ();
3624 /* Init initial mode for testing. */
3625 thumb_flipper
= TARGET_THUMB
;
3629 /* Reconfigure global status flags from the active_target.isa. */
3631 arm_option_reconfigure_globals (void)
3633 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_active_target
.arch_pp_name
);
3634 arm_base_arch
= arm_active_target
.base_arch
;
3636 /* Initialize boolean versions of the architectural flags, for use
3637 in the arm.md file. */
3638 arm_arch3m
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv3m
);
3639 arm_arch4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv4
);
3640 arm_arch4t
= arm_arch4
&& bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3641 arm_arch5
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5
);
3642 arm_arch5e
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5e
);
3643 arm_arch5te
= arm_arch5e
3644 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3645 arm_arch6
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6
);
3646 arm_arch6k
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6k
);
3647 arm_arch_notm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
);
3648 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3649 arm_arch7
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7
);
3650 arm_arch7em
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7em
);
3651 arm_arch8
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8
);
3652 arm_arch8_1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_1
);
3653 arm_arch8_2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_2
);
3654 arm_arch_thumb1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3655 arm_arch_thumb2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb2
);
3656 arm_arch_xscale
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_xscale
);
3657 arm_arch_iwmmxt
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt
);
3658 arm_arch_iwmmxt2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt2
);
3659 arm_arch_thumb_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_tdiv
);
3660 arm_arch_arm_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_adiv
);
3661 arm_arch_crc
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_crc32
);
3662 arm_arch_cmse
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_cmse
);
3663 arm_fp16_inst
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_fp16
);
3664 arm_arch_lpae
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_lpae
);
3667 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3668 error ("selected fp16 options are incompatible");
3669 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3672 /* And finally, set up some quirks. */
3673 arm_arch_no_volatile_ce
3674 = bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_volatile_ce
);
3675 arm_arch6kz
= arm_arch6k
&& bitmap_bit_p (arm_active_target
.isa
,
3676 isa_bit_quirk_armv6kz
);
3678 /* Use the cp15 method if it is available. */
3679 if (target_thread_pointer
== TP_AUTO
)
3681 if (arm_arch6k
&& !TARGET_THUMB1
)
3682 target_thread_pointer
= TP_CP15
;
3684 target_thread_pointer
= TP_SOFT
;
3688 /* Perform some validation between the desired architecture and the rest of the
3691 arm_options_perform_arch_sanity_checks (void)
3693 /* V5 code we generate is completely interworking capable, so we turn off
3694 TARGET_INTERWORK here to avoid many tests later on. */
3696 /* XXX However, we must pass the right pre-processor defines to CPP
3697 or GLD can get confused. This is a hack. */
3698 if (TARGET_INTERWORK
)
3699 arm_cpp_interwork
= 1;
3702 target_flags
&= ~MASK_INTERWORK
;
3704 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3705 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3707 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3708 error ("iwmmxt abi requires an iwmmxt capable cpu");
3710 /* BPABI targets use linker tricks to allow interworking on cores
3711 without thumb support. */
3712 if (TARGET_INTERWORK
3714 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3716 warning (0, "target CPU does not support interworking" );
3717 target_flags
&= ~MASK_INTERWORK
;
3720 /* If soft-float is specified then don't use FPU. */
3721 if (TARGET_SOFT_FLOAT
)
3722 arm_fpu_attr
= FPU_NONE
;
3724 arm_fpu_attr
= FPU_VFP
;
3726 if (TARGET_AAPCS_BASED
)
3728 if (TARGET_CALLER_INTERWORKING
)
3729 error ("AAPCS does not support -mcaller-super-interworking");
3731 if (TARGET_CALLEE_INTERWORKING
)
3732 error ("AAPCS does not support -mcallee-super-interworking");
3735 /* __fp16 support currently assumes the core has ldrh. */
3736 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3737 sorry ("__fp16 and no ldrh");
3739 if (use_cmse
&& !arm_arch_cmse
)
3740 error ("target CPU does not support ARMv8-M Security Extensions");
3742 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3743 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3744 if (use_cmse
&& LAST_VFP_REGNUM
> LAST_LO_VFP_REGNUM
)
3745 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3748 if (TARGET_AAPCS_BASED
)
3750 if (arm_abi
== ARM_ABI_IWMMXT
)
3751 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3752 else if (TARGET_HARD_FLOAT_ABI
)
3754 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3755 if (!bitmap_bit_p (arm_active_target
.isa
, isa_bit_vfpv2
))
3756 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3759 arm_pcs_default
= ARM_PCS_AAPCS
;
3763 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
3764 sorry ("-mfloat-abi=hard and VFP");
3766 if (arm_abi
== ARM_ABI_APCS
)
3767 arm_pcs_default
= ARM_PCS_APCS
;
3769 arm_pcs_default
= ARM_PCS_ATPCS
;
3774 arm_add_gc_roots (void)
3776 gcc_obstack_init(&minipool_obstack
);
3777 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3780 /* A table of known ARM exception types.
3781 For use with the interrupt function attribute. */
3785 const char *const arg
;
3786 const unsigned long return_value
;
3790 static const isr_attribute_arg isr_attribute_args
[] =
3792 { "IRQ", ARM_FT_ISR
},
3793 { "irq", ARM_FT_ISR
},
3794 { "FIQ", ARM_FT_FIQ
},
3795 { "fiq", ARM_FT_FIQ
},
3796 { "ABORT", ARM_FT_ISR
},
3797 { "abort", ARM_FT_ISR
},
3798 { "ABORT", ARM_FT_ISR
},
3799 { "abort", ARM_FT_ISR
},
3800 { "UNDEF", ARM_FT_EXCEPTION
},
3801 { "undef", ARM_FT_EXCEPTION
},
3802 { "SWI", ARM_FT_EXCEPTION
},
3803 { "swi", ARM_FT_EXCEPTION
},
3804 { NULL
, ARM_FT_NORMAL
}
3807 /* Returns the (interrupt) function type of the current
3808 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3810 static unsigned long
3811 arm_isr_value (tree argument
)
3813 const isr_attribute_arg
* ptr
;
3817 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3819 /* No argument - default to IRQ. */
3820 if (argument
== NULL_TREE
)
3823 /* Get the value of the argument. */
3824 if (TREE_VALUE (argument
) == NULL_TREE
3825 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3826 return ARM_FT_UNKNOWN
;
3828 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3830 /* Check it against the list of known arguments. */
3831 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3832 if (streq (arg
, ptr
->arg
))
3833 return ptr
->return_value
;
3835 /* An unrecognized interrupt type. */
3836 return ARM_FT_UNKNOWN
;
3839 /* Computes the type of the current function. */
3841 static unsigned long
3842 arm_compute_func_type (void)
3844 unsigned long type
= ARM_FT_UNKNOWN
;
3848 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3850 /* Decide if the current function is volatile. Such functions
3851 never return, and many memory cycles can be saved by not storing
3852 register values that will never be needed again. This optimization
3853 was added to speed up context switching in a kernel application. */
3855 && (TREE_NOTHROW (current_function_decl
)
3856 || !(flag_unwind_tables
3858 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3859 && TREE_THIS_VOLATILE (current_function_decl
))
3860 type
|= ARM_FT_VOLATILE
;
3862 if (cfun
->static_chain_decl
!= NULL
)
3863 type
|= ARM_FT_NESTED
;
3865 attr
= DECL_ATTRIBUTES (current_function_decl
);
3867 a
= lookup_attribute ("naked", attr
);
3869 type
|= ARM_FT_NAKED
;
3871 a
= lookup_attribute ("isr", attr
);
3873 a
= lookup_attribute ("interrupt", attr
);
3876 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3878 type
|= arm_isr_value (TREE_VALUE (a
));
3880 if (lookup_attribute ("cmse_nonsecure_entry", attr
))
3881 type
|= ARM_FT_CMSE_ENTRY
;
3886 /* Returns the type of the current function. */
3889 arm_current_func_type (void)
3891 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3892 cfun
->machine
->func_type
= arm_compute_func_type ();
3894 return cfun
->machine
->func_type
;
3898 arm_allocate_stack_slots_for_args (void)
3900 /* Naked functions should not allocate stack slots for arguments. */
3901 return !IS_NAKED (arm_current_func_type ());
3905 arm_warn_func_return (tree decl
)
3907 /* Naked functions are implemented entirely in assembly, including the
3908 return sequence, so suppress warnings about this. */
3909 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3913 /* Output assembler code for a block containing the constant parts
3914 of a trampoline, leaving space for the variable parts.
3916 On the ARM, (if r8 is the static chain regnum, and remembering that
3917 referencing pc adds an offset of 8) the trampoline looks like:
3920 .word static chain value
3921 .word function's address
3922 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3925 arm_asm_trampoline_template (FILE *f
)
3927 fprintf (f
, "\t.syntax unified\n");
3931 fprintf (f
, "\t.arm\n");
3932 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3933 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3935 else if (TARGET_THUMB2
)
3937 fprintf (f
, "\t.thumb\n");
3938 /* The Thumb-2 trampoline is similar to the arm implementation.
3939 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3940 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3941 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3942 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3946 ASM_OUTPUT_ALIGN (f
, 2);
3947 fprintf (f
, "\t.code\t16\n");
3948 fprintf (f
, ".Ltrampoline_start:\n");
3949 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3950 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3951 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3952 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3953 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3954 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3956 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3957 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3960 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3963 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3965 rtx fnaddr
, mem
, a_tramp
;
3967 emit_block_move (m_tramp
, assemble_trampoline_template (),
3968 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3970 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3971 emit_move_insn (mem
, chain_value
);
3973 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3974 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3975 emit_move_insn (mem
, fnaddr
);
3977 a_tramp
= XEXP (m_tramp
, 0);
3978 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3979 LCT_NORMAL
, VOIDmode
, a_tramp
, Pmode
,
3980 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3983 /* Thumb trampolines should be entered in thumb mode, so set
3984 the bottom bit of the address. */
3987 arm_trampoline_adjust_address (rtx addr
)
3990 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3991 NULL
, 0, OPTAB_LIB_WIDEN
);
3995 /* Return 1 if it is possible to return using a single instruction.
3996 If SIBLING is non-null, this is a test for a return before a sibling
3997 call. SIBLING is the call insn, so we can examine its register usage. */
4000 use_return_insn (int iscond
, rtx sibling
)
4003 unsigned int func_type
;
4004 unsigned long saved_int_regs
;
4005 unsigned HOST_WIDE_INT stack_adjust
;
4006 arm_stack_offsets
*offsets
;
4008 /* Never use a return instruction before reload has run. */
4009 if (!reload_completed
)
4012 func_type
= arm_current_func_type ();
4014 /* Naked, volatile and stack alignment functions need special
4016 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
4019 /* So do interrupt functions that use the frame pointer and Thumb
4020 interrupt functions. */
4021 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
4024 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
4025 && !optimize_function_for_size_p (cfun
))
4028 offsets
= arm_get_frame_offsets ();
4029 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
4031 /* As do variadic functions. */
4032 if (crtl
->args
.pretend_args_size
4033 || cfun
->machine
->uses_anonymous_args
4034 /* Or if the function calls __builtin_eh_return () */
4035 || crtl
->calls_eh_return
4036 /* Or if the function calls alloca */
4037 || cfun
->calls_alloca
4038 /* Or if there is a stack adjustment. However, if the stack pointer
4039 is saved on the stack, we can use a pre-incrementing stack load. */
4040 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
4041 && stack_adjust
== 4))
4042 /* Or if the static chain register was saved above the frame, under the
4043 assumption that the stack pointer isn't saved on the stack. */
4044 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
4045 && arm_compute_static_chain_stack_bytes() != 0))
4048 saved_int_regs
= offsets
->saved_regs_mask
;
4050 /* Unfortunately, the insn
4052 ldmib sp, {..., sp, ...}
4054 triggers a bug on most SA-110 based devices, such that the stack
4055 pointer won't be correctly restored if the instruction takes a
4056 page fault. We work around this problem by popping r3 along with
4057 the other registers, since that is never slower than executing
4058 another instruction.
4060 We test for !arm_arch5 here, because code for any architecture
4061 less than this could potentially be run on one of the buggy
4063 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
4065 /* Validate that r3 is a call-clobbered register (always true in
4066 the default abi) ... */
4067 if (!call_used_regs
[3])
4070 /* ... that it isn't being used for a return value ... */
4071 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
4074 /* ... or for a tail-call argument ... */
4077 gcc_assert (CALL_P (sibling
));
4079 if (find_regno_fusage (sibling
, USE
, 3))
4083 /* ... and that there are no call-saved registers in r0-r2
4084 (always true in the default ABI). */
4085 if (saved_int_regs
& 0x7)
4089 /* Can't be done if interworking with Thumb, and any registers have been
4091 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
4094 /* On StrongARM, conditional returns are expensive if they aren't
4095 taken and multiple registers have been stacked. */
4096 if (iscond
&& arm_tune_strongarm
)
4098 /* Conditional return when just the LR is stored is a simple
4099 conditional-load instruction, that's not expensive. */
4100 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
4104 && arm_pic_register
!= INVALID_REGNUM
4105 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
4109 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4110 several instructions if anything needs to be popped. */
4111 if (saved_int_regs
&& IS_CMSE_ENTRY (func_type
))
4114 /* If there are saved registers but the LR isn't saved, then we need
4115 two instructions for the return. */
4116 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
4119 /* Can't be done if any of the VFP regs are pushed,
4120 since this also requires an insn. */
4121 if (TARGET_HARD_FLOAT
)
4122 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
4123 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
4126 if (TARGET_REALLY_IWMMXT
)
4127 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
4128 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
4134 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4135 shrink-wrapping if possible. This is the case if we need to emit a
4136 prologue, which we can test by looking at the offsets. */
4138 use_simple_return_p (void)
4140 arm_stack_offsets
*offsets
;
4142 /* Note this function can be called before or after reload. */
4143 if (!reload_completed
)
4144 arm_compute_frame_layout ();
4146 offsets
= arm_get_frame_offsets ();
4147 return offsets
->outgoing_args
!= 0;
4150 /* Return TRUE if int I is a valid immediate ARM constant. */
4153 const_ok_for_arm (HOST_WIDE_INT i
)
4157 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4158 be all zero, or all one. */
4159 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
4160 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
4161 != ((~(unsigned HOST_WIDE_INT
) 0)
4162 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
4165 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
4167 /* Fast return for 0 and small values. We must do this for zero, since
4168 the code below can't handle that one case. */
4169 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
4172 /* Get the number of trailing zeros. */
4173 lowbit
= ffs((int) i
) - 1;
4175 /* Only even shifts are allowed in ARM mode so round down to the
4176 nearest even number. */
4180 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
4185 /* Allow rotated constants in ARM mode. */
4187 && ((i
& ~0xc000003f) == 0
4188 || (i
& ~0xf000000f) == 0
4189 || (i
& ~0xfc000003) == 0))
4192 else if (TARGET_THUMB2
)
4196 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4199 if (i
== v
|| i
== (v
| (v
<< 8)))
4202 /* Allow repeated pattern 0xXY00XY00. */
4208 else if (TARGET_HAVE_MOVT
)
4210 /* Thumb-1 Targets with MOVT. */
4220 /* Return true if I is a valid constant for the operation CODE. */
4222 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
4224 if (const_ok_for_arm (i
))
4230 /* See if we can use movw. */
4231 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
4234 /* Otherwise, try mvn. */
4235 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4238 /* See if we can use addw or subw. */
4240 && ((i
& 0xfffff000) == 0
4241 || ((-i
) & 0xfffff000) == 0))
4262 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4264 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4270 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4274 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4281 /* Return true if I is a valid di mode constant for the operation CODE. */
4283 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4285 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4286 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4287 rtx hi
= GEN_INT (hi_val
);
4288 rtx lo
= GEN_INT (lo_val
);
4298 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
4299 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
4301 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4308 /* Emit a sequence of insns to handle a large constant.
4309 CODE is the code of the operation required, it can be any of SET, PLUS,
4310 IOR, AND, XOR, MINUS;
4311 MODE is the mode in which the operation is being performed;
4312 VAL is the integer to operate on;
4313 SOURCE is the other operand (a register, or a null-pointer for SET);
4314 SUBTARGETS means it is safe to create scratch registers if that will
4315 either produce a simpler sequence, or we will want to cse the values.
4316 Return value is the number of insns emitted. */
4318 /* ??? Tweak this for thumb2. */
4320 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4321 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4325 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4326 cond
= COND_EXEC_TEST (PATTERN (insn
));
4330 if (subtargets
|| code
== SET
4331 || (REG_P (target
) && REG_P (source
)
4332 && REGNO (target
) != REGNO (source
)))
4334 /* After arm_reorg has been called, we can't fix up expensive
4335 constants by pushing them into memory so we must synthesize
4336 them in-line, regardless of the cost. This is only likely to
4337 be more costly on chips that have load delay slots and we are
4338 compiling without running the scheduler (so no splitting
4339 occurred before the final instruction emission).
4341 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4343 if (!cfun
->machine
->after_arm_reorg
4345 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4347 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4352 /* Currently SET is the only monadic value for CODE, all
4353 the rest are diadic. */
4354 if (TARGET_USE_MOVT
)
4355 arm_emit_movpair (target
, GEN_INT (val
));
4357 emit_set_insn (target
, GEN_INT (val
));
4363 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4365 if (TARGET_USE_MOVT
)
4366 arm_emit_movpair (temp
, GEN_INT (val
));
4368 emit_set_insn (temp
, GEN_INT (val
));
4370 /* For MINUS, the value is subtracted from, since we never
4371 have subtraction of a constant. */
4373 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4375 emit_set_insn (target
,
4376 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4382 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4386 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4387 ARM/THUMB2 immediates, and add up to VAL.
4388 Thr function return value gives the number of insns required. */
4390 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4391 struct four_ints
*return_sequence
)
4393 int best_consecutive_zeros
= 0;
4397 struct four_ints tmp_sequence
;
4399 /* If we aren't targeting ARM, the best place to start is always at
4400 the bottom, otherwise look more closely. */
4403 for (i
= 0; i
< 32; i
+= 2)
4405 int consecutive_zeros
= 0;
4407 if (!(val
& (3 << i
)))
4409 while ((i
< 32) && !(val
& (3 << i
)))
4411 consecutive_zeros
+= 2;
4414 if (consecutive_zeros
> best_consecutive_zeros
)
4416 best_consecutive_zeros
= consecutive_zeros
;
4417 best_start
= i
- consecutive_zeros
;
4424 /* So long as it won't require any more insns to do so, it's
4425 desirable to emit a small constant (in bits 0...9) in the last
4426 insn. This way there is more chance that it can be combined with
4427 a later addressing insn to form a pre-indexed load or store
4428 operation. Consider:
4430 *((volatile int *)0xe0000100) = 1;
4431 *((volatile int *)0xe0000110) = 2;
4433 We want this to wind up as:
4437 str rB, [rA, #0x100]
4439 str rB, [rA, #0x110]
4441 rather than having to synthesize both large constants from scratch.
4443 Therefore, we calculate how many insns would be required to emit
4444 the constant starting from `best_start', and also starting from
4445 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4446 yield a shorter sequence, we may as well use zero. */
4447 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4449 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4451 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4452 if (insns2
<= insns1
)
4454 *return_sequence
= tmp_sequence
;
4462 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4464 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4465 struct four_ints
*return_sequence
, int i
)
4467 int remainder
= val
& 0xffffffff;
4470 /* Try and find a way of doing the job in either two or three
4473 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4474 location. We start at position I. This may be the MSB, or
4475 optimial_immediate_sequence may have positioned it at the largest block
4476 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4477 wrapping around to the top of the word when we drop off the bottom.
4478 In the worst case this code should produce no more than four insns.
4480 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4481 constants, shifted to any arbitrary location. We should always start
4486 unsigned int b1
, b2
, b3
, b4
;
4487 unsigned HOST_WIDE_INT result
;
4490 gcc_assert (insns
< 4);
4495 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4496 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4499 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4500 /* We can use addw/subw for the last 12 bits. */
4504 /* Use an 8-bit shifted/rotated immediate. */
4508 result
= remainder
& ((0x0ff << end
)
4509 | ((i
< end
) ? (0xff >> (32 - end
))
4516 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4517 arbitrary shifts. */
4518 i
-= TARGET_ARM
? 2 : 1;
4522 /* Next, see if we can do a better job with a thumb2 replicated
4525 We do it this way around to catch the cases like 0x01F001E0 where
4526 two 8-bit immediates would work, but a replicated constant would
4529 TODO: 16-bit constants that don't clear all the bits, but still win.
4530 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4533 b1
= (remainder
& 0xff000000) >> 24;
4534 b2
= (remainder
& 0x00ff0000) >> 16;
4535 b3
= (remainder
& 0x0000ff00) >> 8;
4536 b4
= remainder
& 0xff;
4540 /* The 8-bit immediate already found clears b1 (and maybe b2),
4541 but must leave b3 and b4 alone. */
4543 /* First try to find a 32-bit replicated constant that clears
4544 almost everything. We can assume that we can't do it in one,
4545 or else we wouldn't be here. */
4546 unsigned int tmp
= b1
& b2
& b3
& b4
;
4547 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4549 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4550 + (tmp
== b3
) + (tmp
== b4
);
4552 && (matching_bytes
>= 3
4553 || (matching_bytes
== 2
4554 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4556 /* At least 3 of the bytes match, and the fourth has at
4557 least as many bits set, or two of the bytes match
4558 and it will only require one more insn to finish. */
4566 /* Second, try to find a 16-bit replicated constant that can
4567 leave three of the bytes clear. If b2 or b4 is already
4568 zero, then we can. If the 8-bit from above would not
4569 clear b2 anyway, then we still win. */
4570 else if (b1
== b3
&& (!b2
|| !b4
4571 || (remainder
& 0x00ff0000 & ~result
)))
4573 result
= remainder
& 0xff00ff00;
4579 /* The 8-bit immediate already found clears b2 (and maybe b3)
4580 and we don't get here unless b1 is alredy clear, but it will
4581 leave b4 unchanged. */
4583 /* If we can clear b2 and b4 at once, then we win, since the
4584 8-bits couldn't possibly reach that far. */
4587 result
= remainder
& 0x00ff00ff;
4593 return_sequence
->i
[insns
++] = result
;
4594 remainder
&= ~result
;
4596 if (code
== SET
|| code
== MINUS
)
4604 /* Emit an instruction with the indicated PATTERN. If COND is
4605 non-NULL, conditionalize the execution of the instruction on COND
4609 emit_constant_insn (rtx cond
, rtx pattern
)
4612 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4613 emit_insn (pattern
);
4616 /* As above, but extra parameter GENERATE which, if clear, suppresses
4620 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4621 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4622 int subtargets
, int generate
)
4626 int final_invert
= 0;
4628 int set_sign_bit_copies
= 0;
4629 int clear_sign_bit_copies
= 0;
4630 int clear_zero_bit_copies
= 0;
4631 int set_zero_bit_copies
= 0;
4632 int insns
= 0, neg_insns
, inv_insns
;
4633 unsigned HOST_WIDE_INT temp1
, temp2
;
4634 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4635 struct four_ints
*immediates
;
4636 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4638 /* Find out which operations are safe for a given CODE. Also do a quick
4639 check for degenerate cases; these can occur when DImode operations
4652 if (remainder
== 0xffffffff)
4655 emit_constant_insn (cond
,
4656 gen_rtx_SET (target
,
4657 GEN_INT (ARM_SIGN_EXTEND (val
))));
4663 if (reload_completed
&& rtx_equal_p (target
, source
))
4667 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4676 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4679 if (remainder
== 0xffffffff)
4681 if (reload_completed
&& rtx_equal_p (target
, source
))
4684 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4693 if (reload_completed
&& rtx_equal_p (target
, source
))
4696 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4700 if (remainder
== 0xffffffff)
4703 emit_constant_insn (cond
,
4704 gen_rtx_SET (target
,
4705 gen_rtx_NOT (mode
, source
)));
4712 /* We treat MINUS as (val - source), since (source - val) is always
4713 passed as (source + (-val)). */
4717 emit_constant_insn (cond
,
4718 gen_rtx_SET (target
,
4719 gen_rtx_NEG (mode
, source
)));
4722 if (const_ok_for_arm (val
))
4725 emit_constant_insn (cond
,
4726 gen_rtx_SET (target
,
4727 gen_rtx_MINUS (mode
, GEN_INT (val
),
4738 /* If we can do it in one insn get out quickly. */
4739 if (const_ok_for_op (val
, code
))
4742 emit_constant_insn (cond
,
4743 gen_rtx_SET (target
,
4745 ? gen_rtx_fmt_ee (code
, mode
, source
,
4751 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4753 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4754 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4758 if (mode
== SImode
&& i
== 16)
4759 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4761 emit_constant_insn (cond
,
4762 gen_zero_extendhisi2
4763 (target
, gen_lowpart (HImode
, source
)));
4765 /* Extz only supports SImode, but we can coerce the operands
4767 emit_constant_insn (cond
,
4768 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4769 gen_lowpart (SImode
, source
),
4770 GEN_INT (i
), const0_rtx
));
4776 /* Calculate a few attributes that may be useful for specific
4778 /* Count number of leading zeros. */
4779 for (i
= 31; i
>= 0; i
--)
4781 if ((remainder
& (1 << i
)) == 0)
4782 clear_sign_bit_copies
++;
4787 /* Count number of leading 1's. */
4788 for (i
= 31; i
>= 0; i
--)
4790 if ((remainder
& (1 << i
)) != 0)
4791 set_sign_bit_copies
++;
4796 /* Count number of trailing zero's. */
4797 for (i
= 0; i
<= 31; i
++)
4799 if ((remainder
& (1 << i
)) == 0)
4800 clear_zero_bit_copies
++;
4805 /* Count number of trailing 1's. */
4806 for (i
= 0; i
<= 31; i
++)
4808 if ((remainder
& (1 << i
)) != 0)
4809 set_zero_bit_copies
++;
4817 /* See if we can do this by sign_extending a constant that is known
4818 to be negative. This is a good, way of doing it, since the shift
4819 may well merge into a subsequent insn. */
4820 if (set_sign_bit_copies
> 1)
4822 if (const_ok_for_arm
4823 (temp1
= ARM_SIGN_EXTEND (remainder
4824 << (set_sign_bit_copies
- 1))))
4828 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4829 emit_constant_insn (cond
,
4830 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4831 emit_constant_insn (cond
,
4832 gen_ashrsi3 (target
, new_src
,
4833 GEN_INT (set_sign_bit_copies
- 1)));
4837 /* For an inverted constant, we will need to set the low bits,
4838 these will be shifted out of harm's way. */
4839 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4840 if (const_ok_for_arm (~temp1
))
4844 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4845 emit_constant_insn (cond
,
4846 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4847 emit_constant_insn (cond
,
4848 gen_ashrsi3 (target
, new_src
,
4849 GEN_INT (set_sign_bit_copies
- 1)));
4855 /* See if we can calculate the value as the difference between two
4856 valid immediates. */
4857 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4859 int topshift
= clear_sign_bit_copies
& ~1;
4861 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4862 & (0xff000000 >> topshift
));
4864 /* If temp1 is zero, then that means the 9 most significant
4865 bits of remainder were 1 and we've caused it to overflow.
4866 When topshift is 0 we don't need to do anything since we
4867 can borrow from 'bit 32'. */
4868 if (temp1
== 0 && topshift
!= 0)
4869 temp1
= 0x80000000 >> (topshift
- 1);
4871 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4873 if (const_ok_for_arm (temp2
))
4877 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4878 emit_constant_insn (cond
,
4879 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4880 emit_constant_insn (cond
,
4881 gen_addsi3 (target
, new_src
,
4889 /* See if we can generate this by setting the bottom (or the top)
4890 16 bits, and then shifting these into the other half of the
4891 word. We only look for the simplest cases, to do more would cost
4892 too much. Be careful, however, not to generate this when the
4893 alternative would take fewer insns. */
4894 if (val
& 0xffff0000)
4896 temp1
= remainder
& 0xffff0000;
4897 temp2
= remainder
& 0x0000ffff;
4899 /* Overlaps outside this range are best done using other methods. */
4900 for (i
= 9; i
< 24; i
++)
4902 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4903 && !const_ok_for_arm (temp2
))
4905 rtx new_src
= (subtargets
4906 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4908 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4909 source
, subtargets
, generate
);
4917 gen_rtx_ASHIFT (mode
, source
,
4924 /* Don't duplicate cases already considered. */
4925 for (i
= 17; i
< 24; i
++)
4927 if (((temp1
| (temp1
>> i
)) == remainder
)
4928 && !const_ok_for_arm (temp1
))
4930 rtx new_src
= (subtargets
4931 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4933 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4934 source
, subtargets
, generate
);
4939 gen_rtx_SET (target
,
4942 gen_rtx_LSHIFTRT (mode
, source
,
4953 /* If we have IOR or XOR, and the constant can be loaded in a
4954 single instruction, and we can find a temporary to put it in,
4955 then this can be done in two instructions instead of 3-4. */
4957 /* TARGET can't be NULL if SUBTARGETS is 0 */
4958 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4960 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4964 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4966 emit_constant_insn (cond
,
4967 gen_rtx_SET (sub
, GEN_INT (val
)));
4968 emit_constant_insn (cond
,
4969 gen_rtx_SET (target
,
4970 gen_rtx_fmt_ee (code
, mode
,
4981 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4982 and the remainder 0s for e.g. 0xfff00000)
4983 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4985 This can be done in 2 instructions by using shifts with mov or mvn.
4990 mvn r0, r0, lsr #12 */
4991 if (set_sign_bit_copies
> 8
4992 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
4996 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4997 rtx shift
= GEN_INT (set_sign_bit_copies
);
5003 gen_rtx_ASHIFT (mode
,
5008 gen_rtx_SET (target
,
5010 gen_rtx_LSHIFTRT (mode
, sub
,
5017 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5019 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5021 For eg. r0 = r0 | 0xfff
5026 if (set_zero_bit_copies
> 8
5027 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
5031 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5032 rtx shift
= GEN_INT (set_zero_bit_copies
);
5038 gen_rtx_LSHIFTRT (mode
,
5043 gen_rtx_SET (target
,
5045 gen_rtx_ASHIFT (mode
, sub
,
5051 /* This will never be reached for Thumb2 because orn is a valid
5052 instruction. This is for Thumb1 and the ARM 32 bit cases.
5054 x = y | constant (such that ~constant is a valid constant)
5056 x = ~(~y & ~constant).
5058 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
5062 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5063 emit_constant_insn (cond
,
5065 gen_rtx_NOT (mode
, source
)));
5068 sub
= gen_reg_rtx (mode
);
5069 emit_constant_insn (cond
,
5071 gen_rtx_AND (mode
, source
,
5073 emit_constant_insn (cond
,
5074 gen_rtx_SET (target
,
5075 gen_rtx_NOT (mode
, sub
)));
5082 /* See if two shifts will do 2 or more insn's worth of work. */
5083 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
5085 HOST_WIDE_INT shift_mask
= ((0xffffffff
5086 << (32 - clear_sign_bit_copies
))
5089 if ((remainder
| shift_mask
) != 0xffffffff)
5091 HOST_WIDE_INT new_val
5092 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5096 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5097 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
5098 new_src
, source
, subtargets
, 1);
5103 rtx targ
= subtargets
? NULL_RTX
: target
;
5104 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5105 targ
, source
, subtargets
, 0);
5111 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5112 rtx shift
= GEN_INT (clear_sign_bit_copies
);
5114 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
5115 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
5121 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
5123 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
5125 if ((remainder
| shift_mask
) != 0xffffffff)
5127 HOST_WIDE_INT new_val
5128 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5131 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5133 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5134 new_src
, source
, subtargets
, 1);
5139 rtx targ
= subtargets
? NULL_RTX
: target
;
5141 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5142 targ
, source
, subtargets
, 0);
5148 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5149 rtx shift
= GEN_INT (clear_zero_bit_copies
);
5151 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
5152 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
5164 /* Calculate what the instruction sequences would be if we generated it
5165 normally, negated, or inverted. */
5167 /* AND cannot be split into multiple insns, so invert and use BIC. */
5170 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
5173 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
5178 if (can_invert
|| final_invert
)
5179 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
5184 immediates
= &pos_immediates
;
5186 /* Is the negated immediate sequence more efficient? */
5187 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
5190 immediates
= &neg_immediates
;
5195 /* Is the inverted immediate sequence more efficient?
5196 We must allow for an extra NOT instruction for XOR operations, although
5197 there is some chance that the final 'mvn' will get optimized later. */
5198 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
5201 immediates
= &inv_immediates
;
5209 /* Now output the chosen sequence as instructions. */
5212 for (i
= 0; i
< insns
; i
++)
5214 rtx new_src
, temp1_rtx
;
5216 temp1
= immediates
->i
[i
];
5218 if (code
== SET
|| code
== MINUS
)
5219 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
5220 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
5221 new_src
= gen_reg_rtx (mode
);
5227 else if (can_negate
)
5230 temp1
= trunc_int_for_mode (temp1
, mode
);
5231 temp1_rtx
= GEN_INT (temp1
);
5235 else if (code
== MINUS
)
5236 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
5238 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
5240 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5245 can_negate
= can_invert
;
5249 else if (code
== MINUS
)
5257 emit_constant_insn (cond
, gen_rtx_SET (target
,
5258 gen_rtx_NOT (mode
, source
)));
5265 /* Canonicalize a comparison so that we are more likely to recognize it.
5266 This can be done for a few constant compares, where we can make the
5267 immediate value easier to load. */
5270 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5271 bool op0_preserve_value
)
5274 unsigned HOST_WIDE_INT i
, maxval
;
5276 mode
= GET_MODE (*op0
);
5277 if (mode
== VOIDmode
)
5278 mode
= GET_MODE (*op1
);
5280 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5282 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5283 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5284 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5285 for GTU/LEU in Thumb mode. */
5289 if (*code
== GT
|| *code
== LE
5290 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
5292 /* Missing comparison. First try to use an available
5294 if (CONST_INT_P (*op1
))
5302 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5304 *op1
= GEN_INT (i
+ 1);
5305 *code
= *code
== GT
? GE
: LT
;
5311 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5312 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5314 *op1
= GEN_INT (i
+ 1);
5315 *code
= *code
== GTU
? GEU
: LTU
;
5324 /* If that did not work, reverse the condition. */
5325 if (!op0_preserve_value
)
5327 std::swap (*op0
, *op1
);
5328 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5334 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5335 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5336 to facilitate possible combining with a cmp into 'ands'. */
5338 && GET_CODE (*op0
) == ZERO_EXTEND
5339 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5340 && GET_MODE (XEXP (*op0
, 0)) == QImode
5341 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5342 && subreg_lowpart_p (XEXP (*op0
, 0))
5343 && *op1
== const0_rtx
)
5344 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5347 /* Comparisons smaller than DImode. Only adjust comparisons against
5348 an out-of-range constant. */
5349 if (!CONST_INT_P (*op1
)
5350 || const_ok_for_arm (INTVAL (*op1
))
5351 || const_ok_for_arm (- INTVAL (*op1
)))
5365 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5367 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5368 *code
= *code
== GT
? GE
: LT
;
5376 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5378 *op1
= GEN_INT (i
- 1);
5379 *code
= *code
== GE
? GT
: LE
;
5386 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5387 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5389 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5390 *code
= *code
== GTU
? GEU
: LTU
;
5398 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5400 *op1
= GEN_INT (i
- 1);
5401 *code
= *code
== GEU
? GTU
: LEU
;
5412 /* Define how to find the value returned by a function. */
5415 arm_function_value(const_tree type
, const_tree func
,
5416 bool outgoing ATTRIBUTE_UNUSED
)
5419 int unsignedp ATTRIBUTE_UNUSED
;
5420 rtx r ATTRIBUTE_UNUSED
;
5422 mode
= TYPE_MODE (type
);
5424 if (TARGET_AAPCS_BASED
)
5425 return aapcs_allocate_return_reg (mode
, type
, func
);
5427 /* Promote integer types. */
5428 if (INTEGRAL_TYPE_P (type
))
5429 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5431 /* Promotes small structs returned in a register to full-word size
5432 for big-endian AAPCS. */
5433 if (arm_return_in_msb (type
))
5435 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5436 if (size
% UNITS_PER_WORD
!= 0)
5438 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5439 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
5443 return arm_libcall_value_1 (mode
);
5446 /* libcall hashtable helpers. */
5448 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5450 static inline hashval_t
hash (const rtx_def
*);
5451 static inline bool equal (const rtx_def
*, const rtx_def
*);
5452 static inline void remove (rtx_def
*);
5456 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5458 return rtx_equal_p (p1
, p2
);
5462 libcall_hasher::hash (const rtx_def
*p1
)
5464 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5467 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5470 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5472 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5476 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5478 static bool init_done
= false;
5479 static libcall_table_type
*libcall_htab
= NULL
;
5485 libcall_htab
= new libcall_table_type (31);
5486 add_libcall (libcall_htab
,
5487 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5488 add_libcall (libcall_htab
,
5489 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5490 add_libcall (libcall_htab
,
5491 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5492 add_libcall (libcall_htab
,
5493 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5495 add_libcall (libcall_htab
,
5496 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5497 add_libcall (libcall_htab
,
5498 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5499 add_libcall (libcall_htab
,
5500 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5501 add_libcall (libcall_htab
,
5502 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5504 add_libcall (libcall_htab
,
5505 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5506 add_libcall (libcall_htab
,
5507 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5508 add_libcall (libcall_htab
,
5509 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5510 add_libcall (libcall_htab
,
5511 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5512 add_libcall (libcall_htab
,
5513 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5514 add_libcall (libcall_htab
,
5515 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5516 add_libcall (libcall_htab
,
5517 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5518 add_libcall (libcall_htab
,
5519 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5521 /* Values from double-precision helper functions are returned in core
5522 registers if the selected core only supports single-precision
5523 arithmetic, even if we are using the hard-float ABI. The same is
5524 true for single-precision helpers, but we will never be using the
5525 hard-float ABI on a CPU which doesn't support single-precision
5526 operations in hardware. */
5527 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5528 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5529 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5530 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5531 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5532 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5533 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5534 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5535 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5536 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5537 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5538 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5540 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5542 add_libcall (libcall_htab
,
5543 convert_optab_libfunc (trunc_optab
, HFmode
, DFmode
));
5546 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5550 arm_libcall_value_1 (machine_mode mode
)
5552 if (TARGET_AAPCS_BASED
)
5553 return aapcs_libcall_value (mode
);
5554 else if (TARGET_IWMMXT_ABI
5555 && arm_vector_mode_supported_p (mode
))
5556 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5558 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5561 /* Define how to find the value returned by a library function
5562 assuming the value has mode MODE. */
5565 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5567 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5568 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5570 /* The following libcalls return their result in integer registers,
5571 even though they return a floating point value. */
5572 if (arm_libcall_uses_aapcs_base (libcall
))
5573 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5577 return arm_libcall_value_1 (mode
);
5580 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5583 arm_function_value_regno_p (const unsigned int regno
)
5585 if (regno
== ARG_REGISTER (1)
5587 && TARGET_AAPCS_BASED
5588 && TARGET_HARD_FLOAT
5589 && regno
== FIRST_VFP_REGNUM
)
5590 || (TARGET_IWMMXT_ABI
5591 && regno
== FIRST_IWMMXT_REGNUM
))
5597 /* Determine the amount of memory needed to store the possible return
5598 registers of an untyped call. */
5600 arm_apply_result_size (void)
5606 if (TARGET_HARD_FLOAT_ABI
)
5608 if (TARGET_IWMMXT_ABI
)
5615 /* Decide whether TYPE should be returned in memory (true)
5616 or in a register (false). FNTYPE is the type of the function making
5619 arm_return_in_memory (const_tree type
, const_tree fntype
)
5623 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5625 if (TARGET_AAPCS_BASED
)
5627 /* Simple, non-aggregate types (ie not including vectors and
5628 complex) are always returned in a register (or registers).
5629 We don't care about which register here, so we can short-cut
5630 some of the detail. */
5631 if (!AGGREGATE_TYPE_P (type
)
5632 && TREE_CODE (type
) != VECTOR_TYPE
5633 && TREE_CODE (type
) != COMPLEX_TYPE
)
5636 /* Any return value that is no larger than one word can be
5638 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5641 /* Check any available co-processors to see if they accept the
5642 type as a register candidate (VFP, for example, can return
5643 some aggregates in consecutive registers). These aren't
5644 available if the call is variadic. */
5645 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5648 /* Vector values should be returned using ARM registers, not
5649 memory (unless they're over 16 bytes, which will break since
5650 we only have four call-clobbered registers to play with). */
5651 if (TREE_CODE (type
) == VECTOR_TYPE
)
5652 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5654 /* The rest go in memory. */
5658 if (TREE_CODE (type
) == VECTOR_TYPE
)
5659 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5661 if (!AGGREGATE_TYPE_P (type
) &&
5662 (TREE_CODE (type
) != VECTOR_TYPE
))
5663 /* All simple types are returned in registers. */
5666 if (arm_abi
!= ARM_ABI_APCS
)
5668 /* ATPCS and later return aggregate types in memory only if they are
5669 larger than a word (or are variable size). */
5670 return (size
< 0 || size
> UNITS_PER_WORD
);
5673 /* For the arm-wince targets we choose to be compatible with Microsoft's
5674 ARM and Thumb compilers, which always return aggregates in memory. */
5676 /* All structures/unions bigger than one word are returned in memory.
5677 Also catch the case where int_size_in_bytes returns -1. In this case
5678 the aggregate is either huge or of variable size, and in either case
5679 we will want to return it via memory and not in a register. */
5680 if (size
< 0 || size
> UNITS_PER_WORD
)
5683 if (TREE_CODE (type
) == RECORD_TYPE
)
5687 /* For a struct the APCS says that we only return in a register
5688 if the type is 'integer like' and every addressable element
5689 has an offset of zero. For practical purposes this means
5690 that the structure can have at most one non bit-field element
5691 and that this element must be the first one in the structure. */
5693 /* Find the first field, ignoring non FIELD_DECL things which will
5694 have been created by C++. */
5695 for (field
= TYPE_FIELDS (type
);
5696 field
&& TREE_CODE (field
) != FIELD_DECL
;
5697 field
= DECL_CHAIN (field
))
5701 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5703 /* Check that the first field is valid for returning in a register. */
5705 /* ... Floats are not allowed */
5706 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5709 /* ... Aggregates that are not themselves valid for returning in
5710 a register are not allowed. */
5711 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5714 /* Now check the remaining fields, if any. Only bitfields are allowed,
5715 since they are not addressable. */
5716 for (field
= DECL_CHAIN (field
);
5718 field
= DECL_CHAIN (field
))
5720 if (TREE_CODE (field
) != FIELD_DECL
)
5723 if (!DECL_BIT_FIELD_TYPE (field
))
5730 if (TREE_CODE (type
) == UNION_TYPE
)
5734 /* Unions can be returned in registers if every element is
5735 integral, or can be returned in an integer register. */
5736 for (field
= TYPE_FIELDS (type
);
5738 field
= DECL_CHAIN (field
))
5740 if (TREE_CODE (field
) != FIELD_DECL
)
5743 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5746 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5752 #endif /* not ARM_WINCE */
5754 /* Return all other types in memory. */
5758 const struct pcs_attribute_arg
5762 } pcs_attribute_args
[] =
5764 {"aapcs", ARM_PCS_AAPCS
},
5765 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5767 /* We could recognize these, but changes would be needed elsewhere
5768 * to implement them. */
5769 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5770 {"atpcs", ARM_PCS_ATPCS
},
5771 {"apcs", ARM_PCS_APCS
},
5773 {NULL
, ARM_PCS_UNKNOWN
}
5777 arm_pcs_from_attribute (tree attr
)
5779 const struct pcs_attribute_arg
*ptr
;
5782 /* Get the value of the argument. */
5783 if (TREE_VALUE (attr
) == NULL_TREE
5784 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5785 return ARM_PCS_UNKNOWN
;
5787 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5789 /* Check it against the list of known arguments. */
5790 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5791 if (streq (arg
, ptr
->arg
))
5794 /* An unrecognized interrupt type. */
5795 return ARM_PCS_UNKNOWN
;
5798 /* Get the PCS variant to use for this call. TYPE is the function's type
5799 specification, DECL is the specific declartion. DECL may be null if
5800 the call could be indirect or if this is a library call. */
5802 arm_get_pcs_model (const_tree type
, const_tree decl
)
5804 bool user_convention
= false;
5805 enum arm_pcs user_pcs
= arm_pcs_default
;
5810 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5813 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5814 user_convention
= true;
5817 if (TARGET_AAPCS_BASED
)
5819 /* Detect varargs functions. These always use the base rules
5820 (no argument is ever a candidate for a co-processor
5822 bool base_rules
= stdarg_p (type
);
5824 if (user_convention
)
5826 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5827 sorry ("non-AAPCS derived PCS variant");
5828 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5829 error ("variadic functions must use the base AAPCS variant");
5833 return ARM_PCS_AAPCS
;
5834 else if (user_convention
)
5836 else if (decl
&& flag_unit_at_a_time
)
5838 /* Local functions never leak outside this compilation unit,
5839 so we are free to use whatever conventions are
5841 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5842 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5844 return ARM_PCS_AAPCS_LOCAL
;
5847 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5848 sorry ("PCS variant");
5850 /* For everything else we use the target's default. */
5851 return arm_pcs_default
;
5856 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5857 const_tree fntype ATTRIBUTE_UNUSED
,
5858 rtx libcall ATTRIBUTE_UNUSED
,
5859 const_tree fndecl ATTRIBUTE_UNUSED
)
5861 /* Record the unallocated VFP registers. */
5862 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5863 pcum
->aapcs_vfp_reg_alloc
= 0;
5866 /* Walk down the type tree of TYPE counting consecutive base elements.
5867 If *MODEP is VOIDmode, then set it to the first valid floating point
5868 type. If a non-floating point type is found, or if a floating point
5869 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5870 otherwise return the count in the sub-tree. */
5872 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5877 switch (TREE_CODE (type
))
5880 mode
= TYPE_MODE (type
);
5881 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
)
5884 if (*modep
== VOIDmode
)
5893 mode
= TYPE_MODE (TREE_TYPE (type
));
5894 if (mode
!= DFmode
&& mode
!= SFmode
)
5897 if (*modep
== VOIDmode
)
5906 /* Use V2SImode and V4SImode as representatives of all 64-bit
5907 and 128-bit vector types, whether or not those modes are
5908 supported with the present options. */
5909 size
= int_size_in_bytes (type
);
5922 if (*modep
== VOIDmode
)
5925 /* Vector modes are considered to be opaque: two vectors are
5926 equivalent for the purposes of being homogeneous aggregates
5927 if they are the same size. */
5936 tree index
= TYPE_DOMAIN (type
);
5938 /* Can't handle incomplete types nor sizes that are not
5940 if (!COMPLETE_TYPE_P (type
)
5941 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5944 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5947 || !TYPE_MAX_VALUE (index
)
5948 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5949 || !TYPE_MIN_VALUE (index
)
5950 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5954 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5955 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5957 /* There must be no padding. */
5958 if (wi::to_wide (TYPE_SIZE (type
))
5959 != count
* GET_MODE_BITSIZE (*modep
))
5971 /* Can't handle incomplete types nor sizes that are not
5973 if (!COMPLETE_TYPE_P (type
)
5974 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5977 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5979 if (TREE_CODE (field
) != FIELD_DECL
)
5982 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5988 /* There must be no padding. */
5989 if (wi::to_wide (TYPE_SIZE (type
))
5990 != count
* GET_MODE_BITSIZE (*modep
))
5997 case QUAL_UNION_TYPE
:
5999 /* These aren't very interesting except in a degenerate case. */
6004 /* Can't handle incomplete types nor sizes that are not
6006 if (!COMPLETE_TYPE_P (type
)
6007 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6010 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6012 if (TREE_CODE (field
) != FIELD_DECL
)
6015 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
6018 count
= count
> sub_count
? count
: sub_count
;
6021 /* There must be no padding. */
6022 if (wi::to_wide (TYPE_SIZE (type
))
6023 != count
* GET_MODE_BITSIZE (*modep
))
6036 /* Return true if PCS_VARIANT should use VFP registers. */
6038 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
6040 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
6042 static bool seen_thumb1_vfp
= false;
6044 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
6046 sorry ("Thumb-1 hard-float VFP ABI");
6047 /* sorry() is not immediately fatal, so only display this once. */
6048 seen_thumb1_vfp
= true;
6054 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
6057 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
6058 (TARGET_VFP_DOUBLE
|| !is_double
));
6061 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6062 suitable for passing or returning in VFP registers for the PCS
6063 variant selected. If it is, then *BASE_MODE is updated to contain
6064 a machine mode describing each element of the argument's type and
6065 *COUNT to hold the number of such elements. */
6067 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
6068 machine_mode mode
, const_tree type
,
6069 machine_mode
*base_mode
, int *count
)
6071 machine_mode new_mode
= VOIDmode
;
6073 /* If we have the type information, prefer that to working things
6074 out from the mode. */
6077 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
6079 if (ag_count
> 0 && ag_count
<= 4)
6084 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
6085 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
6086 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6091 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6094 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
6100 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
6103 *base_mode
= new_mode
;
6108 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
6109 machine_mode mode
, const_tree type
)
6111 int count ATTRIBUTE_UNUSED
;
6112 machine_mode ag_mode ATTRIBUTE_UNUSED
;
6114 if (!use_vfp_abi (pcs_variant
, false))
6116 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6121 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6124 if (!use_vfp_abi (pcum
->pcs_variant
, false))
6127 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
6128 &pcum
->aapcs_vfp_rmode
,
6129 &pcum
->aapcs_vfp_rcount
);
6132 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6133 for the behaviour of this function. */
6136 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6137 const_tree type ATTRIBUTE_UNUSED
)
6140 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
6141 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
6142 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
6145 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
6146 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
6148 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
6150 || (mode
== TImode
&& ! TARGET_NEON
)
6151 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
6154 int rcount
= pcum
->aapcs_vfp_rcount
;
6156 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
6160 /* Avoid using unsupported vector modes. */
6161 if (rmode
== V2SImode
)
6163 else if (rmode
== V4SImode
)
6170 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
6171 for (i
= 0; i
< rcount
; i
++)
6173 rtx tmp
= gen_rtx_REG (rmode
,
6174 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
6175 tmp
= gen_rtx_EXPR_LIST
6177 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
6178 XVECEXP (par
, 0, i
) = tmp
;
6181 pcum
->aapcs_reg
= par
;
6184 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
6190 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6191 comment there for the behaviour of this function. */
6194 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
6196 const_tree type ATTRIBUTE_UNUSED
)
6198 if (!use_vfp_abi (pcs_variant
, false))
6202 || (GET_MODE_CLASS (mode
) == MODE_INT
6203 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
6207 machine_mode ag_mode
;
6212 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6217 if (ag_mode
== V2SImode
)
6219 else if (ag_mode
== V4SImode
)
6225 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
6226 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
6227 for (i
= 0; i
< count
; i
++)
6229 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
6230 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
6231 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
6232 XVECEXP (par
, 0, i
) = tmp
;
6238 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
6242 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6243 machine_mode mode ATTRIBUTE_UNUSED
,
6244 const_tree type ATTRIBUTE_UNUSED
)
6246 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6247 pcum
->aapcs_vfp_reg_alloc
= 0;
6251 #define AAPCS_CP(X) \
6253 aapcs_ ## X ## _cum_init, \
6254 aapcs_ ## X ## _is_call_candidate, \
6255 aapcs_ ## X ## _allocate, \
6256 aapcs_ ## X ## _is_return_candidate, \
6257 aapcs_ ## X ## _allocate_return_reg, \
6258 aapcs_ ## X ## _advance \
6261 /* Table of co-processors that can be used to pass arguments in
6262 registers. Idealy no arugment should be a candidate for more than
6263 one co-processor table entry, but the table is processed in order
6264 and stops after the first match. If that entry then fails to put
6265 the argument into a co-processor register, the argument will go on
6269 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6270 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6272 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6273 BLKmode) is a candidate for this co-processor's registers; this
6274 function should ignore any position-dependent state in
6275 CUMULATIVE_ARGS and only use call-type dependent information. */
6276 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6278 /* Return true if the argument does get a co-processor register; it
6279 should set aapcs_reg to an RTX of the register allocated as is
6280 required for a return from FUNCTION_ARG. */
6281 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6283 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6284 be returned in this co-processor's registers. */
6285 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6287 /* Allocate and return an RTX element to hold the return type of a call. This
6288 routine must not fail and will only be called if is_return_candidate
6289 returned true with the same parameters. */
6290 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6292 /* Finish processing this argument and prepare to start processing
6294 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6295 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6303 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6308 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6309 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6316 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6318 /* We aren't passed a decl, so we can't check that a call is local.
6319 However, it isn't clear that that would be a win anyway, since it
6320 might limit some tail-calling opportunities. */
6321 enum arm_pcs pcs_variant
;
6325 const_tree fndecl
= NULL_TREE
;
6327 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6330 fntype
= TREE_TYPE (fntype
);
6333 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6336 pcs_variant
= arm_pcs_default
;
6338 if (pcs_variant
!= ARM_PCS_AAPCS
)
6342 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6343 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6352 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6355 /* We aren't passed a decl, so we can't check that a call is local.
6356 However, it isn't clear that that would be a win anyway, since it
6357 might limit some tail-calling opportunities. */
6358 enum arm_pcs pcs_variant
;
6359 int unsignedp ATTRIBUTE_UNUSED
;
6363 const_tree fndecl
= NULL_TREE
;
6365 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6368 fntype
= TREE_TYPE (fntype
);
6371 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6374 pcs_variant
= arm_pcs_default
;
6376 /* Promote integer types. */
6377 if (type
&& INTEGRAL_TYPE_P (type
))
6378 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6380 if (pcs_variant
!= ARM_PCS_AAPCS
)
6384 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6385 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6387 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6391 /* Promotes small structs returned in a register to full-word size
6392 for big-endian AAPCS. */
6393 if (type
&& arm_return_in_msb (type
))
6395 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6396 if (size
% UNITS_PER_WORD
!= 0)
6398 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6399 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
6403 return gen_rtx_REG (mode
, R0_REGNUM
);
6407 aapcs_libcall_value (machine_mode mode
)
6409 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6410 && GET_MODE_SIZE (mode
) <= 4)
6413 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6416 /* Lay out a function argument using the AAPCS rules. The rule
6417 numbers referred to here are those in the AAPCS. */
6419 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6420 const_tree type
, bool named
)
6425 /* We only need to do this once per argument. */
6426 if (pcum
->aapcs_arg_processed
)
6429 pcum
->aapcs_arg_processed
= true;
6431 /* Special case: if named is false then we are handling an incoming
6432 anonymous argument which is on the stack. */
6436 /* Is this a potential co-processor register candidate? */
6437 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6439 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6440 pcum
->aapcs_cprc_slot
= slot
;
6442 /* We don't have to apply any of the rules from part B of the
6443 preparation phase, these are handled elsewhere in the
6448 /* A Co-processor register candidate goes either in its own
6449 class of registers or on the stack. */
6450 if (!pcum
->aapcs_cprc_failed
[slot
])
6452 /* C1.cp - Try to allocate the argument to co-processor
6454 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6457 /* C2.cp - Put the argument on the stack and note that we
6458 can't assign any more candidates in this slot. We also
6459 need to note that we have allocated stack space, so that
6460 we won't later try to split a non-cprc candidate between
6461 core registers and the stack. */
6462 pcum
->aapcs_cprc_failed
[slot
] = true;
6463 pcum
->can_split
= false;
6466 /* We didn't get a register, so this argument goes on the
6468 gcc_assert (pcum
->can_split
== false);
6473 /* C3 - For double-word aligned arguments, round the NCRN up to the
6474 next even number. */
6475 ncrn
= pcum
->aapcs_ncrn
;
6478 int res
= arm_needs_doubleword_align (mode
, type
);
6479 /* Only warn during RTL expansion of call stmts, otherwise we would
6480 warn e.g. during gimplification even on functions that will be
6481 always inlined, and we'd warn multiple times. Don't warn when
6482 called in expand_function_start either, as we warn instead in
6483 arm_function_arg_boundary in that case. */
6484 if (res
< 0 && warn_psabi
&& currently_expanding_gimple_stmt
)
6485 inform (input_location
, "parameter passing for argument of type "
6486 "%qT changed in GCC 7.1", type
);
6491 nregs
= ARM_NUM_REGS2(mode
, type
);
6493 /* Sigh, this test should really assert that nregs > 0, but a GCC
6494 extension allows empty structs and then gives them empty size; it
6495 then allows such a structure to be passed by value. For some of
6496 the code below we have to pretend that such an argument has
6497 non-zero size so that we 'locate' it correctly either in
6498 registers or on the stack. */
6499 gcc_assert (nregs
>= 0);
6501 nregs2
= nregs
? nregs
: 1;
6503 /* C4 - Argument fits entirely in core registers. */
6504 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6506 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6507 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6511 /* C5 - Some core registers left and there are no arguments already
6512 on the stack: split this argument between the remaining core
6513 registers and the stack. */
6514 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6516 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6517 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6518 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6522 /* C6 - NCRN is set to 4. */
6523 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6525 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6529 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6530 for a call to a function whose data type is FNTYPE.
6531 For a library call, FNTYPE is NULL. */
6533 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6535 tree fndecl ATTRIBUTE_UNUSED
)
6537 /* Long call handling. */
6539 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6541 pcum
->pcs_variant
= arm_pcs_default
;
6543 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6545 if (arm_libcall_uses_aapcs_base (libname
))
6546 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6548 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6549 pcum
->aapcs_reg
= NULL_RTX
;
6550 pcum
->aapcs_partial
= 0;
6551 pcum
->aapcs_arg_processed
= false;
6552 pcum
->aapcs_cprc_slot
= -1;
6553 pcum
->can_split
= true;
6555 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6559 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6561 pcum
->aapcs_cprc_failed
[i
] = false;
6562 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6570 /* On the ARM, the offset starts at 0. */
6572 pcum
->iwmmxt_nregs
= 0;
6573 pcum
->can_split
= true;
6575 /* Varargs vectors are treated the same as long long.
6576 named_count avoids having to change the way arm handles 'named' */
6577 pcum
->named_count
= 0;
6580 if (TARGET_REALLY_IWMMXT
&& fntype
)
6584 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6586 fn_arg
= TREE_CHAIN (fn_arg
))
6587 pcum
->named_count
+= 1;
6589 if (! pcum
->named_count
)
6590 pcum
->named_count
= INT_MAX
;
6594 /* Return 1 if double word alignment is required for argument passing.
6595 Return -1 if double word alignment used to be required for argument
6596 passing before PR77728 ABI fix, but is not required anymore.
6597 Return 0 if double word alignment is not required and wasn't requried
6600 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6603 return GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
;
6605 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6606 if (!AGGREGATE_TYPE_P (type
))
6607 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6609 /* Array types: Use member alignment of element type. */
6610 if (TREE_CODE (type
) == ARRAY_TYPE
)
6611 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
6614 /* Record/aggregate types: Use greatest member alignment of any member. */
6615 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6616 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
6618 if (TREE_CODE (field
) == FIELD_DECL
)
6621 /* Before PR77728 fix, we were incorrectly considering also
6622 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6623 Make sure we can warn about that with -Wpsabi. */
6631 /* Determine where to put an argument to a function.
6632 Value is zero to push the argument on the stack,
6633 or a hard register in which to store the argument.
6635 MODE is the argument's machine mode.
6636 TYPE is the data type of the argument (as a tree).
6637 This is null for libcalls where that information may
6639 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6640 the preceding args and about the function being called.
6641 NAMED is nonzero if this argument is a named parameter
6642 (otherwise it is an extra parameter matching an ellipsis).
6644 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6645 other arguments are passed on the stack. If (NAMED == 0) (which happens
6646 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6647 defined), say it is passed in the stack (function_prologue will
6648 indeed make it pass in the stack if necessary). */
6651 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6652 const_tree type
, bool named
)
6654 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6657 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6658 a call insn (op3 of a call_value insn). */
6659 if (mode
== VOIDmode
)
6662 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6664 aapcs_layout_arg (pcum
, mode
, type
, named
);
6665 return pcum
->aapcs_reg
;
6668 /* Varargs vectors are treated the same as long long.
6669 named_count avoids having to change the way arm handles 'named' */
6670 if (TARGET_IWMMXT_ABI
6671 && arm_vector_mode_supported_p (mode
)
6672 && pcum
->named_count
> pcum
->nargs
+ 1)
6674 if (pcum
->iwmmxt_nregs
<= 9)
6675 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6678 pcum
->can_split
= false;
6683 /* Put doubleword aligned quantities in even register pairs. */
6684 if ((pcum
->nregs
& 1) && ARM_DOUBLEWORD_ALIGN
)
6686 int res
= arm_needs_doubleword_align (mode
, type
);
6687 if (res
< 0 && warn_psabi
)
6688 inform (input_location
, "parameter passing for argument of type "
6689 "%qT changed in GCC 7.1", type
);
6694 /* Only allow splitting an arg between regs and memory if all preceding
6695 args were allocated to regs. For args passed by reference we only count
6696 the reference pointer. */
6697 if (pcum
->can_split
)
6700 nregs
= ARM_NUM_REGS2 (mode
, type
);
6702 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6705 return gen_rtx_REG (mode
, pcum
->nregs
);
6709 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6711 if (!ARM_DOUBLEWORD_ALIGN
)
6712 return PARM_BOUNDARY
;
6714 int res
= arm_needs_doubleword_align (mode
, type
);
6715 if (res
< 0 && warn_psabi
)
6716 inform (input_location
, "parameter passing for argument of type %qT "
6717 "changed in GCC 7.1", type
);
6719 return res
> 0 ? DOUBLEWORD_ALIGNMENT
: PARM_BOUNDARY
;
6723 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6724 tree type
, bool named
)
6726 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6727 int nregs
= pcum
->nregs
;
6729 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6731 aapcs_layout_arg (pcum
, mode
, type
, named
);
6732 return pcum
->aapcs_partial
;
6735 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6738 if (NUM_ARG_REGS
> nregs
6739 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6741 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6746 /* Update the data in PCUM to advance over an argument
6747 of mode MODE and data type TYPE.
6748 (TYPE is null for libcalls where that information may not be available.) */
6751 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6752 const_tree type
, bool named
)
6754 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6756 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6758 aapcs_layout_arg (pcum
, mode
, type
, named
);
6760 if (pcum
->aapcs_cprc_slot
>= 0)
6762 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6764 pcum
->aapcs_cprc_slot
= -1;
6767 /* Generic stuff. */
6768 pcum
->aapcs_arg_processed
= false;
6769 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6770 pcum
->aapcs_reg
= NULL_RTX
;
6771 pcum
->aapcs_partial
= 0;
6776 if (arm_vector_mode_supported_p (mode
)
6777 && pcum
->named_count
> pcum
->nargs
6778 && TARGET_IWMMXT_ABI
)
6779 pcum
->iwmmxt_nregs
+= 1;
6781 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6785 /* Variable sized types are passed by reference. This is a GCC
6786 extension to the ARM ABI. */
6789 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6790 machine_mode mode ATTRIBUTE_UNUSED
,
6791 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6793 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6796 /* Encode the current state of the #pragma [no_]long_calls. */
6799 OFF
, /* No #pragma [no_]long_calls is in effect. */
6800 LONG
, /* #pragma long_calls is in effect. */
6801 SHORT
/* #pragma no_long_calls is in effect. */
6804 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6807 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6809 arm_pragma_long_calls
= LONG
;
6813 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6815 arm_pragma_long_calls
= SHORT
;
6819 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6821 arm_pragma_long_calls
= OFF
;
6824 /* Handle an attribute requiring a FUNCTION_DECL;
6825 arguments as in struct attribute_spec.handler. */
6827 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6828 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6830 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6832 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6834 *no_add_attrs
= true;
6840 /* Handle an "interrupt" or "isr" attribute;
6841 arguments as in struct attribute_spec.handler. */
6843 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6848 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6850 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6852 *no_add_attrs
= true;
6854 /* FIXME: the argument if any is checked for type attributes;
6855 should it be checked for decl ones? */
6859 if (TREE_CODE (*node
) == FUNCTION_TYPE
6860 || TREE_CODE (*node
) == METHOD_TYPE
)
6862 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6864 warning (OPT_Wattributes
, "%qE attribute ignored",
6866 *no_add_attrs
= true;
6869 else if (TREE_CODE (*node
) == POINTER_TYPE
6870 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6871 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6872 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6874 *node
= build_variant_type_copy (*node
);
6875 TREE_TYPE (*node
) = build_type_attribute_variant
6877 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6878 *no_add_attrs
= true;
6882 /* Possibly pass this attribute on from the type to a decl. */
6883 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6884 | (int) ATTR_FLAG_FUNCTION_NEXT
6885 | (int) ATTR_FLAG_ARRAY_NEXT
))
6887 *no_add_attrs
= true;
6888 return tree_cons (name
, args
, NULL_TREE
);
6892 warning (OPT_Wattributes
, "%qE attribute ignored",
6901 /* Handle a "pcs" attribute; arguments as in struct
6902 attribute_spec.handler. */
6904 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6905 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6907 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6909 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6910 *no_add_attrs
= true;
6915 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6916 /* Handle the "notshared" attribute. This attribute is another way of
6917 requesting hidden visibility. ARM's compiler supports
6918 "__declspec(notshared)"; we support the same thing via an
6922 arm_handle_notshared_attribute (tree
*node
,
6923 tree name ATTRIBUTE_UNUSED
,
6924 tree args ATTRIBUTE_UNUSED
,
6925 int flags ATTRIBUTE_UNUSED
,
6928 tree decl
= TYPE_NAME (*node
);
6932 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6933 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6934 *no_add_attrs
= false;
6940 /* This function returns true if a function with declaration FNDECL and type
6941 FNTYPE uses the stack to pass arguments or return variables and false
6942 otherwise. This is used for functions with the attributes
6943 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6944 diagnostic messages if the stack is used. NAME is the name of the attribute
6948 cmse_func_args_or_return_in_stack (tree fndecl
, tree name
, tree fntype
)
6950 function_args_iterator args_iter
;
6951 CUMULATIVE_ARGS args_so_far_v
;
6952 cumulative_args_t args_so_far
;
6953 bool first_param
= true;
6954 tree arg_type
, prev_arg_type
= NULL_TREE
, ret_type
;
6956 /* Error out if any argument is passed on the stack. */
6957 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
6958 args_so_far
= pack_cumulative_args (&args_so_far_v
);
6959 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
6962 machine_mode arg_mode
= TYPE_MODE (arg_type
);
6964 prev_arg_type
= arg_type
;
6965 if (VOID_TYPE_P (arg_type
))
6969 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
, true);
6970 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
, true);
6972 || arm_arg_partial_bytes (args_so_far
, arg_mode
, arg_type
, true))
6974 error ("%qE attribute not available to functions with arguments "
6975 "passed on the stack", name
);
6978 first_param
= false;
6981 /* Error out for variadic functions since we cannot control how many
6982 arguments will be passed and thus stack could be used. stdarg_p () is not
6983 used for the checking to avoid browsing arguments twice. */
6984 if (prev_arg_type
!= NULL_TREE
&& !VOID_TYPE_P (prev_arg_type
))
6986 error ("%qE attribute not available to functions with variable number "
6987 "of arguments", name
);
6991 /* Error out if return value is passed on the stack. */
6992 ret_type
= TREE_TYPE (fntype
);
6993 if (arm_return_in_memory (ret_type
, fntype
))
6995 error ("%qE attribute not available to functions that return value on "
7002 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7003 function will check whether the attribute is allowed here and will add the
7004 attribute to the function declaration tree or otherwise issue a warning. */
7007 arm_handle_cmse_nonsecure_entry (tree
*node
, tree name
,
7016 *no_add_attrs
= true;
7017 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
7022 /* Ignore attribute for function types. */
7023 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7025 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7027 *no_add_attrs
= true;
7033 /* Warn for static linkage functions. */
7034 if (!TREE_PUBLIC (fndecl
))
7036 warning (OPT_Wattributes
, "%qE attribute has no effect on functions "
7037 "with static linkage", name
);
7038 *no_add_attrs
= true;
7042 *no_add_attrs
|= cmse_func_args_or_return_in_stack (fndecl
, name
,
7043 TREE_TYPE (fndecl
));
7048 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7049 function will check whether the attribute is allowed here and will add the
7050 attribute to the function type tree or otherwise issue a diagnostic. The
7051 reason we check this at declaration time is to only allow the use of the
7052 attribute with declarations of function pointers and not function
7053 declarations. This function checks NODE is of the expected type and issues
7054 diagnostics otherwise using NAME. If it is not of the expected type
7055 *NO_ADD_ATTRS will be set to true. */
7058 arm_handle_cmse_nonsecure_call (tree
*node
, tree name
,
7063 tree decl
= NULL_TREE
, fntype
= NULL_TREE
;
7068 *no_add_attrs
= true;
7069 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
7074 if (TREE_CODE (*node
) == VAR_DECL
|| TREE_CODE (*node
) == TYPE_DECL
)
7077 fntype
= TREE_TYPE (decl
);
7080 while (fntype
!= NULL_TREE
&& TREE_CODE (fntype
) == POINTER_TYPE
)
7081 fntype
= TREE_TYPE (fntype
);
7083 if (!decl
|| TREE_CODE (fntype
) != FUNCTION_TYPE
)
7085 warning (OPT_Wattributes
, "%qE attribute only applies to base type of a "
7086 "function pointer", name
);
7087 *no_add_attrs
= true;
7091 *no_add_attrs
|= cmse_func_args_or_return_in_stack (NULL
, name
, fntype
);
7096 /* Prevent trees being shared among function types with and without
7097 cmse_nonsecure_call attribute. */
7098 type
= TREE_TYPE (decl
);
7100 type
= build_distinct_type_copy (type
);
7101 TREE_TYPE (decl
) = type
;
7104 while (TREE_CODE (fntype
) != FUNCTION_TYPE
)
7107 fntype
= TREE_TYPE (fntype
);
7108 fntype
= build_distinct_type_copy (fntype
);
7109 TREE_TYPE (type
) = fntype
;
7112 /* Construct a type attribute and add it to the function type. */
7113 tree attrs
= tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE
,
7114 TYPE_ATTRIBUTES (fntype
));
7115 TYPE_ATTRIBUTES (fntype
) = attrs
;
7119 /* Return 0 if the attributes for two types are incompatible, 1 if they
7120 are compatible, and 2 if they are nearly compatible (which causes a
7121 warning to be generated). */
7123 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
7127 /* Check for mismatch of non-default calling convention. */
7128 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
7131 /* Check for mismatched call attributes. */
7132 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7133 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7134 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7135 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7137 /* Only bother to check if an attribute is defined. */
7138 if (l1
| l2
| s1
| s2
)
7140 /* If one type has an attribute, the other must have the same attribute. */
7141 if ((l1
!= l2
) || (s1
!= s2
))
7144 /* Disallow mixed attributes. */
7145 if ((l1
& s2
) || (l2
& s1
))
7149 /* Check for mismatched ISR attribute. */
7150 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
7152 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
7153 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
7155 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
7159 l1
= lookup_attribute ("cmse_nonsecure_call",
7160 TYPE_ATTRIBUTES (type1
)) != NULL
;
7161 l2
= lookup_attribute ("cmse_nonsecure_call",
7162 TYPE_ATTRIBUTES (type2
)) != NULL
;
7170 /* Assigns default attributes to newly defined type. This is used to
7171 set short_call/long_call attributes for function types of
7172 functions defined inside corresponding #pragma scopes. */
7174 arm_set_default_type_attributes (tree type
)
7176 /* Add __attribute__ ((long_call)) to all functions, when
7177 inside #pragma long_calls or __attribute__ ((short_call)),
7178 when inside #pragma no_long_calls. */
7179 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
7181 tree type_attr_list
, attr_name
;
7182 type_attr_list
= TYPE_ATTRIBUTES (type
);
7184 if (arm_pragma_long_calls
== LONG
)
7185 attr_name
= get_identifier ("long_call");
7186 else if (arm_pragma_long_calls
== SHORT
)
7187 attr_name
= get_identifier ("short_call");
7191 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
7192 TYPE_ATTRIBUTES (type
) = type_attr_list
;
7196 /* Return true if DECL is known to be linked into section SECTION. */
7199 arm_function_in_section_p (tree decl
, section
*section
)
7201 /* We can only be certain about the prevailing symbol definition. */
7202 if (!decl_binds_to_current_def_p (decl
))
7205 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7206 if (!DECL_SECTION_NAME (decl
))
7208 /* Make sure that we will not create a unique section for DECL. */
7209 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
7213 return function_section (decl
) == section
;
7216 /* Return nonzero if a 32-bit "long_call" should be generated for
7217 a call from the current function to DECL. We generate a long_call
7220 a. has an __attribute__((long call))
7221 or b. is within the scope of a #pragma long_calls
7222 or c. the -mlong-calls command line switch has been specified
7224 However we do not generate a long call if the function:
7226 d. has an __attribute__ ((short_call))
7227 or e. is inside the scope of a #pragma no_long_calls
7228 or f. is defined in the same section as the current function. */
7231 arm_is_long_call_p (tree decl
)
7236 return TARGET_LONG_CALLS
;
7238 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
7239 if (lookup_attribute ("short_call", attrs
))
7242 /* For "f", be conservative, and only cater for cases in which the
7243 whole of the current function is placed in the same section. */
7244 if (!flag_reorder_blocks_and_partition
7245 && TREE_CODE (decl
) == FUNCTION_DECL
7246 && arm_function_in_section_p (decl
, current_function_section ()))
7249 if (lookup_attribute ("long_call", attrs
))
7252 return TARGET_LONG_CALLS
;
7255 /* Return nonzero if it is ok to make a tail-call to DECL. */
7257 arm_function_ok_for_sibcall (tree decl
, tree exp
)
7259 unsigned long func_type
;
7261 if (cfun
->machine
->sibcall_blocked
)
7264 /* Never tailcall something if we are generating code for Thumb-1. */
7268 /* The PIC register is live on entry to VxWorks PLT entries, so we
7269 must make the call before restoring the PIC register. */
7270 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
7273 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7274 may be used both as target of the call and base register for restoring
7275 the VFP registers */
7276 if (TARGET_APCS_FRAME
&& TARGET_ARM
7277 && TARGET_HARD_FLOAT
7278 && decl
&& arm_is_long_call_p (decl
))
7281 /* If we are interworking and the function is not declared static
7282 then we can't tail-call it unless we know that it exists in this
7283 compilation unit (since it might be a Thumb routine). */
7284 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
7285 && !TREE_ASM_WRITTEN (decl
))
7288 func_type
= arm_current_func_type ();
7289 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7290 if (IS_INTERRUPT (func_type
))
7293 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7294 generated for entry functions themselves. */
7295 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7298 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7299 this would complicate matters for later code generation. */
7300 if (TREE_CODE (exp
) == CALL_EXPR
)
7302 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7303 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype
)))
7307 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
7309 /* Check that the return value locations are the same. For
7310 example that we aren't returning a value from the sibling in
7311 a VFP register but then need to transfer it to a core
7314 tree decl_or_type
= decl
;
7316 /* If it is an indirect function pointer, get the function type. */
7318 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7320 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
7321 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
7323 if (!rtx_equal_p (a
, b
))
7327 /* Never tailcall if function may be called with a misaligned SP. */
7328 if (IS_STACKALIGN (func_type
))
7331 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7332 references should become a NOP. Don't convert such calls into
7334 if (TARGET_AAPCS_BASED
7335 && arm_abi
== ARM_ABI_AAPCS
7337 && DECL_WEAK (decl
))
7340 /* We cannot do a tailcall for an indirect call by descriptor if all the
7341 argument registers are used because the only register left to load the
7342 address is IP and it will already contain the static chain. */
7343 if (!decl
&& CALL_EXPR_BY_DESCRIPTOR (exp
) && !flag_trampolines
)
7345 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7346 CUMULATIVE_ARGS cum
;
7347 cumulative_args_t cum_v
;
7349 arm_init_cumulative_args (&cum
, fntype
, NULL_RTX
, NULL_TREE
);
7350 cum_v
= pack_cumulative_args (&cum
);
7352 for (tree t
= TYPE_ARG_TYPES (fntype
); t
; t
= TREE_CHAIN (t
))
7354 tree type
= TREE_VALUE (t
);
7355 if (!VOID_TYPE_P (type
))
7356 arm_function_arg_advance (cum_v
, TYPE_MODE (type
), type
, true);
7359 if (!arm_function_arg (cum_v
, SImode
, integer_type_node
, true))
7363 /* Everything else is ok. */
7368 /* Addressing mode support functions. */
7370 /* Return nonzero if X is a legitimate immediate operand when compiling
7371 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7373 legitimate_pic_operand_p (rtx x
)
7375 if (GET_CODE (x
) == SYMBOL_REF
7376 || (GET_CODE (x
) == CONST
7377 && GET_CODE (XEXP (x
, 0)) == PLUS
7378 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7384 /* Record that the current function needs a PIC register. Initialize
7385 cfun->machine->pic_reg if we have not already done so. */
7388 require_pic_register (void)
7390 /* A lot of the logic here is made obscure by the fact that this
7391 routine gets called as part of the rtx cost estimation process.
7392 We don't want those calls to affect any assumptions about the real
7393 function; and further, we can't call entry_of_function() until we
7394 start the real expansion process. */
7395 if (!crtl
->uses_pic_offset_table
)
7397 gcc_assert (can_create_pseudo_p ());
7398 if (arm_pic_register
!= INVALID_REGNUM
7399 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
7401 if (!cfun
->machine
->pic_reg
)
7402 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
7404 /* Play games to avoid marking the function as needing pic
7405 if we are being called as part of the cost-estimation
7407 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7408 crtl
->uses_pic_offset_table
= 1;
7412 rtx_insn
*seq
, *insn
;
7414 if (!cfun
->machine
->pic_reg
)
7415 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
7417 /* Play games to avoid marking the function as needing pic
7418 if we are being called as part of the cost-estimation
7420 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7422 crtl
->uses_pic_offset_table
= 1;
7425 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
7426 && arm_pic_register
> LAST_LO_REGNUM
)
7427 emit_move_insn (cfun
->machine
->pic_reg
,
7428 gen_rtx_REG (Pmode
, arm_pic_register
));
7430 arm_load_pic_register (0UL);
7435 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
7437 INSN_LOCATION (insn
) = prologue_location
;
7439 /* We can be called during expansion of PHI nodes, where
7440 we can't yet emit instructions directly in the final
7441 insn stream. Queue the insns on the entry edge, they will
7442 be committed after everything else is expanded. */
7443 insert_insn_on_edge (seq
,
7444 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
7451 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
7453 if (GET_CODE (orig
) == SYMBOL_REF
7454 || GET_CODE (orig
) == LABEL_REF
)
7458 gcc_assert (can_create_pseudo_p ());
7459 reg
= gen_reg_rtx (Pmode
);
7462 /* VxWorks does not impose a fixed gap between segments; the run-time
7463 gap can be different from the object-file gap. We therefore can't
7464 use GOTOFF unless we are absolutely sure that the symbol is in the
7465 same segment as the GOT. Unfortunately, the flexibility of linker
7466 scripts means that we can't be sure of that in general, so assume
7467 that GOTOFF is never valid on VxWorks. */
7468 /* References to weak symbols cannot be resolved locally: they
7469 may be overridden by a non-weak definition at link time. */
7471 if ((GET_CODE (orig
) == LABEL_REF
7472 || (GET_CODE (orig
) == SYMBOL_REF
7473 && SYMBOL_REF_LOCAL_P (orig
)
7474 && (SYMBOL_REF_DECL (orig
)
7475 ? !DECL_WEAK (SYMBOL_REF_DECL (orig
)) : 1)))
7477 && arm_pic_data_is_text_relative
)
7478 insn
= arm_pic_static_addr (orig
, reg
);
7484 /* If this function doesn't have a pic register, create one now. */
7485 require_pic_register ();
7487 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
7489 /* Make the MEM as close to a constant as possible. */
7490 mem
= SET_SRC (pat
);
7491 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
7492 MEM_READONLY_P (mem
) = 1;
7493 MEM_NOTRAP_P (mem
) = 1;
7495 insn
= emit_insn (pat
);
7498 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7500 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
7504 else if (GET_CODE (orig
) == CONST
)
7508 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7509 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
7512 /* Handle the case where we have: const (UNSPEC_TLS). */
7513 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
7514 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
7517 /* Handle the case where we have:
7518 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7520 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7521 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
7522 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
7524 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
7530 gcc_assert (can_create_pseudo_p ());
7531 reg
= gen_reg_rtx (Pmode
);
7534 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
7536 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
7537 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
7538 base
== reg
? 0 : reg
);
7540 if (CONST_INT_P (offset
))
7542 /* The base register doesn't really matter, we only want to
7543 test the index for the appropriate mode. */
7544 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
7546 gcc_assert (can_create_pseudo_p ());
7547 offset
= force_reg (Pmode
, offset
);
7550 if (CONST_INT_P (offset
))
7551 return plus_constant (Pmode
, base
, INTVAL (offset
));
7554 if (GET_MODE_SIZE (mode
) > 4
7555 && (GET_MODE_CLASS (mode
) == MODE_INT
7556 || TARGET_SOFT_FLOAT
))
7558 emit_insn (gen_addsi3 (reg
, base
, offset
));
7562 return gen_rtx_PLUS (Pmode
, base
, offset
);
7569 /* Find a spare register to use during the prolog of a function. */
7572 thumb_find_work_register (unsigned long pushed_regs_mask
)
7576 /* Check the argument registers first as these are call-used. The
7577 register allocation order means that sometimes r3 might be used
7578 but earlier argument registers might not, so check them all. */
7579 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
7580 if (!df_regs_ever_live_p (reg
))
7583 /* Before going on to check the call-saved registers we can try a couple
7584 more ways of deducing that r3 is available. The first is when we are
7585 pushing anonymous arguments onto the stack and we have less than 4
7586 registers worth of fixed arguments(*). In this case r3 will be part of
7587 the variable argument list and so we can be sure that it will be
7588 pushed right at the start of the function. Hence it will be available
7589 for the rest of the prologue.
7590 (*): ie crtl->args.pretend_args_size is greater than 0. */
7591 if (cfun
->machine
->uses_anonymous_args
7592 && crtl
->args
.pretend_args_size
> 0)
7593 return LAST_ARG_REGNUM
;
7595 /* The other case is when we have fixed arguments but less than 4 registers
7596 worth. In this case r3 might be used in the body of the function, but
7597 it is not being used to convey an argument into the function. In theory
7598 we could just check crtl->args.size to see how many bytes are
7599 being passed in argument registers, but it seems that it is unreliable.
7600 Sometimes it will have the value 0 when in fact arguments are being
7601 passed. (See testcase execute/20021111-1.c for an example). So we also
7602 check the args_info.nregs field as well. The problem with this field is
7603 that it makes no allowances for arguments that are passed to the
7604 function but which are not used. Hence we could miss an opportunity
7605 when a function has an unused argument in r3. But it is better to be
7606 safe than to be sorry. */
7607 if (! cfun
->machine
->uses_anonymous_args
7608 && crtl
->args
.size
>= 0
7609 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
7610 && (TARGET_AAPCS_BASED
7611 ? crtl
->args
.info
.aapcs_ncrn
< 4
7612 : crtl
->args
.info
.nregs
< 4))
7613 return LAST_ARG_REGNUM
;
7615 /* Otherwise look for a call-saved register that is going to be pushed. */
7616 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
7617 if (pushed_regs_mask
& (1 << reg
))
7622 /* Thumb-2 can use high regs. */
7623 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
7624 if (pushed_regs_mask
& (1 << reg
))
7627 /* Something went wrong - thumb_compute_save_reg_mask()
7628 should have arranged for a suitable register to be pushed. */
7632 static GTY(()) int pic_labelno
;
7634 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7638 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
7640 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
7642 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
7645 gcc_assert (flag_pic
);
7647 pic_reg
= cfun
->machine
->pic_reg
;
7648 if (TARGET_VXWORKS_RTP
)
7650 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
7651 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7652 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
7654 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
7656 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
7657 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
7661 /* We use an UNSPEC rather than a LABEL_REF because this label
7662 never appears in the code stream. */
7664 labelno
= GEN_INT (pic_labelno
++);
7665 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7666 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7668 /* On the ARM the PC register contains 'dot + 8' at the time of the
7669 addition, on the Thumb it is 'dot + 4'. */
7670 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7671 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
7673 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7677 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7679 else /* TARGET_THUMB1 */
7681 if (arm_pic_register
!= INVALID_REGNUM
7682 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
7684 /* We will have pushed the pic register, so we should always be
7685 able to find a work register. */
7686 pic_tmp
= gen_rtx_REG (SImode
,
7687 thumb_find_work_register (saved_regs
));
7688 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
7689 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
7690 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
7692 else if (arm_pic_register
!= INVALID_REGNUM
7693 && arm_pic_register
> LAST_LO_REGNUM
7694 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
7696 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7697 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
7698 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
7701 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7705 /* Need to emit this whether or not we obey regdecls,
7706 since setjmp/longjmp can cause life info to screw up. */
7710 /* Generate code to load the address of a static var when flag_pic is set. */
7712 arm_pic_static_addr (rtx orig
, rtx reg
)
7714 rtx l1
, labelno
, offset_rtx
;
7716 gcc_assert (flag_pic
);
7718 /* We use an UNSPEC rather than a LABEL_REF because this label
7719 never appears in the code stream. */
7720 labelno
= GEN_INT (pic_labelno
++);
7721 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7722 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7724 /* On the ARM the PC register contains 'dot + 8' at the time of the
7725 addition, on the Thumb it is 'dot + 4'. */
7726 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7727 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
7728 UNSPEC_SYMBOL_OFFSET
);
7729 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
7731 return emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
7734 /* Return nonzero if X is valid as an ARM state addressing register. */
7736 arm_address_register_rtx_p (rtx x
, int strict_p
)
7746 return ARM_REGNO_OK_FOR_BASE_P (regno
);
7748 return (regno
<= LAST_ARM_REGNUM
7749 || regno
>= FIRST_PSEUDO_REGISTER
7750 || regno
== FRAME_POINTER_REGNUM
7751 || regno
== ARG_POINTER_REGNUM
);
7754 /* Return TRUE if this rtx is the difference of a symbol and a label,
7755 and will reduce to a PC-relative relocation in the object file.
7756 Expressions like this can be left alone when generating PIC, rather
7757 than forced through the GOT. */
7759 pcrel_constant_p (rtx x
)
7761 if (GET_CODE (x
) == MINUS
)
7762 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
7767 /* Return true if X will surely end up in an index register after next
7770 will_be_in_index_register (const_rtx x
)
7772 /* arm.md: calculate_pic_address will split this into a register. */
7773 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
7776 /* Return nonzero if X is a valid ARM state address operand. */
7778 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
7782 enum rtx_code code
= GET_CODE (x
);
7784 if (arm_address_register_rtx_p (x
, strict_p
))
7787 use_ldrd
= (TARGET_LDRD
7788 && (mode
== DImode
|| mode
== DFmode
));
7790 if (code
== POST_INC
|| code
== PRE_DEC
7791 || ((code
== PRE_INC
|| code
== POST_DEC
)
7792 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7793 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7795 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7796 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7797 && GET_CODE (XEXP (x
, 1)) == PLUS
7798 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7800 rtx addend
= XEXP (XEXP (x
, 1), 1);
7802 /* Don't allow ldrd post increment by register because it's hard
7803 to fixup invalid register choices. */
7805 && GET_CODE (x
) == POST_MODIFY
7809 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
7810 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
7813 /* After reload constants split into minipools will have addresses
7814 from a LABEL_REF. */
7815 else if (reload_completed
7816 && (code
== LABEL_REF
7818 && GET_CODE (XEXP (x
, 0)) == PLUS
7819 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7820 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7823 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7826 else if (code
== PLUS
)
7828 rtx xop0
= XEXP (x
, 0);
7829 rtx xop1
= XEXP (x
, 1);
7831 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7832 && ((CONST_INT_P (xop1
)
7833 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7834 || (!strict_p
&& will_be_in_index_register (xop1
))))
7835 || (arm_address_register_rtx_p (xop1
, strict_p
)
7836 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7840 /* Reload currently can't handle MINUS, so disable this for now */
7841 else if (GET_CODE (x
) == MINUS
)
7843 rtx xop0
= XEXP (x
, 0);
7844 rtx xop1
= XEXP (x
, 1);
7846 return (arm_address_register_rtx_p (xop0
, strict_p
)
7847 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7851 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7852 && code
== SYMBOL_REF
7853 && CONSTANT_POOL_ADDRESS_P (x
)
7855 && symbol_mentioned_p (get_pool_constant (x
))
7856 && ! pcrel_constant_p (get_pool_constant (x
))))
7862 /* Return true if we can avoid creating a constant pool entry for x. */
7864 can_avoid_literal_pool_for_label_p (rtx x
)
7866 /* Normally we can assign constant values to target registers without
7867 the help of constant pool. But there are cases we have to use constant
7869 1) assign a label to register.
7870 2) sign-extend a 8bit value to 32bit and then assign to register.
7872 Constant pool access in format:
7873 (set (reg r0) (mem (symbol_ref (".LC0"))))
7874 will cause the use of literal pool (later in function arm_reorg).
7875 So here we mark such format as an invalid format, then the compiler
7876 will adjust it into:
7877 (set (reg r0) (symbol_ref (".LC0")))
7878 (set (reg r0) (mem (reg r0))).
7879 No extra register is required, and (mem (reg r0)) won't cause the use
7880 of literal pools. */
7881 if (arm_disable_literal_pool
&& GET_CODE (x
) == SYMBOL_REF
7882 && CONSTANT_POOL_ADDRESS_P (x
))
7888 /* Return nonzero if X is a valid Thumb-2 address operand. */
7890 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7893 enum rtx_code code
= GET_CODE (x
);
7895 if (arm_address_register_rtx_p (x
, strict_p
))
7898 use_ldrd
= (TARGET_LDRD
7899 && (mode
== DImode
|| mode
== DFmode
));
7901 if (code
== POST_INC
|| code
== PRE_DEC
7902 || ((code
== PRE_INC
|| code
== POST_DEC
)
7903 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7904 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7906 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7907 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7908 && GET_CODE (XEXP (x
, 1)) == PLUS
7909 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7911 /* Thumb-2 only has autoincrement by constant. */
7912 rtx addend
= XEXP (XEXP (x
, 1), 1);
7913 HOST_WIDE_INT offset
;
7915 if (!CONST_INT_P (addend
))
7918 offset
= INTVAL(addend
);
7919 if (GET_MODE_SIZE (mode
) <= 4)
7920 return (offset
> -256 && offset
< 256);
7922 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7923 && (offset
& 3) == 0);
7926 /* After reload constants split into minipools will have addresses
7927 from a LABEL_REF. */
7928 else if (reload_completed
7929 && (code
== LABEL_REF
7931 && GET_CODE (XEXP (x
, 0)) == PLUS
7932 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7933 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7936 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7939 else if (code
== PLUS
)
7941 rtx xop0
= XEXP (x
, 0);
7942 rtx xop1
= XEXP (x
, 1);
7944 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7945 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7946 || (!strict_p
&& will_be_in_index_register (xop1
))))
7947 || (arm_address_register_rtx_p (xop1
, strict_p
)
7948 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7951 else if (can_avoid_literal_pool_for_label_p (x
))
7954 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7955 && code
== SYMBOL_REF
7956 && CONSTANT_POOL_ADDRESS_P (x
)
7958 && symbol_mentioned_p (get_pool_constant (x
))
7959 && ! pcrel_constant_p (get_pool_constant (x
))))
7965 /* Return nonzero if INDEX is valid for an address index operand in
7968 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7971 HOST_WIDE_INT range
;
7972 enum rtx_code code
= GET_CODE (index
);
7974 /* Standard coprocessor addressing modes. */
7975 if (TARGET_HARD_FLOAT
7976 && (mode
== SFmode
|| mode
== DFmode
))
7977 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7978 && INTVAL (index
) > -1024
7979 && (INTVAL (index
) & 3) == 0);
7981 /* For quad modes, we restrict the constant offset to be slightly less
7982 than what the instruction format permits. We do this because for
7983 quad mode moves, we will actually decompose them into two separate
7984 double-mode reads or writes. INDEX must therefore be a valid
7985 (double-mode) offset and so should INDEX+8. */
7986 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7987 return (code
== CONST_INT
7988 && INTVAL (index
) < 1016
7989 && INTVAL (index
) > -1024
7990 && (INTVAL (index
) & 3) == 0);
7992 /* We have no such constraint on double mode offsets, so we permit the
7993 full range of the instruction format. */
7994 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7995 return (code
== CONST_INT
7996 && INTVAL (index
) < 1024
7997 && INTVAL (index
) > -1024
7998 && (INTVAL (index
) & 3) == 0);
8000 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8001 return (code
== CONST_INT
8002 && INTVAL (index
) < 1024
8003 && INTVAL (index
) > -1024
8004 && (INTVAL (index
) & 3) == 0);
8006 if (arm_address_register_rtx_p (index
, strict_p
)
8007 && (GET_MODE_SIZE (mode
) <= 4))
8010 if (mode
== DImode
|| mode
== DFmode
)
8012 if (code
== CONST_INT
)
8014 HOST_WIDE_INT val
= INTVAL (index
);
8016 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8017 If vldr is selected it uses arm_coproc_mem_operand. */
8019 return val
> -256 && val
< 256;
8021 return val
> -4096 && val
< 4092;
8024 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
8027 if (GET_MODE_SIZE (mode
) <= 4
8031 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
8035 rtx xiop0
= XEXP (index
, 0);
8036 rtx xiop1
= XEXP (index
, 1);
8038 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8039 && power_of_two_operand (xiop1
, SImode
))
8040 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8041 && power_of_two_operand (xiop0
, SImode
)));
8043 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
8044 || code
== ASHIFT
|| code
== ROTATERT
)
8046 rtx op
= XEXP (index
, 1);
8048 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8051 && INTVAL (op
) <= 31);
8055 /* For ARM v4 we may be doing a sign-extend operation during the
8061 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
8067 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
8069 return (code
== CONST_INT
8070 && INTVAL (index
) < range
8071 && INTVAL (index
) > -range
);
8074 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8075 index operand. i.e. 1, 2, 4 or 8. */
8077 thumb2_index_mul_operand (rtx op
)
8081 if (!CONST_INT_P (op
))
8085 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
8088 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8090 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
8092 enum rtx_code code
= GET_CODE (index
);
8094 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8095 /* Standard coprocessor addressing modes. */
8096 if (TARGET_HARD_FLOAT
8097 && (mode
== SFmode
|| mode
== DFmode
))
8098 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8099 /* Thumb-2 allows only > -256 index range for it's core register
8100 load/stores. Since we allow SF/DF in core registers, we have
8101 to use the intersection between -256~4096 (core) and -1024~1024
8103 && INTVAL (index
) > -256
8104 && (INTVAL (index
) & 3) == 0);
8106 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8108 /* For DImode assume values will usually live in core regs
8109 and only allow LDRD addressing modes. */
8110 if (!TARGET_LDRD
|| mode
!= DImode
)
8111 return (code
== CONST_INT
8112 && INTVAL (index
) < 1024
8113 && INTVAL (index
) > -1024
8114 && (INTVAL (index
) & 3) == 0);
8117 /* For quad modes, we restrict the constant offset to be slightly less
8118 than what the instruction format permits. We do this because for
8119 quad mode moves, we will actually decompose them into two separate
8120 double-mode reads or writes. INDEX must therefore be a valid
8121 (double-mode) offset and so should INDEX+8. */
8122 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8123 return (code
== CONST_INT
8124 && INTVAL (index
) < 1016
8125 && INTVAL (index
) > -1024
8126 && (INTVAL (index
) & 3) == 0);
8128 /* We have no such constraint on double mode offsets, so we permit the
8129 full range of the instruction format. */
8130 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8131 return (code
== CONST_INT
8132 && INTVAL (index
) < 1024
8133 && INTVAL (index
) > -1024
8134 && (INTVAL (index
) & 3) == 0);
8136 if (arm_address_register_rtx_p (index
, strict_p
)
8137 && (GET_MODE_SIZE (mode
) <= 4))
8140 if (mode
== DImode
|| mode
== DFmode
)
8142 if (code
== CONST_INT
)
8144 HOST_WIDE_INT val
= INTVAL (index
);
8145 /* Thumb-2 ldrd only has reg+const addressing modes.
8146 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8147 If vldr is selected it uses arm_coproc_mem_operand. */
8149 return IN_RANGE (val
, -1020, 1020) && (val
& 3) == 0;
8151 return IN_RANGE (val
, -255, 4095 - 4);
8159 rtx xiop0
= XEXP (index
, 0);
8160 rtx xiop1
= XEXP (index
, 1);
8162 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8163 && thumb2_index_mul_operand (xiop1
))
8164 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8165 && thumb2_index_mul_operand (xiop0
)));
8167 else if (code
== ASHIFT
)
8169 rtx op
= XEXP (index
, 1);
8171 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8174 && INTVAL (op
) <= 3);
8177 return (code
== CONST_INT
8178 && INTVAL (index
) < 4096
8179 && INTVAL (index
) > -256);
8182 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8184 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
8194 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
8196 return (regno
<= LAST_LO_REGNUM
8197 || regno
> LAST_VIRTUAL_REGISTER
8198 || regno
== FRAME_POINTER_REGNUM
8199 || (GET_MODE_SIZE (mode
) >= 4
8200 && (regno
== STACK_POINTER_REGNUM
8201 || regno
>= FIRST_PSEUDO_REGISTER
8202 || x
== hard_frame_pointer_rtx
8203 || x
== arg_pointer_rtx
)));
8206 /* Return nonzero if x is a legitimate index register. This is the case
8207 for any base register that can access a QImode object. */
8209 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
8211 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
8214 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8216 The AP may be eliminated to either the SP or the FP, so we use the
8217 least common denominator, e.g. SImode, and offsets from 0 to 64.
8219 ??? Verify whether the above is the right approach.
8221 ??? Also, the FP may be eliminated to the SP, so perhaps that
8222 needs special handling also.
8224 ??? Look at how the mips16 port solves this problem. It probably uses
8225 better ways to solve some of these problems.
8227 Although it is not incorrect, we don't accept QImode and HImode
8228 addresses based on the frame pointer or arg pointer until the
8229 reload pass starts. This is so that eliminating such addresses
8230 into stack based ones won't produce impossible code. */
8232 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8234 if (TARGET_HAVE_MOVT
&& can_avoid_literal_pool_for_label_p (x
))
8237 /* ??? Not clear if this is right. Experiment. */
8238 if (GET_MODE_SIZE (mode
) < 4
8239 && !(reload_in_progress
|| reload_completed
)
8240 && (reg_mentioned_p (frame_pointer_rtx
, x
)
8241 || reg_mentioned_p (arg_pointer_rtx
, x
)
8242 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
8243 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
8244 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
8245 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
8248 /* Accept any base register. SP only in SImode or larger. */
8249 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
8252 /* This is PC relative data before arm_reorg runs. */
8253 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
8254 && GET_CODE (x
) == SYMBOL_REF
8255 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
8258 /* This is PC relative data after arm_reorg runs. */
8259 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
8261 && (GET_CODE (x
) == LABEL_REF
8262 || (GET_CODE (x
) == CONST
8263 && GET_CODE (XEXP (x
, 0)) == PLUS
8264 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8265 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8268 /* Post-inc indexing only supported for SImode and larger. */
8269 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
8270 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
8273 else if (GET_CODE (x
) == PLUS
)
8275 /* REG+REG address can be any two index registers. */
8276 /* We disallow FRAME+REG addressing since we know that FRAME
8277 will be replaced with STACK, and SP relative addressing only
8278 permits SP+OFFSET. */
8279 if (GET_MODE_SIZE (mode
) <= 4
8280 && XEXP (x
, 0) != frame_pointer_rtx
8281 && XEXP (x
, 1) != frame_pointer_rtx
8282 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8283 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
8284 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
8287 /* REG+const has 5-7 bit offset for non-SP registers. */
8288 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8289 || XEXP (x
, 0) == arg_pointer_rtx
)
8290 && CONST_INT_P (XEXP (x
, 1))
8291 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
8294 /* REG+const has 10-bit offset for SP, but only SImode and
8295 larger is supported. */
8296 /* ??? Should probably check for DI/DFmode overflow here
8297 just like GO_IF_LEGITIMATE_OFFSET does. */
8298 else if (REG_P (XEXP (x
, 0))
8299 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
8300 && GET_MODE_SIZE (mode
) >= 4
8301 && CONST_INT_P (XEXP (x
, 1))
8302 && INTVAL (XEXP (x
, 1)) >= 0
8303 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
8304 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8307 else if (REG_P (XEXP (x
, 0))
8308 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
8309 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
8310 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
8311 && REGNO (XEXP (x
, 0))
8312 <= LAST_VIRTUAL_POINTER_REGISTER
))
8313 && GET_MODE_SIZE (mode
) >= 4
8314 && CONST_INT_P (XEXP (x
, 1))
8315 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8319 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8320 && GET_MODE_SIZE (mode
) == 4
8321 && GET_CODE (x
) == SYMBOL_REF
8322 && CONSTANT_POOL_ADDRESS_P (x
)
8324 && symbol_mentioned_p (get_pool_constant (x
))
8325 && ! pcrel_constant_p (get_pool_constant (x
))))
8331 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8332 instruction of mode MODE. */
8334 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
8336 switch (GET_MODE_SIZE (mode
))
8339 return val
>= 0 && val
< 32;
8342 return val
>= 0 && val
< 64 && (val
& 1) == 0;
8346 && (val
+ GET_MODE_SIZE (mode
)) <= 128
8352 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
8355 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
8356 else if (TARGET_THUMB2
)
8357 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
8358 else /* if (TARGET_THUMB1) */
8359 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
8362 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8364 Given an rtx X being reloaded into a reg required to be
8365 in class CLASS, return the class of reg to actually use.
8366 In general this is just CLASS, but for the Thumb core registers and
8367 immediate constants we prefer a LO_REGS class or a subset. */
8370 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
8376 if (rclass
== GENERAL_REGS
)
8383 /* Build the SYMBOL_REF for __tls_get_addr. */
8385 static GTY(()) rtx tls_get_addr_libfunc
;
8388 get_tls_get_addr (void)
8390 if (!tls_get_addr_libfunc
)
8391 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
8392 return tls_get_addr_libfunc
;
8396 arm_load_tp (rtx target
)
8399 target
= gen_reg_rtx (SImode
);
8403 /* Can return in any reg. */
8404 emit_insn (gen_load_tp_hard (target
));
8408 /* Always returned in r0. Immediately copy the result into a pseudo,
8409 otherwise other uses of r0 (e.g. setting up function arguments) may
8410 clobber the value. */
8414 emit_insn (gen_load_tp_soft ());
8416 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
8417 emit_move_insn (target
, tmp
);
8423 load_tls_operand (rtx x
, rtx reg
)
8427 if (reg
== NULL_RTX
)
8428 reg
= gen_reg_rtx (SImode
);
8430 tmp
= gen_rtx_CONST (SImode
, x
);
8432 emit_move_insn (reg
, tmp
);
8438 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
8440 rtx label
, labelno
, sum
;
8442 gcc_assert (reloc
!= TLS_DESCSEQ
);
8445 labelno
= GEN_INT (pic_labelno
++);
8446 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8447 label
= gen_rtx_CONST (VOIDmode
, label
);
8449 sum
= gen_rtx_UNSPEC (Pmode
,
8450 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
8451 GEN_INT (TARGET_ARM
? 8 : 4)),
8453 reg
= load_tls_operand (sum
, reg
);
8456 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
8458 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8460 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
8461 LCT_PURE
, /* LCT_CONST? */
8464 rtx_insn
*insns
= get_insns ();
8471 arm_tls_descseq_addr (rtx x
, rtx reg
)
8473 rtx labelno
= GEN_INT (pic_labelno
++);
8474 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8475 rtx sum
= gen_rtx_UNSPEC (Pmode
,
8476 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
8477 gen_rtx_CONST (VOIDmode
, label
),
8478 GEN_INT (!TARGET_ARM
)),
8480 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
8482 emit_insn (gen_tlscall (x
, labelno
));
8484 reg
= gen_reg_rtx (SImode
);
8486 gcc_assert (REGNO (reg
) != R0_REGNUM
);
8488 emit_move_insn (reg
, reg0
);
8494 legitimize_tls_address (rtx x
, rtx reg
)
8496 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
8498 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
8502 case TLS_MODEL_GLOBAL_DYNAMIC
:
8503 if (TARGET_GNU2_TLS
)
8505 reg
= arm_tls_descseq_addr (x
, reg
);
8507 tp
= arm_load_tp (NULL_RTX
);
8509 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8513 /* Original scheme */
8514 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
8515 dest
= gen_reg_rtx (Pmode
);
8516 emit_libcall_block (insns
, dest
, ret
, x
);
8520 case TLS_MODEL_LOCAL_DYNAMIC
:
8521 if (TARGET_GNU2_TLS
)
8523 reg
= arm_tls_descseq_addr (x
, reg
);
8525 tp
= arm_load_tp (NULL_RTX
);
8527 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8531 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
8533 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8534 share the LDM result with other LD model accesses. */
8535 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
8537 dest
= gen_reg_rtx (Pmode
);
8538 emit_libcall_block (insns
, dest
, ret
, eqv
);
8540 /* Load the addend. */
8541 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
8542 GEN_INT (TLS_LDO32
)),
8544 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
8545 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
8549 case TLS_MODEL_INITIAL_EXEC
:
8550 labelno
= GEN_INT (pic_labelno
++);
8551 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8552 label
= gen_rtx_CONST (VOIDmode
, label
);
8553 sum
= gen_rtx_UNSPEC (Pmode
,
8554 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
8555 GEN_INT (TARGET_ARM
? 8 : 4)),
8557 reg
= load_tls_operand (sum
, reg
);
8560 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
8561 else if (TARGET_THUMB2
)
8562 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
8565 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8566 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
8569 tp
= arm_load_tp (NULL_RTX
);
8571 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8573 case TLS_MODEL_LOCAL_EXEC
:
8574 tp
= arm_load_tp (NULL_RTX
);
8576 reg
= gen_rtx_UNSPEC (Pmode
,
8577 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
8579 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
8581 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8588 /* Try machine-dependent ways of modifying an illegitimate address
8589 to be legitimate. If we find one, return the new, valid address. */
8591 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8593 if (arm_tls_referenced_p (x
))
8597 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
8599 addend
= XEXP (XEXP (x
, 0), 1);
8600 x
= XEXP (XEXP (x
, 0), 0);
8603 if (GET_CODE (x
) != SYMBOL_REF
)
8606 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
8608 x
= legitimize_tls_address (x
, NULL_RTX
);
8612 x
= gen_rtx_PLUS (SImode
, x
, addend
);
8621 /* TODO: legitimize_address for Thumb2. */
8624 return thumb_legitimize_address (x
, orig_x
, mode
);
8627 if (GET_CODE (x
) == PLUS
)
8629 rtx xop0
= XEXP (x
, 0);
8630 rtx xop1
= XEXP (x
, 1);
8632 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
8633 xop0
= force_reg (SImode
, xop0
);
8635 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
8636 && !symbol_mentioned_p (xop1
))
8637 xop1
= force_reg (SImode
, xop1
);
8639 if (ARM_BASE_REGISTER_RTX_P (xop0
)
8640 && CONST_INT_P (xop1
))
8642 HOST_WIDE_INT n
, low_n
;
8646 /* VFP addressing modes actually allow greater offsets, but for
8647 now we just stick with the lowest common denominator. */
8648 if (mode
== DImode
|| mode
== DFmode
)
8660 low_n
= ((mode
) == TImode
? 0
8661 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
8665 base_reg
= gen_reg_rtx (SImode
);
8666 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
8667 emit_move_insn (base_reg
, val
);
8668 x
= plus_constant (Pmode
, base_reg
, low_n
);
8670 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8671 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8674 /* XXX We don't allow MINUS any more -- see comment in
8675 arm_legitimate_address_outer_p (). */
8676 else if (GET_CODE (x
) == MINUS
)
8678 rtx xop0
= XEXP (x
, 0);
8679 rtx xop1
= XEXP (x
, 1);
8681 if (CONSTANT_P (xop0
))
8682 xop0
= force_reg (SImode
, xop0
);
8684 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
8685 xop1
= force_reg (SImode
, xop1
);
8687 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8688 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
8691 /* Make sure to take full advantage of the pre-indexed addressing mode
8692 with absolute addresses which often allows for the base register to
8693 be factorized for multiple adjacent memory references, and it might
8694 even allows for the mini pool to be avoided entirely. */
8695 else if (CONST_INT_P (x
) && optimize
> 0)
8698 HOST_WIDE_INT mask
, base
, index
;
8701 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8702 use a 8-bit index. So let's use a 12-bit index for SImode only and
8703 hope that arm_gen_constant will enable ldrb to use more bits. */
8704 bits
= (mode
== SImode
) ? 12 : 8;
8705 mask
= (1 << bits
) - 1;
8706 base
= INTVAL (x
) & ~mask
;
8707 index
= INTVAL (x
) & mask
;
8708 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
8710 /* It'll most probably be more efficient to generate the base
8711 with more bits set and use a negative index instead. */
8715 base_reg
= force_reg (SImode
, GEN_INT (base
));
8716 x
= plus_constant (Pmode
, base_reg
, index
);
8721 /* We need to find and carefully transform any SYMBOL and LABEL
8722 references; so go back to the original address expression. */
8723 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8725 if (new_x
!= orig_x
)
8733 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8734 to be legitimate. If we find one, return the new, valid address. */
8736 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8738 if (GET_CODE (x
) == PLUS
8739 && CONST_INT_P (XEXP (x
, 1))
8740 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
8741 || INTVAL (XEXP (x
, 1)) < 0))
8743 rtx xop0
= XEXP (x
, 0);
8744 rtx xop1
= XEXP (x
, 1);
8745 HOST_WIDE_INT offset
= INTVAL (xop1
);
8747 /* Try and fold the offset into a biasing of the base register and
8748 then offsetting that. Don't do this when optimizing for space
8749 since it can cause too many CSEs. */
8750 if (optimize_size
&& offset
>= 0
8751 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
8753 HOST_WIDE_INT delta
;
8756 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
8757 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
8758 delta
= 31 * GET_MODE_SIZE (mode
);
8760 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
8762 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
8764 x
= plus_constant (Pmode
, xop0
, delta
);
8766 else if (offset
< 0 && offset
> -256)
8767 /* Small negative offsets are best done with a subtract before the
8768 dereference, forcing these into a register normally takes two
8770 x
= force_operand (x
, NULL_RTX
);
8773 /* For the remaining cases, force the constant into a register. */
8774 xop1
= force_reg (SImode
, xop1
);
8775 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8778 else if (GET_CODE (x
) == PLUS
8779 && s_register_operand (XEXP (x
, 1), SImode
)
8780 && !s_register_operand (XEXP (x
, 0), SImode
))
8782 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
8784 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
8789 /* We need to find and carefully transform any SYMBOL and LABEL
8790 references; so go back to the original address expression. */
8791 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8793 if (new_x
!= orig_x
)
8800 /* Return TRUE if X contains any TLS symbol references. */
8803 arm_tls_referenced_p (rtx x
)
8805 if (! TARGET_HAVE_TLS
)
8808 subrtx_iterator::array_type array
;
8809 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8811 const_rtx x
= *iter
;
8812 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8814 /* ARM currently does not provide relocations to encode TLS variables
8815 into AArch32 instructions, only data, so there is no way to
8816 currently implement these if a literal pool is disabled. */
8817 if (arm_disable_literal_pool
)
8818 sorry ("accessing thread-local storage is not currently supported "
8819 "with -mpure-code or -mslow-flash-data");
8824 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8825 TLS offsets, not real symbol references. */
8826 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8827 iter
.skip_subrtxes ();
8832 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8834 On the ARM, allow any integer (invalid ones are removed later by insn
8835 patterns), nice doubles and symbol_refs which refer to the function's
8838 When generating pic allow anything. */
8841 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8843 return flag_pic
|| !label_mentioned_p (x
);
8847 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8849 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8850 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8851 for ARMv8-M Baseline or later the result is valid. */
8852 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
8855 return (CONST_INT_P (x
)
8856 || CONST_DOUBLE_P (x
)
8857 || CONSTANT_ADDRESS_P (x
)
8858 || (TARGET_HAVE_MOVT
&& GET_CODE (x
) == SYMBOL_REF
)
8863 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8865 return (!arm_cannot_force_const_mem (mode
, x
)
8867 ? arm_legitimate_constant_p_1 (mode
, x
)
8868 : thumb_legitimate_constant_p (mode
, x
)));
8871 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8874 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8878 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8880 split_const (x
, &base
, &offset
);
8881 if (GET_CODE (base
) == SYMBOL_REF
8882 && !offset_within_block_p (base
, INTVAL (offset
)))
8885 return arm_tls_referenced_p (x
);
8888 #define REG_OR_SUBREG_REG(X) \
8890 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8892 #define REG_OR_SUBREG_RTX(X) \
8893 (REG_P (X) ? (X) : SUBREG_REG (X))
8896 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8898 machine_mode mode
= GET_MODE (x
);
8907 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8914 return COSTS_N_INSNS (1);
8917 if (arm_arch6m
&& arm_m_profile_small_mul
)
8918 return COSTS_N_INSNS (32);
8920 if (CONST_INT_P (XEXP (x
, 1)))
8923 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8930 return COSTS_N_INSNS (2) + cycles
;
8932 return COSTS_N_INSNS (1) + 16;
8935 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8937 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8938 return (COSTS_N_INSNS (words
)
8939 + 4 * ((MEM_P (SET_SRC (x
)))
8940 + MEM_P (SET_DEST (x
))));
8945 if (UINTVAL (x
) < 256
8946 /* 16-bit constant. */
8947 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
8949 if (thumb_shiftable_const (INTVAL (x
)))
8950 return COSTS_N_INSNS (2);
8951 return COSTS_N_INSNS (3);
8953 else if ((outer
== PLUS
|| outer
== COMPARE
)
8954 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8956 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8957 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8958 return COSTS_N_INSNS (1);
8959 else if (outer
== AND
)
8962 /* This duplicates the tests in the andsi3 expander. */
8963 for (i
= 9; i
<= 31; i
++)
8964 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8965 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8966 return COSTS_N_INSNS (2);
8968 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8969 || outer
== LSHIFTRT
)
8971 return COSTS_N_INSNS (2);
8977 return COSTS_N_INSNS (3);
8995 /* XXX another guess. */
8996 /* Memory costs quite a lot for the first word, but subsequent words
8997 load at the equivalent of a single insn each. */
8998 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8999 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9004 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9010 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
9011 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
9017 return total
+ COSTS_N_INSNS (1);
9019 /* Assume a two-shift sequence. Increase the cost slightly so
9020 we prefer actual shifts over an extend operation. */
9021 return total
+ 1 + COSTS_N_INSNS (2);
9028 /* Estimates the size cost of thumb1 instructions.
9029 For now most of the code is copied from thumb1_rtx_costs. We need more
9030 fine grain tuning when we have more related test cases. */
9032 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9034 machine_mode mode
= GET_MODE (x
);
9043 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9047 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9048 defined by RTL expansion, especially for the expansion of
9050 if ((GET_CODE (XEXP (x
, 0)) == MULT
9051 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
9052 || (GET_CODE (XEXP (x
, 1)) == MULT
9053 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
9054 return COSTS_N_INSNS (2);
9059 return COSTS_N_INSNS (1);
9062 if (CONST_INT_P (XEXP (x
, 1)))
9064 /* Thumb1 mul instruction can't operate on const. We must Load it
9065 into a register first. */
9066 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
9067 /* For the targets which have a very small and high-latency multiply
9068 unit, we prefer to synthesize the mult with up to 5 instructions,
9069 giving a good balance between size and performance. */
9070 if (arm_arch6m
&& arm_m_profile_small_mul
)
9071 return COSTS_N_INSNS (5);
9073 return COSTS_N_INSNS (1) + const_size
;
9075 return COSTS_N_INSNS (1);
9078 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9080 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9081 cost
= COSTS_N_INSNS (words
);
9082 if (satisfies_constraint_J (SET_SRC (x
))
9083 || satisfies_constraint_K (SET_SRC (x
))
9084 /* Too big an immediate for a 2-byte mov, using MOVT. */
9085 || (CONST_INT_P (SET_SRC (x
))
9086 && UINTVAL (SET_SRC (x
)) >= 256
9088 && satisfies_constraint_j (SET_SRC (x
)))
9089 /* thumb1_movdi_insn. */
9090 || ((words
> 1) && MEM_P (SET_SRC (x
))))
9091 cost
+= COSTS_N_INSNS (1);
9097 if (UINTVAL (x
) < 256)
9098 return COSTS_N_INSNS (1);
9099 /* movw is 4byte long. */
9100 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
9101 return COSTS_N_INSNS (2);
9102 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9103 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
9104 return COSTS_N_INSNS (2);
9105 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9106 if (thumb_shiftable_const (INTVAL (x
)))
9107 return COSTS_N_INSNS (2);
9108 return COSTS_N_INSNS (3);
9110 else if ((outer
== PLUS
|| outer
== COMPARE
)
9111 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9113 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9114 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9115 return COSTS_N_INSNS (1);
9116 else if (outer
== AND
)
9119 /* This duplicates the tests in the andsi3 expander. */
9120 for (i
= 9; i
<= 31; i
++)
9121 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9122 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9123 return COSTS_N_INSNS (2);
9125 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9126 || outer
== LSHIFTRT
)
9128 return COSTS_N_INSNS (2);
9134 return COSTS_N_INSNS (3);
9148 return COSTS_N_INSNS (1);
9151 return (COSTS_N_INSNS (1)
9153 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9154 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9155 ? COSTS_N_INSNS (1) : 0));
9159 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9164 /* XXX still guessing. */
9165 switch (GET_MODE (XEXP (x
, 0)))
9168 return (1 + (mode
== DImode
? 4 : 0)
9169 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9172 return (4 + (mode
== DImode
? 4 : 0)
9173 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9176 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9187 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9188 operand, then return the operand that is being shifted. If the shift
9189 is not by a constant, then set SHIFT_REG to point to the operand.
9190 Return NULL if OP is not a shifter operand. */
9192 shifter_op_p (rtx op
, rtx
*shift_reg
)
9194 enum rtx_code code
= GET_CODE (op
);
9196 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9197 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9198 return XEXP (op
, 0);
9199 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9200 return XEXP (op
, 0);
9201 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9202 || code
== ASHIFTRT
)
9204 if (!CONST_INT_P (XEXP (op
, 1)))
9205 *shift_reg
= XEXP (op
, 1);
9206 return XEXP (op
, 0);
9213 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9215 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9216 rtx_code code
= GET_CODE (x
);
9217 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9219 switch (XINT (x
, 1))
9221 case UNSPEC_UNALIGNED_LOAD
:
9222 /* We can only do unaligned loads into the integer unit, and we can't
9224 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9226 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9227 + extra_cost
->ldst
.load_unaligned
);
9230 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9231 ADDR_SPACE_GENERIC
, speed_p
);
9235 case UNSPEC_UNALIGNED_STORE
:
9236 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9238 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9239 + extra_cost
->ldst
.store_unaligned
);
9241 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
9243 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9244 ADDR_SPACE_GENERIC
, speed_p
);
9255 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9259 *cost
= COSTS_N_INSNS (2);
9265 /* Cost of a libcall. We assume one insn per argument, an amount for the
9266 call (one insn for -Os) and then one for processing the result. */
9267 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9269 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9272 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9273 if (shift_op != NULL \
9274 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9279 *cost += extra_cost->alu.arith_shift_reg; \
9280 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9281 ASHIFT, 1, speed_p); \
9284 *cost += extra_cost->alu.arith_shift; \
9286 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9287 ASHIFT, 0, speed_p) \
9288 + rtx_cost (XEXP (x, 1 - IDX), \
9289 GET_MODE (shift_op), \
9296 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
9297 considering the costs of the addressing mode and memory access
9300 arm_mem_costs (rtx x
, const struct cpu_cost_table
*extra_cost
,
9301 int *cost
, bool speed_p
)
9303 machine_mode mode
= GET_MODE (x
);
9305 *cost
= COSTS_N_INSNS (1);
9308 && GET_CODE (XEXP (x
, 0)) == PLUS
9309 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9310 /* This will be split into two instructions. Add the cost of the
9311 additional instruction here. The cost of the memory access is computed
9312 below. See arm.md:calculate_pic_address. */
9313 *cost
+= COSTS_N_INSNS (1);
9315 /* Calculate cost of the addressing mode. */
9318 arm_addr_mode_op op_type
;
9319 switch (GET_CODE (XEXP (x
, 0)))
9323 op_type
= AMO_DEFAULT
;
9326 /* MINUS does not appear in RTL, but the architecture supports it,
9327 so handle this case defensively. */
9330 op_type
= AMO_NO_WB
;
9342 if (VECTOR_MODE_P (mode
))
9343 *cost
+= current_tune
->addr_mode_costs
->vector
[op_type
];
9344 else if (FLOAT_MODE_P (mode
))
9345 *cost
+= current_tune
->addr_mode_costs
->fp
[op_type
];
9347 *cost
+= current_tune
->addr_mode_costs
->integer
[op_type
];
9350 /* Calculate cost of memory access. */
9353 if (FLOAT_MODE_P (mode
))
9355 if (GET_MODE_SIZE (mode
) == 8)
9356 *cost
+= extra_cost
->ldst
.loadd
;
9358 *cost
+= extra_cost
->ldst
.loadf
;
9360 else if (VECTOR_MODE_P (mode
))
9361 *cost
+= extra_cost
->ldst
.loadv
;
9365 if (GET_MODE_SIZE (mode
) == 8)
9366 *cost
+= extra_cost
->ldst
.ldrd
;
9368 *cost
+= extra_cost
->ldst
.load
;
9375 /* RTX costs. Make an estimate of the cost of executing the operation
9376 X, which is contained within an operation with code OUTER_CODE.
9377 SPEED_P indicates whether the cost desired is the performance cost,
9378 or the size cost. The estimate is stored in COST and the return
9379 value is TRUE if the cost calculation is final, or FALSE if the
9380 caller should recurse through the operands of X to add additional
9383 We currently make no attempt to model the size savings of Thumb-2
9384 16-bit instructions. At the normal points in compilation where
9385 this code is called we have no measure of whether the condition
9386 flags are live or not, and thus no realistic way to determine what
9387 the size will eventually be. */
9389 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9390 const struct cpu_cost_table
*extra_cost
,
9391 int *cost
, bool speed_p
)
9393 machine_mode mode
= GET_MODE (x
);
9395 *cost
= COSTS_N_INSNS (1);
9400 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9402 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9410 /* SET RTXs don't have a mode so we get it from the destination. */
9411 mode
= GET_MODE (SET_DEST (x
));
9413 if (REG_P (SET_SRC (x
))
9414 && REG_P (SET_DEST (x
)))
9416 /* Assume that most copies can be done with a single insn,
9417 unless we don't have HW FP, in which case everything
9418 larger than word mode will require two insns. */
9419 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9420 && GET_MODE_SIZE (mode
) > 4)
9423 /* Conditional register moves can be encoded
9424 in 16 bits in Thumb mode. */
9425 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9431 if (CONST_INT_P (SET_SRC (x
)))
9433 /* Handle CONST_INT here, since the value doesn't have a mode
9434 and we would otherwise be unable to work out the true cost. */
9435 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
9438 /* Slightly lower the cost of setting a core reg to a constant.
9439 This helps break up chains and allows for better scheduling. */
9440 if (REG_P (SET_DEST (x
))
9441 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9444 /* Immediate moves with an immediate in the range [0, 255] can be
9445 encoded in 16 bits in Thumb mode. */
9446 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9447 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9449 goto const_int_cost
;
9455 return arm_mem_costs (x
, extra_cost
, cost
, speed_p
);
9459 /* Calculations of LDM costs are complex. We assume an initial cost
9460 (ldm_1st) which will load the number of registers mentioned in
9461 ldm_regs_per_insn_1st registers; then each additional
9462 ldm_regs_per_insn_subsequent registers cost one more insn. The
9463 formula for N regs is thus:
9465 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9466 + ldm_regs_per_insn_subsequent - 1)
9467 / ldm_regs_per_insn_subsequent).
9469 Additional costs may also be added for addressing. A similar
9470 formula is used for STM. */
9472 bool is_ldm
= load_multiple_operation (x
, SImode
);
9473 bool is_stm
= store_multiple_operation (x
, SImode
);
9475 if (is_ldm
|| is_stm
)
9479 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9480 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9481 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9482 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9483 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9484 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9485 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9487 *cost
+= regs_per_insn_1st
9488 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9489 + regs_per_insn_sub
- 1)
9490 / regs_per_insn_sub
);
9499 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9500 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9501 *cost
+= COSTS_N_INSNS (speed_p
9502 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
9503 else if (mode
== SImode
&& TARGET_IDIV
)
9504 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
9506 *cost
= LIBCALL_COST (2);
9508 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9509 possible udiv is prefered. */
9510 *cost
+= (code
== DIV
? COSTS_N_INSNS (1) : 0);
9511 return false; /* All arguments must be in registers. */
9514 /* MOD by a power of 2 can be expanded as:
9516 and r0, r0, #(n - 1)
9517 and r1, r1, #(n - 1)
9518 rsbpl r0, r1, #0. */
9519 if (CONST_INT_P (XEXP (x
, 1))
9520 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
9523 *cost
+= COSTS_N_INSNS (3);
9526 *cost
+= 2 * extra_cost
->alu
.logical
9527 + extra_cost
->alu
.arith
;
9533 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9534 possible udiv is prefered. */
9535 *cost
= LIBCALL_COST (2) + (code
== MOD
? COSTS_N_INSNS (1) : 0);
9536 return false; /* All arguments must be in registers. */
9539 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9541 *cost
+= (COSTS_N_INSNS (1)
9542 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9544 *cost
+= extra_cost
->alu
.shift_reg
;
9552 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9554 *cost
+= (COSTS_N_INSNS (2)
9555 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9557 *cost
+= 2 * extra_cost
->alu
.shift
;
9558 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
9559 if (code
== ASHIFT
&& XEXP (x
, 1) == CONST1_RTX (SImode
))
9563 else if (mode
== SImode
)
9565 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9566 /* Slightly disparage register shifts at -Os, but not by much. */
9567 if (!CONST_INT_P (XEXP (x
, 1)))
9568 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9569 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9572 else if (GET_MODE_CLASS (mode
) == MODE_INT
9573 && GET_MODE_SIZE (mode
) < 4)
9577 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9578 /* Slightly disparage register shifts at -Os, but not by
9580 if (!CONST_INT_P (XEXP (x
, 1)))
9581 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9582 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9584 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9586 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9588 /* Can use SBFX/UBFX. */
9590 *cost
+= extra_cost
->alu
.bfx
;
9591 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9595 *cost
+= COSTS_N_INSNS (1);
9596 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9599 if (CONST_INT_P (XEXP (x
, 1)))
9600 *cost
+= 2 * extra_cost
->alu
.shift
;
9602 *cost
+= (extra_cost
->alu
.shift
9603 + extra_cost
->alu
.shift_reg
);
9606 /* Slightly disparage register shifts. */
9607 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9612 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
9613 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9616 if (CONST_INT_P (XEXP (x
, 1)))
9617 *cost
+= (2 * extra_cost
->alu
.shift
9618 + extra_cost
->alu
.log_shift
);
9620 *cost
+= (extra_cost
->alu
.shift
9621 + extra_cost
->alu
.shift_reg
9622 + extra_cost
->alu
.log_shift_reg
);
9628 *cost
= LIBCALL_COST (2);
9637 *cost
+= extra_cost
->alu
.rev
;
9644 /* No rev instruction available. Look at arm_legacy_rev
9645 and thumb_legacy_rev for the form of RTL used then. */
9648 *cost
+= COSTS_N_INSNS (9);
9652 *cost
+= 6 * extra_cost
->alu
.shift
;
9653 *cost
+= 3 * extra_cost
->alu
.logical
;
9658 *cost
+= COSTS_N_INSNS (4);
9662 *cost
+= 2 * extra_cost
->alu
.shift
;
9663 *cost
+= extra_cost
->alu
.arith_shift
;
9664 *cost
+= 2 * extra_cost
->alu
.logical
;
9672 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9673 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9675 if (GET_CODE (XEXP (x
, 0)) == MULT
9676 || GET_CODE (XEXP (x
, 1)) == MULT
)
9678 rtx mul_op0
, mul_op1
, sub_op
;
9681 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9683 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9685 mul_op0
= XEXP (XEXP (x
, 0), 0);
9686 mul_op1
= XEXP (XEXP (x
, 0), 1);
9687 sub_op
= XEXP (x
, 1);
9691 mul_op0
= XEXP (XEXP (x
, 1), 0);
9692 mul_op1
= XEXP (XEXP (x
, 1), 1);
9693 sub_op
= XEXP (x
, 0);
9696 /* The first operand of the multiply may be optionally
9698 if (GET_CODE (mul_op0
) == NEG
)
9699 mul_op0
= XEXP (mul_op0
, 0);
9701 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9702 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9703 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
9709 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9715 rtx shift_by_reg
= NULL
;
9719 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9720 if (shift_op
== NULL
)
9722 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9723 non_shift_op
= XEXP (x
, 0);
9726 non_shift_op
= XEXP (x
, 1);
9728 if (shift_op
!= NULL
)
9730 if (shift_by_reg
!= NULL
)
9733 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9734 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
9737 *cost
+= extra_cost
->alu
.arith_shift
;
9739 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
9740 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
9745 && GET_CODE (XEXP (x
, 1)) == MULT
)
9749 *cost
+= extra_cost
->mult
[0].add
;
9750 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
9751 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
9752 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
9756 if (CONST_INT_P (XEXP (x
, 0)))
9758 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9759 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9761 *cost
= COSTS_N_INSNS (insns
);
9763 *cost
+= insns
* extra_cost
->alu
.arith
;
9764 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9768 *cost
+= extra_cost
->alu
.arith
;
9773 if (GET_MODE_CLASS (mode
) == MODE_INT
9774 && GET_MODE_SIZE (mode
) < 4)
9776 rtx shift_op
, shift_reg
;
9779 /* We check both sides of the MINUS for shifter operands since,
9780 unlike PLUS, it's not commutative. */
9782 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0);
9783 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1);
9785 /* Slightly disparage, as we might need to widen the result. */
9788 *cost
+= extra_cost
->alu
.arith
;
9790 if (CONST_INT_P (XEXP (x
, 0)))
9792 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9801 *cost
+= COSTS_N_INSNS (1);
9803 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9805 rtx op1
= XEXP (x
, 1);
9808 *cost
+= 2 * extra_cost
->alu
.arith
;
9810 if (GET_CODE (op1
) == ZERO_EXTEND
)
9811 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
9814 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
9815 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9819 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9822 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9823 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
9825 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
9828 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9829 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9832 *cost
+= (extra_cost
->alu
.arith
9833 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9834 ? extra_cost
->alu
.arith
9835 : extra_cost
->alu
.arith_shift
));
9836 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
9837 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
9838 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9843 *cost
+= 2 * extra_cost
->alu
.arith
;
9849 *cost
= LIBCALL_COST (2);
9853 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9854 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9856 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9858 rtx mul_op0
, mul_op1
, add_op
;
9861 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9863 mul_op0
= XEXP (XEXP (x
, 0), 0);
9864 mul_op1
= XEXP (XEXP (x
, 0), 1);
9865 add_op
= XEXP (x
, 1);
9867 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9868 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9869 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
9875 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9878 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9880 *cost
= LIBCALL_COST (2);
9884 /* Narrow modes can be synthesized in SImode, but the range
9885 of useful sub-operations is limited. Check for shift operations
9886 on one of the operands. Only left shifts can be used in the
9888 if (GET_MODE_CLASS (mode
) == MODE_INT
9889 && GET_MODE_SIZE (mode
) < 4)
9891 rtx shift_op
, shift_reg
;
9894 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0);
9896 if (CONST_INT_P (XEXP (x
, 1)))
9898 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9899 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9901 *cost
= COSTS_N_INSNS (insns
);
9903 *cost
+= insns
* extra_cost
->alu
.arith
;
9904 /* Slightly penalize a narrow operation as the result may
9906 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9910 /* Slightly penalize a narrow operation as the result may
9914 *cost
+= extra_cost
->alu
.arith
;
9921 rtx shift_op
, shift_reg
;
9924 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9925 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9927 /* UXTA[BH] or SXTA[BH]. */
9929 *cost
+= extra_cost
->alu
.extend_arith
;
9930 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9932 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
9937 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9938 if (shift_op
!= NULL
)
9943 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9944 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9947 *cost
+= extra_cost
->alu
.arith_shift
;
9949 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9950 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9953 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9955 rtx mul_op
= XEXP (x
, 0);
9957 if (TARGET_DSP_MULTIPLY
9958 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9959 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9960 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9961 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9962 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9963 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9964 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9965 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9966 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9967 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9968 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9969 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9974 *cost
+= extra_cost
->mult
[0].extend_add
;
9975 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
9976 SIGN_EXTEND
, 0, speed_p
)
9977 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
9978 SIGN_EXTEND
, 0, speed_p
)
9979 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9984 *cost
+= extra_cost
->mult
[0].add
;
9985 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
9986 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
9987 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9990 if (CONST_INT_P (XEXP (x
, 1)))
9992 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9993 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9995 *cost
= COSTS_N_INSNS (insns
);
9997 *cost
+= insns
* extra_cost
->alu
.arith
;
9998 *cost
+= rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
10002 *cost
+= extra_cost
->alu
.arith
;
10007 if (mode
== DImode
)
10010 && GET_CODE (XEXP (x
, 0)) == MULT
10011 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
10012 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
10013 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
10014 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
10017 *cost
+= extra_cost
->mult
[1].extend_add
;
10018 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10019 ZERO_EXTEND
, 0, speed_p
)
10020 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
10021 ZERO_EXTEND
, 0, speed_p
)
10022 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10026 *cost
+= COSTS_N_INSNS (1);
10028 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10029 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10032 *cost
+= (extra_cost
->alu
.arith
10033 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10034 ? extra_cost
->alu
.arith
10035 : extra_cost
->alu
.arith_shift
));
10037 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10039 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10044 *cost
+= 2 * extra_cost
->alu
.arith
;
10049 *cost
= LIBCALL_COST (2);
10052 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
10055 *cost
+= extra_cost
->alu
.rev
;
10059 /* Fall through. */
10060 case AND
: case XOR
:
10061 if (mode
== SImode
)
10063 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
10064 rtx op0
= XEXP (x
, 0);
10065 rtx shift_op
, shift_reg
;
10069 || (code
== IOR
&& TARGET_THUMB2
)))
10070 op0
= XEXP (op0
, 0);
10073 shift_op
= shifter_op_p (op0
, &shift_reg
);
10074 if (shift_op
!= NULL
)
10079 *cost
+= extra_cost
->alu
.log_shift_reg
;
10080 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10083 *cost
+= extra_cost
->alu
.log_shift
;
10085 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10086 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10090 if (CONST_INT_P (XEXP (x
, 1)))
10092 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
10093 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10096 *cost
= COSTS_N_INSNS (insns
);
10098 *cost
+= insns
* extra_cost
->alu
.logical
;
10099 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
10104 *cost
+= extra_cost
->alu
.logical
;
10105 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
10106 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10110 if (mode
== DImode
)
10112 rtx op0
= XEXP (x
, 0);
10113 enum rtx_code subcode
= GET_CODE (op0
);
10115 *cost
+= COSTS_N_INSNS (1);
10119 || (code
== IOR
&& TARGET_THUMB2
)))
10120 op0
= XEXP (op0
, 0);
10122 if (GET_CODE (op0
) == ZERO_EXTEND
)
10125 *cost
+= 2 * extra_cost
->alu
.logical
;
10127 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
10129 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10132 else if (GET_CODE (op0
) == SIGN_EXTEND
)
10135 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
10137 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
10139 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10144 *cost
+= 2 * extra_cost
->alu
.logical
;
10150 *cost
= LIBCALL_COST (2);
10154 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10155 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10157 rtx op0
= XEXP (x
, 0);
10159 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
10160 op0
= XEXP (op0
, 0);
10163 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10165 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
10166 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
10169 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10171 *cost
= LIBCALL_COST (2);
10175 if (mode
== SImode
)
10177 if (TARGET_DSP_MULTIPLY
10178 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10179 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10180 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10181 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10182 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10183 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10184 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10185 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10186 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10187 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10188 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10189 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10192 /* SMUL[TB][TB]. */
10194 *cost
+= extra_cost
->mult
[0].extend
;
10195 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
10196 SIGN_EXTEND
, 0, speed_p
);
10197 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
10198 SIGN_EXTEND
, 1, speed_p
);
10202 *cost
+= extra_cost
->mult
[0].simple
;
10206 if (mode
== DImode
)
10209 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10210 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10211 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10212 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10215 *cost
+= extra_cost
->mult
[1].extend
;
10216 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
10217 ZERO_EXTEND
, 0, speed_p
)
10218 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10219 ZERO_EXTEND
, 0, speed_p
));
10223 *cost
= LIBCALL_COST (2);
10228 *cost
= LIBCALL_COST (2);
10232 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10233 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10235 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10238 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
10243 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10247 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10249 *cost
= LIBCALL_COST (1);
10253 if (mode
== SImode
)
10255 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10257 *cost
+= COSTS_N_INSNS (1);
10258 /* Assume the non-flag-changing variant. */
10260 *cost
+= (extra_cost
->alu
.log_shift
10261 + extra_cost
->alu
.arith_shift
);
10262 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
10266 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10267 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10269 *cost
+= COSTS_N_INSNS (1);
10270 /* No extra cost for MOV imm and MVN imm. */
10271 /* If the comparison op is using the flags, there's no further
10272 cost, otherwise we need to add the cost of the comparison. */
10273 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10274 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10275 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10277 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
10278 *cost
+= (COSTS_N_INSNS (1)
10279 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
10281 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
10284 *cost
+= extra_cost
->alu
.arith
;
10290 *cost
+= extra_cost
->alu
.arith
;
10294 if (GET_MODE_CLASS (mode
) == MODE_INT
10295 && GET_MODE_SIZE (mode
) < 4)
10297 /* Slightly disparage, as we might need an extend operation. */
10300 *cost
+= extra_cost
->alu
.arith
;
10304 if (mode
== DImode
)
10306 *cost
+= COSTS_N_INSNS (1);
10308 *cost
+= 2 * extra_cost
->alu
.arith
;
10313 *cost
= LIBCALL_COST (1);
10317 if (mode
== SImode
)
10320 rtx shift_reg
= NULL
;
10322 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10326 if (shift_reg
!= NULL
)
10329 *cost
+= extra_cost
->alu
.log_shift_reg
;
10330 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10333 *cost
+= extra_cost
->alu
.log_shift
;
10334 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
10339 *cost
+= extra_cost
->alu
.logical
;
10342 if (mode
== DImode
)
10344 *cost
+= COSTS_N_INSNS (1);
10350 *cost
+= LIBCALL_COST (1);
10355 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10357 *cost
+= COSTS_N_INSNS (3);
10360 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
10361 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
10363 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
10364 /* Assume that if one arm of the if_then_else is a register,
10365 that it will be tied with the result and eliminate the
10366 conditional insn. */
10367 if (REG_P (XEXP (x
, 1)))
10369 else if (REG_P (XEXP (x
, 2)))
10375 if (extra_cost
->alu
.non_exec_costs_exec
)
10376 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10378 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10381 *cost
+= op1cost
+ op2cost
;
10387 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10391 machine_mode op0mode
;
10392 /* We'll mostly assume that the cost of a compare is the cost of the
10393 LHS. However, there are some notable exceptions. */
10395 /* Floating point compares are never done as side-effects. */
10396 op0mode
= GET_MODE (XEXP (x
, 0));
10397 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10398 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10401 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10403 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10405 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
10411 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10413 *cost
= LIBCALL_COST (2);
10417 /* DImode compares normally take two insns. */
10418 if (op0mode
== DImode
)
10420 *cost
+= COSTS_N_INSNS (1);
10422 *cost
+= 2 * extra_cost
->alu
.arith
;
10426 if (op0mode
== SImode
)
10431 if (XEXP (x
, 1) == const0_rtx
10432 && !(REG_P (XEXP (x
, 0))
10433 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10434 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10436 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10438 /* Multiply operations that set the flags are often
10439 significantly more expensive. */
10441 && GET_CODE (XEXP (x
, 0)) == MULT
10442 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10443 *cost
+= extra_cost
->mult
[0].flag_setting
;
10446 && GET_CODE (XEXP (x
, 0)) == PLUS
10447 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10448 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10450 *cost
+= extra_cost
->mult
[0].flag_setting
;
10455 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10456 if (shift_op
!= NULL
)
10458 if (shift_reg
!= NULL
)
10460 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
10463 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10466 *cost
+= extra_cost
->alu
.arith_shift
;
10467 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
10468 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
10473 *cost
+= extra_cost
->alu
.arith
;
10474 if (CONST_INT_P (XEXP (x
, 1))
10475 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10477 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10485 *cost
= LIBCALL_COST (2);
10508 if (outer_code
== SET
)
10510 /* Is it a store-flag operation? */
10511 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10512 && XEXP (x
, 1) == const0_rtx
)
10514 /* Thumb also needs an IT insn. */
10515 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
10518 if (XEXP (x
, 1) == const0_rtx
)
10523 /* LSR Rd, Rn, #31. */
10525 *cost
+= extra_cost
->alu
.shift
;
10535 *cost
+= COSTS_N_INSNS (1);
10539 /* RSBS T1, Rn, Rn, LSR #31
10541 *cost
+= COSTS_N_INSNS (1);
10543 *cost
+= extra_cost
->alu
.arith_shift
;
10547 /* RSB Rd, Rn, Rn, ASR #1
10548 LSR Rd, Rd, #31. */
10549 *cost
+= COSTS_N_INSNS (1);
10551 *cost
+= (extra_cost
->alu
.arith_shift
10552 + extra_cost
->alu
.shift
);
10558 *cost
+= COSTS_N_INSNS (1);
10560 *cost
+= extra_cost
->alu
.shift
;
10564 /* Remaining cases are either meaningless or would take
10565 three insns anyway. */
10566 *cost
= COSTS_N_INSNS (3);
10569 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10574 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10575 if (CONST_INT_P (XEXP (x
, 1))
10576 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10578 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10585 /* Not directly inside a set. If it involves the condition code
10586 register it must be the condition for a branch, cond_exec or
10587 I_T_E operation. Since the comparison is performed elsewhere
10588 this is just the control part which has no additional
10590 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10591 && XEXP (x
, 1) == const0_rtx
)
10599 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10600 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10603 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10607 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10609 *cost
= LIBCALL_COST (1);
10613 if (mode
== SImode
)
10616 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10620 *cost
= LIBCALL_COST (1);
10624 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10625 && MEM_P (XEXP (x
, 0)))
10627 if (mode
== DImode
)
10628 *cost
+= COSTS_N_INSNS (1);
10633 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10634 *cost
+= extra_cost
->ldst
.load
;
10636 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10638 if (mode
== DImode
)
10639 *cost
+= extra_cost
->alu
.shift
;
10644 /* Widening from less than 32-bits requires an extend operation. */
10645 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10647 /* We have SXTB/SXTH. */
10648 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10650 *cost
+= extra_cost
->alu
.extend
;
10652 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10654 /* Needs two shifts. */
10655 *cost
+= COSTS_N_INSNS (1);
10656 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10658 *cost
+= 2 * extra_cost
->alu
.shift
;
10661 /* Widening beyond 32-bits requires one more insn. */
10662 if (mode
== DImode
)
10664 *cost
+= COSTS_N_INSNS (1);
10666 *cost
+= extra_cost
->alu
.shift
;
10673 || GET_MODE (XEXP (x
, 0)) == SImode
10674 || GET_MODE (XEXP (x
, 0)) == QImode
)
10675 && MEM_P (XEXP (x
, 0)))
10677 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10679 if (mode
== DImode
)
10680 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10685 /* Widening from less than 32-bits requires an extend operation. */
10686 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10688 /* UXTB can be a shorter instruction in Thumb2, but it might
10689 be slower than the AND Rd, Rn, #255 alternative. When
10690 optimizing for speed it should never be slower to use
10691 AND, and we don't really model 16-bit vs 32-bit insns
10694 *cost
+= extra_cost
->alu
.logical
;
10696 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10698 /* We have UXTB/UXTH. */
10699 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10701 *cost
+= extra_cost
->alu
.extend
;
10703 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10705 /* Needs two shifts. It's marginally preferable to use
10706 shifts rather than two BIC instructions as the second
10707 shift may merge with a subsequent insn as a shifter
10709 *cost
= COSTS_N_INSNS (2);
10710 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10712 *cost
+= 2 * extra_cost
->alu
.shift
;
10715 /* Widening beyond 32-bits requires one more insn. */
10716 if (mode
== DImode
)
10718 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10725 /* CONST_INT has no mode, so we cannot tell for sure how many
10726 insns are really going to be needed. The best we can do is
10727 look at the value passed. If it fits in SImode, then assume
10728 that's the mode it will be used for. Otherwise assume it
10729 will be used in DImode. */
10730 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10735 /* Avoid blowing up in arm_gen_constant (). */
10736 if (!(outer_code
== PLUS
10737 || outer_code
== AND
10738 || outer_code
== IOR
10739 || outer_code
== XOR
10740 || outer_code
== MINUS
))
10744 if (mode
== SImode
)
10746 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10747 INTVAL (x
), NULL
, NULL
,
10753 *cost
+= COSTS_N_INSNS (arm_gen_constant
10754 (outer_code
, SImode
, NULL
,
10755 trunc_int_for_mode (INTVAL (x
), SImode
),
10757 + arm_gen_constant (outer_code
, SImode
, NULL
,
10758 INTVAL (x
) >> 32, NULL
,
10770 if (arm_arch_thumb2
&& !flag_pic
)
10771 *cost
+= COSTS_N_INSNS (1);
10773 *cost
+= extra_cost
->ldst
.load
;
10776 *cost
+= COSTS_N_INSNS (1);
10780 *cost
+= COSTS_N_INSNS (1);
10782 *cost
+= extra_cost
->alu
.arith
;
10788 *cost
= COSTS_N_INSNS (4);
10793 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10794 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10796 if (vfp3_const_double_rtx (x
))
10799 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10805 if (mode
== DFmode
)
10806 *cost
+= extra_cost
->ldst
.loadd
;
10808 *cost
+= extra_cost
->ldst
.loadf
;
10811 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
10815 *cost
= COSTS_N_INSNS (4);
10821 && TARGET_HARD_FLOAT
10822 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10823 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10824 *cost
= COSTS_N_INSNS (1);
10826 *cost
= COSTS_N_INSNS (4);
10831 /* When optimizing for size, we prefer constant pool entries to
10832 MOVW/MOVT pairs, so bump the cost of these slightly. */
10839 *cost
+= extra_cost
->alu
.clz
;
10843 if (XEXP (x
, 1) == const0_rtx
)
10846 *cost
+= extra_cost
->alu
.log_shift
;
10847 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10850 /* Fall through. */
10854 *cost
+= COSTS_N_INSNS (1);
10858 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10859 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10860 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10861 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10862 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10863 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10864 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10865 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10869 *cost
+= extra_cost
->mult
[1].extend
;
10870 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
10871 ZERO_EXTEND
, 0, speed_p
)
10872 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
10873 ZERO_EXTEND
, 0, speed_p
));
10876 *cost
= LIBCALL_COST (1);
10879 case UNSPEC_VOLATILE
:
10881 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10884 /* Reading the PC is like reading any other register. Writing it
10885 is more expensive, but we take that into account elsewhere. */
10890 /* TODO: Simple zero_extract of bottom bits using AND. */
10891 /* Fall through. */
10895 && CONST_INT_P (XEXP (x
, 1))
10896 && CONST_INT_P (XEXP (x
, 2)))
10899 *cost
+= extra_cost
->alu
.bfx
;
10900 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10903 /* Without UBFX/SBFX, need to resort to shift operations. */
10904 *cost
+= COSTS_N_INSNS (1);
10906 *cost
+= 2 * extra_cost
->alu
.shift
;
10907 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
10911 if (TARGET_HARD_FLOAT
)
10914 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10916 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10918 /* Pre v8, widening HF->DF is a two-step process, first
10919 widening to SFmode. */
10920 *cost
+= COSTS_N_INSNS (1);
10922 *cost
+= extra_cost
->fp
[0].widen
;
10924 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10928 *cost
= LIBCALL_COST (1);
10931 case FLOAT_TRUNCATE
:
10932 if (TARGET_HARD_FLOAT
)
10935 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10936 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10938 /* Vector modes? */
10940 *cost
= LIBCALL_COST (1);
10944 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10946 rtx op0
= XEXP (x
, 0);
10947 rtx op1
= XEXP (x
, 1);
10948 rtx op2
= XEXP (x
, 2);
10951 /* vfms or vfnma. */
10952 if (GET_CODE (op0
) == NEG
)
10953 op0
= XEXP (op0
, 0);
10955 /* vfnms or vfnma. */
10956 if (GET_CODE (op2
) == NEG
)
10957 op2
= XEXP (op2
, 0);
10959 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
10960 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
10961 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
10964 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
10969 *cost
= LIBCALL_COST (3);
10974 if (TARGET_HARD_FLOAT
)
10976 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10977 a vcvt fixed-point conversion. */
10978 if (code
== FIX
&& mode
== SImode
10979 && GET_CODE (XEXP (x
, 0)) == FIX
10980 && GET_MODE (XEXP (x
, 0)) == SFmode
10981 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10982 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10986 *cost
+= extra_cost
->fp
[0].toint
;
10988 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10993 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10995 mode
= GET_MODE (XEXP (x
, 0));
10997 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
10998 /* Strip of the 'cost' of rounding towards zero. */
10999 if (GET_CODE (XEXP (x
, 0)) == FIX
)
11000 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
11003 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11004 /* ??? Increase the cost to deal with transferring from
11005 FP -> CORE registers? */
11008 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11012 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
11015 /* Vector costs? */
11017 *cost
= LIBCALL_COST (1);
11021 case UNSIGNED_FLOAT
:
11022 if (TARGET_HARD_FLOAT
)
11024 /* ??? Increase the cost to deal with transferring from CORE
11025 -> FP registers? */
11027 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
11030 *cost
= LIBCALL_COST (1);
11038 /* Just a guess. Guess number of instructions in the asm
11039 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11040 though (see PR60663). */
11041 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
11042 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
11044 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
11048 if (mode
!= VOIDmode
)
11049 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
11051 *cost
= COSTS_N_INSNS (4); /* Who knows? */
11056 #undef HANDLE_NARROW_SHIFT_ARITH
11058 /* RTX costs entry point. */
11061 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
11062 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
11065 int code
= GET_CODE (x
);
11066 gcc_assert (current_tune
->insn_extra_cost
);
11068 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
11069 (enum rtx_code
) outer_code
,
11070 current_tune
->insn_extra_cost
,
11073 if (dump_file
&& arm_verbose_cost
)
11075 print_rtl_single (dump_file
, x
);
11076 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
11077 *total
, result
? "final" : "partial");
11082 /* All address computations that can be done are free, but rtx cost returns
11083 the same for practically all of them. So we weight the different types
11084 of address here in the order (most pref first):
11085 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11087 arm_arm_address_cost (rtx x
)
11089 enum rtx_code c
= GET_CODE (x
);
11091 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11093 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11098 if (CONST_INT_P (XEXP (x
, 1)))
11101 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11111 arm_thumb_address_cost (rtx x
)
11113 enum rtx_code c
= GET_CODE (x
);
11118 && REG_P (XEXP (x
, 0))
11119 && CONST_INT_P (XEXP (x
, 1)))
11126 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
11127 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11129 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11132 /* Adjust cost hook for XScale. */
11134 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11137 /* Some true dependencies can have a higher cost depending
11138 on precisely how certain input operands are used. */
11140 && recog_memoized (insn
) >= 0
11141 && recog_memoized (dep
) >= 0)
11143 int shift_opnum
= get_attr_shift (insn
);
11144 enum attr_type attr_type
= get_attr_type (dep
);
11146 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11147 operand for INSN. If we have a shifted input operand and the
11148 instruction we depend on is another ALU instruction, then we may
11149 have to account for an additional stall. */
11150 if (shift_opnum
!= 0
11151 && (attr_type
== TYPE_ALU_SHIFT_IMM
11152 || attr_type
== TYPE_ALUS_SHIFT_IMM
11153 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11154 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11155 || attr_type
== TYPE_ALU_SHIFT_REG
11156 || attr_type
== TYPE_ALUS_SHIFT_REG
11157 || attr_type
== TYPE_LOGIC_SHIFT_REG
11158 || attr_type
== TYPE_LOGICS_SHIFT_REG
11159 || attr_type
== TYPE_MOV_SHIFT
11160 || attr_type
== TYPE_MVN_SHIFT
11161 || attr_type
== TYPE_MOV_SHIFT_REG
11162 || attr_type
== TYPE_MVN_SHIFT_REG
))
11164 rtx shifted_operand
;
11167 /* Get the shifted operand. */
11168 extract_insn (insn
);
11169 shifted_operand
= recog_data
.operand
[shift_opnum
];
11171 /* Iterate over all the operands in DEP. If we write an operand
11172 that overlaps with SHIFTED_OPERAND, then we have increase the
11173 cost of this dependency. */
11174 extract_insn (dep
);
11175 preprocess_constraints (dep
);
11176 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11178 /* We can ignore strict inputs. */
11179 if (recog_data
.operand_type
[opno
] == OP_IN
)
11182 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11194 /* Adjust cost hook for Cortex A9. */
11196 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11206 case REG_DEP_OUTPUT
:
11207 if (recog_memoized (insn
) >= 0
11208 && recog_memoized (dep
) >= 0)
11210 if (GET_CODE (PATTERN (insn
)) == SET
)
11213 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11215 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11217 enum attr_type attr_type_insn
= get_attr_type (insn
);
11218 enum attr_type attr_type_dep
= get_attr_type (dep
);
11220 /* By default all dependencies of the form
11223 have an extra latency of 1 cycle because
11224 of the input and output dependency in this
11225 case. However this gets modeled as an true
11226 dependency and hence all these checks. */
11227 if (REG_P (SET_DEST (PATTERN (insn
)))
11228 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
11230 /* FMACS is a special case where the dependent
11231 instruction can be issued 3 cycles before
11232 the normal latency in case of an output
11234 if ((attr_type_insn
== TYPE_FMACS
11235 || attr_type_insn
== TYPE_FMACD
)
11236 && (attr_type_dep
== TYPE_FMACS
11237 || attr_type_dep
== TYPE_FMACD
))
11239 if (dep_type
== REG_DEP_OUTPUT
)
11240 *cost
= insn_default_latency (dep
) - 3;
11242 *cost
= insn_default_latency (dep
);
11247 if (dep_type
== REG_DEP_OUTPUT
)
11248 *cost
= insn_default_latency (dep
) + 1;
11250 *cost
= insn_default_latency (dep
);
11260 gcc_unreachable ();
11266 /* Adjust cost hook for FA726TE. */
11268 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11271 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11272 have penalty of 3. */
11273 if (dep_type
== REG_DEP_TRUE
11274 && recog_memoized (insn
) >= 0
11275 && recog_memoized (dep
) >= 0
11276 && get_attr_conds (dep
) == CONDS_SET
)
11278 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11279 if (get_attr_conds (insn
) == CONDS_USE
11280 && get_attr_type (insn
) != TYPE_BRANCH
)
11286 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11287 || get_attr_conds (insn
) == CONDS_USE
)
11297 /* Implement TARGET_REGISTER_MOVE_COST.
11299 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11300 it is typically more expensive than a single memory access. We set
11301 the cost to less than two memory accesses so that floating
11302 point to integer conversion does not go through memory. */
11305 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11306 reg_class_t from
, reg_class_t to
)
11310 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11311 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11313 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11314 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11316 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11323 if (from
== HI_REGS
|| to
== HI_REGS
)
11330 /* Implement TARGET_MEMORY_MOVE_COST. */
11333 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11334 bool in ATTRIBUTE_UNUSED
)
11340 if (GET_MODE_SIZE (mode
) < 4)
11343 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11347 /* Vectorizer cost model implementation. */
11349 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11351 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11353 int misalign ATTRIBUTE_UNUSED
)
11357 switch (type_of_cost
)
11360 return current_tune
->vec_costs
->scalar_stmt_cost
;
11363 return current_tune
->vec_costs
->scalar_load_cost
;
11366 return current_tune
->vec_costs
->scalar_store_cost
;
11369 return current_tune
->vec_costs
->vec_stmt_cost
;
11372 return current_tune
->vec_costs
->vec_align_load_cost
;
11375 return current_tune
->vec_costs
->vec_store_cost
;
11377 case vec_to_scalar
:
11378 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11380 case scalar_to_vec
:
11381 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11383 case unaligned_load
:
11384 case vector_gather_load
:
11385 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11387 case unaligned_store
:
11388 case vector_scatter_store
:
11389 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11391 case cond_branch_taken
:
11392 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11394 case cond_branch_not_taken
:
11395 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11398 case vec_promote_demote
:
11399 return current_tune
->vec_costs
->vec_stmt_cost
;
11401 case vec_construct
:
11402 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11403 return elements
/ 2 + 1;
11406 gcc_unreachable ();
11410 /* Implement targetm.vectorize.add_stmt_cost. */
11413 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11414 struct _stmt_vec_info
*stmt_info
, int misalign
,
11415 enum vect_cost_model_location where
)
11417 unsigned *cost
= (unsigned *) data
;
11418 unsigned retval
= 0;
11420 if (flag_vect_cost_model
)
11422 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11423 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11425 /* Statements in an inner loop relative to the loop being
11426 vectorized are weighted more heavily. The value here is
11427 arbitrary and could potentially be improved with analysis. */
11428 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11429 count
*= 50; /* FIXME. */
11431 retval
= (unsigned) (count
* stmt_cost
);
11432 cost
[where
] += retval
;
11438 /* Return true if and only if this insn can dual-issue only as older. */
11440 cortexa7_older_only (rtx_insn
*insn
)
11442 if (recog_memoized (insn
) < 0)
11445 switch (get_attr_type (insn
))
11447 case TYPE_ALU_DSP_REG
:
11448 case TYPE_ALU_SREG
:
11449 case TYPE_ALUS_SREG
:
11450 case TYPE_LOGIC_REG
:
11451 case TYPE_LOGICS_REG
:
11453 case TYPE_ADCS_REG
:
11458 case TYPE_SHIFT_IMM
:
11459 case TYPE_SHIFT_REG
:
11460 case TYPE_LOAD_BYTE
:
11463 case TYPE_FFARITHS
:
11465 case TYPE_FFARITHD
:
11483 case TYPE_F_STORES
:
11490 /* Return true if and only if this insn can dual-issue as younger. */
11492 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11494 if (recog_memoized (insn
) < 0)
11497 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11501 switch (get_attr_type (insn
))
11504 case TYPE_ALUS_IMM
:
11505 case TYPE_LOGIC_IMM
:
11506 case TYPE_LOGICS_IMM
:
11511 case TYPE_MOV_SHIFT
:
11512 case TYPE_MOV_SHIFT_REG
:
11522 /* Look for an instruction that can dual issue only as an older
11523 instruction, and move it in front of any instructions that can
11524 dual-issue as younger, while preserving the relative order of all
11525 other instructions in the ready list. This is a hueuristic to help
11526 dual-issue in later cycles, by postponing issue of more flexible
11527 instructions. This heuristic may affect dual issue opportunities
11528 in the current cycle. */
11530 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11531 int *n_readyp
, int clock
)
11534 int first_older_only
= -1, first_younger
= -1;
11538 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11542 /* Traverse the ready list from the head (the instruction to issue
11543 first), and looking for the first instruction that can issue as
11544 younger and the first instruction that can dual-issue only as
11546 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11548 rtx_insn
*insn
= ready
[i
];
11549 if (cortexa7_older_only (insn
))
11551 first_older_only
= i
;
11553 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11556 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11560 /* Nothing to reorder because either no younger insn found or insn
11561 that can dual-issue only as older appears before any insn that
11562 can dual-issue as younger. */
11563 if (first_younger
== -1)
11566 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11570 /* Nothing to reorder because no older-only insn in the ready list. */
11571 if (first_older_only
== -1)
11574 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11578 /* Move first_older_only insn before first_younger. */
11580 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11581 INSN_UID(ready
[first_older_only
]),
11582 INSN_UID(ready
[first_younger
]));
11583 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11584 for (i
= first_older_only
; i
< first_younger
; i
++)
11586 ready
[i
] = ready
[i
+1];
11589 ready
[i
] = first_older_only_insn
;
11593 /* Implement TARGET_SCHED_REORDER. */
11595 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11600 case TARGET_CPU_cortexa7
:
11601 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11604 /* Do nothing for other cores. */
11608 return arm_issue_rate ();
11611 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11612 It corrects the value of COST based on the relationship between
11613 INSN and DEP through the dependence LINK. It returns the new
11614 value. There is a per-core adjust_cost hook to adjust scheduler costs
11615 and the per-core hook can choose to completely override the generic
11616 adjust_cost function. Only put bits of code into arm_adjust_cost that
11617 are common across all cores. */
11619 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
11624 /* When generating Thumb-1 code, we want to place flag-setting operations
11625 close to a conditional branch which depends on them, so that we can
11626 omit the comparison. */
11629 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11630 && recog_memoized (dep
) >= 0
11631 && get_attr_conds (dep
) == CONDS_SET
)
11634 if (current_tune
->sched_adjust_cost
!= NULL
)
11636 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
11640 /* XXX Is this strictly true? */
11641 if (dep_type
== REG_DEP_ANTI
11642 || dep_type
== REG_DEP_OUTPUT
)
11645 /* Call insns don't incur a stall, even if they follow a load. */
11650 if ((i_pat
= single_set (insn
)) != NULL
11651 && MEM_P (SET_SRC (i_pat
))
11652 && (d_pat
= single_set (dep
)) != NULL
11653 && MEM_P (SET_DEST (d_pat
)))
11655 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11656 /* This is a load after a store, there is no conflict if the load reads
11657 from a cached area. Assume that loads from the stack, and from the
11658 constant pool are cached, and that others will miss. This is a
11661 if ((GET_CODE (src_mem
) == SYMBOL_REF
11662 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11663 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11664 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11665 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11673 arm_max_conditional_execute (void)
11675 return max_insns_skipped
;
11679 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11682 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11684 return (optimize
> 0) ? 2 : 0;
11688 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11690 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11693 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11694 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11695 sequences of non-executed instructions in IT blocks probably take the same
11696 amount of time as executed instructions (and the IT instruction itself takes
11697 space in icache). This function was experimentally determined to give good
11698 results on a popular embedded benchmark. */
11701 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11703 return (TARGET_32BIT
&& speed_p
) ? 1
11704 : arm_default_branch_cost (speed_p
, predictable_p
);
11708 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
11710 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11713 static bool fp_consts_inited
= false;
11715 static REAL_VALUE_TYPE value_fp0
;
11718 init_fp_table (void)
11722 r
= REAL_VALUE_ATOF ("0", DFmode
);
11724 fp_consts_inited
= true;
11727 /* Return TRUE if rtx X is a valid immediate FP constant. */
11729 arm_const_double_rtx (rtx x
)
11731 const REAL_VALUE_TYPE
*r
;
11733 if (!fp_consts_inited
)
11736 r
= CONST_DOUBLE_REAL_VALUE (x
);
11737 if (REAL_VALUE_MINUS_ZERO (*r
))
11740 if (real_equal (r
, &value_fp0
))
11746 /* VFPv3 has a fairly wide range of representable immediates, formed from
11747 "quarter-precision" floating-point values. These can be evaluated using this
11748 formula (with ^ for exponentiation):
11752 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11753 16 <= n <= 31 and 0 <= r <= 7.
11755 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11757 - A (most-significant) is the sign bit.
11758 - BCD are the exponent (encoded as r XOR 3).
11759 - EFGH are the mantissa (encoded as n - 16).
11762 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11763 fconst[sd] instruction, or -1 if X isn't suitable. */
11765 vfp3_const_double_index (rtx x
)
11767 REAL_VALUE_TYPE r
, m
;
11768 int sign
, exponent
;
11769 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11770 unsigned HOST_WIDE_INT mask
;
11771 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11774 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11777 r
= *CONST_DOUBLE_REAL_VALUE (x
);
11779 /* We can't represent these things, so detect them first. */
11780 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11783 /* Extract sign, exponent and mantissa. */
11784 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
11785 r
= real_value_abs (&r
);
11786 exponent
= REAL_EXP (&r
);
11787 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11788 highest (sign) bit, with a fixed binary point at bit point_pos.
11789 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11790 bits for the mantissa, this may fail (low bits would be lost). */
11791 real_ldexp (&m
, &r
, point_pos
- exponent
);
11792 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
11793 mantissa
= w
.elt (0);
11794 mant_hi
= w
.elt (1);
11796 /* If there are bits set in the low part of the mantissa, we can't
11797 represent this value. */
11801 /* Now make it so that mantissa contains the most-significant bits, and move
11802 the point_pos to indicate that the least-significant bits have been
11804 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11805 mantissa
= mant_hi
;
11807 /* We can permit four significant bits of mantissa only, plus a high bit
11808 which is always 1. */
11809 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
11810 if ((mantissa
& mask
) != 0)
11813 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11814 mantissa
>>= point_pos
- 5;
11816 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11817 floating-point immediate zero with Neon using an integer-zero load, but
11818 that case is handled elsewhere.) */
11822 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
11824 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11825 normalized significands are in the range [1, 2). (Our mantissa is shifted
11826 left 4 places at this point relative to normalized IEEE754 values). GCC
11827 internally uses [0.5, 1) (see real.c), so the exponent returned from
11828 REAL_EXP must be altered. */
11829 exponent
= 5 - exponent
;
11831 if (exponent
< 0 || exponent
> 7)
11834 /* Sign, mantissa and exponent are now in the correct form to plug into the
11835 formula described in the comment above. */
11836 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
11839 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11841 vfp3_const_double_rtx (rtx x
)
11846 return vfp3_const_double_index (x
) != -1;
11849 /* Recognize immediates which can be used in various Neon instructions. Legal
11850 immediates are described by the following table (for VMVN variants, the
11851 bitwise inverse of the constant shown is recognized. In either case, VMOV
11852 is output and the correct instruction to use for a given constant is chosen
11853 by the assembler). The constant shown is replicated across all elements of
11854 the destination vector.
11856 insn elems variant constant (binary)
11857 ---- ----- ------- -----------------
11858 vmov i32 0 00000000 00000000 00000000 abcdefgh
11859 vmov i32 1 00000000 00000000 abcdefgh 00000000
11860 vmov i32 2 00000000 abcdefgh 00000000 00000000
11861 vmov i32 3 abcdefgh 00000000 00000000 00000000
11862 vmov i16 4 00000000 abcdefgh
11863 vmov i16 5 abcdefgh 00000000
11864 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11865 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11866 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11867 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11868 vmvn i16 10 00000000 abcdefgh
11869 vmvn i16 11 abcdefgh 00000000
11870 vmov i32 12 00000000 00000000 abcdefgh 11111111
11871 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11872 vmov i32 14 00000000 abcdefgh 11111111 11111111
11873 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11874 vmov i8 16 abcdefgh
11875 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11876 eeeeeeee ffffffff gggggggg hhhhhhhh
11877 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11878 vmov f32 19 00000000 00000000 00000000 00000000
11880 For case 18, B = !b. Representable values are exactly those accepted by
11881 vfp3_const_double_index, but are output as floating-point numbers rather
11884 For case 19, we will change it to vmov.i32 when assembling.
11886 Variants 0-5 (inclusive) may also be used as immediates for the second
11887 operand of VORR/VBIC instructions.
11889 The INVERSE argument causes the bitwise inverse of the given operand to be
11890 recognized instead (used for recognizing legal immediates for the VAND/VORN
11891 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11892 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11893 output, rather than the real insns vbic/vorr).
11895 INVERSE makes no difference to the recognition of float vectors.
11897 The return value is the variant of immediate as shown in the above table, or
11898 -1 if the given value doesn't match any of the listed patterns.
11901 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
11902 rtx
*modconst
, int *elementwidth
)
11904 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11906 for (i = 0; i < idx; i += (STRIDE)) \
11911 immtype = (CLASS); \
11912 elsize = (ELSIZE); \
11916 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
11917 unsigned int innersize
;
11918 unsigned char bytes
[16];
11919 int immtype
= -1, matches
;
11920 unsigned int invmask
= inverse
? 0xff : 0;
11921 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
11924 n_elts
= CONST_VECTOR_NUNITS (op
);
11928 if (mode
== VOIDmode
)
11932 innersize
= GET_MODE_UNIT_SIZE (mode
);
11934 /* Vectors of float constants. */
11935 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11937 rtx el0
= CONST_VECTOR_ELT (op
, 0);
11939 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
11942 /* FP16 vectors cannot be represented. */
11943 if (GET_MODE_INNER (mode
) == HFmode
)
11946 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11947 are distinct in this context. */
11948 if (!const_vec_duplicate_p (op
))
11952 *modconst
= CONST_VECTOR_ELT (op
, 0);
11957 if (el0
== CONST0_RTX (GET_MODE (el0
)))
11963 /* The tricks done in the code below apply for little-endian vector layout.
11964 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11965 FIXME: Implement logic for big-endian vectors. */
11966 if (BYTES_BIG_ENDIAN
&& vector
&& !const_vec_duplicate_p (op
))
11969 /* Splat vector constant out into a byte vector. */
11970 for (i
= 0; i
< n_elts
; i
++)
11972 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
11973 unsigned HOST_WIDE_INT elpart
;
11975 gcc_assert (CONST_INT_P (el
));
11976 elpart
= INTVAL (el
);
11978 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
11980 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
11981 elpart
>>= BITS_PER_UNIT
;
11985 /* Sanity check. */
11986 gcc_assert (idx
== GET_MODE_SIZE (mode
));
11990 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
11991 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11993 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11994 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11996 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11997 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11999 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12000 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
12002 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
12004 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
12006 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
12007 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12009 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12010 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12012 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12013 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12015 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12016 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
12018 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
12020 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
12022 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12023 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12025 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12026 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12028 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12029 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12031 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12032 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12034 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
12036 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
12037 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
12045 *elementwidth
= elsize
;
12049 unsigned HOST_WIDE_INT imm
= 0;
12051 /* Un-invert bytes of recognized vector, if necessary. */
12053 for (i
= 0; i
< idx
; i
++)
12054 bytes
[i
] ^= invmask
;
12058 /* FIXME: Broken on 32-bit H_W_I hosts. */
12059 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12061 for (i
= 0; i
< 8; i
++)
12062 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12063 << (i
* BITS_PER_UNIT
);
12065 *modconst
= GEN_INT (imm
);
12069 unsigned HOST_WIDE_INT imm
= 0;
12071 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12072 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12074 *modconst
= GEN_INT (imm
);
12082 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12083 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12084 float elements), and a modified constant (whatever should be output for a
12085 VMOV) in *MODCONST. */
12088 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
12089 rtx
*modconst
, int *elementwidth
)
12093 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12099 *modconst
= tmpconst
;
12102 *elementwidth
= tmpwidth
;
12107 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12108 the immediate is valid, write a constant suitable for using as an operand
12109 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12110 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12113 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
12114 rtx
*modconst
, int *elementwidth
)
12118 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12120 if (retval
< 0 || retval
> 5)
12124 *modconst
= tmpconst
;
12127 *elementwidth
= tmpwidth
;
12132 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12133 the immediate is valid, write a constant suitable for using as an operand
12134 to VSHR/VSHL to *MODCONST and the corresponding element width to
12135 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12136 because they have different limitations. */
12139 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
12140 rtx
*modconst
, int *elementwidth
,
12143 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
12144 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12145 unsigned HOST_WIDE_INT last_elt
= 0;
12146 unsigned HOST_WIDE_INT maxshift
;
12148 /* Split vector constant out into a byte vector. */
12149 for (i
= 0; i
< n_elts
; i
++)
12151 rtx el
= CONST_VECTOR_ELT (op
, i
);
12152 unsigned HOST_WIDE_INT elpart
;
12154 if (CONST_INT_P (el
))
12155 elpart
= INTVAL (el
);
12156 else if (CONST_DOUBLE_P (el
))
12159 gcc_unreachable ();
12161 if (i
!= 0 && elpart
!= last_elt
)
12167 /* Shift less than element size. */
12168 maxshift
= innersize
* 8;
12172 /* Left shift immediate value can be from 0 to <size>-1. */
12173 if (last_elt
>= maxshift
)
12178 /* Right shift immediate value can be from 1 to <size>. */
12179 if (last_elt
== 0 || last_elt
> maxshift
)
12184 *elementwidth
= innersize
* 8;
12187 *modconst
= CONST_VECTOR_ELT (op
, 0);
12192 /* Return a string suitable for output of Neon immediate logic operation
12196 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12197 int inverse
, int quad
)
12199 int width
, is_valid
;
12200 static char templ
[40];
12202 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12204 gcc_assert (is_valid
!= 0);
12207 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12209 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12214 /* Return a string suitable for output of Neon immediate shift operation
12215 (VSHR or VSHL) MNEM. */
12218 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12219 machine_mode mode
, int quad
,
12222 int width
, is_valid
;
12223 static char templ
[40];
12225 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12226 gcc_assert (is_valid
!= 0);
12229 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12231 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12236 /* Output a sequence of pairwise operations to implement a reduction.
12237 NOTE: We do "too much work" here, because pairwise operations work on two
12238 registers-worth of operands in one go. Unfortunately we can't exploit those
12239 extra calculations to do the full operation in fewer steps, I don't think.
12240 Although all vector elements of the result but the first are ignored, we
12241 actually calculate the same result in each of the elements. An alternative
12242 such as initially loading a vector with zero to use as each of the second
12243 operands would use up an additional register and take an extra instruction,
12244 for no particular gain. */
12247 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12248 rtx (*reduc
) (rtx
, rtx
, rtx
))
12250 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
12253 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12255 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12256 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12261 /* If VALS is a vector constant that can be loaded into a register
12262 using VDUP, generate instructions to do so and return an RTX to
12263 assign to the register. Otherwise return NULL_RTX. */
12266 neon_vdup_constant (rtx vals
)
12268 machine_mode mode
= GET_MODE (vals
);
12269 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12272 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12275 if (!const_vec_duplicate_p (vals
, &x
))
12276 /* The elements are not all the same. We could handle repeating
12277 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12278 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12282 /* We can load this constant by using VDUP and a constant in a
12283 single ARM register. This will be cheaper than a vector
12286 x
= copy_to_mode_reg (inner_mode
, x
);
12287 return gen_vec_duplicate (mode
, x
);
12290 /* Generate code to load VALS, which is a PARALLEL containing only
12291 constants (for vec_init) or CONST_VECTOR, efficiently into a
12292 register. Returns an RTX to copy into the register, or NULL_RTX
12293 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12296 neon_make_constant (rtx vals
)
12298 machine_mode mode
= GET_MODE (vals
);
12300 rtx const_vec
= NULL_RTX
;
12301 int n_elts
= GET_MODE_NUNITS (mode
);
12305 if (GET_CODE (vals
) == CONST_VECTOR
)
12307 else if (GET_CODE (vals
) == PARALLEL
)
12309 /* A CONST_VECTOR must contain only CONST_INTs and
12310 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12311 Only store valid constants in a CONST_VECTOR. */
12312 for (i
= 0; i
< n_elts
; ++i
)
12314 rtx x
= XVECEXP (vals
, 0, i
);
12315 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12318 if (n_const
== n_elts
)
12319 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12322 gcc_unreachable ();
12324 if (const_vec
!= NULL
12325 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12326 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12328 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12329 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12330 pipeline cycle; creating the constant takes one or two ARM
12331 pipeline cycles. */
12333 else if (const_vec
!= NULL_RTX
)
12334 /* Load from constant pool. On Cortex-A8 this takes two cycles
12335 (for either double or quad vectors). We can not take advantage
12336 of single-cycle VLD1 because we need a PC-relative addressing
12340 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12341 We can not construct an initializer. */
12345 /* Initialize vector TARGET to VALS. */
12348 neon_expand_vector_init (rtx target
, rtx vals
)
12350 machine_mode mode
= GET_MODE (target
);
12351 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12352 int n_elts
= GET_MODE_NUNITS (mode
);
12353 int n_var
= 0, one_var
= -1;
12354 bool all_same
= true;
12358 for (i
= 0; i
< n_elts
; ++i
)
12360 x
= XVECEXP (vals
, 0, i
);
12361 if (!CONSTANT_P (x
))
12362 ++n_var
, one_var
= i
;
12364 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12370 rtx constant
= neon_make_constant (vals
);
12371 if (constant
!= NULL_RTX
)
12373 emit_move_insn (target
, constant
);
12378 /* Splat a single non-constant element if we can. */
12379 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12381 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12382 emit_insn (gen_rtx_SET (target
, gen_vec_duplicate (mode
, x
)));
12386 /* One field is non-constant. Load constant then overwrite varying
12387 field. This is more efficient than using the stack. */
12390 rtx copy
= copy_rtx (vals
);
12391 rtx index
= GEN_INT (one_var
);
12393 /* Load constant part of vector, substitute neighboring value for
12394 varying element. */
12395 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12396 neon_expand_vector_init (target
, copy
);
12398 /* Insert variable. */
12399 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12403 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12406 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12409 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12412 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12415 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12418 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12421 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12424 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12427 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12430 gcc_unreachable ();
12435 /* Construct the vector in memory one field at a time
12436 and load the whole vector. */
12437 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12438 for (i
= 0; i
< n_elts
; i
++)
12439 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12440 i
* GET_MODE_SIZE (inner_mode
)),
12441 XVECEXP (vals
, 0, i
));
12442 emit_move_insn (target
, mem
);
12445 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12446 ERR if it doesn't. EXP indicates the source location, which includes the
12447 inlining history for intrinsics. */
12450 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12451 const_tree exp
, const char *desc
)
12453 HOST_WIDE_INT lane
;
12455 gcc_assert (CONST_INT_P (operand
));
12457 lane
= INTVAL (operand
);
12459 if (lane
< low
|| lane
>= high
)
12462 error ("%K%s %wd out of range %wd - %wd",
12463 exp
, desc
, lane
, low
, high
- 1);
12465 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
12469 /* Bounds-check lanes. */
12472 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12475 bounds_check (operand
, low
, high
, exp
, "lane");
12478 /* Bounds-check constants. */
12481 arm_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12483 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
12487 neon_element_bits (machine_mode mode
)
12489 return GET_MODE_UNIT_BITSIZE (mode
);
12493 /* Predicates for `match_operand' and `match_operator'. */
12495 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12496 WB is true if full writeback address modes are allowed and is false
12497 if limited writeback address modes (POST_INC and PRE_DEC) are
12501 arm_coproc_mem_operand (rtx op
, bool wb
)
12505 /* Reject eliminable registers. */
12506 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12507 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12508 || reg_mentioned_p (arg_pointer_rtx
, op
)
12509 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12510 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12511 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12512 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12515 /* Constants are converted into offsets from labels. */
12519 ind
= XEXP (op
, 0);
12521 if (reload_completed
12522 && (GET_CODE (ind
) == LABEL_REF
12523 || (GET_CODE (ind
) == CONST
12524 && GET_CODE (XEXP (ind
, 0)) == PLUS
12525 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12526 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12529 /* Match: (mem (reg)). */
12531 return arm_address_register_rtx_p (ind
, 0);
12533 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12534 acceptable in any case (subject to verification by
12535 arm_address_register_rtx_p). We need WB to be true to accept
12536 PRE_INC and POST_DEC. */
12537 if (GET_CODE (ind
) == POST_INC
12538 || GET_CODE (ind
) == PRE_DEC
12540 && (GET_CODE (ind
) == PRE_INC
12541 || GET_CODE (ind
) == POST_DEC
)))
12542 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12545 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12546 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12547 && GET_CODE (XEXP (ind
, 1)) == PLUS
12548 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12549 ind
= XEXP (ind
, 1);
12554 if (GET_CODE (ind
) == PLUS
12555 && REG_P (XEXP (ind
, 0))
12556 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12557 && CONST_INT_P (XEXP (ind
, 1))
12558 && INTVAL (XEXP (ind
, 1)) > -1024
12559 && INTVAL (XEXP (ind
, 1)) < 1024
12560 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12566 /* Return TRUE if OP is a memory operand which we can load or store a vector
12567 to/from. TYPE is one of the following values:
12568 0 - Vector load/stor (vldr)
12569 1 - Core registers (ldm)
12570 2 - Element/structure loads (vld1)
12573 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12577 /* Reject eliminable registers. */
12578 if (strict
&& ! (reload_in_progress
|| reload_completed
)
12579 && (reg_mentioned_p (frame_pointer_rtx
, op
)
12580 || reg_mentioned_p (arg_pointer_rtx
, op
)
12581 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12582 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12583 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12584 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12587 /* Constants are converted into offsets from labels. */
12591 ind
= XEXP (op
, 0);
12593 if (reload_completed
12594 && (GET_CODE (ind
) == LABEL_REF
12595 || (GET_CODE (ind
) == CONST
12596 && GET_CODE (XEXP (ind
, 0)) == PLUS
12597 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12598 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12601 /* Match: (mem (reg)). */
12603 return arm_address_register_rtx_p (ind
, 0);
12605 /* Allow post-increment with Neon registers. */
12606 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12607 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12608 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12610 /* Allow post-increment by register for VLDn */
12611 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12612 && GET_CODE (XEXP (ind
, 1)) == PLUS
12613 && REG_P (XEXP (XEXP (ind
, 1), 1)))
12620 && GET_CODE (ind
) == PLUS
12621 && REG_P (XEXP (ind
, 0))
12622 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12623 && CONST_INT_P (XEXP (ind
, 1))
12624 && INTVAL (XEXP (ind
, 1)) > -1024
12625 /* For quad modes, we restrict the constant offset to be slightly less
12626 than what the instruction format permits. We have no such constraint
12627 on double mode offsets. (This must match arm_legitimate_index_p.) */
12628 && (INTVAL (XEXP (ind
, 1))
12629 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12630 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12636 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12639 neon_struct_mem_operand (rtx op
)
12643 /* Reject eliminable registers. */
12644 if (! (reload_in_progress
|| reload_completed
)
12645 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12646 || reg_mentioned_p (arg_pointer_rtx
, op
)
12647 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12648 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12649 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12650 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12653 /* Constants are converted into offsets from labels. */
12657 ind
= XEXP (op
, 0);
12659 if (reload_completed
12660 && (GET_CODE (ind
) == LABEL_REF
12661 || (GET_CODE (ind
) == CONST
12662 && GET_CODE (XEXP (ind
, 0)) == PLUS
12663 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12664 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12667 /* Match: (mem (reg)). */
12669 return arm_address_register_rtx_p (ind
, 0);
12671 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12672 if (GET_CODE (ind
) == POST_INC
12673 || GET_CODE (ind
) == PRE_DEC
)
12674 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12679 /* Return true if X is a register that will be eliminated later on. */
12681 arm_eliminable_register (rtx x
)
12683 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12684 || REGNO (x
) == ARG_POINTER_REGNUM
12685 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12686 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12689 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12690 coprocessor registers. Otherwise return NO_REGS. */
12693 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
12695 if (mode
== HFmode
)
12697 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
12698 return GENERAL_REGS
;
12699 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12701 return GENERAL_REGS
;
12704 /* The neon move patterns handle all legitimate vector and struct
12707 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12708 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12709 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12710 || VALID_NEON_STRUCT_MODE (mode
)))
12713 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12716 return GENERAL_REGS
;
12719 /* Values which must be returned in the most-significant end of the return
12723 arm_return_in_msb (const_tree valtype
)
12725 return (TARGET_AAPCS_BASED
12726 && BYTES_BIG_ENDIAN
12727 && (AGGREGATE_TYPE_P (valtype
)
12728 || TREE_CODE (valtype
) == COMPLEX_TYPE
12729 || FIXED_POINT_TYPE_P (valtype
)));
12732 /* Return TRUE if X references a SYMBOL_REF. */
12734 symbol_mentioned_p (rtx x
)
12739 if (GET_CODE (x
) == SYMBOL_REF
)
12742 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12743 are constant offsets, not symbols. */
12744 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12747 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12749 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12755 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12756 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
12759 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
12766 /* Return TRUE if X references a LABEL_REF. */
12768 label_mentioned_p (rtx x
)
12773 if (GET_CODE (x
) == LABEL_REF
)
12776 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12777 instruction, but they are constant offsets, not symbols. */
12778 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12781 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12782 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12788 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12789 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
12792 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
12800 tls_mentioned_p (rtx x
)
12802 switch (GET_CODE (x
))
12805 return tls_mentioned_p (XEXP (x
, 0));
12808 if (XINT (x
, 1) == UNSPEC_TLS
)
12811 /* Fall through. */
12817 /* Must not copy any rtx that uses a pc-relative address.
12818 Also, disallow copying of load-exclusive instructions that
12819 may appear after splitting of compare-and-swap-style operations
12820 so as to prevent those loops from being transformed away from their
12821 canonical forms (see PR 69904). */
12824 arm_cannot_copy_insn_p (rtx_insn
*insn
)
12826 /* The tls call insn cannot be copied, as it is paired with a data
12828 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
12831 subrtx_iterator::array_type array
;
12832 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
12834 const_rtx x
= *iter
;
12835 if (GET_CODE (x
) == UNSPEC
12836 && (XINT (x
, 1) == UNSPEC_PIC_BASE
12837 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
12841 rtx set
= single_set (insn
);
12844 rtx src
= SET_SRC (set
);
12845 if (GET_CODE (src
) == ZERO_EXTEND
)
12846 src
= XEXP (src
, 0);
12848 /* Catch the load-exclusive and load-acquire operations. */
12849 if (GET_CODE (src
) == UNSPEC_VOLATILE
12850 && (XINT (src
, 1) == VUNSPEC_LL
12851 || XINT (src
, 1) == VUNSPEC_LAX
))
12858 minmax_code (rtx x
)
12860 enum rtx_code code
= GET_CODE (x
);
12873 gcc_unreachable ();
12877 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12880 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
12881 int *mask
, bool *signed_sat
)
12883 /* The high bound must be a power of two minus one. */
12884 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
12888 /* The low bound is either zero (for usat) or one less than the
12889 negation of the high bound (for ssat). */
12890 if (INTVAL (lo_bound
) == 0)
12895 *signed_sat
= false;
12900 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
12905 *signed_sat
= true;
12913 /* Return 1 if memory locations are adjacent. */
12915 adjacent_mem_locations (rtx a
, rtx b
)
12917 /* We don't guarantee to preserve the order of these memory refs. */
12918 if (volatile_refs_p (a
) || volatile_refs_p (b
))
12921 if ((REG_P (XEXP (a
, 0))
12922 || (GET_CODE (XEXP (a
, 0)) == PLUS
12923 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
12924 && (REG_P (XEXP (b
, 0))
12925 || (GET_CODE (XEXP (b
, 0)) == PLUS
12926 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
12928 HOST_WIDE_INT val0
= 0, val1
= 0;
12932 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
12934 reg0
= XEXP (XEXP (a
, 0), 0);
12935 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
12938 reg0
= XEXP (a
, 0);
12940 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
12942 reg1
= XEXP (XEXP (b
, 0), 0);
12943 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
12946 reg1
= XEXP (b
, 0);
12948 /* Don't accept any offset that will require multiple
12949 instructions to handle, since this would cause the
12950 arith_adjacentmem pattern to output an overlong sequence. */
12951 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
12954 /* Don't allow an eliminable register: register elimination can make
12955 the offset too large. */
12956 if (arm_eliminable_register (reg0
))
12959 val_diff
= val1
- val0
;
12963 /* If the target has load delay slots, then there's no benefit
12964 to using an ldm instruction unless the offset is zero and
12965 we are optimizing for size. */
12966 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
12967 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
12968 && (val_diff
== 4 || val_diff
== -4));
12971 return ((REGNO (reg0
) == REGNO (reg1
))
12972 && (val_diff
== 4 || val_diff
== -4));
12978 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12979 for load operations, false for store operations. CONSECUTIVE is true
12980 if the register numbers in the operation must be consecutive in the register
12981 bank. RETURN_PC is true if value is to be loaded in PC.
12982 The pattern we are trying to match for load is:
12983 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12984 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12987 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12990 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12991 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12992 3. If consecutive is TRUE, then for kth register being loaded,
12993 REGNO (R_dk) = REGNO (R_d0) + k.
12994 The pattern for store is similar. */
12996 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
12997 bool consecutive
, bool return_pc
)
12999 HOST_WIDE_INT count
= XVECLEN (op
, 0);
13000 rtx reg
, mem
, addr
;
13002 unsigned first_regno
;
13003 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
13005 bool addr_reg_in_reglist
= false;
13006 bool update
= false;
13011 /* If not in SImode, then registers must be consecutive
13012 (e.g., VLDM instructions for DFmode). */
13013 gcc_assert ((mode
== SImode
) || consecutive
);
13014 /* Setting return_pc for stores is illegal. */
13015 gcc_assert (!return_pc
|| load
);
13017 /* Set up the increments and the regs per val based on the mode. */
13018 reg_increment
= GET_MODE_SIZE (mode
);
13019 regs_per_val
= reg_increment
/ 4;
13020 offset_adj
= return_pc
? 1 : 0;
13023 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
13024 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
13027 /* Check if this is a write-back. */
13028 elt
= XVECEXP (op
, 0, offset_adj
);
13029 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
13035 /* The offset adjustment must be the number of registers being
13036 popped times the size of a single register. */
13037 if (!REG_P (SET_DEST (elt
))
13038 || !REG_P (XEXP (SET_SRC (elt
), 0))
13039 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
13040 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
13041 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
13042 ((count
- 1 - offset_adj
) * reg_increment
))
13046 i
= i
+ offset_adj
;
13047 base
= base
+ offset_adj
;
13048 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13049 success depends on the type: VLDM can do just one reg,
13050 LDM must do at least two. */
13051 if ((count
<= i
) && (mode
== SImode
))
13054 elt
= XVECEXP (op
, 0, i
- 1);
13055 if (GET_CODE (elt
) != SET
)
13060 reg
= SET_DEST (elt
);
13061 mem
= SET_SRC (elt
);
13065 reg
= SET_SRC (elt
);
13066 mem
= SET_DEST (elt
);
13069 if (!REG_P (reg
) || !MEM_P (mem
))
13072 regno
= REGNO (reg
);
13073 first_regno
= regno
;
13074 addr
= XEXP (mem
, 0);
13075 if (GET_CODE (addr
) == PLUS
)
13077 if (!CONST_INT_P (XEXP (addr
, 1)))
13080 offset
= INTVAL (XEXP (addr
, 1));
13081 addr
= XEXP (addr
, 0);
13087 /* Don't allow SP to be loaded unless it is also the base register. It
13088 guarantees that SP is reset correctly when an LDM instruction
13089 is interrupted. Otherwise, we might end up with a corrupt stack. */
13090 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13093 for (; i
< count
; i
++)
13095 elt
= XVECEXP (op
, 0, i
);
13096 if (GET_CODE (elt
) != SET
)
13101 reg
= SET_DEST (elt
);
13102 mem
= SET_SRC (elt
);
13106 reg
= SET_SRC (elt
);
13107 mem
= SET_DEST (elt
);
13111 || GET_MODE (reg
) != mode
13112 || REGNO (reg
) <= regno
13115 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13116 /* Don't allow SP to be loaded unless it is also the base register. It
13117 guarantees that SP is reset correctly when an LDM instruction
13118 is interrupted. Otherwise, we might end up with a corrupt stack. */
13119 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13121 || GET_MODE (mem
) != mode
13122 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13123 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13124 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13125 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13126 offset
+ (i
- base
) * reg_increment
))
13127 && (!REG_P (XEXP (mem
, 0))
13128 || offset
+ (i
- base
) * reg_increment
!= 0)))
13131 regno
= REGNO (reg
);
13132 if (regno
== REGNO (addr
))
13133 addr_reg_in_reglist
= true;
13138 if (update
&& addr_reg_in_reglist
)
13141 /* For Thumb-1, address register is always modified - either by write-back
13142 or by explicit load. If the pattern does not describe an update,
13143 then the address register must be in the list of loaded registers. */
13145 return update
|| addr_reg_in_reglist
;
13151 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13152 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13153 instruction. ADD_OFFSET is nonzero if the base address register needs
13154 to be modified with an add instruction before we can use it. */
13157 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13158 int nops
, HOST_WIDE_INT add_offset
)
13160 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13161 if the offset isn't small enough. The reason 2 ldrs are faster
13162 is because these ARMs are able to do more than one cache access
13163 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13164 whilst the ARM8 has a double bandwidth cache. This means that
13165 these cores can do both an instruction fetch and a data fetch in
13166 a single cycle, so the trick of calculating the address into a
13167 scratch register (one of the result regs) and then doing a load
13168 multiple actually becomes slower (and no smaller in code size).
13169 That is the transformation
13171 ldr rd1, [rbase + offset]
13172 ldr rd2, [rbase + offset + 4]
13176 add rd1, rbase, offset
13177 ldmia rd1, {rd1, rd2}
13179 produces worse code -- '3 cycles + any stalls on rd2' instead of
13180 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13181 access per cycle, the first sequence could never complete in less
13182 than 6 cycles, whereas the ldm sequence would only take 5 and
13183 would make better use of sequential accesses if not hitting the
13186 We cheat here and test 'arm_ld_sched' which we currently know to
13187 only be true for the ARM8, ARM9 and StrongARM. If this ever
13188 changes, then the test below needs to be reworked. */
13189 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13192 /* XScale has load-store double instructions, but they have stricter
13193 alignment requirements than load-store multiple, so we cannot
13196 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13197 the pipeline until completion.
13205 An ldr instruction takes 1-3 cycles, but does not block the
13214 Best case ldr will always win. However, the more ldr instructions
13215 we issue, the less likely we are to be able to schedule them well.
13216 Using ldr instructions also increases code size.
13218 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13219 for counts of 3 or 4 regs. */
13220 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13225 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13226 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13227 an array ORDER which describes the sequence to use when accessing the
13228 offsets that produces an ascending order. In this sequence, each
13229 offset must be larger by exactly 4 than the previous one. ORDER[0]
13230 must have been filled in with the lowest offset by the caller.
13231 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13232 we use to verify that ORDER produces an ascending order of registers.
13233 Return true if it was possible to construct such an order, false if
13237 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13238 int *unsorted_regs
)
13241 for (i
= 1; i
< nops
; i
++)
13245 order
[i
] = order
[i
- 1];
13246 for (j
= 0; j
< nops
; j
++)
13247 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13249 /* We must find exactly one offset that is higher than the
13250 previous one by 4. */
13251 if (order
[i
] != order
[i
- 1])
13255 if (order
[i
] == order
[i
- 1])
13257 /* The register numbers must be ascending. */
13258 if (unsorted_regs
!= NULL
13259 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13265 /* Used to determine in a peephole whether a sequence of load
13266 instructions can be changed into a load-multiple instruction.
13267 NOPS is the number of separate load instructions we are examining. The
13268 first NOPS entries in OPERANDS are the destination registers, the
13269 next NOPS entries are memory operands. If this function is
13270 successful, *BASE is set to the common base register of the memory
13271 accesses; *LOAD_OFFSET is set to the first memory location's offset
13272 from that base register.
13273 REGS is an array filled in with the destination register numbers.
13274 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13275 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13276 the sequence of registers in REGS matches the loads from ascending memory
13277 locations, and the function verifies that the register numbers are
13278 themselves ascending. If CHECK_REGS is false, the register numbers
13279 are stored in the order they are found in the operands. */
13281 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13282 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13284 int unsorted_regs
[MAX_LDM_STM_OPS
];
13285 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13286 int order
[MAX_LDM_STM_OPS
];
13287 rtx base_reg_rtx
= NULL
;
13291 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13292 easily extended if required. */
13293 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13295 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13297 /* Loop over the operands and check that the memory references are
13298 suitable (i.e. immediate offsets from the same base register). At
13299 the same time, extract the target register, and the memory
13301 for (i
= 0; i
< nops
; i
++)
13306 /* Convert a subreg of a mem into the mem itself. */
13307 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13308 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13310 gcc_assert (MEM_P (operands
[nops
+ i
]));
13312 /* Don't reorder volatile memory references; it doesn't seem worth
13313 looking for the case where the order is ok anyway. */
13314 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13317 offset
= const0_rtx
;
13319 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13320 || (GET_CODE (reg
) == SUBREG
13321 && REG_P (reg
= SUBREG_REG (reg
))))
13322 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13323 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13324 || (GET_CODE (reg
) == SUBREG
13325 && REG_P (reg
= SUBREG_REG (reg
))))
13326 && (CONST_INT_P (offset
13327 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13331 base_reg
= REGNO (reg
);
13332 base_reg_rtx
= reg
;
13333 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13336 else if (base_reg
!= (int) REGNO (reg
))
13337 /* Not addressed from the same base register. */
13340 unsorted_regs
[i
] = (REG_P (operands
[i
])
13341 ? REGNO (operands
[i
])
13342 : REGNO (SUBREG_REG (operands
[i
])));
13344 /* If it isn't an integer register, or if it overwrites the
13345 base register but isn't the last insn in the list, then
13346 we can't do this. */
13347 if (unsorted_regs
[i
] < 0
13348 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13349 || unsorted_regs
[i
] > 14
13350 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13353 /* Don't allow SP to be loaded unless it is also the base
13354 register. It guarantees that SP is reset correctly when
13355 an LDM instruction is interrupted. Otherwise, we might
13356 end up with a corrupt stack. */
13357 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13360 unsorted_offsets
[i
] = INTVAL (offset
);
13361 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13365 /* Not a suitable memory address. */
13369 /* All the useful information has now been extracted from the
13370 operands into unsorted_regs and unsorted_offsets; additionally,
13371 order[0] has been set to the lowest offset in the list. Sort
13372 the offsets into order, verifying that they are adjacent, and
13373 check that the register numbers are ascending. */
13374 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13375 check_regs
? unsorted_regs
: NULL
))
13379 memcpy (saved_order
, order
, sizeof order
);
13385 for (i
= 0; i
< nops
; i
++)
13386 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13388 *load_offset
= unsorted_offsets
[order
[0]];
13392 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13395 if (unsorted_offsets
[order
[0]] == 0)
13396 ldm_case
= 1; /* ldmia */
13397 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13398 ldm_case
= 2; /* ldmib */
13399 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13400 ldm_case
= 3; /* ldmda */
13401 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13402 ldm_case
= 4; /* ldmdb */
13403 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13404 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13409 if (!multiple_operation_profitable_p (false, nops
,
13411 ? unsorted_offsets
[order
[0]] : 0))
13417 /* Used to determine in a peephole whether a sequence of store instructions can
13418 be changed into a store-multiple instruction.
13419 NOPS is the number of separate store instructions we are examining.
13420 NOPS_TOTAL is the total number of instructions recognized by the peephole
13422 The first NOPS entries in OPERANDS are the source registers, the next
13423 NOPS entries are memory operands. If this function is successful, *BASE is
13424 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13425 to the first memory location's offset from that base register. REGS is an
13426 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13427 likewise filled with the corresponding rtx's.
13428 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13429 numbers to an ascending order of stores.
13430 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13431 from ascending memory locations, and the function verifies that the register
13432 numbers are themselves ascending. If CHECK_REGS is false, the register
13433 numbers are stored in the order they are found in the operands. */
13435 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13436 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13437 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13439 int unsorted_regs
[MAX_LDM_STM_OPS
];
13440 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13441 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13442 int order
[MAX_LDM_STM_OPS
];
13444 rtx base_reg_rtx
= NULL
;
13447 /* Write back of base register is currently only supported for Thumb 1. */
13448 int base_writeback
= TARGET_THUMB1
;
13450 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13451 easily extended if required. */
13452 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13454 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13456 /* Loop over the operands and check that the memory references are
13457 suitable (i.e. immediate offsets from the same base register). At
13458 the same time, extract the target register, and the memory
13460 for (i
= 0; i
< nops
; i
++)
13465 /* Convert a subreg of a mem into the mem itself. */
13466 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13467 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13469 gcc_assert (MEM_P (operands
[nops
+ i
]));
13471 /* Don't reorder volatile memory references; it doesn't seem worth
13472 looking for the case where the order is ok anyway. */
13473 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13476 offset
= const0_rtx
;
13478 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13479 || (GET_CODE (reg
) == SUBREG
13480 && REG_P (reg
= SUBREG_REG (reg
))))
13481 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13482 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13483 || (GET_CODE (reg
) == SUBREG
13484 && REG_P (reg
= SUBREG_REG (reg
))))
13485 && (CONST_INT_P (offset
13486 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13488 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13489 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13490 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13494 base_reg
= REGNO (reg
);
13495 base_reg_rtx
= reg
;
13496 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13499 else if (base_reg
!= (int) REGNO (reg
))
13500 /* Not addressed from the same base register. */
13503 /* If it isn't an integer register, then we can't do this. */
13504 if (unsorted_regs
[i
] < 0
13505 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13506 /* The effects are unpredictable if the base register is
13507 both updated and stored. */
13508 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13509 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13510 || unsorted_regs
[i
] > 14)
13513 unsorted_offsets
[i
] = INTVAL (offset
);
13514 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13518 /* Not a suitable memory address. */
13522 /* All the useful information has now been extracted from the
13523 operands into unsorted_regs and unsorted_offsets; additionally,
13524 order[0] has been set to the lowest offset in the list. Sort
13525 the offsets into order, verifying that they are adjacent, and
13526 check that the register numbers are ascending. */
13527 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13528 check_regs
? unsorted_regs
: NULL
))
13532 memcpy (saved_order
, order
, sizeof order
);
13538 for (i
= 0; i
< nops
; i
++)
13540 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13542 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13545 *load_offset
= unsorted_offsets
[order
[0]];
13549 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13552 if (unsorted_offsets
[order
[0]] == 0)
13553 stm_case
= 1; /* stmia */
13554 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13555 stm_case
= 2; /* stmib */
13556 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13557 stm_case
= 3; /* stmda */
13558 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13559 stm_case
= 4; /* stmdb */
13563 if (!multiple_operation_profitable_p (false, nops
, 0))
13569 /* Routines for use in generating RTL. */
13571 /* Generate a load-multiple instruction. COUNT is the number of loads in
13572 the instruction; REGS and MEMS are arrays containing the operands.
13573 BASEREG is the base register to be used in addressing the memory operands.
13574 WBACK_OFFSET is nonzero if the instruction should update the base
13578 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13579 HOST_WIDE_INT wback_offset
)
13584 if (!multiple_operation_profitable_p (false, count
, 0))
13590 for (i
= 0; i
< count
; i
++)
13591 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13593 if (wback_offset
!= 0)
13594 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13596 seq
= get_insns ();
13602 result
= gen_rtx_PARALLEL (VOIDmode
,
13603 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13604 if (wback_offset
!= 0)
13606 XVECEXP (result
, 0, 0)
13607 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13612 for (j
= 0; i
< count
; i
++, j
++)
13613 XVECEXP (result
, 0, i
)
13614 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13619 /* Generate a store-multiple instruction. COUNT is the number of stores in
13620 the instruction; REGS and MEMS are arrays containing the operands.
13621 BASEREG is the base register to be used in addressing the memory operands.
13622 WBACK_OFFSET is nonzero if the instruction should update the base
13626 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13627 HOST_WIDE_INT wback_offset
)
13632 if (GET_CODE (basereg
) == PLUS
)
13633 basereg
= XEXP (basereg
, 0);
13635 if (!multiple_operation_profitable_p (false, count
, 0))
13641 for (i
= 0; i
< count
; i
++)
13642 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13644 if (wback_offset
!= 0)
13645 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13647 seq
= get_insns ();
13653 result
= gen_rtx_PARALLEL (VOIDmode
,
13654 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13655 if (wback_offset
!= 0)
13657 XVECEXP (result
, 0, 0)
13658 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13663 for (j
= 0; i
< count
; i
++, j
++)
13664 XVECEXP (result
, 0, i
)
13665 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13670 /* Generate either a load-multiple or a store-multiple instruction. This
13671 function can be used in situations where we can start with a single MEM
13672 rtx and adjust its address upwards.
13673 COUNT is the number of operations in the instruction, not counting a
13674 possible update of the base register. REGS is an array containing the
13676 BASEREG is the base register to be used in addressing the memory operands,
13677 which are constructed from BASEMEM.
13678 WRITE_BACK specifies whether the generated instruction should include an
13679 update of the base register.
13680 OFFSETP is used to pass an offset to and from this function; this offset
13681 is not used when constructing the address (instead BASEMEM should have an
13682 appropriate offset in its address), it is used only for setting
13683 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13686 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13687 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13689 rtx mems
[MAX_LDM_STM_OPS
];
13690 HOST_WIDE_INT offset
= *offsetp
;
13693 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13695 if (GET_CODE (basereg
) == PLUS
)
13696 basereg
= XEXP (basereg
, 0);
13698 for (i
= 0; i
< count
; i
++)
13700 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13701 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13709 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13710 write_back
? 4 * count
: 0);
13712 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13713 write_back
? 4 * count
: 0);
13717 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13718 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13720 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13725 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13726 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13728 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13732 /* Called from a peephole2 expander to turn a sequence of loads into an
13733 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13734 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13735 is true if we can reorder the registers because they are used commutatively
13737 Returns true iff we could generate a new instruction. */
13740 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13742 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13743 rtx mems
[MAX_LDM_STM_OPS
];
13744 int i
, j
, base_reg
;
13746 HOST_WIDE_INT offset
;
13747 int write_back
= FALSE
;
13751 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13752 &base_reg
, &offset
, !sort_regs
);
13758 for (i
= 0; i
< nops
- 1; i
++)
13759 for (j
= i
+ 1; j
< nops
; j
++)
13760 if (regs
[i
] > regs
[j
])
13766 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13770 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
13771 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13777 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
13778 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
13780 if (!TARGET_THUMB1
)
13781 base_reg_rtx
= newbase
;
13784 for (i
= 0; i
< nops
; i
++)
13786 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13787 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13790 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13791 write_back
? offset
+ i
* 4 : 0));
13795 /* Called from a peephole2 expander to turn a sequence of stores into an
13796 STM instruction. OPERANDS are the operands found by the peephole matcher;
13797 NOPS indicates how many separate stores we are trying to combine.
13798 Returns true iff we could generate a new instruction. */
13801 gen_stm_seq (rtx
*operands
, int nops
)
13804 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13805 rtx mems
[MAX_LDM_STM_OPS
];
13808 HOST_WIDE_INT offset
;
13809 int write_back
= FALSE
;
13812 bool base_reg_dies
;
13814 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
13815 mem_order
, &base_reg
, &offset
, true);
13820 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13822 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
13825 gcc_assert (base_reg_dies
);
13831 gcc_assert (base_reg_dies
);
13832 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13836 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13838 for (i
= 0; i
< nops
; i
++)
13840 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13841 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13844 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13845 write_back
? offset
+ i
* 4 : 0));
13849 /* Called from a peephole2 expander to turn a sequence of stores that are
13850 preceded by constant loads into an STM instruction. OPERANDS are the
13851 operands found by the peephole matcher; NOPS indicates how many
13852 separate stores we are trying to combine; there are 2 * NOPS
13853 instructions in the peephole.
13854 Returns true iff we could generate a new instruction. */
13857 gen_const_stm_seq (rtx
*operands
, int nops
)
13859 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
13860 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13861 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
13862 rtx mems
[MAX_LDM_STM_OPS
];
13865 HOST_WIDE_INT offset
;
13866 int write_back
= FALSE
;
13869 bool base_reg_dies
;
13871 HARD_REG_SET allocated
;
13873 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
13874 mem_order
, &base_reg
, &offset
, false);
13879 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
13881 /* If the same register is used more than once, try to find a free
13883 CLEAR_HARD_REG_SET (allocated
);
13884 for (i
= 0; i
< nops
; i
++)
13886 for (j
= i
+ 1; j
< nops
; j
++)
13887 if (regs
[i
] == regs
[j
])
13889 rtx t
= peep2_find_free_register (0, nops
* 2,
13890 TARGET_THUMB1
? "l" : "r",
13891 SImode
, &allocated
);
13895 regs
[i
] = REGNO (t
);
13899 /* Compute an ordering that maps the register numbers to an ascending
13902 for (i
= 0; i
< nops
; i
++)
13903 if (regs
[i
] < regs
[reg_order
[0]])
13906 for (i
= 1; i
< nops
; i
++)
13908 int this_order
= reg_order
[i
- 1];
13909 for (j
= 0; j
< nops
; j
++)
13910 if (regs
[j
] > regs
[reg_order
[i
- 1]]
13911 && (this_order
== reg_order
[i
- 1]
13912 || regs
[j
] < regs
[this_order
]))
13914 reg_order
[i
] = this_order
;
13917 /* Ensure that registers that must be live after the instruction end
13918 up with the correct value. */
13919 for (i
= 0; i
< nops
; i
++)
13921 int this_order
= reg_order
[i
];
13922 if ((this_order
!= mem_order
[i
]
13923 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
13924 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
13928 /* Load the constants. */
13929 for (i
= 0; i
< nops
; i
++)
13931 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
13932 sorted_regs
[i
] = regs
[reg_order
[i
]];
13933 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
13936 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13938 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
13941 gcc_assert (base_reg_dies
);
13947 gcc_assert (base_reg_dies
);
13948 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13952 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13954 for (i
= 0; i
< nops
; i
++)
13956 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13957 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13960 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
13961 write_back
? offset
+ i
* 4 : 0));
13965 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13966 unaligned copies on processors which support unaligned semantics for those
13967 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13968 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13969 An interleave factor of 1 (the minimum) will perform no interleaving.
13970 Load/store multiple are used for aligned addresses where possible. */
13973 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
13974 HOST_WIDE_INT length
,
13975 unsigned int interleave_factor
)
13977 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
13978 int *regnos
= XALLOCAVEC (int, interleave_factor
);
13979 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
13980 HOST_WIDE_INT i
, j
;
13981 HOST_WIDE_INT remaining
= length
, words
;
13982 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
13984 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
13985 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
13986 HOST_WIDE_INT srcoffset
, dstoffset
;
13987 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
13990 gcc_assert (interleave_factor
>= 1 && interleave_factor
<= 4);
13992 /* Use hard registers if we have aligned source or destination so we can use
13993 load/store multiple with contiguous registers. */
13994 if (dst_aligned
|| src_aligned
)
13995 for (i
= 0; i
< interleave_factor
; i
++)
13996 regs
[i
] = gen_rtx_REG (SImode
, i
);
13998 for (i
= 0; i
< interleave_factor
; i
++)
13999 regs
[i
] = gen_reg_rtx (SImode
);
14001 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
14002 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
14004 srcoffset
= dstoffset
= 0;
14006 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14007 For copying the last bytes we want to subtract this offset again. */
14008 src_autoinc
= dst_autoinc
= 0;
14010 for (i
= 0; i
< interleave_factor
; i
++)
14013 /* Copy BLOCK_SIZE_BYTES chunks. */
14015 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
14018 if (src_aligned
&& interleave_factor
> 1)
14020 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
14021 TRUE
, srcbase
, &srcoffset
));
14022 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14026 for (j
= 0; j
< interleave_factor
; j
++)
14028 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
14030 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14031 srcoffset
+ j
* UNITS_PER_WORD
);
14032 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14034 srcoffset
+= block_size_bytes
;
14038 if (dst_aligned
&& interleave_factor
> 1)
14040 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
14041 TRUE
, dstbase
, &dstoffset
));
14042 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14046 for (j
= 0; j
< interleave_factor
; j
++)
14048 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
14050 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14051 dstoffset
+ j
* UNITS_PER_WORD
);
14052 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14054 dstoffset
+= block_size_bytes
;
14057 remaining
-= block_size_bytes
;
14060 /* Copy any whole words left (note these aren't interleaved with any
14061 subsequent halfword/byte load/stores in the interests of simplicity). */
14063 words
= remaining
/ UNITS_PER_WORD
;
14065 gcc_assert (words
< interleave_factor
);
14067 if (src_aligned
&& words
> 1)
14069 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
14071 src_autoinc
+= UNITS_PER_WORD
* words
;
14075 for (j
= 0; j
< words
; j
++)
14077 addr
= plus_constant (Pmode
, src
,
14078 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
14079 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14080 srcoffset
+ j
* UNITS_PER_WORD
);
14082 emit_move_insn (regs
[j
], mem
);
14084 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14086 srcoffset
+= words
* UNITS_PER_WORD
;
14089 if (dst_aligned
&& words
> 1)
14091 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14093 dst_autoinc
+= words
* UNITS_PER_WORD
;
14097 for (j
= 0; j
< words
; j
++)
14099 addr
= plus_constant (Pmode
, dst
,
14100 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14101 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14102 dstoffset
+ j
* UNITS_PER_WORD
);
14104 emit_move_insn (mem
, regs
[j
]);
14106 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14108 dstoffset
+= words
* UNITS_PER_WORD
;
14111 remaining
-= words
* UNITS_PER_WORD
;
14113 gcc_assert (remaining
< 4);
14115 /* Copy a halfword if necessary. */
14117 if (remaining
>= 2)
14119 halfword_tmp
= gen_reg_rtx (SImode
);
14121 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14122 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14123 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14125 /* Either write out immediately, or delay until we've loaded the last
14126 byte, depending on interleave factor. */
14127 if (interleave_factor
== 1)
14129 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14130 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14131 emit_insn (gen_unaligned_storehi (mem
,
14132 gen_lowpart (HImode
, halfword_tmp
)));
14133 halfword_tmp
= NULL
;
14141 gcc_assert (remaining
< 2);
14143 /* Copy last byte. */
14145 if ((remaining
& 1) != 0)
14147 byte_tmp
= gen_reg_rtx (SImode
);
14149 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14150 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14151 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14153 if (interleave_factor
== 1)
14155 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14156 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14157 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14166 /* Store last halfword if we haven't done so already. */
14170 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14171 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14172 emit_insn (gen_unaligned_storehi (mem
,
14173 gen_lowpart (HImode
, halfword_tmp
)));
14177 /* Likewise for last byte. */
14181 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14182 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14183 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14187 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14190 /* From mips_adjust_block_mem:
14192 Helper function for doing a loop-based block operation on memory
14193 reference MEM. Each iteration of the loop will operate on LENGTH
14196 Create a new base register for use within the loop and point it to
14197 the start of MEM. Create a new memory reference that uses this
14198 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14201 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14204 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14206 /* Although the new mem does not refer to a known location,
14207 it does keep up to LENGTH bytes of alignment. */
14208 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14209 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14212 /* From mips_block_move_loop:
14214 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14215 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14216 the memory regions do not overlap. */
14219 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14220 unsigned int interleave_factor
,
14221 HOST_WIDE_INT bytes_per_iter
)
14223 rtx src_reg
, dest_reg
, final_src
, test
;
14224 HOST_WIDE_INT leftover
;
14226 leftover
= length
% bytes_per_iter
;
14227 length
-= leftover
;
14229 /* Create registers and memory references for use within the loop. */
14230 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14231 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14233 /* Calculate the value that SRC_REG should have after the last iteration of
14235 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14236 0, 0, OPTAB_WIDEN
);
14238 /* Emit the start of the loop. */
14239 rtx_code_label
*label
= gen_label_rtx ();
14240 emit_label (label
);
14242 /* Emit the loop body. */
14243 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14244 interleave_factor
);
14246 /* Move on to the next block. */
14247 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14248 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14250 /* Emit the loop condition. */
14251 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14252 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14254 /* Mop up any left-over bytes. */
14256 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14259 /* Emit a block move when either the source or destination is unaligned (not
14260 aligned to a four-byte boundary). This may need further tuning depending on
14261 core type, optimize_size setting, etc. */
14264 arm_movmemqi_unaligned (rtx
*operands
)
14266 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14270 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14271 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14272 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14273 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14274 or dst_aligned though: allow more interleaving in those cases since the
14275 resulting code can be smaller. */
14276 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14277 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14280 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14281 interleave_factor
, bytes_per_iter
);
14283 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14284 interleave_factor
);
14288 /* Note that the loop created by arm_block_move_unaligned_loop may be
14289 subject to loop unrolling, which makes tuning this condition a little
14292 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14294 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14301 arm_gen_movmemqi (rtx
*operands
)
14303 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14304 HOST_WIDE_INT srcoffset
, dstoffset
;
14305 rtx src
, dst
, srcbase
, dstbase
;
14306 rtx part_bytes_reg
= NULL
;
14309 if (!CONST_INT_P (operands
[2])
14310 || !CONST_INT_P (operands
[3])
14311 || INTVAL (operands
[2]) > 64)
14314 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14315 return arm_movmemqi_unaligned (operands
);
14317 if (INTVAL (operands
[3]) & 3)
14320 dstbase
= operands
[0];
14321 srcbase
= operands
[1];
14323 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14324 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14326 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14327 out_words_to_go
= INTVAL (operands
[2]) / 4;
14328 last_bytes
= INTVAL (operands
[2]) & 3;
14329 dstoffset
= srcoffset
= 0;
14331 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14332 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14334 while (in_words_to_go
>= 2)
14336 if (in_words_to_go
> 4)
14337 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14338 TRUE
, srcbase
, &srcoffset
));
14340 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14341 src
, FALSE
, srcbase
,
14344 if (out_words_to_go
)
14346 if (out_words_to_go
> 4)
14347 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14348 TRUE
, dstbase
, &dstoffset
));
14349 else if (out_words_to_go
!= 1)
14350 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14351 out_words_to_go
, dst
,
14354 dstbase
, &dstoffset
));
14357 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14358 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14359 if (last_bytes
!= 0)
14361 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14367 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14368 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14371 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14372 if (out_words_to_go
)
14376 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14377 sreg
= copy_to_reg (mem
);
14379 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14380 emit_move_insn (mem
, sreg
);
14383 gcc_assert (!in_words_to_go
); /* Sanity check */
14386 if (in_words_to_go
)
14388 gcc_assert (in_words_to_go
> 0);
14390 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14391 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14394 gcc_assert (!last_bytes
|| part_bytes_reg
);
14396 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14398 rtx tmp
= gen_reg_rtx (SImode
);
14400 /* The bytes we want are in the top end of the word. */
14401 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14402 GEN_INT (8 * (4 - last_bytes
))));
14403 part_bytes_reg
= tmp
;
14407 mem
= adjust_automodify_address (dstbase
, QImode
,
14408 plus_constant (Pmode
, dst
,
14410 dstoffset
+ last_bytes
- 1);
14411 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14415 tmp
= gen_reg_rtx (SImode
);
14416 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14417 part_bytes_reg
= tmp
;
14424 if (last_bytes
> 1)
14426 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14427 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14431 rtx tmp
= gen_reg_rtx (SImode
);
14432 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14433 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14434 part_bytes_reg
= tmp
;
14441 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14442 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14449 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14452 next_consecutive_mem (rtx mem
)
14454 machine_mode mode
= GET_MODE (mem
);
14455 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14456 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14458 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14461 /* Copy using LDRD/STRD instructions whenever possible.
14462 Returns true upon success. */
14464 gen_movmem_ldrd_strd (rtx
*operands
)
14466 unsigned HOST_WIDE_INT len
;
14467 HOST_WIDE_INT align
;
14468 rtx src
, dst
, base
;
14470 bool src_aligned
, dst_aligned
;
14471 bool src_volatile
, dst_volatile
;
14473 gcc_assert (CONST_INT_P (operands
[2]));
14474 gcc_assert (CONST_INT_P (operands
[3]));
14476 len
= UINTVAL (operands
[2]);
14480 /* Maximum alignment we can assume for both src and dst buffers. */
14481 align
= INTVAL (operands
[3]);
14483 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14486 /* Place src and dst addresses in registers
14487 and update the corresponding mem rtx. */
14489 dst_volatile
= MEM_VOLATILE_P (dst
);
14490 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14491 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14492 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14495 src_volatile
= MEM_VOLATILE_P (src
);
14496 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14497 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14498 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14500 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14503 if (src_volatile
|| dst_volatile
)
14506 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14507 if (!(dst_aligned
|| src_aligned
))
14508 return arm_gen_movmemqi (operands
);
14510 /* If the either src or dst is unaligned we'll be accessing it as pairs
14511 of unaligned SImode accesses. Otherwise we can generate DImode
14512 ldrd/strd instructions. */
14513 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
14514 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
14519 reg0
= gen_reg_rtx (DImode
);
14520 rtx low_reg
= NULL_RTX
;
14521 rtx hi_reg
= NULL_RTX
;
14523 if (!src_aligned
|| !dst_aligned
)
14525 low_reg
= gen_lowpart (SImode
, reg0
);
14526 hi_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
14529 emit_move_insn (reg0
, src
);
14532 emit_insn (gen_unaligned_loadsi (low_reg
, src
));
14533 src
= next_consecutive_mem (src
);
14534 emit_insn (gen_unaligned_loadsi (hi_reg
, src
));
14538 emit_move_insn (dst
, reg0
);
14541 emit_insn (gen_unaligned_storesi (dst
, low_reg
));
14542 dst
= next_consecutive_mem (dst
);
14543 emit_insn (gen_unaligned_storesi (dst
, hi_reg
));
14546 src
= next_consecutive_mem (src
);
14547 dst
= next_consecutive_mem (dst
);
14550 gcc_assert (len
< 8);
14553 /* More than a word but less than a double-word to copy. Copy a word. */
14554 reg0
= gen_reg_rtx (SImode
);
14555 src
= adjust_address (src
, SImode
, 0);
14556 dst
= adjust_address (dst
, SImode
, 0);
14558 emit_move_insn (reg0
, src
);
14560 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14563 emit_move_insn (dst
, reg0
);
14565 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14567 src
= next_consecutive_mem (src
);
14568 dst
= next_consecutive_mem (dst
);
14575 /* Copy the remaining bytes. */
14578 dst
= adjust_address (dst
, HImode
, 0);
14579 src
= adjust_address (src
, HImode
, 0);
14580 reg0
= gen_reg_rtx (SImode
);
14582 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14584 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14587 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14589 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14591 src
= next_consecutive_mem (src
);
14592 dst
= next_consecutive_mem (dst
);
14597 dst
= adjust_address (dst
, QImode
, 0);
14598 src
= adjust_address (src
, QImode
, 0);
14599 reg0
= gen_reg_rtx (QImode
);
14600 emit_move_insn (reg0
, src
);
14601 emit_move_insn (dst
, reg0
);
14605 /* Select a dominance comparison mode if possible for a test of the general
14606 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14607 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14608 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14609 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14610 In all cases OP will be either EQ or NE, but we don't need to know which
14611 here. If we are unable to support a dominance comparison we return
14612 CC mode. This will then fail to match for the RTL expressions that
14613 generate this call. */
14615 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14617 enum rtx_code cond1
, cond2
;
14620 /* Currently we will probably get the wrong result if the individual
14621 comparisons are not simple. This also ensures that it is safe to
14622 reverse a comparison if necessary. */
14623 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14625 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14629 /* The if_then_else variant of this tests the second condition if the
14630 first passes, but is true if the first fails. Reverse the first
14631 condition to get a true "inclusive-or" expression. */
14632 if (cond_or
== DOM_CC_NX_OR_Y
)
14633 cond1
= reverse_condition (cond1
);
14635 /* If the comparisons are not equal, and one doesn't dominate the other,
14636 then we can't do this. */
14638 && !comparison_dominates_p (cond1
, cond2
)
14639 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14643 std::swap (cond1
, cond2
);
14648 if (cond_or
== DOM_CC_X_AND_Y
)
14653 case EQ
: return CC_DEQmode
;
14654 case LE
: return CC_DLEmode
;
14655 case LEU
: return CC_DLEUmode
;
14656 case GE
: return CC_DGEmode
;
14657 case GEU
: return CC_DGEUmode
;
14658 default: gcc_unreachable ();
14662 if (cond_or
== DOM_CC_X_AND_Y
)
14674 gcc_unreachable ();
14678 if (cond_or
== DOM_CC_X_AND_Y
)
14690 gcc_unreachable ();
14694 if (cond_or
== DOM_CC_X_AND_Y
)
14695 return CC_DLTUmode
;
14700 return CC_DLTUmode
;
14702 return CC_DLEUmode
;
14706 gcc_unreachable ();
14710 if (cond_or
== DOM_CC_X_AND_Y
)
14711 return CC_DGTUmode
;
14716 return CC_DGTUmode
;
14718 return CC_DGEUmode
;
14722 gcc_unreachable ();
14725 /* The remaining cases only occur when both comparisons are the
14728 gcc_assert (cond1
== cond2
);
14732 gcc_assert (cond1
== cond2
);
14736 gcc_assert (cond1
== cond2
);
14740 gcc_assert (cond1
== cond2
);
14741 return CC_DLEUmode
;
14744 gcc_assert (cond1
== cond2
);
14745 return CC_DGEUmode
;
14748 gcc_unreachable ();
14753 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14755 /* All floating point compares return CCFP if it is an equality
14756 comparison, and CCFPE otherwise. */
14757 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14780 gcc_unreachable ();
14784 /* A compare with a shifted operand. Because of canonicalization, the
14785 comparison will have to be swapped when we emit the assembler. */
14786 if (GET_MODE (y
) == SImode
14787 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14788 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14789 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14790 || GET_CODE (x
) == ROTATERT
))
14793 /* This operation is performed swapped, but since we only rely on the Z
14794 flag we don't need an additional mode. */
14795 if (GET_MODE (y
) == SImode
14796 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14797 && GET_CODE (x
) == NEG
14798 && (op
== EQ
|| op
== NE
))
14801 /* This is a special case that is used by combine to allow a
14802 comparison of a shifted byte load to be split into a zero-extend
14803 followed by a comparison of the shifted integer (only valid for
14804 equalities and unsigned inequalities). */
14805 if (GET_MODE (x
) == SImode
14806 && GET_CODE (x
) == ASHIFT
14807 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
14808 && GET_CODE (XEXP (x
, 0)) == SUBREG
14809 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
14810 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
14811 && (op
== EQ
|| op
== NE
14812 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
14813 && CONST_INT_P (y
))
14816 /* A construct for a conditional compare, if the false arm contains
14817 0, then both conditions must be true, otherwise either condition
14818 must be true. Not all conditions are possible, so CCmode is
14819 returned if it can't be done. */
14820 if (GET_CODE (x
) == IF_THEN_ELSE
14821 && (XEXP (x
, 2) == const0_rtx
14822 || XEXP (x
, 2) == const1_rtx
)
14823 && COMPARISON_P (XEXP (x
, 0))
14824 && COMPARISON_P (XEXP (x
, 1)))
14825 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14826 INTVAL (XEXP (x
, 2)));
14828 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14829 if (GET_CODE (x
) == AND
14830 && (op
== EQ
|| op
== NE
)
14831 && COMPARISON_P (XEXP (x
, 0))
14832 && COMPARISON_P (XEXP (x
, 1)))
14833 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14836 if (GET_CODE (x
) == IOR
14837 && (op
== EQ
|| op
== NE
)
14838 && COMPARISON_P (XEXP (x
, 0))
14839 && COMPARISON_P (XEXP (x
, 1)))
14840 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14843 /* An operation (on Thumb) where we want to test for a single bit.
14844 This is done by shifting that bit up into the top bit of a
14845 scratch register; we can then branch on the sign bit. */
14847 && GET_MODE (x
) == SImode
14848 && (op
== EQ
|| op
== NE
)
14849 && GET_CODE (x
) == ZERO_EXTRACT
14850 && XEXP (x
, 1) == const1_rtx
)
14853 /* An operation that sets the condition codes as a side-effect, the
14854 V flag is not set correctly, so we can only use comparisons where
14855 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14857 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14858 if (GET_MODE (x
) == SImode
14860 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
14861 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
14862 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
14863 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
14864 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
14865 || GET_CODE (x
) == LSHIFTRT
14866 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14867 || GET_CODE (x
) == ROTATERT
14868 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
14869 return CC_NOOVmode
;
14871 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
14874 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
14875 && GET_CODE (x
) == PLUS
14876 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
14879 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
14885 /* A DImode comparison against zero can be implemented by
14886 or'ing the two halves together. */
14887 if (y
== const0_rtx
)
14890 /* We can do an equality test in three Thumb instructions. */
14900 /* DImode unsigned comparisons can be implemented by cmp +
14901 cmpeq without a scratch register. Not worth doing in
14912 /* DImode signed and unsigned comparisons can be implemented
14913 by cmp + sbcs with a scratch register, but that does not
14914 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14915 gcc_assert (op
!= EQ
&& op
!= NE
);
14919 gcc_unreachable ();
14923 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
14924 return GET_MODE (x
);
14929 /* X and Y are two things to compare using CODE. Emit the compare insn and
14930 return the rtx for register 0 in the proper mode. FP means this is a
14931 floating point compare: I don't think that it is needed on the arm. */
14933 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
14937 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
14939 /* We might have X as a constant, Y as a register because of the predicates
14940 used for cmpdi. If so, force X to a register here. */
14941 if (dimode_comparison
&& !REG_P (x
))
14942 x
= force_reg (DImode
, x
);
14944 mode
= SELECT_CC_MODE (code
, x
, y
);
14945 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
14947 if (dimode_comparison
14948 && mode
!= CC_CZmode
)
14952 /* To compare two non-zero values for equality, XOR them and
14953 then compare against zero. Not used for ARM mode; there
14954 CC_CZmode is cheaper. */
14955 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
14957 gcc_assert (!reload_completed
);
14958 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
14962 /* A scratch register is required. */
14963 if (reload_completed
)
14964 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
14966 scratch
= gen_rtx_SCRATCH (SImode
);
14968 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
14969 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14970 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
14973 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14978 /* Generate a sequence of insns that will generate the correct return
14979 address mask depending on the physical architecture that the program
14982 arm_gen_return_addr_mask (void)
14984 rtx reg
= gen_reg_rtx (Pmode
);
14986 emit_insn (gen_return_addr_mask (reg
));
14991 arm_reload_in_hi (rtx
*operands
)
14993 rtx ref
= operands
[1];
14995 HOST_WIDE_INT offset
= 0;
14997 if (GET_CODE (ref
) == SUBREG
)
14999 offset
= SUBREG_BYTE (ref
);
15000 ref
= SUBREG_REG (ref
);
15005 /* We have a pseudo which has been spilt onto the stack; there
15006 are two cases here: the first where there is a simple
15007 stack-slot replacement and a second where the stack-slot is
15008 out of range, or is used as a subreg. */
15009 if (reg_equiv_mem (REGNO (ref
)))
15011 ref
= reg_equiv_mem (REGNO (ref
));
15012 base
= find_replacement (&XEXP (ref
, 0));
15015 /* The slot is out of range, or was dressed up in a SUBREG. */
15016 base
= reg_equiv_address (REGNO (ref
));
15018 /* PR 62554: If there is no equivalent memory location then just move
15019 the value as an SImode register move. This happens when the target
15020 architecture variant does not have an HImode register move. */
15023 gcc_assert (REG_P (operands
[0]));
15024 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15025 gen_rtx_SUBREG (SImode
, ref
, 0)));
15030 base
= find_replacement (&XEXP (ref
, 0));
15032 /* Handle the case where the address is too complex to be offset by 1. */
15033 if (GET_CODE (base
) == MINUS
15034 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15036 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15038 emit_set_insn (base_plus
, base
);
15041 else if (GET_CODE (base
) == PLUS
)
15043 /* The addend must be CONST_INT, or we would have dealt with it above. */
15044 HOST_WIDE_INT hi
, lo
;
15046 offset
+= INTVAL (XEXP (base
, 1));
15047 base
= XEXP (base
, 0);
15049 /* Rework the address into a legal sequence of insns. */
15050 /* Valid range for lo is -4095 -> 4095 */
15053 : -((-offset
) & 0xfff));
15055 /* Corner case, if lo is the max offset then we would be out of range
15056 once we have added the additional 1 below, so bump the msb into the
15057 pre-loading insn(s). */
15061 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15062 ^ (HOST_WIDE_INT
) 0x80000000)
15063 - (HOST_WIDE_INT
) 0x80000000);
15065 gcc_assert (hi
+ lo
== offset
);
15069 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15071 /* Get the base address; addsi3 knows how to handle constants
15072 that require more than one insn. */
15073 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15079 /* Operands[2] may overlap operands[0] (though it won't overlap
15080 operands[1]), that's why we asked for a DImode reg -- so we can
15081 use the bit that does not overlap. */
15082 if (REGNO (operands
[2]) == REGNO (operands
[0]))
15083 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15085 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15087 emit_insn (gen_zero_extendqisi2 (scratch
,
15088 gen_rtx_MEM (QImode
,
15089 plus_constant (Pmode
, base
,
15091 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15092 gen_rtx_MEM (QImode
,
15093 plus_constant (Pmode
, base
,
15095 if (!BYTES_BIG_ENDIAN
)
15096 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15097 gen_rtx_IOR (SImode
,
15100 gen_rtx_SUBREG (SImode
, operands
[0], 0),
15104 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15105 gen_rtx_IOR (SImode
,
15106 gen_rtx_ASHIFT (SImode
, scratch
,
15108 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
15111 /* Handle storing a half-word to memory during reload by synthesizing as two
15112 byte stores. Take care not to clobber the input values until after we
15113 have moved them somewhere safe. This code assumes that if the DImode
15114 scratch in operands[2] overlaps either the input value or output address
15115 in some way, then that value must die in this insn (we absolutely need
15116 two scratch registers for some corner cases). */
15118 arm_reload_out_hi (rtx
*operands
)
15120 rtx ref
= operands
[0];
15121 rtx outval
= operands
[1];
15123 HOST_WIDE_INT offset
= 0;
15125 if (GET_CODE (ref
) == SUBREG
)
15127 offset
= SUBREG_BYTE (ref
);
15128 ref
= SUBREG_REG (ref
);
15133 /* We have a pseudo which has been spilt onto the stack; there
15134 are two cases here: the first where there is a simple
15135 stack-slot replacement and a second where the stack-slot is
15136 out of range, or is used as a subreg. */
15137 if (reg_equiv_mem (REGNO (ref
)))
15139 ref
= reg_equiv_mem (REGNO (ref
));
15140 base
= find_replacement (&XEXP (ref
, 0));
15143 /* The slot is out of range, or was dressed up in a SUBREG. */
15144 base
= reg_equiv_address (REGNO (ref
));
15146 /* PR 62254: If there is no equivalent memory location then just move
15147 the value as an SImode register move. This happens when the target
15148 architecture variant does not have an HImode register move. */
15151 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
15153 if (REG_P (outval
))
15155 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15156 gen_rtx_SUBREG (SImode
, outval
, 0)));
15158 else /* SUBREG_P (outval) */
15160 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
15161 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15162 SUBREG_REG (outval
)));
15164 /* FIXME: Handle other cases ? */
15165 gcc_unreachable ();
15171 base
= find_replacement (&XEXP (ref
, 0));
15173 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15175 /* Handle the case where the address is too complex to be offset by 1. */
15176 if (GET_CODE (base
) == MINUS
15177 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15179 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15181 /* Be careful not to destroy OUTVAL. */
15182 if (reg_overlap_mentioned_p (base_plus
, outval
))
15184 /* Updating base_plus might destroy outval, see if we can
15185 swap the scratch and base_plus. */
15186 if (!reg_overlap_mentioned_p (scratch
, outval
))
15187 std::swap (scratch
, base_plus
);
15190 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15192 /* Be conservative and copy OUTVAL into the scratch now,
15193 this should only be necessary if outval is a subreg
15194 of something larger than a word. */
15195 /* XXX Might this clobber base? I can't see how it can,
15196 since scratch is known to overlap with OUTVAL, and
15197 must be wider than a word. */
15198 emit_insn (gen_movhi (scratch_hi
, outval
));
15199 outval
= scratch_hi
;
15203 emit_set_insn (base_plus
, base
);
15206 else if (GET_CODE (base
) == PLUS
)
15208 /* The addend must be CONST_INT, or we would have dealt with it above. */
15209 HOST_WIDE_INT hi
, lo
;
15211 offset
+= INTVAL (XEXP (base
, 1));
15212 base
= XEXP (base
, 0);
15214 /* Rework the address into a legal sequence of insns. */
15215 /* Valid range for lo is -4095 -> 4095 */
15218 : -((-offset
) & 0xfff));
15220 /* Corner case, if lo is the max offset then we would be out of range
15221 once we have added the additional 1 below, so bump the msb into the
15222 pre-loading insn(s). */
15226 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15227 ^ (HOST_WIDE_INT
) 0x80000000)
15228 - (HOST_WIDE_INT
) 0x80000000);
15230 gcc_assert (hi
+ lo
== offset
);
15234 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15236 /* Be careful not to destroy OUTVAL. */
15237 if (reg_overlap_mentioned_p (base_plus
, outval
))
15239 /* Updating base_plus might destroy outval, see if we
15240 can swap the scratch and base_plus. */
15241 if (!reg_overlap_mentioned_p (scratch
, outval
))
15242 std::swap (scratch
, base_plus
);
15245 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15247 /* Be conservative and copy outval into scratch now,
15248 this should only be necessary if outval is a
15249 subreg of something larger than a word. */
15250 /* XXX Might this clobber base? I can't see how it
15251 can, since scratch is known to overlap with
15253 emit_insn (gen_movhi (scratch_hi
, outval
));
15254 outval
= scratch_hi
;
15258 /* Get the base address; addsi3 knows how to handle constants
15259 that require more than one insn. */
15260 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15266 if (BYTES_BIG_ENDIAN
)
15268 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15269 plus_constant (Pmode
, base
,
15271 gen_lowpart (QImode
, outval
)));
15272 emit_insn (gen_lshrsi3 (scratch
,
15273 gen_rtx_SUBREG (SImode
, outval
, 0),
15275 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15277 gen_lowpart (QImode
, scratch
)));
15281 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15283 gen_lowpart (QImode
, outval
)));
15284 emit_insn (gen_lshrsi3 (scratch
,
15285 gen_rtx_SUBREG (SImode
, outval
, 0),
15287 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15288 plus_constant (Pmode
, base
,
15290 gen_lowpart (QImode
, scratch
)));
15294 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15295 (padded to the size of a word) should be passed in a register. */
15298 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15300 if (TARGET_AAPCS_BASED
)
15301 return must_pass_in_stack_var_size (mode
, type
);
15303 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15307 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15308 byte of a stack argument has useful data. For legacy APCS ABIs we use
15309 the default. For AAPCS based ABIs small aggregate types are placed
15310 in the lowest memory address. */
15312 static pad_direction
15313 arm_function_arg_padding (machine_mode mode
, const_tree type
)
15315 if (!TARGET_AAPCS_BASED
)
15316 return default_function_arg_padding (mode
, type
);
15318 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15319 return PAD_DOWNWARD
;
15325 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15326 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15327 register has useful data, and return the opposite if the most
15328 significant byte does. */
15331 arm_pad_reg_upward (machine_mode mode
,
15332 tree type
, int first ATTRIBUTE_UNUSED
)
15334 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15336 /* For AAPCS, small aggregates, small fixed-point types,
15337 and small complex types are always padded upwards. */
15340 if ((AGGREGATE_TYPE_P (type
)
15341 || TREE_CODE (type
) == COMPLEX_TYPE
15342 || FIXED_POINT_TYPE_P (type
))
15343 && int_size_in_bytes (type
) <= 4)
15348 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15349 && GET_MODE_SIZE (mode
) <= 4)
15354 /* Otherwise, use default padding. */
15355 return !BYTES_BIG_ENDIAN
;
15358 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15359 assuming that the address in the base register is word aligned. */
15361 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15363 HOST_WIDE_INT max_offset
;
15365 /* Offset must be a multiple of 4 in Thumb mode. */
15366 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15371 else if (TARGET_ARM
)
15376 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15379 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15380 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15381 Assumes that the address in the base register RN is word aligned. Pattern
15382 guarantees that both memory accesses use the same base register,
15383 the offsets are constants within the range, and the gap between the offsets is 4.
15384 If preload complete then check that registers are legal. WBACK indicates whether
15385 address is updated. LOAD indicates whether memory access is load or store. */
15387 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15388 bool wback
, bool load
)
15390 unsigned int t
, t2
, n
;
15392 if (!reload_completed
)
15395 if (!offset_ok_for_ldrd_strd (offset
))
15402 if ((TARGET_THUMB2
)
15403 && ((wback
&& (n
== t
|| n
== t2
))
15404 || (t
== SP_REGNUM
)
15405 || (t
== PC_REGNUM
)
15406 || (t2
== SP_REGNUM
)
15407 || (t2
== PC_REGNUM
)
15408 || (!load
&& (n
== PC_REGNUM
))
15409 || (load
&& (t
== t2
))
15410 /* Triggers Cortex-M3 LDRD errata. */
15411 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15415 && ((wback
&& (n
== t
|| n
== t2
))
15416 || (t2
== PC_REGNUM
)
15417 || (t
% 2 != 0) /* First destination register is not even. */
15419 /* PC can be used as base register (for offset addressing only),
15420 but it is depricated. */
15421 || (n
== PC_REGNUM
)))
15427 /* Return true if a 64-bit access with alignment ALIGN and with a
15428 constant offset OFFSET from the base pointer is permitted on this
15431 align_ok_ldrd_strd (HOST_WIDE_INT align
, HOST_WIDE_INT offset
)
15433 return (unaligned_access
15434 ? (align
>= BITS_PER_WORD
&& (offset
& 3) == 0)
15435 : (align
>= 2 * BITS_PER_WORD
&& (offset
& 7) == 0));
15438 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15439 operand MEM's address contains an immediate offset from the base
15440 register and has no side effects, in which case it sets BASE,
15441 OFFSET and ALIGN accordingly. */
15443 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
, HOST_WIDE_INT
*align
)
15447 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15449 /* TODO: Handle more general memory operand patterns, such as
15450 PRE_DEC and PRE_INC. */
15452 if (side_effects_p (mem
))
15455 /* Can't deal with subregs. */
15456 if (GET_CODE (mem
) == SUBREG
)
15459 gcc_assert (MEM_P (mem
));
15461 *offset
= const0_rtx
;
15462 *align
= MEM_ALIGN (mem
);
15464 addr
= XEXP (mem
, 0);
15466 /* If addr isn't valid for DImode, then we can't handle it. */
15467 if (!arm_legitimate_address_p (DImode
, addr
,
15468 reload_in_progress
|| reload_completed
))
15476 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15478 *base
= XEXP (addr
, 0);
15479 *offset
= XEXP (addr
, 1);
15480 return (REG_P (*base
) && CONST_INT_P (*offset
));
15486 /* Called from a peephole2 to replace two word-size accesses with a
15487 single LDRD/STRD instruction. Returns true iff we can generate a
15488 new instruction sequence. That is, both accesses use the same base
15489 register and the gap between constant offsets is 4. This function
15490 may reorder its operands to match ldrd/strd RTL templates.
15491 OPERANDS are the operands found by the peephole matcher;
15492 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15493 corresponding memory operands. LOAD indicaates whether the access
15494 is load or store. CONST_STORE indicates a store of constant
15495 integer values held in OPERANDS[4,5] and assumes that the pattern
15496 is of length 4 insn, for the purpose of checking dead registers.
15497 COMMUTE indicates that register operands may be reordered. */
15499 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15500 bool const_store
, bool commute
)
15503 HOST_WIDE_INT offsets
[2], offset
, align
[2];
15504 rtx base
= NULL_RTX
;
15505 rtx cur_base
, cur_offset
, tmp
;
15507 HARD_REG_SET regset
;
15509 gcc_assert (!const_store
|| !load
);
15510 /* Check that the memory references are immediate offsets from the
15511 same base register. Extract the base register, the destination
15512 registers, and the corresponding memory offsets. */
15513 for (i
= 0; i
< nops
; i
++)
15515 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
15521 else if (REGNO (base
) != REGNO (cur_base
))
15524 offsets
[i
] = INTVAL (cur_offset
);
15525 if (GET_CODE (operands
[i
]) == SUBREG
)
15527 tmp
= SUBREG_REG (operands
[i
]);
15528 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15533 /* Make sure there is no dependency between the individual loads. */
15534 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15535 return false; /* RAW */
15537 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15538 return false; /* WAW */
15540 /* If the same input register is used in both stores
15541 when storing different constants, try to find a free register.
15542 For example, the code
15547 can be transformed into
15551 in Thumb mode assuming that r1 is free.
15552 For ARM mode do the same but only if the starting register
15553 can be made to be even. */
15555 && REGNO (operands
[0]) == REGNO (operands
[1])
15556 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15560 CLEAR_HARD_REG_SET (regset
);
15561 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15562 if (tmp
== NULL_RTX
)
15565 /* Use the new register in the first load to ensure that
15566 if the original input register is not dead after peephole,
15567 then it will have the correct constant value. */
15570 else if (TARGET_ARM
)
15572 int regno
= REGNO (operands
[0]);
15573 if (!peep2_reg_dead_p (4, operands
[0]))
15575 /* When the input register is even and is not dead after the
15576 pattern, it has to hold the second constant but we cannot
15577 form a legal STRD in ARM mode with this register as the second
15579 if (regno
% 2 == 0)
15582 /* Is regno-1 free? */
15583 SET_HARD_REG_SET (regset
);
15584 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15585 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15586 if (tmp
== NULL_RTX
)
15593 /* Find a DImode register. */
15594 CLEAR_HARD_REG_SET (regset
);
15595 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15596 if (tmp
!= NULL_RTX
)
15598 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15599 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15603 /* Can we use the input register to form a DI register? */
15604 SET_HARD_REG_SET (regset
);
15605 CLEAR_HARD_REG_BIT(regset
,
15606 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15607 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15608 if (tmp
== NULL_RTX
)
15610 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15614 gcc_assert (operands
[0] != NULL_RTX
);
15615 gcc_assert (operands
[1] != NULL_RTX
);
15616 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15617 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15621 /* Make sure the instructions are ordered with lower memory access first. */
15622 if (offsets
[0] > offsets
[1])
15624 gap
= offsets
[0] - offsets
[1];
15625 offset
= offsets
[1];
15627 /* Swap the instructions such that lower memory is accessed first. */
15628 std::swap (operands
[0], operands
[1]);
15629 std::swap (operands
[2], operands
[3]);
15630 std::swap (align
[0], align
[1]);
15632 std::swap (operands
[4], operands
[5]);
15636 gap
= offsets
[1] - offsets
[0];
15637 offset
= offsets
[0];
15640 /* Make sure accesses are to consecutive memory locations. */
15644 if (!align_ok_ldrd_strd (align
[0], offset
))
15647 /* Make sure we generate legal instructions. */
15648 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15652 /* In Thumb state, where registers are almost unconstrained, there
15653 is little hope to fix it. */
15657 if (load
&& commute
)
15659 /* Try reordering registers. */
15660 std::swap (operands
[0], operands
[1]);
15661 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15668 /* If input registers are dead after this pattern, they can be
15669 reordered or replaced by other registers that are free in the
15670 current pattern. */
15671 if (!peep2_reg_dead_p (4, operands
[0])
15672 || !peep2_reg_dead_p (4, operands
[1]))
15675 /* Try to reorder the input registers. */
15676 /* For example, the code
15681 can be transformed into
15686 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15689 std::swap (operands
[0], operands
[1]);
15693 /* Try to find a free DI register. */
15694 CLEAR_HARD_REG_SET (regset
);
15695 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15696 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15699 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15700 if (tmp
== NULL_RTX
)
15703 /* DREG must be an even-numbered register in DImode.
15704 Split it into SI registers. */
15705 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15706 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15707 gcc_assert (operands
[0] != NULL_RTX
);
15708 gcc_assert (operands
[1] != NULL_RTX
);
15709 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15710 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15712 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15724 /* Print a symbolic form of X to the debug file, F. */
15726 arm_print_value (FILE *f
, rtx x
)
15728 switch (GET_CODE (x
))
15731 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15735 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15743 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15745 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15746 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15754 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15758 fprintf (f
, "`%s'", XSTR (x
, 0));
15762 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15766 arm_print_value (f
, XEXP (x
, 0));
15770 arm_print_value (f
, XEXP (x
, 0));
15772 arm_print_value (f
, XEXP (x
, 1));
15780 fprintf (f
, "????");
15785 /* Routines for manipulation of the constant pool. */
15787 /* Arm instructions cannot load a large constant directly into a
15788 register; they have to come from a pc relative load. The constant
15789 must therefore be placed in the addressable range of the pc
15790 relative load. Depending on the precise pc relative load
15791 instruction the range is somewhere between 256 bytes and 4k. This
15792 means that we often have to dump a constant inside a function, and
15793 generate code to branch around it.
15795 It is important to minimize this, since the branches will slow
15796 things down and make the code larger.
15798 Normally we can hide the table after an existing unconditional
15799 branch so that there is no interruption of the flow, but in the
15800 worst case the code looks like this:
15818 We fix this by performing a scan after scheduling, which notices
15819 which instructions need to have their operands fetched from the
15820 constant table and builds the table.
15822 The algorithm starts by building a table of all the constants that
15823 need fixing up and all the natural barriers in the function (places
15824 where a constant table can be dropped without breaking the flow).
15825 For each fixup we note how far the pc-relative replacement will be
15826 able to reach and the offset of the instruction into the function.
15828 Having built the table we then group the fixes together to form
15829 tables that are as large as possible (subject to addressing
15830 constraints) and emit each table of constants after the last
15831 barrier that is within range of all the instructions in the group.
15832 If a group does not contain a barrier, then we forcibly create one
15833 by inserting a jump instruction into the flow. Once the table has
15834 been inserted, the insns are then modified to reference the
15835 relevant entry in the pool.
15837 Possible enhancements to the algorithm (not implemented) are:
15839 1) For some processors and object formats, there may be benefit in
15840 aligning the pools to the start of cache lines; this alignment
15841 would need to be taken into account when calculating addressability
15844 /* These typedefs are located at the start of this file, so that
15845 they can be used in the prototypes there. This comment is to
15846 remind readers of that fact so that the following structures
15847 can be understood more easily.
15849 typedef struct minipool_node Mnode;
15850 typedef struct minipool_fixup Mfix; */
15852 struct minipool_node
15854 /* Doubly linked chain of entries. */
15857 /* The maximum offset into the code that this entry can be placed. While
15858 pushing fixes for forward references, all entries are sorted in order
15859 of increasing max_address. */
15860 HOST_WIDE_INT max_address
;
15861 /* Similarly for an entry inserted for a backwards ref. */
15862 HOST_WIDE_INT min_address
;
15863 /* The number of fixes referencing this entry. This can become zero
15864 if we "unpush" an entry. In this case we ignore the entry when we
15865 come to emit the code. */
15867 /* The offset from the start of the minipool. */
15868 HOST_WIDE_INT offset
;
15869 /* The value in table. */
15871 /* The mode of value. */
15873 /* The size of the value. With iWMMXt enabled
15874 sizes > 4 also imply an alignment of 8-bytes. */
15878 struct minipool_fixup
15882 HOST_WIDE_INT address
;
15888 HOST_WIDE_INT forwards
;
15889 HOST_WIDE_INT backwards
;
15892 /* Fixes less than a word need padding out to a word boundary. */
15893 #define MINIPOOL_FIX_SIZE(mode) \
15894 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15896 static Mnode
* minipool_vector_head
;
15897 static Mnode
* minipool_vector_tail
;
15898 static rtx_code_label
*minipool_vector_label
;
15899 static int minipool_pad
;
15901 /* The linked list of all minipool fixes required for this function. */
15902 Mfix
* minipool_fix_head
;
15903 Mfix
* minipool_fix_tail
;
15904 /* The fix entry for the current minipool, once it has been placed. */
15905 Mfix
* minipool_barrier
;
15907 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15908 #define JUMP_TABLES_IN_TEXT_SECTION 0
15911 static HOST_WIDE_INT
15912 get_jump_table_size (rtx_jump_table_data
*insn
)
15914 /* ADDR_VECs only take room if read-only data does into the text
15916 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
15918 rtx body
= PATTERN (insn
);
15919 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
15920 HOST_WIDE_INT size
;
15921 HOST_WIDE_INT modesize
;
15923 modesize
= GET_MODE_SIZE (GET_MODE (body
));
15924 size
= modesize
* XVECLEN (body
, elt
);
15928 /* Round up size of TBB table to a halfword boundary. */
15929 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
15932 /* No padding necessary for TBH. */
15935 /* Add two bytes for alignment on Thumb. */
15940 gcc_unreachable ();
15948 /* Return the maximum amount of padding that will be inserted before
15951 static HOST_WIDE_INT
15952 get_label_padding (rtx label
)
15954 HOST_WIDE_INT align
, min_insn_size
;
15956 align
= 1 << label_to_alignment (label
);
15957 min_insn_size
= TARGET_THUMB
? 2 : 4;
15958 return align
> min_insn_size
? align
- min_insn_size
: 0;
15961 /* Move a minipool fix MP from its current location to before MAX_MP.
15962 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15963 constraints may need updating. */
15965 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
15966 HOST_WIDE_INT max_address
)
15968 /* The code below assumes these are different. */
15969 gcc_assert (mp
!= max_mp
);
15971 if (max_mp
== NULL
)
15973 if (max_address
< mp
->max_address
)
15974 mp
->max_address
= max_address
;
15978 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15979 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15981 mp
->max_address
= max_address
;
15983 /* Unlink MP from its current position. Since max_mp is non-null,
15984 mp->prev must be non-null. */
15985 mp
->prev
->next
= mp
->next
;
15986 if (mp
->next
!= NULL
)
15987 mp
->next
->prev
= mp
->prev
;
15989 minipool_vector_tail
= mp
->prev
;
15991 /* Re-insert it before MAX_MP. */
15993 mp
->prev
= max_mp
->prev
;
15996 if (mp
->prev
!= NULL
)
15997 mp
->prev
->next
= mp
;
15999 minipool_vector_head
= mp
;
16002 /* Save the new entry. */
16005 /* Scan over the preceding entries and adjust their addresses as
16007 while (mp
->prev
!= NULL
16008 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16010 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16017 /* Add a constant to the minipool for a forward reference. Returns the
16018 node added or NULL if the constant will not fit in this pool. */
16020 add_minipool_forward_ref (Mfix
*fix
)
16022 /* If set, max_mp is the first pool_entry that has a lower
16023 constraint than the one we are trying to add. */
16024 Mnode
* max_mp
= NULL
;
16025 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
16028 /* If the minipool starts before the end of FIX->INSN then this FIX
16029 can not be placed into the current pool. Furthermore, adding the
16030 new constant pool entry may cause the pool to start FIX_SIZE bytes
16032 if (minipool_vector_head
&&
16033 (fix
->address
+ get_attr_length (fix
->insn
)
16034 >= minipool_vector_head
->max_address
- fix
->fix_size
))
16037 /* Scan the pool to see if a constant with the same value has
16038 already been added. While we are doing this, also note the
16039 location where we must insert the constant if it doesn't already
16041 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16043 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16044 && fix
->mode
== mp
->mode
16045 && (!LABEL_P (fix
->value
)
16046 || (CODE_LABEL_NUMBER (fix
->value
)
16047 == CODE_LABEL_NUMBER (mp
->value
)))
16048 && rtx_equal_p (fix
->value
, mp
->value
))
16050 /* More than one fix references this entry. */
16052 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
16055 /* Note the insertion point if necessary. */
16057 && mp
->max_address
> max_address
)
16060 /* If we are inserting an 8-bytes aligned quantity and
16061 we have not already found an insertion point, then
16062 make sure that all such 8-byte aligned quantities are
16063 placed at the start of the pool. */
16064 if (ARM_DOUBLEWORD_ALIGN
16066 && fix
->fix_size
>= 8
16067 && mp
->fix_size
< 8)
16070 max_address
= mp
->max_address
;
16074 /* The value is not currently in the minipool, so we need to create
16075 a new entry for it. If MAX_MP is NULL, the entry will be put on
16076 the end of the list since the placement is less constrained than
16077 any existing entry. Otherwise, we insert the new fix before
16078 MAX_MP and, if necessary, adjust the constraints on the other
16081 mp
->fix_size
= fix
->fix_size
;
16082 mp
->mode
= fix
->mode
;
16083 mp
->value
= fix
->value
;
16085 /* Not yet required for a backwards ref. */
16086 mp
->min_address
= -65536;
16088 if (max_mp
== NULL
)
16090 mp
->max_address
= max_address
;
16092 mp
->prev
= minipool_vector_tail
;
16094 if (mp
->prev
== NULL
)
16096 minipool_vector_head
= mp
;
16097 minipool_vector_label
= gen_label_rtx ();
16100 mp
->prev
->next
= mp
;
16102 minipool_vector_tail
= mp
;
16106 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16107 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16109 mp
->max_address
= max_address
;
16112 mp
->prev
= max_mp
->prev
;
16114 if (mp
->prev
!= NULL
)
16115 mp
->prev
->next
= mp
;
16117 minipool_vector_head
= mp
;
16120 /* Save the new entry. */
16123 /* Scan over the preceding entries and adjust their addresses as
16125 while (mp
->prev
!= NULL
16126 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16128 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16136 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16137 HOST_WIDE_INT min_address
)
16139 HOST_WIDE_INT offset
;
16141 /* The code below assumes these are different. */
16142 gcc_assert (mp
!= min_mp
);
16144 if (min_mp
== NULL
)
16146 if (min_address
> mp
->min_address
)
16147 mp
->min_address
= min_address
;
16151 /* We will adjust this below if it is too loose. */
16152 mp
->min_address
= min_address
;
16154 /* Unlink MP from its current position. Since min_mp is non-null,
16155 mp->next must be non-null. */
16156 mp
->next
->prev
= mp
->prev
;
16157 if (mp
->prev
!= NULL
)
16158 mp
->prev
->next
= mp
->next
;
16160 minipool_vector_head
= mp
->next
;
16162 /* Reinsert it after MIN_MP. */
16164 mp
->next
= min_mp
->next
;
16166 if (mp
->next
!= NULL
)
16167 mp
->next
->prev
= mp
;
16169 minipool_vector_tail
= mp
;
16175 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16177 mp
->offset
= offset
;
16178 if (mp
->refcount
> 0)
16179 offset
+= mp
->fix_size
;
16181 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16182 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16188 /* Add a constant to the minipool for a backward reference. Returns the
16189 node added or NULL if the constant will not fit in this pool.
16191 Note that the code for insertion for a backwards reference can be
16192 somewhat confusing because the calculated offsets for each fix do
16193 not take into account the size of the pool (which is still under
16196 add_minipool_backward_ref (Mfix
*fix
)
16198 /* If set, min_mp is the last pool_entry that has a lower constraint
16199 than the one we are trying to add. */
16200 Mnode
*min_mp
= NULL
;
16201 /* This can be negative, since it is only a constraint. */
16202 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16205 /* If we can't reach the current pool from this insn, or if we can't
16206 insert this entry at the end of the pool without pushing other
16207 fixes out of range, then we don't try. This ensures that we
16208 can't fail later on. */
16209 if (min_address
>= minipool_barrier
->address
16210 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16211 >= minipool_barrier
->address
))
16214 /* Scan the pool to see if a constant with the same value has
16215 already been added. While we are doing this, also note the
16216 location where we must insert the constant if it doesn't already
16218 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16220 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16221 && fix
->mode
== mp
->mode
16222 && (!LABEL_P (fix
->value
)
16223 || (CODE_LABEL_NUMBER (fix
->value
)
16224 == CODE_LABEL_NUMBER (mp
->value
)))
16225 && rtx_equal_p (fix
->value
, mp
->value
)
16226 /* Check that there is enough slack to move this entry to the
16227 end of the table (this is conservative). */
16228 && (mp
->max_address
16229 > (minipool_barrier
->address
16230 + minipool_vector_tail
->offset
16231 + minipool_vector_tail
->fix_size
)))
16234 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16237 if (min_mp
!= NULL
)
16238 mp
->min_address
+= fix
->fix_size
;
16241 /* Note the insertion point if necessary. */
16242 if (mp
->min_address
< min_address
)
16244 /* For now, we do not allow the insertion of 8-byte alignment
16245 requiring nodes anywhere but at the start of the pool. */
16246 if (ARM_DOUBLEWORD_ALIGN
16247 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16252 else if (mp
->max_address
16253 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16255 /* Inserting before this entry would push the fix beyond
16256 its maximum address (which can happen if we have
16257 re-located a forwards fix); force the new fix to come
16259 if (ARM_DOUBLEWORD_ALIGN
16260 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16265 min_address
= mp
->min_address
+ fix
->fix_size
;
16268 /* Do not insert a non-8-byte aligned quantity before 8-byte
16269 aligned quantities. */
16270 else if (ARM_DOUBLEWORD_ALIGN
16271 && fix
->fix_size
< 8
16272 && mp
->fix_size
>= 8)
16275 min_address
= mp
->min_address
+ fix
->fix_size
;
16280 /* We need to create a new entry. */
16282 mp
->fix_size
= fix
->fix_size
;
16283 mp
->mode
= fix
->mode
;
16284 mp
->value
= fix
->value
;
16286 mp
->max_address
= minipool_barrier
->address
+ 65536;
16288 mp
->min_address
= min_address
;
16290 if (min_mp
== NULL
)
16293 mp
->next
= minipool_vector_head
;
16295 if (mp
->next
== NULL
)
16297 minipool_vector_tail
= mp
;
16298 minipool_vector_label
= gen_label_rtx ();
16301 mp
->next
->prev
= mp
;
16303 minipool_vector_head
= mp
;
16307 mp
->next
= min_mp
->next
;
16311 if (mp
->next
!= NULL
)
16312 mp
->next
->prev
= mp
;
16314 minipool_vector_tail
= mp
;
16317 /* Save the new entry. */
16325 /* Scan over the following entries and adjust their offsets. */
16326 while (mp
->next
!= NULL
)
16328 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16329 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16332 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16334 mp
->next
->offset
= mp
->offset
;
16343 assign_minipool_offsets (Mfix
*barrier
)
16345 HOST_WIDE_INT offset
= 0;
16348 minipool_barrier
= barrier
;
16350 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16352 mp
->offset
= offset
;
16354 if (mp
->refcount
> 0)
16355 offset
+= mp
->fix_size
;
16359 /* Output the literal table */
16361 dump_minipool (rtx_insn
*scan
)
16367 if (ARM_DOUBLEWORD_ALIGN
)
16368 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16369 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16376 fprintf (dump_file
,
16377 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16378 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16380 scan
= emit_label_after (gen_label_rtx (), scan
);
16381 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16382 scan
= emit_label_after (minipool_vector_label
, scan
);
16384 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16386 if (mp
->refcount
> 0)
16390 fprintf (dump_file
,
16391 ";; Offset %u, min %ld, max %ld ",
16392 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16393 (unsigned long) mp
->max_address
);
16394 arm_print_value (dump_file
, mp
->value
);
16395 fputc ('\n', dump_file
);
16398 rtx val
= copy_rtx (mp
->value
);
16400 switch (GET_MODE_SIZE (mp
->mode
))
16402 #ifdef HAVE_consttable_1
16404 scan
= emit_insn_after (gen_consttable_1 (val
), scan
);
16408 #ifdef HAVE_consttable_2
16410 scan
= emit_insn_after (gen_consttable_2 (val
), scan
);
16414 #ifdef HAVE_consttable_4
16416 scan
= emit_insn_after (gen_consttable_4 (val
), scan
);
16420 #ifdef HAVE_consttable_8
16422 scan
= emit_insn_after (gen_consttable_8 (val
), scan
);
16426 #ifdef HAVE_consttable_16
16428 scan
= emit_insn_after (gen_consttable_16 (val
), scan
);
16433 gcc_unreachable ();
16441 minipool_vector_head
= minipool_vector_tail
= NULL
;
16442 scan
= emit_insn_after (gen_consttable_end (), scan
);
16443 scan
= emit_barrier_after (scan
);
16446 /* Return the cost of forcibly inserting a barrier after INSN. */
16448 arm_barrier_cost (rtx_insn
*insn
)
16450 /* Basing the location of the pool on the loop depth is preferable,
16451 but at the moment, the basic block information seems to be
16452 corrupt by this stage of the compilation. */
16453 int base_cost
= 50;
16454 rtx_insn
*next
= next_nonnote_insn (insn
);
16456 if (next
!= NULL
&& LABEL_P (next
))
16459 switch (GET_CODE (insn
))
16462 /* It will always be better to place the table before the label, rather
16471 return base_cost
- 10;
16474 return base_cost
+ 10;
16478 /* Find the best place in the insn stream in the range
16479 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16480 Create the barrier by inserting a jump and add a new fix entry for
16483 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16485 HOST_WIDE_INT count
= 0;
16486 rtx_barrier
*barrier
;
16487 rtx_insn
*from
= fix
->insn
;
16488 /* The instruction after which we will insert the jump. */
16489 rtx_insn
*selected
= NULL
;
16491 /* The address at which the jump instruction will be placed. */
16492 HOST_WIDE_INT selected_address
;
16494 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16495 rtx_code_label
*label
= gen_label_rtx ();
16497 selected_cost
= arm_barrier_cost (from
);
16498 selected_address
= fix
->address
;
16500 while (from
&& count
< max_count
)
16502 rtx_jump_table_data
*tmp
;
16505 /* This code shouldn't have been called if there was a natural barrier
16507 gcc_assert (!BARRIER_P (from
));
16509 /* Count the length of this insn. This must stay in sync with the
16510 code that pushes minipool fixes. */
16511 if (LABEL_P (from
))
16512 count
+= get_label_padding (from
);
16514 count
+= get_attr_length (from
);
16516 /* If there is a jump table, add its length. */
16517 if (tablejump_p (from
, NULL
, &tmp
))
16519 count
+= get_jump_table_size (tmp
);
16521 /* Jump tables aren't in a basic block, so base the cost on
16522 the dispatch insn. If we select this location, we will
16523 still put the pool after the table. */
16524 new_cost
= arm_barrier_cost (from
);
16526 if (count
< max_count
16527 && (!selected
|| new_cost
<= selected_cost
))
16530 selected_cost
= new_cost
;
16531 selected_address
= fix
->address
+ count
;
16534 /* Continue after the dispatch table. */
16535 from
= NEXT_INSN (tmp
);
16539 new_cost
= arm_barrier_cost (from
);
16541 if (count
< max_count
16542 && (!selected
|| new_cost
<= selected_cost
))
16545 selected_cost
= new_cost
;
16546 selected_address
= fix
->address
+ count
;
16549 from
= NEXT_INSN (from
);
16552 /* Make sure that we found a place to insert the jump. */
16553 gcc_assert (selected
);
16555 /* Make sure we do not split a call and its corresponding
16556 CALL_ARG_LOCATION note. */
16557 if (CALL_P (selected
))
16559 rtx_insn
*next
= NEXT_INSN (selected
);
16560 if (next
&& NOTE_P (next
)
16561 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16565 /* Create a new JUMP_INSN that branches around a barrier. */
16566 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16567 JUMP_LABEL (from
) = label
;
16568 barrier
= emit_barrier_after (from
);
16569 emit_label_after (label
, barrier
);
16571 /* Create a minipool barrier entry for the new barrier. */
16572 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16573 new_fix
->insn
= barrier
;
16574 new_fix
->address
= selected_address
;
16575 new_fix
->next
= fix
->next
;
16576 fix
->next
= new_fix
;
16581 /* Record that there is a natural barrier in the insn stream at
16584 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16586 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16589 fix
->address
= address
;
16592 if (minipool_fix_head
!= NULL
)
16593 minipool_fix_tail
->next
= fix
;
16595 minipool_fix_head
= fix
;
16597 minipool_fix_tail
= fix
;
16600 /* Record INSN, which will need fixing up to load a value from the
16601 minipool. ADDRESS is the offset of the insn since the start of the
16602 function; LOC is a pointer to the part of the insn which requires
16603 fixing; VALUE is the constant that must be loaded, which is of type
16606 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16607 machine_mode mode
, rtx value
)
16609 gcc_assert (!arm_disable_literal_pool
);
16610 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16613 fix
->address
= address
;
16616 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16617 fix
->value
= value
;
16618 fix
->forwards
= get_attr_pool_range (insn
);
16619 fix
->backwards
= get_attr_neg_pool_range (insn
);
16620 fix
->minipool
= NULL
;
16622 /* If an insn doesn't have a range defined for it, then it isn't
16623 expecting to be reworked by this code. Better to stop now than
16624 to generate duff assembly code. */
16625 gcc_assert (fix
->forwards
|| fix
->backwards
);
16627 /* If an entry requires 8-byte alignment then assume all constant pools
16628 require 4 bytes of padding. Trying to do this later on a per-pool
16629 basis is awkward because existing pool entries have to be modified. */
16630 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16635 fprintf (dump_file
,
16636 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16637 GET_MODE_NAME (mode
),
16638 INSN_UID (insn
), (unsigned long) address
,
16639 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16640 arm_print_value (dump_file
, fix
->value
);
16641 fprintf (dump_file
, "\n");
16644 /* Add it to the chain of fixes. */
16647 if (minipool_fix_head
!= NULL
)
16648 minipool_fix_tail
->next
= fix
;
16650 minipool_fix_head
= fix
;
16652 minipool_fix_tail
= fix
;
16655 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16656 Returns the number of insns needed, or 99 if we always want to synthesize
16659 arm_max_const_double_inline_cost ()
16661 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16664 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16665 Returns the number of insns needed, or 99 if we don't know how to
16668 arm_const_double_inline_cost (rtx val
)
16670 rtx lowpart
, highpart
;
16673 mode
= GET_MODE (val
);
16675 if (mode
== VOIDmode
)
16678 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16680 lowpart
= gen_lowpart (SImode
, val
);
16681 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16683 gcc_assert (CONST_INT_P (lowpart
));
16684 gcc_assert (CONST_INT_P (highpart
));
16686 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16687 NULL_RTX
, NULL_RTX
, 0, 0)
16688 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16689 NULL_RTX
, NULL_RTX
, 0, 0));
16692 /* Cost of loading a SImode constant. */
16694 arm_const_inline_cost (enum rtx_code code
, rtx val
)
16696 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
16697 NULL_RTX
, NULL_RTX
, 1, 0);
16700 /* Return true if it is worthwhile to split a 64-bit constant into two
16701 32-bit operations. This is the case if optimizing for size, or
16702 if we have load delay slots, or if one 32-bit part can be done with
16703 a single data operation. */
16705 arm_const_double_by_parts (rtx val
)
16707 machine_mode mode
= GET_MODE (val
);
16710 if (optimize_size
|| arm_ld_sched
)
16713 if (mode
== VOIDmode
)
16716 part
= gen_highpart_mode (SImode
, mode
, val
);
16718 gcc_assert (CONST_INT_P (part
));
16720 if (const_ok_for_arm (INTVAL (part
))
16721 || const_ok_for_arm (~INTVAL (part
)))
16724 part
= gen_lowpart (SImode
, val
);
16726 gcc_assert (CONST_INT_P (part
));
16728 if (const_ok_for_arm (INTVAL (part
))
16729 || const_ok_for_arm (~INTVAL (part
)))
16735 /* Return true if it is possible to inline both the high and low parts
16736 of a 64-bit constant into 32-bit data processing instructions. */
16738 arm_const_double_by_immediates (rtx val
)
16740 machine_mode mode
= GET_MODE (val
);
16743 if (mode
== VOIDmode
)
16746 part
= gen_highpart_mode (SImode
, mode
, val
);
16748 gcc_assert (CONST_INT_P (part
));
16750 if (!const_ok_for_arm (INTVAL (part
)))
16753 part
= gen_lowpart (SImode
, val
);
16755 gcc_assert (CONST_INT_P (part
));
16757 if (!const_ok_for_arm (INTVAL (part
)))
16763 /* Scan INSN and note any of its operands that need fixing.
16764 If DO_PUSHES is false we do not actually push any of the fixups
16767 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
16771 extract_constrain_insn (insn
);
16773 if (recog_data
.n_alternatives
== 0)
16776 /* Fill in recog_op_alt with information about the constraints of
16778 preprocess_constraints (insn
);
16780 const operand_alternative
*op_alt
= which_op_alt ();
16781 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16783 /* Things we need to fix can only occur in inputs. */
16784 if (recog_data
.operand_type
[opno
] != OP_IN
)
16787 /* If this alternative is a memory reference, then any mention
16788 of constants in this alternative is really to fool reload
16789 into allowing us to accept one there. We need to fix them up
16790 now so that we output the right code. */
16791 if (op_alt
[opno
].memory_ok
)
16793 rtx op
= recog_data
.operand
[opno
];
16795 if (CONSTANT_P (op
))
16798 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16799 recog_data
.operand_mode
[opno
], op
);
16801 else if (MEM_P (op
)
16802 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16803 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16807 rtx cop
= avoid_constant_pool_reference (op
);
16809 /* Casting the address of something to a mode narrower
16810 than a word can cause avoid_constant_pool_reference()
16811 to return the pool reference itself. That's no good to
16812 us here. Lets just hope that we can use the
16813 constant pool value directly. */
16815 cop
= get_pool_constant (XEXP (op
, 0));
16817 push_minipool_fix (insn
, address
,
16818 recog_data
.operand_loc
[opno
],
16819 recog_data
.operand_mode
[opno
], cop
);
16829 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16830 and unions in the context of ARMv8-M Security Extensions. It is used as a
16831 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16832 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16833 or four masks, depending on whether it is being computed for a
16834 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16835 respectively. The tree for the type of the argument or a field within an
16836 argument is passed in ARG_TYPE, the current register this argument or field
16837 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16838 argument or field starts at is passed in STARTING_BIT and the last used bit
16839 is kept in LAST_USED_BIT which is also updated accordingly. */
16841 static unsigned HOST_WIDE_INT
16842 comp_not_to_clear_mask_str_un (tree arg_type
, int * regno
,
16843 uint32_t * padding_bits_to_clear
,
16844 unsigned starting_bit
, int * last_used_bit
)
16847 unsigned HOST_WIDE_INT not_to_clear_reg_mask
= 0;
16849 if (TREE_CODE (arg_type
) == RECORD_TYPE
)
16851 unsigned current_bit
= starting_bit
;
16853 long int offset
, size
;
16856 field
= TYPE_FIELDS (arg_type
);
16859 /* The offset within a structure is always an offset from
16860 the start of that structure. Make sure we take that into the
16861 calculation of the register based offset that we use here. */
16862 offset
= starting_bit
;
16863 offset
+= TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field
), 0);
16866 /* This is the actual size of the field, for bitfields this is the
16867 bitfield width and not the container size. */
16868 size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16870 if (*last_used_bit
!= offset
)
16872 if (offset
< *last_used_bit
)
16874 /* This field's offset is before the 'last_used_bit', that
16875 means this field goes on the next register. So we need to
16876 pad the rest of the current register and increase the
16877 register number. */
16879 mask
= ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit
);
16882 padding_bits_to_clear
[*regno
] |= mask
;
16883 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16888 /* Otherwise we pad the bits between the last field's end and
16889 the start of the new field. */
16892 mask
= ((uint32_t)-1) >> (32 - offset
);
16893 mask
-= ((uint32_t) 1 << *last_used_bit
) - 1;
16894 padding_bits_to_clear
[*regno
] |= mask
;
16896 current_bit
= offset
;
16899 /* Calculate further padding bits for inner structs/unions too. */
16900 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field
)))
16902 *last_used_bit
= current_bit
;
16903 not_to_clear_reg_mask
16904 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field
), regno
,
16905 padding_bits_to_clear
, offset
,
16910 /* Update 'current_bit' with this field's size. If the
16911 'current_bit' lies in a subsequent register, update 'regno' and
16912 reset 'current_bit' to point to the current bit in that new
16914 current_bit
+= size
;
16915 while (current_bit
>= 32)
16918 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16921 *last_used_bit
= current_bit
;
16924 field
= TREE_CHAIN (field
);
16926 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16928 else if (TREE_CODE (arg_type
) == UNION_TYPE
)
16930 tree field
, field_t
;
16931 int i
, regno_t
, field_size
;
16935 uint32_t padding_bits_to_clear_res
[NUM_ARG_REGS
]
16936 = {-1, -1, -1, -1};
16938 /* To compute the padding bits in a union we only consider bits as
16939 padding bits if they are always either a padding bit or fall outside a
16940 fields size for all fields in the union. */
16941 field
= TYPE_FIELDS (arg_type
);
16944 uint32_t padding_bits_to_clear_t
[NUM_ARG_REGS
]
16945 = {0U, 0U, 0U, 0U};
16946 int last_used_bit_t
= *last_used_bit
;
16948 field_t
= TREE_TYPE (field
);
16950 /* If the field's type is either a record or a union make sure to
16951 compute their padding bits too. */
16952 if (RECORD_OR_UNION_TYPE_P (field_t
))
16953 not_to_clear_reg_mask
16954 |= comp_not_to_clear_mask_str_un (field_t
, ®no_t
,
16955 &padding_bits_to_clear_t
[0],
16956 starting_bit
, &last_used_bit_t
);
16959 field_size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16960 regno_t
= (field_size
/ 32) + *regno
;
16961 last_used_bit_t
= (starting_bit
+ field_size
) % 32;
16964 for (i
= *regno
; i
< regno_t
; i
++)
16966 /* For all but the last register used by this field only keep the
16967 padding bits that were padding bits in this field. */
16968 padding_bits_to_clear_res
[i
] &= padding_bits_to_clear_t
[i
];
16971 /* For the last register, keep all padding bits that were padding
16972 bits in this field and any padding bits that are still valid
16973 as padding bits but fall outside of this field's size. */
16974 mask
= (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t
)) + 1;
16975 padding_bits_to_clear_res
[regno_t
]
16976 &= padding_bits_to_clear_t
[regno_t
] | mask
;
16978 /* Update the maximum size of the fields in terms of registers used
16979 ('max_reg') and the 'last_used_bit' in said register. */
16980 if (max_reg
< regno_t
)
16983 max_bit
= last_used_bit_t
;
16985 else if (max_reg
== regno_t
&& max_bit
< last_used_bit_t
)
16986 max_bit
= last_used_bit_t
;
16988 field
= TREE_CHAIN (field
);
16991 /* Update the current padding_bits_to_clear using the intersection of the
16992 padding bits of all the fields. */
16993 for (i
=*regno
; i
< max_reg
; i
++)
16994 padding_bits_to_clear
[i
] |= padding_bits_to_clear_res
[i
];
16996 /* Do not keep trailing padding bits, we do not know yet whether this
16997 is the end of the argument. */
16998 mask
= ((uint32_t) 1 << max_bit
) - 1;
16999 padding_bits_to_clear
[max_reg
]
17000 |= padding_bits_to_clear_res
[max_reg
] & mask
;
17003 *last_used_bit
= max_bit
;
17006 /* This function should only be used for structs and unions. */
17007 gcc_unreachable ();
17009 return not_to_clear_reg_mask
;
17012 /* In the context of ARMv8-M Security Extensions, this function is used for both
17013 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
17014 registers are used when returning or passing arguments, which is then
17015 returned as a mask. It will also compute a mask to indicate padding/unused
17016 bits for each of these registers, and passes this through the
17017 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
17018 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
17019 the starting register used to pass this argument or return value is passed
17020 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
17021 for struct and union types. */
17023 static unsigned HOST_WIDE_INT
17024 compute_not_to_clear_mask (tree arg_type
, rtx arg_rtx
, int regno
,
17025 uint32_t * padding_bits_to_clear
)
17028 int last_used_bit
= 0;
17029 unsigned HOST_WIDE_INT not_to_clear_mask
;
17031 if (RECORD_OR_UNION_TYPE_P (arg_type
))
17034 = comp_not_to_clear_mask_str_un (arg_type
, ®no
,
17035 padding_bits_to_clear
, 0,
17039 /* If the 'last_used_bit' is not zero, that means we are still using a
17040 part of the last 'regno'. In such cases we must clear the trailing
17041 bits. Otherwise we are not using regno and we should mark it as to
17043 if (last_used_bit
!= 0)
17044 padding_bits_to_clear
[regno
]
17045 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit
) + 1;
17047 not_to_clear_mask
&= ~(HOST_WIDE_INT_1U
<< regno
);
17051 not_to_clear_mask
= 0;
17052 /* We are not dealing with structs nor unions. So these arguments may be
17053 passed in floating point registers too. In some cases a BLKmode is
17054 used when returning or passing arguments in multiple VFP registers. */
17055 if (GET_MODE (arg_rtx
) == BLKmode
)
17060 /* This should really only occur when dealing with the hard-float
17062 gcc_assert (TARGET_HARD_FLOAT_ABI
);
17064 for (i
= 0; i
< XVECLEN (arg_rtx
, 0); i
++)
17066 reg
= XEXP (XVECEXP (arg_rtx
, 0, i
), 0);
17067 gcc_assert (REG_P (reg
));
17069 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (reg
);
17071 /* If we are dealing with DF mode, make sure we don't
17072 clear either of the registers it addresses. */
17073 arg_regs
= ARM_NUM_REGS (GET_MODE (reg
));
17076 unsigned HOST_WIDE_INT mask
;
17077 mask
= HOST_WIDE_INT_1U
<< (REGNO (reg
) + arg_regs
);
17078 mask
-= HOST_WIDE_INT_1U
<< REGNO (reg
);
17079 not_to_clear_mask
|= mask
;
17085 /* Otherwise we can rely on the MODE to determine how many registers
17086 are being used by this argument. */
17087 int arg_regs
= ARM_NUM_REGS (GET_MODE (arg_rtx
));
17088 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
17091 unsigned HOST_WIDE_INT
17092 mask
= HOST_WIDE_INT_1U
<< (REGNO (arg_rtx
) + arg_regs
);
17093 mask
-= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
17094 not_to_clear_mask
|= mask
;
17099 return not_to_clear_mask
;
17102 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17103 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
17104 are to be fully cleared, using the value in register CLEARING_REG if more
17105 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17106 the bits that needs to be cleared in caller-saved core registers, with
17107 SCRATCH_REG used as a scratch register for that clearing.
17109 NOTE: one of three following assertions must hold:
17110 - SCRATCH_REG is a low register
17111 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17112 in TO_CLEAR_BITMAP)
17113 - CLEARING_REG is a low register. */
17116 cmse_clear_registers (sbitmap to_clear_bitmap
, uint32_t *padding_bits_to_clear
,
17117 int padding_bits_len
, rtx scratch_reg
, rtx clearing_reg
)
17119 bool saved_clearing
= false;
17120 rtx saved_clearing_reg
= NULL_RTX
;
17121 int i
, regno
, clearing_regno
, minregno
= R0_REGNUM
, maxregno
= minregno
- 1;
17123 gcc_assert (arm_arch_cmse
);
17125 if (!bitmap_empty_p (to_clear_bitmap
))
17127 minregno
= bitmap_first_set_bit (to_clear_bitmap
);
17128 maxregno
= bitmap_last_set_bit (to_clear_bitmap
);
17130 clearing_regno
= REGNO (clearing_reg
);
17132 /* Clear padding bits. */
17133 gcc_assert (padding_bits_len
<= NUM_ARG_REGS
);
17134 for (i
= 0, regno
= R0_REGNUM
; i
< padding_bits_len
; i
++, regno
++)
17137 rtx rtx16
, dest
, cleared_reg
= gen_rtx_REG (SImode
, regno
);
17139 if (padding_bits_to_clear
[i
] == 0)
17142 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17143 CLEARING_REG as scratch. */
17145 && REGNO (scratch_reg
) > LAST_LO_REGNUM
)
17147 /* clearing_reg is not to be cleared, copy its value into scratch_reg
17148 such that we can use clearing_reg to clear the unused bits in the
17150 if ((clearing_regno
> maxregno
17151 || !bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
17152 && !saved_clearing
)
17154 gcc_assert (clearing_regno
<= LAST_LO_REGNUM
);
17155 emit_move_insn (scratch_reg
, clearing_reg
);
17156 saved_clearing
= true;
17157 saved_clearing_reg
= scratch_reg
;
17159 scratch_reg
= clearing_reg
;
17162 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
17163 mask
= (~padding_bits_to_clear
[i
]) & 0xFFFF;
17164 emit_move_insn (scratch_reg
, gen_int_mode (mask
, SImode
));
17166 /* Fill the top half of the negated padding_bits_to_clear[i]. */
17167 mask
= (~padding_bits_to_clear
[i
]) >> 16;
17168 rtx16
= gen_int_mode (16, SImode
);
17169 dest
= gen_rtx_ZERO_EXTRACT (SImode
, scratch_reg
, rtx16
, rtx16
);
17171 emit_insn (gen_rtx_SET (dest
, gen_int_mode (mask
, SImode
)));
17173 emit_insn (gen_andsi3 (cleared_reg
, cleared_reg
, scratch_reg
));
17175 if (saved_clearing
)
17176 emit_move_insn (clearing_reg
, saved_clearing_reg
);
17179 /* Clear full registers. */
17181 /* If not marked for clearing, clearing_reg already does not contain
17183 if (clearing_regno
<= maxregno
17184 && bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
17186 emit_move_insn (clearing_reg
, const0_rtx
);
17187 emit_use (clearing_reg
);
17188 bitmap_clear_bit (to_clear_bitmap
, clearing_regno
);
17191 for (regno
= minregno
; regno
<= maxregno
; regno
++)
17193 if (!bitmap_bit_p (to_clear_bitmap
, regno
))
17196 if (IS_VFP_REGNUM (regno
))
17198 /* If regno is an even vfp register and its successor is also to
17199 be cleared, use vmov. */
17200 if (TARGET_VFP_DOUBLE
17201 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
17202 && bitmap_bit_p (to_clear_bitmap
, regno
+ 1))
17204 emit_move_insn (gen_rtx_REG (DFmode
, regno
),
17205 CONST1_RTX (DFmode
));
17206 emit_use (gen_rtx_REG (DFmode
, regno
));
17211 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
17212 CONST1_RTX (SFmode
));
17213 emit_use (gen_rtx_REG (SFmode
, regno
));
17218 emit_move_insn (gen_rtx_REG (SImode
, regno
), clearing_reg
);
17219 emit_use (gen_rtx_REG (SImode
, regno
));
17224 /* Clears caller saved registers not used to pass arguments before a
17225 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
17226 registers is done in __gnu_cmse_nonsecure_call libcall.
17227 See libgcc/config/arm/cmse_nonsecure_call.S. */
17230 cmse_nonsecure_call_clear_caller_saved (void)
17234 FOR_EACH_BB_FN (bb
, cfun
)
17238 FOR_BB_INSNS (bb
, insn
)
17240 unsigned address_regnum
, regno
, maxregno
=
17241 TARGET_HARD_FLOAT_ABI
? D7_VFP_REGNUM
: NUM_ARG_REGS
- 1;
17242 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
17244 rtx pat
, call
, unspec
, clearing_reg
, ip_reg
, shift
;
17246 CUMULATIVE_ARGS args_so_far_v
;
17247 cumulative_args_t args_so_far
;
17248 tree arg_type
, fntype
;
17249 bool first_param
= true;
17250 function_args_iterator args_iter
;
17251 uint32_t padding_bits_to_clear
[4] = {0U, 0U, 0U, 0U};
17253 if (!NONDEBUG_INSN_P (insn
))
17256 if (!CALL_P (insn
))
17259 pat
= PATTERN (insn
);
17260 gcc_assert (GET_CODE (pat
) == PARALLEL
&& XVECLEN (pat
, 0) > 0);
17261 call
= XVECEXP (pat
, 0, 0);
17263 /* Get the real call RTX if the insn sets a value, ie. returns. */
17264 if (GET_CODE (call
) == SET
)
17265 call
= SET_SRC (call
);
17267 /* Check if it is a cmse_nonsecure_call. */
17268 unspec
= XEXP (call
, 0);
17269 if (GET_CODE (unspec
) != UNSPEC
17270 || XINT (unspec
, 1) != UNSPEC_NONSECURE_MEM
)
17273 /* Determine the caller-saved registers we need to clear. */
17274 bitmap_clear (to_clear_bitmap
);
17275 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
, NUM_ARG_REGS
);
17277 /* Only look at the caller-saved floating point registers in case of
17278 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
17279 lazy store and loads which clear both caller- and callee-saved
17281 if (TARGET_HARD_FLOAT_ABI
)
17283 auto_sbitmap
float_bitmap (maxregno
+ 1);
17285 bitmap_clear (float_bitmap
);
17286 bitmap_set_range (float_bitmap
, FIRST_VFP_REGNUM
,
17287 D7_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1);
17288 bitmap_ior (to_clear_bitmap
, to_clear_bitmap
, float_bitmap
);
17291 /* Make sure the register used to hold the function address is not
17293 address
= RTVEC_ELT (XVEC (unspec
, 0), 0);
17294 gcc_assert (MEM_P (address
));
17295 gcc_assert (REG_P (XEXP (address
, 0)));
17296 address_regnum
= REGNO (XEXP (address
, 0));
17297 if (address_regnum
< R0_REGNUM
+ NUM_ARG_REGS
)
17298 bitmap_clear_bit (to_clear_bitmap
, address_regnum
);
17300 /* Set basic block of call insn so that df rescan is performed on
17301 insns inserted here. */
17302 set_block_for_insn (insn
, bb
);
17303 df_set_flags (DF_DEFER_INSN_RESCAN
);
17306 /* Make sure the scheduler doesn't schedule other insns beyond
17308 emit_insn (gen_blockage ());
17310 /* Walk through all arguments and clear registers appropriately.
17312 fntype
= TREE_TYPE (MEM_EXPR (address
));
17313 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
,
17315 args_so_far
= pack_cumulative_args (&args_so_far_v
);
17316 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
17319 uint64_t to_clear_args_mask
;
17320 machine_mode arg_mode
= TYPE_MODE (arg_type
);
17322 if (VOID_TYPE_P (arg_type
))
17326 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
,
17329 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
,
17331 gcc_assert (REG_P (arg_rtx
));
17333 = compute_not_to_clear_mask (arg_type
, arg_rtx
,
17335 &padding_bits_to_clear
[0]);
17336 if (to_clear_args_mask
)
17338 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
17340 if (to_clear_args_mask
& (1ULL << regno
))
17341 bitmap_clear_bit (to_clear_bitmap
, regno
);
17345 first_param
= false;
17348 /* We use right shift and left shift to clear the LSB of the address
17349 we jump to instead of using bic, to avoid having to use an extra
17350 register on Thumb-1. */
17351 clearing_reg
= XEXP (address
, 0);
17352 shift
= gen_rtx_LSHIFTRT (SImode
, clearing_reg
, const1_rtx
);
17353 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
17354 shift
= gen_rtx_ASHIFT (SImode
, clearing_reg
, const1_rtx
);
17355 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
17357 /* Clear caller-saved registers that leak before doing a non-secure
17359 ip_reg
= gen_rtx_REG (SImode
, IP_REGNUM
);
17360 cmse_clear_registers (to_clear_bitmap
, padding_bits_to_clear
,
17361 NUM_ARG_REGS
, ip_reg
, clearing_reg
);
17363 seq
= get_insns ();
17365 emit_insn_before (seq
, insn
);
17370 /* Rewrite move insn into subtract of 0 if the condition codes will
17371 be useful in next conditional jump insn. */
17374 thumb1_reorg (void)
17378 FOR_EACH_BB_FN (bb
, cfun
)
17381 rtx cmp
, op0
, op1
, set
= NULL
;
17382 rtx_insn
*prev
, *insn
= BB_END (bb
);
17383 bool insn_clobbered
= false;
17385 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17386 insn
= PREV_INSN (insn
);
17388 /* Find the last cbranchsi4_insn in basic block BB. */
17389 if (insn
== BB_HEAD (bb
)
17390 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17393 /* Get the register with which we are comparing. */
17394 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
17395 op0
= XEXP (cmp
, 0);
17396 op1
= XEXP (cmp
, 1);
17398 /* Check that comparison is against ZERO. */
17399 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
17402 /* Find the first flag setting insn before INSN in basic block BB. */
17403 gcc_assert (insn
!= BB_HEAD (bb
));
17404 for (prev
= PREV_INSN (insn
);
17406 && prev
!= BB_HEAD (bb
)
17408 || DEBUG_INSN_P (prev
)
17409 || ((set
= single_set (prev
)) != NULL
17410 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17411 prev
= PREV_INSN (prev
))
17413 if (reg_set_p (op0
, prev
))
17414 insn_clobbered
= true;
17417 /* Skip if op0 is clobbered by insn other than prev. */
17418 if (insn_clobbered
)
17424 dest
= SET_DEST (set
);
17425 src
= SET_SRC (set
);
17426 if (!low_register_operand (dest
, SImode
)
17427 || !low_register_operand (src
, SImode
))
17430 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17431 in INSN. Both src and dest of the move insn are checked. */
17432 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17434 dest
= copy_rtx (dest
);
17435 src
= copy_rtx (src
);
17436 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17437 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
17438 INSN_CODE (prev
) = -1;
17439 /* Set test register in INSN to dest. */
17440 XEXP (cmp
, 0) = copy_rtx (dest
);
17441 INSN_CODE (insn
) = -1;
17446 /* Convert instructions to their cc-clobbering variant if possible, since
17447 that allows us to use smaller encodings. */
17450 thumb2_reorg (void)
17455 INIT_REG_SET (&live
);
17457 /* We are freeing block_for_insn in the toplev to keep compatibility
17458 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17459 compute_bb_for_insn ();
17462 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17464 FOR_EACH_BB_FN (bb
, cfun
)
17466 if ((current_tune
->disparage_flag_setting_t16_encodings
17467 == tune_params::DISPARAGE_FLAGS_ALL
)
17468 && optimize_bb_for_speed_p (bb
))
17472 Convert_Action action
= SKIP
;
17473 Convert_Action action_for_partial_flag_setting
17474 = ((current_tune
->disparage_flag_setting_t16_encodings
17475 != tune_params::DISPARAGE_FLAGS_NEITHER
)
17476 && optimize_bb_for_speed_p (bb
))
17479 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17480 df_simulate_initialize_backwards (bb
, &live
);
17481 FOR_BB_INSNS_REVERSE (bb
, insn
)
17483 if (NONJUMP_INSN_P (insn
)
17484 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17485 && GET_CODE (PATTERN (insn
)) == SET
)
17488 rtx pat
= PATTERN (insn
);
17489 rtx dst
= XEXP (pat
, 0);
17490 rtx src
= XEXP (pat
, 1);
17491 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17493 if (UNARY_P (src
) || BINARY_P (src
))
17494 op0
= XEXP (src
, 0);
17496 if (BINARY_P (src
))
17497 op1
= XEXP (src
, 1);
17499 if (low_register_operand (dst
, SImode
))
17501 switch (GET_CODE (src
))
17504 /* Adding two registers and storing the result
17505 in the first source is already a 16-bit
17507 if (rtx_equal_p (dst
, op0
)
17508 && register_operand (op1
, SImode
))
17511 if (low_register_operand (op0
, SImode
))
17513 /* ADDS <Rd>,<Rn>,<Rm> */
17514 if (low_register_operand (op1
, SImode
))
17516 /* ADDS <Rdn>,#<imm8> */
17517 /* SUBS <Rdn>,#<imm8> */
17518 else if (rtx_equal_p (dst
, op0
)
17519 && CONST_INT_P (op1
)
17520 && IN_RANGE (INTVAL (op1
), -255, 255))
17522 /* ADDS <Rd>,<Rn>,#<imm3> */
17523 /* SUBS <Rd>,<Rn>,#<imm3> */
17524 else if (CONST_INT_P (op1
)
17525 && IN_RANGE (INTVAL (op1
), -7, 7))
17528 /* ADCS <Rd>, <Rn> */
17529 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17530 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17531 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17533 && COMPARISON_P (op1
)
17534 && cc_register (XEXP (op1
, 0), VOIDmode
)
17535 && maybe_get_arm_condition_code (op1
) == ARM_CS
17536 && XEXP (op1
, 1) == const0_rtx
)
17541 /* RSBS <Rd>,<Rn>,#0
17542 Not handled here: see NEG below. */
17543 /* SUBS <Rd>,<Rn>,#<imm3>
17545 Not handled here: see PLUS above. */
17546 /* SUBS <Rd>,<Rn>,<Rm> */
17547 if (low_register_operand (op0
, SImode
)
17548 && low_register_operand (op1
, SImode
))
17553 /* MULS <Rdm>,<Rn>,<Rdm>
17554 As an exception to the rule, this is only used
17555 when optimizing for size since MULS is slow on all
17556 known implementations. We do not even want to use
17557 MULS in cold code, if optimizing for speed, so we
17558 test the global flag here. */
17559 if (!optimize_size
)
17561 /* Fall through. */
17565 /* ANDS <Rdn>,<Rm> */
17566 if (rtx_equal_p (dst
, op0
)
17567 && low_register_operand (op1
, SImode
))
17568 action
= action_for_partial_flag_setting
;
17569 else if (rtx_equal_p (dst
, op1
)
17570 && low_register_operand (op0
, SImode
))
17571 action
= action_for_partial_flag_setting
== SKIP
17572 ? SKIP
: SWAP_CONV
;
17578 /* ASRS <Rdn>,<Rm> */
17579 /* LSRS <Rdn>,<Rm> */
17580 /* LSLS <Rdn>,<Rm> */
17581 if (rtx_equal_p (dst
, op0
)
17582 && low_register_operand (op1
, SImode
))
17583 action
= action_for_partial_flag_setting
;
17584 /* ASRS <Rd>,<Rm>,#<imm5> */
17585 /* LSRS <Rd>,<Rm>,#<imm5> */
17586 /* LSLS <Rd>,<Rm>,#<imm5> */
17587 else if (low_register_operand (op0
, SImode
)
17588 && CONST_INT_P (op1
)
17589 && IN_RANGE (INTVAL (op1
), 0, 31))
17590 action
= action_for_partial_flag_setting
;
17594 /* RORS <Rdn>,<Rm> */
17595 if (rtx_equal_p (dst
, op0
)
17596 && low_register_operand (op1
, SImode
))
17597 action
= action_for_partial_flag_setting
;
17601 /* MVNS <Rd>,<Rm> */
17602 if (low_register_operand (op0
, SImode
))
17603 action
= action_for_partial_flag_setting
;
17607 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17608 if (low_register_operand (op0
, SImode
))
17613 /* MOVS <Rd>,#<imm8> */
17614 if (CONST_INT_P (src
)
17615 && IN_RANGE (INTVAL (src
), 0, 255))
17616 action
= action_for_partial_flag_setting
;
17620 /* MOVS and MOV<c> with registers have different
17621 encodings, so are not relevant here. */
17629 if (action
!= SKIP
)
17631 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17632 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17635 if (action
== SWAP_CONV
)
17637 src
= copy_rtx (src
);
17638 XEXP (src
, 0) = op1
;
17639 XEXP (src
, 1) = op0
;
17640 pat
= gen_rtx_SET (dst
, src
);
17641 vec
= gen_rtvec (2, pat
, clobber
);
17643 else /* action == CONV */
17644 vec
= gen_rtvec (2, pat
, clobber
);
17646 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17647 INSN_CODE (insn
) = -1;
17651 if (NONDEBUG_INSN_P (insn
))
17652 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17656 CLEAR_REG_SET (&live
);
17659 /* Gcc puts the pool in the wrong place for ARM, since we can only
17660 load addresses a limited distance around the pc. We do some
17661 special munging to move the constant pool values to the correct
17662 point in the code. */
17667 HOST_WIDE_INT address
= 0;
17671 cmse_nonsecure_call_clear_caller_saved ();
17674 else if (TARGET_THUMB2
)
17677 /* Ensure all insns that must be split have been split at this point.
17678 Otherwise, the pool placement code below may compute incorrect
17679 insn lengths. Note that when optimizing, all insns have already
17680 been split at this point. */
17682 split_all_insns_noflow ();
17684 /* Make sure we do not attempt to create a literal pool even though it should
17685 no longer be necessary to create any. */
17686 if (arm_disable_literal_pool
)
17689 minipool_fix_head
= minipool_fix_tail
= NULL
;
17691 /* The first insn must always be a note, or the code below won't
17692 scan it properly. */
17693 insn
= get_insns ();
17694 gcc_assert (NOTE_P (insn
));
17697 /* Scan all the insns and record the operands that will need fixing. */
17698 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17700 if (BARRIER_P (insn
))
17701 push_minipool_barrier (insn
, address
);
17702 else if (INSN_P (insn
))
17704 rtx_jump_table_data
*table
;
17706 note_invalid_constants (insn
, address
, true);
17707 address
+= get_attr_length (insn
);
17709 /* If the insn is a vector jump, add the size of the table
17710 and skip the table. */
17711 if (tablejump_p (insn
, NULL
, &table
))
17713 address
+= get_jump_table_size (table
);
17717 else if (LABEL_P (insn
))
17718 /* Add the worst-case padding due to alignment. We don't add
17719 the _current_ padding because the minipool insertions
17720 themselves might change it. */
17721 address
+= get_label_padding (insn
);
17724 fix
= minipool_fix_head
;
17726 /* Now scan the fixups and perform the required changes. */
17731 Mfix
* last_added_fix
;
17732 Mfix
* last_barrier
= NULL
;
17735 /* Skip any further barriers before the next fix. */
17736 while (fix
&& BARRIER_P (fix
->insn
))
17739 /* No more fixes. */
17743 last_added_fix
= NULL
;
17745 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17747 if (BARRIER_P (ftmp
->insn
))
17749 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17752 last_barrier
= ftmp
;
17754 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17757 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17760 /* If we found a barrier, drop back to that; any fixes that we
17761 could have reached but come after the barrier will now go in
17762 the next mini-pool. */
17763 if (last_barrier
!= NULL
)
17765 /* Reduce the refcount for those fixes that won't go into this
17767 for (fdel
= last_barrier
->next
;
17768 fdel
&& fdel
!= ftmp
;
17771 fdel
->minipool
->refcount
--;
17772 fdel
->minipool
= NULL
;
17775 ftmp
= last_barrier
;
17779 /* ftmp is first fix that we can't fit into this pool and
17780 there no natural barriers that we could use. Insert a
17781 new barrier in the code somewhere between the previous
17782 fix and this one, and arrange to jump around it. */
17783 HOST_WIDE_INT max_address
;
17785 /* The last item on the list of fixes must be a barrier, so
17786 we can never run off the end of the list of fixes without
17787 last_barrier being set. */
17790 max_address
= minipool_vector_head
->max_address
;
17791 /* Check that there isn't another fix that is in range that
17792 we couldn't fit into this pool because the pool was
17793 already too large: we need to put the pool before such an
17794 instruction. The pool itself may come just after the
17795 fix because create_fix_barrier also allows space for a
17796 jump instruction. */
17797 if (ftmp
->address
< max_address
)
17798 max_address
= ftmp
->address
+ 1;
17800 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17803 assign_minipool_offsets (last_barrier
);
17807 if (!BARRIER_P (ftmp
->insn
)
17808 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17815 /* Scan over the fixes we have identified for this pool, fixing them
17816 up and adding the constants to the pool itself. */
17817 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17818 this_fix
= this_fix
->next
)
17819 if (!BARRIER_P (this_fix
->insn
))
17822 = plus_constant (Pmode
,
17823 gen_rtx_LABEL_REF (VOIDmode
,
17824 minipool_vector_label
),
17825 this_fix
->minipool
->offset
);
17826 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17829 dump_minipool (last_barrier
->insn
);
17833 /* From now on we must synthesize any constants that we can't handle
17834 directly. This can happen if the RTL gets split during final
17835 instruction generation. */
17836 cfun
->machine
->after_arm_reorg
= 1;
17838 /* Free the minipool memory. */
17839 obstack_free (&minipool_obstack
, minipool_startobj
);
17842 /* Routines to output assembly language. */
17844 /* Return string representation of passed in real value. */
17845 static const char *
17846 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17848 if (!fp_consts_inited
)
17851 gcc_assert (real_equal (r
, &value_fp0
));
17855 /* OPERANDS[0] is the entire list of insns that constitute pop,
17856 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17857 is in the list, UPDATE is true iff the list contains explicit
17858 update of base register. */
17860 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17866 const char *conditional
;
17867 int num_saves
= XVECLEN (operands
[0], 0);
17868 unsigned int regno
;
17869 unsigned int regno_base
= REGNO (operands
[1]);
17870 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
17873 offset
+= update
? 1 : 0;
17874 offset
+= return_pc
? 1 : 0;
17876 /* Is the base register in the list? */
17877 for (i
= offset
; i
< num_saves
; i
++)
17879 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17880 /* If SP is in the list, then the base register must be SP. */
17881 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17882 /* If base register is in the list, there must be no explicit update. */
17883 if (regno
== regno_base
)
17884 gcc_assert (!update
);
17887 conditional
= reverse
? "%?%D0" : "%?%d0";
17888 /* Can't use POP if returning from an interrupt. */
17889 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
17890 sprintf (pattern
, "pop%s\t{", conditional
);
17893 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17894 It's just a convention, their semantics are identical. */
17895 if (regno_base
== SP_REGNUM
)
17896 sprintf (pattern
, "ldmfd%s\t", conditional
);
17898 sprintf (pattern
, "ldmia%s\t", conditional
);
17900 sprintf (pattern
, "ldm%s\t", conditional
);
17902 strcat (pattern
, reg_names
[regno_base
]);
17904 strcat (pattern
, "!, {");
17906 strcat (pattern
, ", {");
17909 /* Output the first destination register. */
17911 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17913 /* Output the rest of the destination registers. */
17914 for (i
= offset
+ 1; i
< num_saves
; i
++)
17916 strcat (pattern
, ", ");
17918 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17921 strcat (pattern
, "}");
17923 if (interrupt_p
&& return_pc
)
17924 strcat (pattern
, "^");
17926 output_asm_insn (pattern
, &cond
);
17930 /* Output the assembly for a store multiple. */
17933 vfp_output_vstmd (rtx
* operands
)
17939 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17940 ? XEXP (operands
[0], 0)
17941 : XEXP (XEXP (operands
[0], 0), 0);
17942 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17945 strcpy (pattern
, "vpush%?.64\t{%P1");
17947 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17949 p
= strlen (pattern
);
17951 gcc_assert (REG_P (operands
[1]));
17953 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17954 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17956 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17958 strcpy (&pattern
[p
], "}");
17960 output_asm_insn (pattern
, operands
);
17965 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17966 number of bytes pushed. */
17969 vfp_emit_fstmd (int base_reg
, int count
)
17976 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17977 register pairs are stored by a store multiple insn. We avoid this
17978 by pushing an extra pair. */
17979 if (count
== 2 && !arm_arch6
)
17981 if (base_reg
== LAST_VFP_REGNUM
- 3)
17986 /* FSTMD may not store more than 16 doubleword registers at once. Split
17987 larger stores into multiple parts (up to a maximum of two, in
17992 /* NOTE: base_reg is an internal register number, so each D register
17994 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17995 saved
+= vfp_emit_fstmd (base_reg
, 16);
17999 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
18000 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
18002 reg
= gen_rtx_REG (DFmode
, base_reg
);
18005 XVECEXP (par
, 0, 0)
18006 = gen_rtx_SET (gen_frame_mem
18008 gen_rtx_PRE_MODIFY (Pmode
,
18011 (Pmode
, stack_pointer_rtx
,
18014 gen_rtx_UNSPEC (BLKmode
,
18015 gen_rtvec (1, reg
),
18016 UNSPEC_PUSH_MULT
));
18018 tmp
= gen_rtx_SET (stack_pointer_rtx
,
18019 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
18020 RTX_FRAME_RELATED_P (tmp
) = 1;
18021 XVECEXP (dwarf
, 0, 0) = tmp
;
18023 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
18024 RTX_FRAME_RELATED_P (tmp
) = 1;
18025 XVECEXP (dwarf
, 0, 1) = tmp
;
18027 for (i
= 1; i
< count
; i
++)
18029 reg
= gen_rtx_REG (DFmode
, base_reg
);
18031 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
18033 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
18034 plus_constant (Pmode
,
18038 RTX_FRAME_RELATED_P (tmp
) = 1;
18039 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
18042 par
= emit_insn (par
);
18043 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
18044 RTX_FRAME_RELATED_P (par
) = 1;
18049 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18050 has the cmse_nonsecure_call attribute and returns false otherwise. */
18053 detect_cmse_nonsecure_call (tree addr
)
18058 tree fntype
= TREE_TYPE (addr
);
18059 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_call",
18060 TYPE_ATTRIBUTES (fntype
)))
18066 /* Emit a call instruction with pattern PAT. ADDR is the address of
18067 the call target. */
18070 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
18074 insn
= emit_call_insn (pat
);
18076 /* The PIC register is live on entry to VxWorks PIC PLT entries.
18077 If the call might use such an entry, add a use of the PIC register
18078 to the instruction's CALL_INSN_FUNCTION_USAGE. */
18079 if (TARGET_VXWORKS_RTP
18082 && GET_CODE (addr
) == SYMBOL_REF
18083 && (SYMBOL_REF_DECL (addr
)
18084 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
18085 : !SYMBOL_REF_LOCAL_P (addr
)))
18087 require_pic_register ();
18088 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
18091 if (TARGET_AAPCS_BASED
)
18093 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18094 linker. We need to add an IP clobber to allow setting
18095 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
18096 is not needed since it's a fixed register. */
18097 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
18098 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
18102 /* Output a 'call' insn. */
18104 output_call (rtx
*operands
)
18106 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
18108 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
18109 if (REGNO (operands
[0]) == LR_REGNUM
)
18111 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
18112 output_asm_insn ("mov%?\t%0, %|lr", operands
);
18115 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
18117 if (TARGET_INTERWORK
|| arm_arch4t
)
18118 output_asm_insn ("bx%?\t%0", operands
);
18120 output_asm_insn ("mov%?\t%|pc, %0", operands
);
18125 /* Output a move from arm registers to arm registers of a long double
18126 OPERANDS[0] is the destination.
18127 OPERANDS[1] is the source. */
18129 output_mov_long_double_arm_from_arm (rtx
*operands
)
18131 /* We have to be careful here because the two might overlap. */
18132 int dest_start
= REGNO (operands
[0]);
18133 int src_start
= REGNO (operands
[1]);
18137 if (dest_start
< src_start
)
18139 for (i
= 0; i
< 3; i
++)
18141 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
18142 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
18143 output_asm_insn ("mov%?\t%0, %1", ops
);
18148 for (i
= 2; i
>= 0; i
--)
18150 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
18151 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
18152 output_asm_insn ("mov%?\t%0, %1", ops
);
18160 arm_emit_movpair (rtx dest
, rtx src
)
18162 /* If the src is an immediate, simplify it. */
18163 if (CONST_INT_P (src
))
18165 HOST_WIDE_INT val
= INTVAL (src
);
18166 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
18167 if ((val
>> 16) & 0x0000ffff)
18169 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
18171 GEN_INT ((val
>> 16) & 0x0000ffff));
18172 rtx_insn
*insn
= get_last_insn ();
18173 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
18177 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
18178 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
18179 rtx_insn
*insn
= get_last_insn ();
18180 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
18183 /* Output a move between double words. It must be REG<-MEM
18186 output_move_double (rtx
*operands
, bool emit
, int *count
)
18188 enum rtx_code code0
= GET_CODE (operands
[0]);
18189 enum rtx_code code1
= GET_CODE (operands
[1]);
18194 /* The only case when this might happen is when
18195 you are looking at the length of a DImode instruction
18196 that has an invalid constant in it. */
18197 if (code0
== REG
&& code1
!= MEM
)
18199 gcc_assert (!emit
);
18206 unsigned int reg0
= REGNO (operands
[0]);
18208 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
18210 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
18212 switch (GET_CODE (XEXP (operands
[1], 0)))
18219 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
18220 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
18222 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18227 gcc_assert (TARGET_LDRD
);
18229 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
18236 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
18238 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
18246 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
18248 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
18253 gcc_assert (TARGET_LDRD
);
18255 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
18260 /* Autoicrement addressing modes should never have overlapping
18261 base and destination registers, and overlapping index registers
18262 are already prohibited, so this doesn't need to worry about
18264 otherops
[0] = operands
[0];
18265 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
18266 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
18268 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
18270 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
18272 /* Registers overlap so split out the increment. */
18275 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
18276 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
18283 /* Use a single insn if we can.
18284 FIXME: IWMMXT allows offsets larger than ldrd can
18285 handle, fix these up with a pair of ldr. */
18287 || !CONST_INT_P (otherops
[2])
18288 || (INTVAL (otherops
[2]) > -256
18289 && INTVAL (otherops
[2]) < 256))
18292 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
18298 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
18299 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18309 /* Use a single insn if we can.
18310 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18311 fix these up with a pair of ldr. */
18313 || !CONST_INT_P (otherops
[2])
18314 || (INTVAL (otherops
[2]) > -256
18315 && INTVAL (otherops
[2]) < 256))
18318 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
18324 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18325 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18335 /* We might be able to use ldrd %0, %1 here. However the range is
18336 different to ldr/adr, and it is broken on some ARMv7-M
18337 implementations. */
18338 /* Use the second register of the pair to avoid problematic
18340 otherops
[1] = operands
[1];
18342 output_asm_insn ("adr%?\t%0, %1", otherops
);
18343 operands
[1] = otherops
[0];
18347 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18349 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
18356 /* ??? This needs checking for thumb2. */
18358 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18359 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18361 otherops
[0] = operands
[0];
18362 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18363 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18365 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18367 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18369 switch ((int) INTVAL (otherops
[2]))
18373 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
18379 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
18385 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
18389 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18390 operands
[1] = otherops
[0];
18392 && (REG_P (otherops
[2])
18394 || (CONST_INT_P (otherops
[2])
18395 && INTVAL (otherops
[2]) > -256
18396 && INTVAL (otherops
[2]) < 256)))
18398 if (reg_overlap_mentioned_p (operands
[0],
18401 /* Swap base and index registers over to
18402 avoid a conflict. */
18403 std::swap (otherops
[1], otherops
[2]);
18405 /* If both registers conflict, it will usually
18406 have been fixed by a splitter. */
18407 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18408 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18412 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18413 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18420 otherops
[0] = operands
[0];
18422 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
18427 if (CONST_INT_P (otherops
[2]))
18431 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18432 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18434 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18440 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18446 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18453 return "ldrd%?\t%0, [%1]";
18455 return "ldmia%?\t%1, %M0";
18459 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18460 /* Take care of overlapping base/data reg. */
18461 if (reg_mentioned_p (operands
[0], operands
[1]))
18465 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18466 output_asm_insn ("ldr%?\t%0, %1", operands
);
18476 output_asm_insn ("ldr%?\t%0, %1", operands
);
18477 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18487 /* Constraints should ensure this. */
18488 gcc_assert (code0
== MEM
&& code1
== REG
);
18489 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18490 || (TARGET_ARM
&& TARGET_LDRD
));
18492 switch (GET_CODE (XEXP (operands
[0], 0)))
18498 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
18500 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18505 gcc_assert (TARGET_LDRD
);
18507 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
18514 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
18516 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
18524 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
18526 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
18531 gcc_assert (TARGET_LDRD
);
18533 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
18538 otherops
[0] = operands
[1];
18539 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18540 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18542 /* IWMMXT allows offsets larger than ldrd can handle,
18543 fix these up with a pair of ldr. */
18545 && CONST_INT_P (otherops
[2])
18546 && (INTVAL(otherops
[2]) <= -256
18547 || INTVAL(otherops
[2]) >= 256))
18549 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18553 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18554 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18563 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18564 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18570 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18573 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
18578 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
18583 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18584 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18586 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18590 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
18597 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
18604 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
18609 && (REG_P (otherops
[2])
18611 || (CONST_INT_P (otherops
[2])
18612 && INTVAL (otherops
[2]) > -256
18613 && INTVAL (otherops
[2]) < 256)))
18615 otherops
[0] = operands
[1];
18616 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18618 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
18624 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18625 otherops
[1] = operands
[1];
18628 output_asm_insn ("str%?\t%1, %0", operands
);
18629 output_asm_insn ("str%?\t%H1, %0", otherops
);
18639 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18640 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18643 output_move_quad (rtx
*operands
)
18645 if (REG_P (operands
[0]))
18647 /* Load, or reg->reg move. */
18649 if (MEM_P (operands
[1]))
18651 switch (GET_CODE (XEXP (operands
[1], 0)))
18654 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18659 output_asm_insn ("adr%?\t%0, %1", operands
);
18660 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
18664 gcc_unreachable ();
18672 gcc_assert (REG_P (operands
[1]));
18674 dest
= REGNO (operands
[0]);
18675 src
= REGNO (operands
[1]);
18677 /* This seems pretty dumb, but hopefully GCC won't try to do it
18680 for (i
= 0; i
< 4; i
++)
18682 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18683 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18684 output_asm_insn ("mov%?\t%0, %1", ops
);
18687 for (i
= 3; i
>= 0; i
--)
18689 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18690 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18691 output_asm_insn ("mov%?\t%0, %1", ops
);
18697 gcc_assert (MEM_P (operands
[0]));
18698 gcc_assert (REG_P (operands
[1]));
18699 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18701 switch (GET_CODE (XEXP (operands
[0], 0)))
18704 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18708 gcc_unreachable ();
18715 /* Output a VFP load or store instruction. */
18718 output_move_vfp (rtx
*operands
)
18720 rtx reg
, mem
, addr
, ops
[2];
18721 int load
= REG_P (operands
[0]);
18722 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18723 int sp
= (!TARGET_VFP_FP16INST
18724 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
18725 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18730 reg
= operands
[!load
];
18731 mem
= operands
[load
];
18733 mode
= GET_MODE (reg
);
18735 gcc_assert (REG_P (reg
));
18736 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18737 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
18743 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18744 gcc_assert (MEM_P (mem
));
18746 addr
= XEXP (mem
, 0);
18748 switch (GET_CODE (addr
))
18751 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18752 ops
[0] = XEXP (addr
, 0);
18757 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18758 ops
[0] = XEXP (addr
, 0);
18763 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18769 sprintf (buff
, templ
,
18770 load
? "ld" : "st",
18771 dp
? "64" : sp
? "32" : "16",
18773 integer_p
? "\t%@ int" : "");
18774 output_asm_insn (buff
, ops
);
18779 /* Output a Neon double-word or quad-word load or store, or a load
18780 or store for larger structure modes.
18782 WARNING: The ordering of elements is weird in big-endian mode,
18783 because the EABI requires that vectors stored in memory appear
18784 as though they were stored by a VSTM, as required by the EABI.
18785 GCC RTL defines element ordering based on in-memory order.
18786 This can be different from the architectural ordering of elements
18787 within a NEON register. The intrinsics defined in arm_neon.h use the
18788 NEON register element ordering, not the GCC RTL element ordering.
18790 For example, the in-memory ordering of a big-endian a quadword
18791 vector with 16-bit elements when stored from register pair {d0,d1}
18792 will be (lowest address first, d0[N] is NEON register element N):
18794 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18796 When necessary, quadword registers (dN, dN+1) are moved to ARM
18797 registers from rN in the order:
18799 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18801 So that STM/LDM can be used on vectors in ARM registers, and the
18802 same memory layout will result as if VSTM/VLDM were used.
18804 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18805 possible, which allows use of appropriate alignment tags.
18806 Note that the choice of "64" is independent of the actual vector
18807 element size; this size simply ensures that the behavior is
18808 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18810 Due to limitations of those instructions, use of VST1.64/VLD1.64
18811 is not possible if:
18812 - the address contains PRE_DEC, or
18813 - the mode refers to more than 4 double-word registers
18815 In those cases, it would be possible to replace VSTM/VLDM by a
18816 sequence of instructions; this is not currently implemented since
18817 this is not certain to actually improve performance. */
18820 output_move_neon (rtx
*operands
)
18822 rtx reg
, mem
, addr
, ops
[2];
18823 int regno
, nregs
, load
= REG_P (operands
[0]);
18828 reg
= operands
[!load
];
18829 mem
= operands
[load
];
18831 mode
= GET_MODE (reg
);
18833 gcc_assert (REG_P (reg
));
18834 regno
= REGNO (reg
);
18835 nregs
= REG_NREGS (reg
) / 2;
18836 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18837 || NEON_REGNO_OK_FOR_QUAD (regno
));
18838 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18839 || VALID_NEON_QREG_MODE (mode
)
18840 || VALID_NEON_STRUCT_MODE (mode
));
18841 gcc_assert (MEM_P (mem
));
18843 addr
= XEXP (mem
, 0);
18845 /* Strip off const from addresses like (const (plus (...))). */
18846 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18847 addr
= XEXP (addr
, 0);
18849 switch (GET_CODE (addr
))
18852 /* We have to use vldm / vstm for too-large modes. */
18855 templ
= "v%smia%%?\t%%0!, %%h1";
18856 ops
[0] = XEXP (addr
, 0);
18860 templ
= "v%s1.64\t%%h1, %%A0";
18867 /* We have to use vldm / vstm in this case, since there is no
18868 pre-decrement form of the vld1 / vst1 instructions. */
18869 templ
= "v%smdb%%?\t%%0!, %%h1";
18870 ops
[0] = XEXP (addr
, 0);
18875 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18876 gcc_unreachable ();
18879 /* We have to use vldm / vstm for too-large modes. */
18883 templ
= "v%smia%%?\t%%m0, %%h1";
18885 templ
= "v%s1.64\t%%h1, %%A0";
18891 /* Fall through. */
18897 for (i
= 0; i
< nregs
; i
++)
18899 /* We're only using DImode here because it's a convenient size. */
18900 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18901 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18902 if (reg_overlap_mentioned_p (ops
[0], mem
))
18904 gcc_assert (overlap
== -1);
18909 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18910 output_asm_insn (buff
, ops
);
18915 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18916 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18917 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18918 output_asm_insn (buff
, ops
);
18925 gcc_unreachable ();
18928 sprintf (buff
, templ
, load
? "ld" : "st");
18929 output_asm_insn (buff
, ops
);
18934 /* Compute and return the length of neon_mov<mode>, where <mode> is
18935 one of VSTRUCT modes: EI, OI, CI or XI. */
18937 arm_attr_length_move_neon (rtx_insn
*insn
)
18939 rtx reg
, mem
, addr
;
18943 extract_insn_cached (insn
);
18945 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18947 mode
= GET_MODE (recog_data
.operand
[0]);
18958 gcc_unreachable ();
18962 load
= REG_P (recog_data
.operand
[0]);
18963 reg
= recog_data
.operand
[!load
];
18964 mem
= recog_data
.operand
[load
];
18966 gcc_assert (MEM_P (mem
));
18968 addr
= XEXP (mem
, 0);
18970 /* Strip off const from addresses like (const (plus (...))). */
18971 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18972 addr
= XEXP (addr
, 0);
18974 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18976 int insns
= REG_NREGS (reg
) / 2;
18983 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18987 arm_address_offset_is_imm (rtx_insn
*insn
)
18991 extract_insn_cached (insn
);
18993 if (REG_P (recog_data
.operand
[0]))
18996 mem
= recog_data
.operand
[0];
18998 gcc_assert (MEM_P (mem
));
19000 addr
= XEXP (mem
, 0);
19003 || (GET_CODE (addr
) == PLUS
19004 && REG_P (XEXP (addr
, 0))
19005 && CONST_INT_P (XEXP (addr
, 1))))
19011 /* Output an ADD r, s, #n where n may be too big for one instruction.
19012 If adding zero to one register, output nothing. */
19014 output_add_immediate (rtx
*operands
)
19016 HOST_WIDE_INT n
= INTVAL (operands
[2]);
19018 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
19021 output_multi_immediate (operands
,
19022 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19025 output_multi_immediate (operands
,
19026 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19033 /* Output a multiple immediate operation.
19034 OPERANDS is the vector of operands referred to in the output patterns.
19035 INSTR1 is the output pattern to use for the first constant.
19036 INSTR2 is the output pattern to use for subsequent constants.
19037 IMMED_OP is the index of the constant slot in OPERANDS.
19038 N is the constant value. */
19039 static const char *
19040 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
19041 int immed_op
, HOST_WIDE_INT n
)
19043 #if HOST_BITS_PER_WIDE_INT > 32
19049 /* Quick and easy output. */
19050 operands
[immed_op
] = const0_rtx
;
19051 output_asm_insn (instr1
, operands
);
19056 const char * instr
= instr1
;
19058 /* Note that n is never zero here (which would give no output). */
19059 for (i
= 0; i
< 32; i
+= 2)
19063 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
19064 output_asm_insn (instr
, operands
);
19074 /* Return the name of a shifter operation. */
19075 static const char *
19076 arm_shift_nmem(enum rtx_code code
)
19081 return ARM_LSL_NAME
;
19097 /* Return the appropriate ARM instruction for the operation code.
19098 The returned result should not be overwritten. OP is the rtx of the
19099 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19102 arithmetic_instr (rtx op
, int shift_first_arg
)
19104 switch (GET_CODE (op
))
19110 return shift_first_arg
? "rsb" : "sub";
19125 return arm_shift_nmem(GET_CODE(op
));
19128 gcc_unreachable ();
19132 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19133 for the operation code. The returned result should not be overwritten.
19134 OP is the rtx code of the shift.
19135 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19137 static const char *
19138 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
19141 enum rtx_code code
= GET_CODE (op
);
19146 if (!CONST_INT_P (XEXP (op
, 1)))
19148 output_operand_lossage ("invalid shift operand");
19153 *amountp
= 32 - INTVAL (XEXP (op
, 1));
19161 mnem
= arm_shift_nmem(code
);
19162 if (CONST_INT_P (XEXP (op
, 1)))
19164 *amountp
= INTVAL (XEXP (op
, 1));
19166 else if (REG_P (XEXP (op
, 1)))
19173 output_operand_lossage ("invalid shift operand");
19179 /* We never have to worry about the amount being other than a
19180 power of 2, since this case can never be reloaded from a reg. */
19181 if (!CONST_INT_P (XEXP (op
, 1)))
19183 output_operand_lossage ("invalid shift operand");
19187 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
19189 /* Amount must be a power of two. */
19190 if (*amountp
& (*amountp
- 1))
19192 output_operand_lossage ("invalid shift operand");
19196 *amountp
= exact_log2 (*amountp
);
19197 gcc_assert (IN_RANGE (*amountp
, 0, 31));
19198 return ARM_LSL_NAME
;
19201 output_operand_lossage ("invalid shift operand");
19205 /* This is not 100% correct, but follows from the desire to merge
19206 multiplication by a power of 2 with the recognizer for a
19207 shift. >=32 is not a valid shift for "lsl", so we must try and
19208 output a shift that produces the correct arithmetical result.
19209 Using lsr #32 is identical except for the fact that the carry bit
19210 is not set correctly if we set the flags; but we never use the
19211 carry bit from such an operation, so we can ignore that. */
19212 if (code
== ROTATERT
)
19213 /* Rotate is just modulo 32. */
19215 else if (*amountp
!= (*amountp
& 31))
19217 if (code
== ASHIFT
)
19222 /* Shifts of 0 are no-ops. */
19229 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19230 because /bin/as is horribly restrictive. The judgement about
19231 whether or not each character is 'printable' (and can be output as
19232 is) or not (and must be printed with an octal escape) must be made
19233 with reference to the *host* character set -- the situation is
19234 similar to that discussed in the comments above pp_c_char in
19235 c-pretty-print.c. */
19237 #define MAX_ASCII_LEN 51
19240 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
19243 int len_so_far
= 0;
19245 fputs ("\t.ascii\t\"", stream
);
19247 for (i
= 0; i
< len
; i
++)
19251 if (len_so_far
>= MAX_ASCII_LEN
)
19253 fputs ("\"\n\t.ascii\t\"", stream
);
19259 if (c
== '\\' || c
== '\"')
19261 putc ('\\', stream
);
19269 fprintf (stream
, "\\%03o", c
);
19274 fputs ("\"\n", stream
);
19277 /* Whether a register is callee saved or not. This is necessary because high
19278 registers are marked as caller saved when optimizing for size on Thumb-1
19279 targets despite being callee saved in order to avoid using them. */
19280 #define callee_saved_reg_p(reg) \
19281 (!call_used_regs[reg] \
19282 || (TARGET_THUMB1 && optimize_size \
19283 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19285 /* Compute the register save mask for registers 0 through 12
19286 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19288 static unsigned long
19289 arm_compute_save_reg0_reg12_mask (void)
19291 unsigned long func_type
= arm_current_func_type ();
19292 unsigned long save_reg_mask
= 0;
19295 if (IS_INTERRUPT (func_type
))
19297 unsigned int max_reg
;
19298 /* Interrupt functions must not corrupt any registers,
19299 even call clobbered ones. If this is a leaf function
19300 we can just examine the registers used by the RTL, but
19301 otherwise we have to assume that whatever function is
19302 called might clobber anything, and so we have to save
19303 all the call-clobbered registers as well. */
19304 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
19305 /* FIQ handlers have registers r8 - r12 banked, so
19306 we only need to check r0 - r7, Normal ISRs only
19307 bank r14 and r15, so we must check up to r12.
19308 r13 is the stack pointer which is always preserved,
19309 so we do not need to consider it here. */
19314 for (reg
= 0; reg
<= max_reg
; reg
++)
19315 if (df_regs_ever_live_p (reg
)
19316 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
19317 save_reg_mask
|= (1 << reg
);
19319 /* Also save the pic base register if necessary. */
19321 && !TARGET_SINGLE_PIC_BASE
19322 && arm_pic_register
!= INVALID_REGNUM
19323 && crtl
->uses_pic_offset_table
)
19324 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19326 else if (IS_VOLATILE(func_type
))
19328 /* For noreturn functions we historically omitted register saves
19329 altogether. However this really messes up debugging. As a
19330 compromise save just the frame pointers. Combined with the link
19331 register saved elsewhere this should be sufficient to get
19333 if (frame_pointer_needed
)
19334 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19335 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19336 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19337 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19338 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19342 /* In the normal case we only need to save those registers
19343 which are call saved and which are used by this function. */
19344 for (reg
= 0; reg
<= 11; reg
++)
19345 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19346 save_reg_mask
|= (1 << reg
);
19348 /* Handle the frame pointer as a special case. */
19349 if (frame_pointer_needed
)
19350 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19352 /* If we aren't loading the PIC register,
19353 don't stack it even though it may be live. */
19355 && !TARGET_SINGLE_PIC_BASE
19356 && arm_pic_register
!= INVALID_REGNUM
19357 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19358 || crtl
->uses_pic_offset_table
))
19359 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19361 /* The prologue will copy SP into R0, so save it. */
19362 if (IS_STACKALIGN (func_type
))
19363 save_reg_mask
|= 1;
19366 /* Save registers so the exception handler can modify them. */
19367 if (crtl
->calls_eh_return
)
19373 reg
= EH_RETURN_DATA_REGNO (i
);
19374 if (reg
== INVALID_REGNUM
)
19376 save_reg_mask
|= 1 << reg
;
19380 return save_reg_mask
;
19383 /* Return true if r3 is live at the start of the function. */
19386 arm_r3_live_at_start_p (void)
19388 /* Just look at cfg info, which is still close enough to correct at this
19389 point. This gives false positives for broken functions that might use
19390 uninitialized data that happens to be allocated in r3, but who cares? */
19391 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19394 /* Compute the number of bytes used to store the static chain register on the
19395 stack, above the stack frame. We need to know this accurately to get the
19396 alignment of the rest of the stack frame correct. */
19399 arm_compute_static_chain_stack_bytes (void)
19401 /* See the defining assertion in arm_expand_prologue. */
19402 if (IS_NESTED (arm_current_func_type ())
19403 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19404 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
19405 || flag_stack_clash_protection
)
19406 && !df_regs_ever_live_p (LR_REGNUM
)))
19407 && arm_r3_live_at_start_p ()
19408 && crtl
->args
.pretend_args_size
== 0)
19414 /* Compute a bit mask of which core registers need to be
19415 saved on the stack for the current function.
19416 This is used by arm_compute_frame_layout, which may add extra registers. */
19418 static unsigned long
19419 arm_compute_save_core_reg_mask (void)
19421 unsigned int save_reg_mask
= 0;
19422 unsigned long func_type
= arm_current_func_type ();
19425 if (IS_NAKED (func_type
))
19426 /* This should never really happen. */
19429 /* If we are creating a stack frame, then we must save the frame pointer,
19430 IP (which will hold the old stack pointer), LR and the PC. */
19431 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19433 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19436 | (1 << PC_REGNUM
);
19438 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19440 /* Decide if we need to save the link register.
19441 Interrupt routines have their own banked link register,
19442 so they never need to save it.
19443 Otherwise if we do not use the link register we do not need to save
19444 it. If we are pushing other registers onto the stack however, we
19445 can save an instruction in the epilogue by pushing the link register
19446 now and then popping it back into the PC. This incurs extra memory
19447 accesses though, so we only do it when optimizing for size, and only
19448 if we know that we will not need a fancy return sequence. */
19449 if (df_regs_ever_live_p (LR_REGNUM
)
19452 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19453 && !crtl
->tail_call_emit
19454 && !crtl
->calls_eh_return
))
19455 save_reg_mask
|= 1 << LR_REGNUM
;
19457 if (cfun
->machine
->lr_save_eliminated
)
19458 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19460 if (TARGET_REALLY_IWMMXT
19461 && ((bit_count (save_reg_mask
)
19462 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19463 arm_compute_static_chain_stack_bytes())
19466 /* The total number of registers that are going to be pushed
19467 onto the stack is odd. We need to ensure that the stack
19468 is 64-bit aligned before we start to save iWMMXt registers,
19469 and also before we start to create locals. (A local variable
19470 might be a double or long long which we will load/store using
19471 an iWMMXt instruction). Therefore we need to push another
19472 ARM register, so that the stack will be 64-bit aligned. We
19473 try to avoid using the arg registers (r0 -r3) as they might be
19474 used to pass values in a tail call. */
19475 for (reg
= 4; reg
<= 12; reg
++)
19476 if ((save_reg_mask
& (1 << reg
)) == 0)
19480 save_reg_mask
|= (1 << reg
);
19483 cfun
->machine
->sibcall_blocked
= 1;
19484 save_reg_mask
|= (1 << 3);
19488 /* We may need to push an additional register for use initializing the
19489 PIC base register. */
19490 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19491 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19493 reg
= thumb_find_work_register (1 << 4);
19494 if (!call_used_regs
[reg
])
19495 save_reg_mask
|= (1 << reg
);
19498 return save_reg_mask
;
19501 /* Compute a bit mask of which core registers need to be
19502 saved on the stack for the current function. */
19503 static unsigned long
19504 thumb1_compute_save_core_reg_mask (void)
19506 unsigned long mask
;
19510 for (reg
= 0; reg
< 12; reg
++)
19511 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19514 /* Handle the frame pointer as a special case. */
19515 if (frame_pointer_needed
)
19516 mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19519 && !TARGET_SINGLE_PIC_BASE
19520 && arm_pic_register
!= INVALID_REGNUM
19521 && crtl
->uses_pic_offset_table
)
19522 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19524 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19525 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19526 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19528 /* LR will also be pushed if any lo regs are pushed. */
19529 if (mask
& 0xff || thumb_force_lr_save ())
19530 mask
|= (1 << LR_REGNUM
);
19532 /* Make sure we have a low work register if we need one.
19533 We will need one if we are going to push a high register,
19534 but we are not currently intending to push a low register. */
19535 if ((mask
& 0xff) == 0
19536 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19538 /* Use thumb_find_work_register to choose which register
19539 we will use. If the register is live then we will
19540 have to push it. Use LAST_LO_REGNUM as our fallback
19541 choice for the register to select. */
19542 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19543 /* Make sure the register returned by thumb_find_work_register is
19544 not part of the return value. */
19545 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19546 reg
= LAST_LO_REGNUM
;
19548 if (callee_saved_reg_p (reg
))
19552 /* The 504 below is 8 bytes less than 512 because there are two possible
19553 alignment words. We can't tell here if they will be present or not so we
19554 have to play it safe and assume that they are. */
19555 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19556 ROUND_UP_WORD (get_frame_size ()) +
19557 crtl
->outgoing_args_size
) >= 504)
19559 /* This is the same as the code in thumb1_expand_prologue() which
19560 determines which register to use for stack decrement. */
19561 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19562 if (mask
& (1 << reg
))
19565 if (reg
> LAST_LO_REGNUM
)
19567 /* Make sure we have a register available for stack decrement. */
19568 mask
|= 1 << LAST_LO_REGNUM
;
19576 /* Return the number of bytes required to save VFP registers. */
19578 arm_get_vfp_saved_size (void)
19580 unsigned int regno
;
19585 /* Space for saved VFP registers. */
19586 if (TARGET_HARD_FLOAT
)
19589 for (regno
= FIRST_VFP_REGNUM
;
19590 regno
< LAST_VFP_REGNUM
;
19593 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19594 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19598 /* Workaround ARM10 VFPr1 bug. */
19599 if (count
== 2 && !arm_arch6
)
19601 saved
+= count
* 8;
19610 if (count
== 2 && !arm_arch6
)
19612 saved
+= count
* 8;
19619 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19620 everything bar the final return instruction. If simple_return is true,
19621 then do not output epilogue, because it has already been emitted in RTL.
19623 Note: do not forget to update length attribute of corresponding insn pattern
19624 when changing assembly output (eg. length attribute of
19625 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
19626 register clearing sequences). */
19628 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19629 bool simple_return
)
19631 char conditional
[10];
19634 unsigned long live_regs_mask
;
19635 unsigned long func_type
;
19636 arm_stack_offsets
*offsets
;
19638 func_type
= arm_current_func_type ();
19640 if (IS_NAKED (func_type
))
19643 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19645 /* If this function was declared non-returning, and we have
19646 found a tail call, then we have to trust that the called
19647 function won't return. */
19652 /* Otherwise, trap an attempted return by aborting. */
19654 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19656 assemble_external_libcall (ops
[1]);
19657 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19663 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19665 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19667 cfun
->machine
->return_used_this_function
= 1;
19669 offsets
= arm_get_frame_offsets ();
19670 live_regs_mask
= offsets
->saved_regs_mask
;
19672 if (!simple_return
&& live_regs_mask
)
19674 const char * return_reg
;
19676 /* If we do not have any special requirements for function exit
19677 (e.g. interworking) then we can load the return address
19678 directly into the PC. Otherwise we must load it into LR. */
19680 && !IS_CMSE_ENTRY (func_type
)
19681 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19682 return_reg
= reg_names
[PC_REGNUM
];
19684 return_reg
= reg_names
[LR_REGNUM
];
19686 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19688 /* There are three possible reasons for the IP register
19689 being saved. 1) a stack frame was created, in which case
19690 IP contains the old stack pointer, or 2) an ISR routine
19691 corrupted it, or 3) it was saved to align the stack on
19692 iWMMXt. In case 1, restore IP into SP, otherwise just
19694 if (frame_pointer_needed
)
19696 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19697 live_regs_mask
|= (1 << SP_REGNUM
);
19700 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19703 /* On some ARM architectures it is faster to use LDR rather than
19704 LDM to load a single register. On other architectures, the
19705 cost is the same. In 26 bit mode, or for exception handlers,
19706 we have to use LDM to load the PC so that the CPSR is also
19708 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19709 if (live_regs_mask
== (1U << reg
))
19712 if (reg
<= LAST_ARM_REGNUM
19713 && (reg
!= LR_REGNUM
19715 || ! IS_INTERRUPT (func_type
)))
19717 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19718 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19725 /* Generate the load multiple instruction to restore the
19726 registers. Note we can get here, even if
19727 frame_pointer_needed is true, but only if sp already
19728 points to the base of the saved core registers. */
19729 if (live_regs_mask
& (1 << SP_REGNUM
))
19731 unsigned HOST_WIDE_INT stack_adjust
;
19733 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19734 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19736 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19737 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19740 /* If we can't use ldmib (SA110 bug),
19741 then try to pop r3 instead. */
19743 live_regs_mask
|= 1 << 3;
19745 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19748 /* For interrupt returns we have to use an LDM rather than
19749 a POP so that we can use the exception return variant. */
19750 else if (IS_INTERRUPT (func_type
))
19751 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
19753 sprintf (instr
, "pop%s\t{", conditional
);
19755 p
= instr
+ strlen (instr
);
19757 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19758 if (live_regs_mask
& (1 << reg
))
19760 int l
= strlen (reg_names
[reg
]);
19766 memcpy (p
, ", ", 2);
19770 memcpy (p
, "%|", 2);
19771 memcpy (p
+ 2, reg_names
[reg
], l
);
19775 if (live_regs_mask
& (1 << LR_REGNUM
))
19777 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19778 /* If returning from an interrupt, restore the CPSR. */
19779 if (IS_INTERRUPT (func_type
))
19786 output_asm_insn (instr
, & operand
);
19788 /* See if we need to generate an extra instruction to
19789 perform the actual function return. */
19791 && func_type
!= ARM_FT_INTERWORKED
19792 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19794 /* The return has already been handled
19795 by loading the LR into the PC. */
19802 switch ((int) ARM_FUNC_TYPE (func_type
))
19806 /* ??? This is wrong for unified assembly syntax. */
19807 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19810 case ARM_FT_INTERWORKED
:
19811 gcc_assert (arm_arch5
|| arm_arch4t
);
19812 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19815 case ARM_FT_EXCEPTION
:
19816 /* ??? This is wrong for unified assembly syntax. */
19817 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19821 if (IS_CMSE_ENTRY (func_type
))
19823 /* Check if we have to clear the 'GE bits' which is only used if
19824 parallel add and subtraction instructions are available. */
19825 if (TARGET_INT_SIMD
)
19826 snprintf (instr
, sizeof (instr
),
19827 "msr%s\tAPSR_nzcvqg, %%|lr", conditional
);
19829 snprintf (instr
, sizeof (instr
),
19830 "msr%s\tAPSR_nzcvq, %%|lr", conditional
);
19832 output_asm_insn (instr
, & operand
);
19833 if (TARGET_HARD_FLOAT
&& !TARGET_THUMB1
)
19835 /* Clear the cumulative exception-status bits (0-4,7) and the
19836 condition code bits (28-31) of the FPSCR. We need to
19837 remember to clear the first scratch register used (IP) and
19838 save and restore the second (r4). */
19839 snprintf (instr
, sizeof (instr
), "push\t{%%|r4}");
19840 output_asm_insn (instr
, & operand
);
19841 snprintf (instr
, sizeof (instr
), "vmrs\t%%|ip, fpscr");
19842 output_asm_insn (instr
, & operand
);
19843 snprintf (instr
, sizeof (instr
), "movw\t%%|r4, #65376");
19844 output_asm_insn (instr
, & operand
);
19845 snprintf (instr
, sizeof (instr
), "movt\t%%|r4, #4095");
19846 output_asm_insn (instr
, & operand
);
19847 snprintf (instr
, sizeof (instr
), "and\t%%|ip, %%|r4");
19848 output_asm_insn (instr
, & operand
);
19849 snprintf (instr
, sizeof (instr
), "vmsr\tfpscr, %%|ip");
19850 output_asm_insn (instr
, & operand
);
19851 snprintf (instr
, sizeof (instr
), "pop\t{%%|r4}");
19852 output_asm_insn (instr
, & operand
);
19853 snprintf (instr
, sizeof (instr
), "mov\t%%|ip, %%|lr");
19854 output_asm_insn (instr
, & operand
);
19856 snprintf (instr
, sizeof (instr
), "bxns\t%%|lr");
19858 /* Use bx if it's available. */
19859 else if (arm_arch5
|| arm_arch4t
)
19860 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19862 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19866 output_asm_insn (instr
, & operand
);
19872 /* Output in FILE asm statements needed to declare the NAME of the function
19873 defined by its DECL node. */
19876 arm_asm_declare_function_name (FILE *file
, const char *name
, tree decl
)
19878 size_t cmse_name_len
;
19879 char *cmse_name
= 0;
19880 char cmse_prefix
[] = "__acle_se_";
19882 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19883 extra function label for each function with the 'cmse_nonsecure_entry'
19884 attribute. This extra function label should be prepended with
19885 '__acle_se_', telling the linker that it needs to create secure gateway
19886 veneers for this function. */
19887 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_entry",
19888 DECL_ATTRIBUTES (decl
)))
19890 cmse_name_len
= sizeof (cmse_prefix
) + strlen (name
);
19891 cmse_name
= XALLOCAVEC (char, cmse_name_len
);
19892 snprintf (cmse_name
, cmse_name_len
, "%s%s", cmse_prefix
, name
);
19893 targetm
.asm_out
.globalize_label (file
, cmse_name
);
19895 ARM_DECLARE_FUNCTION_NAME (file
, cmse_name
, decl
);
19896 ASM_OUTPUT_TYPE_DIRECTIVE (file
, cmse_name
, "function");
19899 ARM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
19900 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
19901 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
19902 ASM_OUTPUT_LABEL (file
, name
);
19905 ASM_OUTPUT_LABEL (file
, cmse_name
);
19907 ARM_OUTPUT_FN_UNWIND (file
, TRUE
);
19910 /* Write the function name into the code section, directly preceding
19911 the function prologue.
19913 Code will be output similar to this:
19915 .ascii "arm_poke_function_name", 0
19918 .word 0xff000000 + (t1 - t0)
19919 arm_poke_function_name
19921 stmfd sp!, {fp, ip, lr, pc}
19924 When performing a stack backtrace, code can inspect the value
19925 of 'pc' stored at 'fp' + 0. If the trace function then looks
19926 at location pc - 12 and the top 8 bits are set, then we know
19927 that there is a function name embedded immediately preceding this
19928 location and has length ((pc[-3]) & 0xff000000).
19930 We assume that pc is declared as a pointer to an unsigned long.
19932 It is of no benefit to output the function name if we are assembling
19933 a leaf function. These function types will not contain a stack
19934 backtrace structure, therefore it is not possible to determine the
19937 arm_poke_function_name (FILE *stream
, const char *name
)
19939 unsigned long alignlength
;
19940 unsigned long length
;
19943 length
= strlen (name
) + 1;
19944 alignlength
= ROUND_UP_WORD (length
);
19946 ASM_OUTPUT_ASCII (stream
, name
, length
);
19947 ASM_OUTPUT_ALIGN (stream
, 2);
19948 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19949 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19952 /* Place some comments into the assembler stream
19953 describing the current function. */
19955 arm_output_function_prologue (FILE *f
)
19957 unsigned long func_type
;
19959 /* Sanity check. */
19960 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19962 func_type
= arm_current_func_type ();
19964 switch ((int) ARM_FUNC_TYPE (func_type
))
19967 case ARM_FT_NORMAL
:
19969 case ARM_FT_INTERWORKED
:
19970 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19973 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19976 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19978 case ARM_FT_EXCEPTION
:
19979 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19983 if (IS_NAKED (func_type
))
19984 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19986 if (IS_VOLATILE (func_type
))
19987 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19989 if (IS_NESTED (func_type
))
19990 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19991 if (IS_STACKALIGN (func_type
))
19992 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19993 if (IS_CMSE_ENTRY (func_type
))
19994 asm_fprintf (f
, "\t%@ Non-secure entry function: called from non-secure code.\n");
19996 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19998 crtl
->args
.pretend_args_size
,
19999 (HOST_WIDE_INT
) get_frame_size ());
20001 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
20002 frame_pointer_needed
,
20003 cfun
->machine
->uses_anonymous_args
);
20005 if (cfun
->machine
->lr_save_eliminated
)
20006 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
20008 if (crtl
->calls_eh_return
)
20009 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
20014 arm_output_function_epilogue (FILE *)
20016 arm_stack_offsets
*offsets
;
20022 /* Emit any call-via-reg trampolines that are needed for v4t support
20023 of call_reg and call_value_reg type insns. */
20024 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
20026 rtx label
= cfun
->machine
->call_via
[regno
];
20030 switch_to_section (function_section (current_function_decl
));
20031 targetm
.asm_out
.internal_label (asm_out_file
, "L",
20032 CODE_LABEL_NUMBER (label
));
20033 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
20037 /* ??? Probably not safe to set this here, since it assumes that a
20038 function will be emitted as assembly immediately after we generate
20039 RTL for it. This does not happen for inline functions. */
20040 cfun
->machine
->return_used_this_function
= 0;
20042 else /* TARGET_32BIT */
20044 /* We need to take into account any stack-frame rounding. */
20045 offsets
= arm_get_frame_offsets ();
20047 gcc_assert (!use_return_insn (FALSE
, NULL
)
20048 || (cfun
->machine
->return_used_this_function
!= 0)
20049 || offsets
->saved_regs
== offsets
->outgoing_args
20050 || frame_pointer_needed
);
20054 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20055 STR and STRD. If an even number of registers are being pushed, one
20056 or more STRD patterns are created for each register pair. If an
20057 odd number of registers are pushed, emit an initial STR followed by
20058 as many STRD instructions as are needed. This works best when the
20059 stack is initially 64-bit aligned (the normal case), since it
20060 ensures that each STRD is also 64-bit aligned. */
20062 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
20067 rtx par
= NULL_RTX
;
20068 rtx dwarf
= NULL_RTX
;
20072 num_regs
= bit_count (saved_regs_mask
);
20074 /* Must be at least one register to save, and can't save SP or PC. */
20075 gcc_assert (num_regs
> 0 && num_regs
<= 14);
20076 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20077 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
20079 /* Create sequence for DWARF info. All the frame-related data for
20080 debugging is held in this wrapper. */
20081 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20083 /* Describe the stack adjustment. */
20084 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20085 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20086 RTX_FRAME_RELATED_P (tmp
) = 1;
20087 XVECEXP (dwarf
, 0, 0) = tmp
;
20089 /* Find the first register. */
20090 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
20095 /* If there's an odd number of registers to push. Start off by
20096 pushing a single register. This ensures that subsequent strd
20097 operations are dword aligned (assuming that SP was originally
20098 64-bit aligned). */
20099 if ((num_regs
& 1) != 0)
20101 rtx reg
, mem
, insn
;
20103 reg
= gen_rtx_REG (SImode
, regno
);
20105 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
20106 stack_pointer_rtx
));
20108 mem
= gen_frame_mem (Pmode
,
20110 (Pmode
, stack_pointer_rtx
,
20111 plus_constant (Pmode
, stack_pointer_rtx
,
20114 tmp
= gen_rtx_SET (mem
, reg
);
20115 RTX_FRAME_RELATED_P (tmp
) = 1;
20116 insn
= emit_insn (tmp
);
20117 RTX_FRAME_RELATED_P (insn
) = 1;
20118 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20119 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
20120 RTX_FRAME_RELATED_P (tmp
) = 1;
20123 XVECEXP (dwarf
, 0, i
) = tmp
;
20127 while (i
< num_regs
)
20128 if (saved_regs_mask
& (1 << regno
))
20130 rtx reg1
, reg2
, mem1
, mem2
;
20131 rtx tmp0
, tmp1
, tmp2
;
20134 /* Find the register to pair with this one. */
20135 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
20139 reg1
= gen_rtx_REG (SImode
, regno
);
20140 reg2
= gen_rtx_REG (SImode
, regno2
);
20147 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20150 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20152 -4 * (num_regs
- 1)));
20153 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
20154 plus_constant (Pmode
, stack_pointer_rtx
,
20156 tmp1
= gen_rtx_SET (mem1
, reg1
);
20157 tmp2
= gen_rtx_SET (mem2
, reg2
);
20158 RTX_FRAME_RELATED_P (tmp0
) = 1;
20159 RTX_FRAME_RELATED_P (tmp1
) = 1;
20160 RTX_FRAME_RELATED_P (tmp2
) = 1;
20161 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
20162 XVECEXP (par
, 0, 0) = tmp0
;
20163 XVECEXP (par
, 0, 1) = tmp1
;
20164 XVECEXP (par
, 0, 2) = tmp2
;
20165 insn
= emit_insn (par
);
20166 RTX_FRAME_RELATED_P (insn
) = 1;
20167 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20171 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20174 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20177 tmp1
= gen_rtx_SET (mem1
, reg1
);
20178 tmp2
= gen_rtx_SET (mem2
, reg2
);
20179 RTX_FRAME_RELATED_P (tmp1
) = 1;
20180 RTX_FRAME_RELATED_P (tmp2
) = 1;
20181 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20182 XVECEXP (par
, 0, 0) = tmp1
;
20183 XVECEXP (par
, 0, 1) = tmp2
;
20187 /* Create unwind information. This is an approximation. */
20188 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
20189 plus_constant (Pmode
,
20193 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
20194 plus_constant (Pmode
,
20199 RTX_FRAME_RELATED_P (tmp1
) = 1;
20200 RTX_FRAME_RELATED_P (tmp2
) = 1;
20201 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
20202 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
20204 regno
= regno2
+ 1;
20212 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20213 whenever possible, otherwise it emits single-word stores. The first store
20214 also allocates stack space for all saved registers, using writeback with
20215 post-addressing mode. All other stores use offset addressing. If no STRD
20216 can be emitted, this function emits a sequence of single-word stores,
20217 and not an STM as before, because single-word stores provide more freedom
20218 scheduling and can be turned into an STM by peephole optimizations. */
20220 arm_emit_strd_push (unsigned long saved_regs_mask
)
20223 int i
, j
, dwarf_index
= 0;
20225 rtx dwarf
= NULL_RTX
;
20226 rtx insn
= NULL_RTX
;
20229 /* TODO: A more efficient code can be emitted by changing the
20230 layout, e.g., first push all pairs that can use STRD to keep the
20231 stack aligned, and then push all other registers. */
20232 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20233 if (saved_regs_mask
& (1 << i
))
20236 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20237 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
20238 gcc_assert (num_regs
> 0);
20240 /* Create sequence for DWARF info. */
20241 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20243 /* For dwarf info, we generate explicit stack update. */
20244 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20245 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20246 RTX_FRAME_RELATED_P (tmp
) = 1;
20247 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20249 /* Save registers. */
20250 offset
= - 4 * num_regs
;
20252 while (j
<= LAST_ARM_REGNUM
)
20253 if (saved_regs_mask
& (1 << j
))
20256 && (saved_regs_mask
& (1 << (j
+ 1))))
20258 /* Current register and previous register form register pair for
20259 which STRD can be generated. */
20262 /* Allocate stack space for all saved registers. */
20263 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20264 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20265 mem
= gen_frame_mem (DImode
, tmp
);
20268 else if (offset
> 0)
20269 mem
= gen_frame_mem (DImode
,
20270 plus_constant (Pmode
,
20274 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20276 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
20277 RTX_FRAME_RELATED_P (tmp
) = 1;
20278 tmp
= emit_insn (tmp
);
20280 /* Record the first store insn. */
20281 if (dwarf_index
== 1)
20284 /* Generate dwarf info. */
20285 mem
= gen_frame_mem (SImode
,
20286 plus_constant (Pmode
,
20289 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20290 RTX_FRAME_RELATED_P (tmp
) = 1;
20291 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20293 mem
= gen_frame_mem (SImode
,
20294 plus_constant (Pmode
,
20297 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
20298 RTX_FRAME_RELATED_P (tmp
) = 1;
20299 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20306 /* Emit a single word store. */
20309 /* Allocate stack space for all saved registers. */
20310 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20311 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20312 mem
= gen_frame_mem (SImode
, tmp
);
20315 else if (offset
> 0)
20316 mem
= gen_frame_mem (SImode
,
20317 plus_constant (Pmode
,
20321 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20323 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20324 RTX_FRAME_RELATED_P (tmp
) = 1;
20325 tmp
= emit_insn (tmp
);
20327 /* Record the first store insn. */
20328 if (dwarf_index
== 1)
20331 /* Generate dwarf info. */
20332 mem
= gen_frame_mem (SImode
,
20333 plus_constant(Pmode
,
20336 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20337 RTX_FRAME_RELATED_P (tmp
) = 1;
20338 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20347 /* Attach dwarf info to the first insn we generate. */
20348 gcc_assert (insn
!= NULL_RTX
);
20349 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20350 RTX_FRAME_RELATED_P (insn
) = 1;
20353 /* Generate and emit an insn that we will recognize as a push_multi.
20354 Unfortunately, since this insn does not reflect very well the actual
20355 semantics of the operation, we need to annotate the insn for the benefit
20356 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20357 MASK for registers that should be annotated for DWARF2 frame unwind
20360 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
20363 int num_dwarf_regs
= 0;
20367 int dwarf_par_index
;
20370 /* We don't record the PC in the dwarf frame information. */
20371 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20373 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20375 if (mask
& (1 << i
))
20377 if (dwarf_regs_mask
& (1 << i
))
20381 gcc_assert (num_regs
&& num_regs
<= 16);
20382 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20384 /* For the body of the insn we are going to generate an UNSPEC in
20385 parallel with several USEs. This allows the insn to be recognized
20386 by the push_multi pattern in the arm.md file.
20388 The body of the insn looks something like this:
20391 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20392 (const_int:SI <num>)))
20393 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20399 For the frame note however, we try to be more explicit and actually
20400 show each register being stored into the stack frame, plus a (single)
20401 decrement of the stack pointer. We do it this way in order to be
20402 friendly to the stack unwinding code, which only wants to see a single
20403 stack decrement per instruction. The RTL we generate for the note looks
20404 something like this:
20407 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20408 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20409 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20410 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20414 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20415 instead we'd have a parallel expression detailing all
20416 the stores to the various memory addresses so that debug
20417 information is more up-to-date. Remember however while writing
20418 this to take care of the constraints with the push instruction.
20420 Note also that this has to be taken care of for the VFP registers.
20422 For more see PR43399. */
20424 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20425 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20426 dwarf_par_index
= 1;
20428 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20430 if (mask
& (1 << i
))
20432 reg
= gen_rtx_REG (SImode
, i
);
20434 XVECEXP (par
, 0, 0)
20435 = gen_rtx_SET (gen_frame_mem
20437 gen_rtx_PRE_MODIFY (Pmode
,
20440 (Pmode
, stack_pointer_rtx
,
20443 gen_rtx_UNSPEC (BLKmode
,
20444 gen_rtvec (1, reg
),
20445 UNSPEC_PUSH_MULT
));
20447 if (dwarf_regs_mask
& (1 << i
))
20449 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
20451 RTX_FRAME_RELATED_P (tmp
) = 1;
20452 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20459 for (j
= 1, i
++; j
< num_regs
; i
++)
20461 if (mask
& (1 << i
))
20463 reg
= gen_rtx_REG (SImode
, i
);
20465 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20467 if (dwarf_regs_mask
& (1 << i
))
20470 = gen_rtx_SET (gen_frame_mem
20472 plus_constant (Pmode
, stack_pointer_rtx
,
20475 RTX_FRAME_RELATED_P (tmp
) = 1;
20476 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20483 par
= emit_insn (par
);
20485 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20486 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20487 RTX_FRAME_RELATED_P (tmp
) = 1;
20488 XVECEXP (dwarf
, 0, 0) = tmp
;
20490 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20495 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20496 SIZE is the offset to be adjusted.
20497 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20499 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20503 RTX_FRAME_RELATED_P (insn
) = 1;
20504 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
20505 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20508 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20509 SAVED_REGS_MASK shows which registers need to be restored.
20511 Unfortunately, since this insn does not reflect very well the actual
20512 semantics of the operation, we need to annotate the insn for the benefit
20513 of DWARF2 frame unwind information. */
20515 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20520 rtx dwarf
= NULL_RTX
;
20522 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20526 offset_adj
= return_in_pc
? 1 : 0;
20527 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20528 if (saved_regs_mask
& (1 << i
))
20531 gcc_assert (num_regs
&& num_regs
<= 16);
20533 /* If SP is in reglist, then we don't emit SP update insn. */
20534 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20536 /* The parallel needs to hold num_regs SETs
20537 and one SET for the stack update. */
20538 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20541 XVECEXP (par
, 0, 0) = ret_rtx
;
20545 /* Increment the stack pointer, based on there being
20546 num_regs 4-byte registers to restore. */
20547 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20548 plus_constant (Pmode
,
20551 RTX_FRAME_RELATED_P (tmp
) = 1;
20552 XVECEXP (par
, 0, offset_adj
) = tmp
;
20555 /* Now restore every reg, which may include PC. */
20556 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20557 if (saved_regs_mask
& (1 << i
))
20559 reg
= gen_rtx_REG (SImode
, i
);
20560 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20562 /* Emit single load with writeback. */
20563 tmp
= gen_frame_mem (SImode
,
20564 gen_rtx_POST_INC (Pmode
,
20565 stack_pointer_rtx
));
20566 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
20567 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20571 tmp
= gen_rtx_SET (reg
,
20574 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20575 RTX_FRAME_RELATED_P (tmp
) = 1;
20576 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20578 /* We need to maintain a sequence for DWARF info too. As dwarf info
20579 should not have PC, skip PC. */
20580 if (i
!= PC_REGNUM
)
20581 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20587 par
= emit_jump_insn (par
);
20589 par
= emit_insn (par
);
20591 REG_NOTES (par
) = dwarf
;
20593 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20594 stack_pointer_rtx
, stack_pointer_rtx
);
20597 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20598 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20600 Unfortunately, since this insn does not reflect very well the actual
20601 semantics of the operation, we need to annotate the insn for the benefit
20602 of DWARF2 frame unwind information. */
20604 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20608 rtx dwarf
= NULL_RTX
;
20611 gcc_assert (num_regs
&& num_regs
<= 32);
20613 /* Workaround ARM10 VFPr1 bug. */
20614 if (num_regs
== 2 && !arm_arch6
)
20616 if (first_reg
== 15)
20622 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20623 there could be up to 32 D-registers to restore.
20624 If there are more than 16 D-registers, make two recursive calls,
20625 each of which emits one pop_multi instruction. */
20628 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20629 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20633 /* The parallel needs to hold num_regs SETs
20634 and one SET for the stack update. */
20635 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20637 /* Increment the stack pointer, based on there being
20638 num_regs 8-byte registers to restore. */
20639 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20640 RTX_FRAME_RELATED_P (tmp
) = 1;
20641 XVECEXP (par
, 0, 0) = tmp
;
20643 /* Now show every reg that will be restored, using a SET for each. */
20644 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20646 reg
= gen_rtx_REG (DFmode
, i
);
20648 tmp
= gen_rtx_SET (reg
,
20651 plus_constant (Pmode
, base_reg
, 8 * j
)));
20652 RTX_FRAME_RELATED_P (tmp
) = 1;
20653 XVECEXP (par
, 0, j
+ 1) = tmp
;
20655 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20660 par
= emit_insn (par
);
20661 REG_NOTES (par
) = dwarf
;
20663 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20664 if (REGNO (base_reg
) == IP_REGNUM
)
20666 RTX_FRAME_RELATED_P (par
) = 1;
20667 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20670 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20671 base_reg
, base_reg
);
20674 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20675 number of registers are being popped, multiple LDRD patterns are created for
20676 all register pairs. If odd number of registers are popped, last register is
20677 loaded by using LDR pattern. */
20679 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20683 rtx par
= NULL_RTX
;
20684 rtx dwarf
= NULL_RTX
;
20685 rtx tmp
, reg
, tmp1
;
20686 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20688 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20689 if (saved_regs_mask
& (1 << i
))
20692 gcc_assert (num_regs
&& num_regs
<= 16);
20694 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20695 to be popped. So, if num_regs is even, now it will become odd,
20696 and we can generate pop with PC. If num_regs is odd, it will be
20697 even now, and ldr with return can be generated for PC. */
20701 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20703 /* Var j iterates over all the registers to gather all the registers in
20704 saved_regs_mask. Var i gives index of saved registers in stack frame.
20705 A PARALLEL RTX of register-pair is created here, so that pattern for
20706 LDRD can be matched. As PC is always last register to be popped, and
20707 we have already decremented num_regs if PC, we don't have to worry
20708 about PC in this loop. */
20709 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20710 if (saved_regs_mask
& (1 << j
))
20712 /* Create RTX for memory load. */
20713 reg
= gen_rtx_REG (SImode
, j
);
20714 tmp
= gen_rtx_SET (reg
,
20715 gen_frame_mem (SImode
,
20716 plus_constant (Pmode
,
20717 stack_pointer_rtx
, 4 * i
)));
20718 RTX_FRAME_RELATED_P (tmp
) = 1;
20722 /* When saved-register index (i) is even, the RTX to be emitted is
20723 yet to be created. Hence create it first. The LDRD pattern we
20724 are generating is :
20725 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20726 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20727 where target registers need not be consecutive. */
20728 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20732 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20733 added as 0th element and if i is odd, reg_i is added as 1st element
20734 of LDRD pattern shown above. */
20735 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20736 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20740 /* When saved-register index (i) is odd, RTXs for both the registers
20741 to be loaded are generated in above given LDRD pattern, and the
20742 pattern can be emitted now. */
20743 par
= emit_insn (par
);
20744 REG_NOTES (par
) = dwarf
;
20745 RTX_FRAME_RELATED_P (par
) = 1;
20751 /* If the number of registers pushed is odd AND return_in_pc is false OR
20752 number of registers are even AND return_in_pc is true, last register is
20753 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20754 then LDR with post increment. */
20756 /* Increment the stack pointer, based on there being
20757 num_regs 4-byte registers to restore. */
20758 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20759 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20760 RTX_FRAME_RELATED_P (tmp
) = 1;
20761 tmp
= emit_insn (tmp
);
20764 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20765 stack_pointer_rtx
, stack_pointer_rtx
);
20770 if (((num_regs
% 2) == 1 && !return_in_pc
)
20771 || ((num_regs
% 2) == 0 && return_in_pc
))
20773 /* Scan for the single register to be popped. Skip until the saved
20774 register is found. */
20775 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20777 /* Gen LDR with post increment here. */
20778 tmp1
= gen_rtx_MEM (SImode
,
20779 gen_rtx_POST_INC (SImode
,
20780 stack_pointer_rtx
));
20781 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20783 reg
= gen_rtx_REG (SImode
, j
);
20784 tmp
= gen_rtx_SET (reg
, tmp1
);
20785 RTX_FRAME_RELATED_P (tmp
) = 1;
20786 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20790 /* If return_in_pc, j must be PC_REGNUM. */
20791 gcc_assert (j
== PC_REGNUM
);
20792 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20793 XVECEXP (par
, 0, 0) = ret_rtx
;
20794 XVECEXP (par
, 0, 1) = tmp
;
20795 par
= emit_jump_insn (par
);
20799 par
= emit_insn (tmp
);
20800 REG_NOTES (par
) = dwarf
;
20801 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20802 stack_pointer_rtx
, stack_pointer_rtx
);
20806 else if ((num_regs
% 2) == 1 && return_in_pc
)
20808 /* There are 2 registers to be popped. So, generate the pattern
20809 pop_multiple_with_stack_update_and_return to pop in PC. */
20810 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20816 /* LDRD in ARM mode needs consecutive registers as operands. This function
20817 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20818 offset addressing and then generates one separate stack udpate. This provides
20819 more scheduling freedom, compared to writeback on every load. However,
20820 if the function returns using load into PC directly
20821 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20822 before the last load. TODO: Add a peephole optimization to recognize
20823 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20824 peephole optimization to merge the load at stack-offset zero
20825 with the stack update instruction using load with writeback
20826 in post-index addressing mode. */
20828 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20832 rtx par
= NULL_RTX
;
20833 rtx dwarf
= NULL_RTX
;
20836 /* Restore saved registers. */
20837 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20839 while (j
<= LAST_ARM_REGNUM
)
20840 if (saved_regs_mask
& (1 << j
))
20843 && (saved_regs_mask
& (1 << (j
+ 1)))
20844 && (j
+ 1) != PC_REGNUM
)
20846 /* Current register and next register form register pair for which
20847 LDRD can be generated. PC is always the last register popped, and
20848 we handle it separately. */
20850 mem
= gen_frame_mem (DImode
,
20851 plus_constant (Pmode
,
20855 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20857 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
20858 tmp
= emit_insn (tmp
);
20859 RTX_FRAME_RELATED_P (tmp
) = 1;
20861 /* Generate dwarf info. */
20863 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20864 gen_rtx_REG (SImode
, j
),
20866 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20867 gen_rtx_REG (SImode
, j
+ 1),
20870 REG_NOTES (tmp
) = dwarf
;
20875 else if (j
!= PC_REGNUM
)
20877 /* Emit a single word load. */
20879 mem
= gen_frame_mem (SImode
,
20880 plus_constant (Pmode
,
20884 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20886 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
20887 tmp
= emit_insn (tmp
);
20888 RTX_FRAME_RELATED_P (tmp
) = 1;
20890 /* Generate dwarf info. */
20891 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20892 gen_rtx_REG (SImode
, j
),
20898 else /* j == PC_REGNUM */
20904 /* Update the stack. */
20907 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20908 plus_constant (Pmode
,
20911 tmp
= emit_insn (tmp
);
20912 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20913 stack_pointer_rtx
, stack_pointer_rtx
);
20917 if (saved_regs_mask
& (1 << PC_REGNUM
))
20919 /* Only PC is to be popped. */
20920 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20921 XVECEXP (par
, 0, 0) = ret_rtx
;
20922 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
20923 gen_frame_mem (SImode
,
20924 gen_rtx_POST_INC (SImode
,
20925 stack_pointer_rtx
)));
20926 RTX_FRAME_RELATED_P (tmp
) = 1;
20927 XVECEXP (par
, 0, 1) = tmp
;
20928 par
= emit_jump_insn (par
);
20930 /* Generate dwarf info. */
20931 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20932 gen_rtx_REG (SImode
, PC_REGNUM
),
20934 REG_NOTES (par
) = dwarf
;
20935 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20936 stack_pointer_rtx
, stack_pointer_rtx
);
20940 /* Calculate the size of the return value that is passed in registers. */
20942 arm_size_return_regs (void)
20946 if (crtl
->return_rtx
!= 0)
20947 mode
= GET_MODE (crtl
->return_rtx
);
20949 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20951 return GET_MODE_SIZE (mode
);
20954 /* Return true if the current function needs to save/restore LR. */
20956 thumb_force_lr_save (void)
20958 return !cfun
->machine
->lr_save_eliminated
20960 || thumb_far_jump_used_p ()
20961 || df_regs_ever_live_p (LR_REGNUM
));
20964 /* We do not know if r3 will be available because
20965 we do have an indirect tailcall happening in this
20966 particular case. */
20968 is_indirect_tailcall_p (rtx call
)
20970 rtx pat
= PATTERN (call
);
20972 /* Indirect tail call. */
20973 pat
= XVECEXP (pat
, 0, 0);
20974 if (GET_CODE (pat
) == SET
)
20975 pat
= SET_SRC (pat
);
20977 pat
= XEXP (XEXP (pat
, 0), 0);
20978 return REG_P (pat
);
20981 /* Return true if r3 is used by any of the tail call insns in the
20982 current function. */
20984 any_sibcall_could_use_r3 (void)
20989 if (!crtl
->tail_call_emit
)
20991 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20992 if (e
->flags
& EDGE_SIBCALL
)
20994 rtx_insn
*call
= BB_END (e
->src
);
20995 if (!CALL_P (call
))
20996 call
= prev_nonnote_nondebug_insn (call
);
20997 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20998 if (find_regno_fusage (call
, USE
, 3)
20999 || is_indirect_tailcall_p (call
))
21006 /* Compute the distance from register FROM to register TO.
21007 These can be the arg pointer (26), the soft frame pointer (25),
21008 the stack pointer (13) or the hard frame pointer (11).
21009 In thumb mode r7 is used as the soft frame pointer, if needed.
21010 Typical stack layout looks like this:
21012 old stack pointer -> | |
21015 | | saved arguments for
21016 | | vararg functions
21019 hard FP & arg pointer -> | | \
21027 soft frame pointer -> | | /
21032 locals base pointer -> | | /
21037 current stack pointer -> | | /
21040 For a given function some or all of these stack components
21041 may not be needed, giving rise to the possibility of
21042 eliminating some of the registers.
21044 The values returned by this function must reflect the behavior
21045 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21047 The sign of the number returned reflects the direction of stack
21048 growth, so the values are positive for all eliminations except
21049 from the soft frame pointer to the hard frame pointer.
21051 SFP may point just inside the local variables block to ensure correct
21055 /* Return cached stack offsets. */
21057 static arm_stack_offsets
*
21058 arm_get_frame_offsets (void)
21060 struct arm_stack_offsets
*offsets
;
21062 offsets
= &cfun
->machine
->stack_offsets
;
21068 /* Calculate stack offsets. These are used to calculate register elimination
21069 offsets and in prologue/epilogue code. Also calculates which registers
21070 should be saved. */
21073 arm_compute_frame_layout (void)
21075 struct arm_stack_offsets
*offsets
;
21076 unsigned long func_type
;
21079 HOST_WIDE_INT frame_size
;
21082 offsets
= &cfun
->machine
->stack_offsets
;
21084 /* Initially this is the size of the local variables. It will translated
21085 into an offset once we have determined the size of preceding data. */
21086 frame_size
= ROUND_UP_WORD (get_frame_size ());
21088 /* Space for variadic functions. */
21089 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
21091 /* In Thumb mode this is incorrect, but never used. */
21093 = (offsets
->saved_args
21094 + arm_compute_static_chain_stack_bytes ()
21095 + (frame_pointer_needed
? 4 : 0));
21099 unsigned int regno
;
21101 offsets
->saved_regs_mask
= arm_compute_save_core_reg_mask ();
21102 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
21103 saved
= core_saved
;
21105 /* We know that SP will be doubleword aligned on entry, and we must
21106 preserve that condition at any subroutine call. We also require the
21107 soft frame pointer to be doubleword aligned. */
21109 if (TARGET_REALLY_IWMMXT
)
21111 /* Check for the call-saved iWMMXt registers. */
21112 for (regno
= FIRST_IWMMXT_REGNUM
;
21113 regno
<= LAST_IWMMXT_REGNUM
;
21115 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
21119 func_type
= arm_current_func_type ();
21120 /* Space for saved VFP registers. */
21121 if (! IS_VOLATILE (func_type
)
21122 && TARGET_HARD_FLOAT
)
21123 saved
+= arm_get_vfp_saved_size ();
21125 else /* TARGET_THUMB1 */
21127 offsets
->saved_regs_mask
= thumb1_compute_save_core_reg_mask ();
21128 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
21129 saved
= core_saved
;
21130 if (TARGET_BACKTRACE
)
21134 /* Saved registers include the stack frame. */
21135 offsets
->saved_regs
21136 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
21137 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
21139 /* A leaf function does not need any stack alignment if it has nothing
21141 if (crtl
->is_leaf
&& frame_size
== 0
21142 /* However if it calls alloca(), we have a dynamically allocated
21143 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
21144 && ! cfun
->calls_alloca
)
21146 offsets
->outgoing_args
= offsets
->soft_frame
;
21147 offsets
->locals_base
= offsets
->soft_frame
;
21151 /* Ensure SFP has the correct alignment. */
21152 if (ARM_DOUBLEWORD_ALIGN
21153 && (offsets
->soft_frame
& 7))
21155 offsets
->soft_frame
+= 4;
21156 /* Try to align stack by pushing an extra reg. Don't bother doing this
21157 when there is a stack frame as the alignment will be rolled into
21158 the normal stack adjustment. */
21159 if (frame_size
+ crtl
->outgoing_args_size
== 0)
21163 /* Register r3 is caller-saved. Normally it does not need to be
21164 saved on entry by the prologue. However if we choose to save
21165 it for padding then we may confuse the compiler into thinking
21166 a prologue sequence is required when in fact it is not. This
21167 will occur when shrink-wrapping if r3 is used as a scratch
21168 register and there are no other callee-saved writes.
21170 This situation can be avoided when other callee-saved registers
21171 are available and r3 is not mandatory if we choose a callee-saved
21172 register for padding. */
21173 bool prefer_callee_reg_p
= false;
21175 /* If it is safe to use r3, then do so. This sometimes
21176 generates better code on Thumb-2 by avoiding the need to
21177 use 32-bit push/pop instructions. */
21178 if (! any_sibcall_could_use_r3 ()
21179 && arm_size_return_regs () <= 12
21180 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
21182 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
21185 if (!TARGET_THUMB2
)
21186 prefer_callee_reg_p
= true;
21189 || prefer_callee_reg_p
)
21191 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
21193 /* Avoid fixed registers; they may be changed at
21194 arbitrary times so it's unsafe to restore them
21195 during the epilogue. */
21197 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
21207 offsets
->saved_regs
+= 4;
21208 offsets
->saved_regs_mask
|= (1 << reg
);
21213 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
21214 offsets
->outgoing_args
= (offsets
->locals_base
21215 + crtl
->outgoing_args_size
);
21217 if (ARM_DOUBLEWORD_ALIGN
)
21219 /* Ensure SP remains doubleword aligned. */
21220 if (offsets
->outgoing_args
& 7)
21221 offsets
->outgoing_args
+= 4;
21222 gcc_assert (!(offsets
->outgoing_args
& 7));
21227 /* Calculate the relative offsets for the different stack pointers. Positive
21228 offsets are in the direction of stack growth. */
21231 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
21233 arm_stack_offsets
*offsets
;
21235 offsets
= arm_get_frame_offsets ();
21237 /* OK, now we have enough information to compute the distances.
21238 There must be an entry in these switch tables for each pair
21239 of registers in ELIMINABLE_REGS, even if some of the entries
21240 seem to be redundant or useless. */
21243 case ARG_POINTER_REGNUM
:
21246 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21249 case FRAME_POINTER_REGNUM
:
21250 /* This is the reverse of the soft frame pointer
21251 to hard frame pointer elimination below. */
21252 return offsets
->soft_frame
- offsets
->saved_args
;
21254 case ARM_HARD_FRAME_POINTER_REGNUM
:
21255 /* This is only non-zero in the case where the static chain register
21256 is stored above the frame. */
21257 return offsets
->frame
- offsets
->saved_args
- 4;
21259 case STACK_POINTER_REGNUM
:
21260 /* If nothing has been pushed on the stack at all
21261 then this will return -4. This *is* correct! */
21262 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
21265 gcc_unreachable ();
21267 gcc_unreachable ();
21269 case FRAME_POINTER_REGNUM
:
21272 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21275 case ARM_HARD_FRAME_POINTER_REGNUM
:
21276 /* The hard frame pointer points to the top entry in the
21277 stack frame. The soft frame pointer to the bottom entry
21278 in the stack frame. If there is no stack frame at all,
21279 then they are identical. */
21281 return offsets
->frame
- offsets
->soft_frame
;
21283 case STACK_POINTER_REGNUM
:
21284 return offsets
->outgoing_args
- offsets
->soft_frame
;
21287 gcc_unreachable ();
21289 gcc_unreachable ();
21292 /* You cannot eliminate from the stack pointer.
21293 In theory you could eliminate from the hard frame
21294 pointer to the stack pointer, but this will never
21295 happen, since if a stack frame is not needed the
21296 hard frame pointer will never be used. */
21297 gcc_unreachable ();
21301 /* Given FROM and TO register numbers, say whether this elimination is
21302 allowed. Frame pointer elimination is automatically handled.
21304 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21305 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21306 pointer, we must eliminate FRAME_POINTER_REGNUM into
21307 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21308 ARG_POINTER_REGNUM. */
21311 arm_can_eliminate (const int from
, const int to
)
21313 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
21314 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
21315 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
21316 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
21320 /* Emit RTL to save coprocessor registers on function entry. Returns the
21321 number of bytes pushed. */
21324 arm_save_coproc_regs(void)
21326 int saved_size
= 0;
21328 unsigned start_reg
;
21331 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
21332 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
21334 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21335 insn
= gen_rtx_MEM (V2SImode
, insn
);
21336 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
21337 RTX_FRAME_RELATED_P (insn
) = 1;
21341 if (TARGET_HARD_FLOAT
)
21343 start_reg
= FIRST_VFP_REGNUM
;
21345 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
21347 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
21348 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
21350 if (start_reg
!= reg
)
21351 saved_size
+= vfp_emit_fstmd (start_reg
,
21352 (reg
- start_reg
) / 2);
21353 start_reg
= reg
+ 2;
21356 if (start_reg
!= reg
)
21357 saved_size
+= vfp_emit_fstmd (start_reg
,
21358 (reg
- start_reg
) / 2);
21364 /* Set the Thumb frame pointer from the stack pointer. */
21367 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21369 HOST_WIDE_INT amount
;
21372 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21374 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21375 stack_pointer_rtx
, GEN_INT (amount
)));
21378 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21379 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21380 expects the first two operands to be the same. */
21383 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21385 hard_frame_pointer_rtx
));
21389 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21390 hard_frame_pointer_rtx
,
21391 stack_pointer_rtx
));
21393 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
21394 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21395 RTX_FRAME_RELATED_P (dwarf
) = 1;
21396 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21399 RTX_FRAME_RELATED_P (insn
) = 1;
21402 struct scratch_reg
{
21407 /* Return a short-lived scratch register for use as a 2nd scratch register on
21408 function entry after the registers are saved in the prologue. This register
21409 must be released by means of release_scratch_register_on_entry. IP is not
21410 considered since it is always used as the 1st scratch register if available.
21412 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21413 mask of live registers. */
21416 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
21417 unsigned long live_regs
)
21423 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
21429 for (i
= 4; i
< 11; i
++)
21430 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
21438 /* If IP is used as the 1st scratch register for a nested function,
21439 then either r3 wasn't available or is used to preserve IP. */
21440 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
21442 regno
= (regno1
== 3 ? 2 : 3);
21444 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
21449 sr
->reg
= gen_rtx_REG (SImode
, regno
);
21452 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21453 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
21454 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21455 plus_constant (Pmode
, stack_pointer_rtx
, -4));
21456 RTX_FRAME_RELATED_P (insn
) = 1;
21457 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21461 /* Release a scratch register obtained from the preceding function. */
21464 release_scratch_register_on_entry (struct scratch_reg
*sr
)
21468 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
21469 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
21470 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21471 plus_constant (Pmode
, stack_pointer_rtx
, 4));
21472 RTX_FRAME_RELATED_P (insn
) = 1;
21473 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21477 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21479 #if PROBE_INTERVAL > 4096
21480 #error Cannot use indexed addressing mode for stack probing
21483 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21484 inclusive. These are offsets from the current stack pointer. REGNO1
21485 is the index number of the 1st scratch register and LIVE_REGS is the
21486 mask of live registers. */
21489 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
21490 unsigned int regno1
, unsigned long live_regs
)
21492 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
21494 /* See if we have a constant small number of probes to generate. If so,
21495 that's the easy case. */
21496 if (size
<= PROBE_INTERVAL
)
21498 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21499 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21500 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
21503 /* The run-time loop is made up of 10 insns in the generic case while the
21504 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21505 else if (size
<= 5 * PROBE_INTERVAL
)
21507 HOST_WIDE_INT i
, rem
;
21509 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21510 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21511 emit_stack_probe (reg1
);
21513 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21514 it exceeds SIZE. If only two probes are needed, this will not
21515 generate any code. Then probe at FIRST + SIZE. */
21516 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
21518 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21519 emit_stack_probe (reg1
);
21522 rem
= size
- (i
- PROBE_INTERVAL
);
21523 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21525 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21526 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
21529 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
21532 /* Otherwise, do the same as above, but in a loop. Note that we must be
21533 extra careful with variables wrapping around because we might be at
21534 the very top (or the very bottom) of the address space and we have
21535 to be able to handle this case properly; in particular, we use an
21536 equality test for the loop condition. */
21539 HOST_WIDE_INT rounded_size
;
21540 struct scratch_reg sr
;
21542 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
21544 emit_move_insn (reg1
, GEN_INT (first
));
21547 /* Step 1: round SIZE to the previous multiple of the interval. */
21549 rounded_size
= size
& -PROBE_INTERVAL
;
21550 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
21553 /* Step 2: compute initial and final value of the loop counter. */
21555 /* TEST_ADDR = SP + FIRST. */
21556 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21558 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21559 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
21562 /* Step 3: the loop
21566 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21569 while (TEST_ADDR != LAST_ADDR)
21571 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21572 until it is equal to ROUNDED_SIZE. */
21574 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
21577 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21578 that SIZE is equal to ROUNDED_SIZE. */
21580 if (size
!= rounded_size
)
21582 HOST_WIDE_INT rem
= size
- rounded_size
;
21584 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21586 emit_set_insn (sr
.reg
,
21587 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
21588 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
21589 PROBE_INTERVAL
- rem
));
21592 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
21595 release_scratch_register_on_entry (&sr
);
21598 /* Make sure nothing is scheduled before we are done. */
21599 emit_insn (gen_blockage ());
21602 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21603 absolute addresses. */
21606 output_probe_stack_range (rtx reg1
, rtx reg2
)
21608 static int labelno
= 0;
21612 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
21615 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
21617 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21619 xops
[1] = GEN_INT (PROBE_INTERVAL
);
21620 output_asm_insn ("sub\t%0, %0, %1", xops
);
21622 /* Probe at TEST_ADDR. */
21623 output_asm_insn ("str\tr0, [%0, #0]", xops
);
21625 /* Test if TEST_ADDR == LAST_ADDR. */
21627 output_asm_insn ("cmp\t%0, %1", xops
);
21630 fputs ("\tbne\t", asm_out_file
);
21631 assemble_name_raw (asm_out_file
, loop_lab
);
21632 fputc ('\n', asm_out_file
);
21637 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21640 arm_expand_prologue (void)
21645 unsigned long live_regs_mask
;
21646 unsigned long func_type
;
21648 int saved_pretend_args
= 0;
21649 int saved_regs
= 0;
21650 unsigned HOST_WIDE_INT args_to_push
;
21651 HOST_WIDE_INT size
;
21652 arm_stack_offsets
*offsets
;
21655 func_type
= arm_current_func_type ();
21657 /* Naked functions don't have prologues. */
21658 if (IS_NAKED (func_type
))
21660 if (flag_stack_usage_info
)
21661 current_function_static_stack_size
= 0;
21665 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21666 args_to_push
= crtl
->args
.pretend_args_size
;
21668 /* Compute which register we will have to save onto the stack. */
21669 offsets
= arm_get_frame_offsets ();
21670 live_regs_mask
= offsets
->saved_regs_mask
;
21672 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21674 if (IS_STACKALIGN (func_type
))
21678 /* Handle a word-aligned stack pointer. We generate the following:
21683 <save and restore r0 in normal prologue/epilogue>
21687 The unwinder doesn't need to know about the stack realignment.
21688 Just tell it we saved SP in r0. */
21689 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21691 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
21692 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
21694 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21695 RTX_FRAME_RELATED_P (insn
) = 1;
21696 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21698 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21700 /* ??? The CFA changes here, which may cause GDB to conclude that it
21701 has entered a different function. That said, the unwind info is
21702 correct, individually, before and after this instruction because
21703 we've described the save of SP, which will override the default
21704 handling of SP as restoring from the CFA. */
21705 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21708 /* The static chain register is the same as the IP register. If it is
21709 clobbered when creating the frame, we need to save and restore it. */
21710 clobber_ip
= IS_NESTED (func_type
)
21711 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21712 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21713 || flag_stack_clash_protection
)
21714 && !df_regs_ever_live_p (LR_REGNUM
)
21715 && arm_r3_live_at_start_p ()));
21717 /* Find somewhere to store IP whilst the frame is being created.
21718 We try the following places in order:
21720 1. The last argument register r3 if it is available.
21721 2. A slot on the stack above the frame if there are no
21722 arguments to push onto the stack.
21723 3. Register r3 again, after pushing the argument registers
21724 onto the stack, if this is a varargs function.
21725 4. The last slot on the stack created for the arguments to
21726 push, if this isn't a varargs function.
21728 Note - we only need to tell the dwarf2 backend about the SP
21729 adjustment in the second variant; the static chain register
21730 doesn't need to be unwound, as it doesn't contain a value
21731 inherited from the caller. */
21734 if (!arm_r3_live_at_start_p ())
21735 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21736 else if (args_to_push
== 0)
21740 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21743 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21744 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21747 /* Just tell the dwarf backend that we adjusted SP. */
21748 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21749 plus_constant (Pmode
, stack_pointer_rtx
,
21751 RTX_FRAME_RELATED_P (insn
) = 1;
21752 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21756 /* Store the args on the stack. */
21757 if (cfun
->machine
->uses_anonymous_args
)
21759 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21760 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21761 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21762 saved_pretend_args
= 1;
21768 if (args_to_push
== 4)
21769 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21771 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21772 plus_constant (Pmode
,
21776 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21778 /* Just tell the dwarf backend that we adjusted SP. */
21779 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21780 plus_constant (Pmode
, stack_pointer_rtx
,
21782 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21785 RTX_FRAME_RELATED_P (insn
) = 1;
21786 fp_offset
= args_to_push
;
21791 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21793 if (IS_INTERRUPT (func_type
))
21795 /* Interrupt functions must not corrupt any registers.
21796 Creating a frame pointer however, corrupts the IP
21797 register, so we must push it first. */
21798 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21800 /* Do not set RTX_FRAME_RELATED_P on this insn.
21801 The dwarf stack unwinding code only wants to see one
21802 stack decrement per function, and this is not it. If
21803 this instruction is labeled as being part of the frame
21804 creation sequence then dwarf2out_frame_debug_expr will
21805 die when it encounters the assignment of IP to FP
21806 later on, since the use of SP here establishes SP as
21807 the CFA register and not IP.
21809 Anyway this instruction is not really part of the stack
21810 frame creation although it is part of the prologue. */
21813 insn
= emit_set_insn (ip_rtx
,
21814 plus_constant (Pmode
, stack_pointer_rtx
,
21816 RTX_FRAME_RELATED_P (insn
) = 1;
21821 /* Push the argument registers, or reserve space for them. */
21822 if (cfun
->machine
->uses_anonymous_args
)
21823 insn
= emit_multi_reg_push
21824 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21825 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21828 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21829 GEN_INT (- args_to_push
)));
21830 RTX_FRAME_RELATED_P (insn
) = 1;
21833 /* If this is an interrupt service routine, and the link register
21834 is going to be pushed, and we're not generating extra
21835 push of IP (needed when frame is needed and frame layout if apcs),
21836 subtracting four from LR now will mean that the function return
21837 can be done with a single instruction. */
21838 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21839 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21840 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21843 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21845 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21848 if (live_regs_mask
)
21850 unsigned long dwarf_regs_mask
= live_regs_mask
;
21852 saved_regs
+= bit_count (live_regs_mask
) * 4;
21853 if (optimize_size
&& !frame_pointer_needed
21854 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21856 /* If no coprocessor registers are being pushed and we don't have
21857 to worry about a frame pointer then push extra registers to
21858 create the stack frame. This is done in a way that does not
21859 alter the frame layout, so is independent of the epilogue. */
21863 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21865 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21866 if (frame
&& n
* 4 >= frame
)
21869 live_regs_mask
|= (1 << n
) - 1;
21870 saved_regs
+= frame
;
21875 && current_tune
->prefer_ldrd_strd
21876 && !optimize_function_for_size_p (cfun
))
21878 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21880 thumb2_emit_strd_push (live_regs_mask
);
21881 else if (TARGET_ARM
21882 && !TARGET_APCS_FRAME
21883 && !IS_INTERRUPT (func_type
))
21884 arm_emit_strd_push (live_regs_mask
);
21887 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21888 RTX_FRAME_RELATED_P (insn
) = 1;
21893 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21894 RTX_FRAME_RELATED_P (insn
) = 1;
21898 if (! IS_VOLATILE (func_type
))
21899 saved_regs
+= arm_save_coproc_regs ();
21901 if (frame_pointer_needed
&& TARGET_ARM
)
21903 /* Create the new frame pointer. */
21904 if (TARGET_APCS_FRAME
)
21906 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21907 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21908 RTX_FRAME_RELATED_P (insn
) = 1;
21912 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
21913 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21914 stack_pointer_rtx
, insn
));
21915 RTX_FRAME_RELATED_P (insn
) = 1;
21919 size
= offsets
->outgoing_args
- offsets
->saved_args
;
21920 if (flag_stack_usage_info
)
21921 current_function_static_stack_size
= size
;
21923 /* If this isn't an interrupt service routine and we have a frame, then do
21924 stack checking. We use IP as the first scratch register, except for the
21925 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21926 if (!IS_INTERRUPT (func_type
)
21927 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21928 || flag_stack_clash_protection
))
21930 unsigned int regno
;
21932 if (!IS_NESTED (func_type
) || clobber_ip
)
21934 else if (df_regs_ever_live_p (LR_REGNUM
))
21939 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
21941 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
21942 arm_emit_probe_stack_range (get_stack_check_protect (),
21943 size
- get_stack_check_protect (),
21944 regno
, live_regs_mask
);
21947 arm_emit_probe_stack_range (get_stack_check_protect (), size
,
21948 regno
, live_regs_mask
);
21951 /* Recover the static chain register. */
21954 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21955 insn
= gen_rtx_REG (SImode
, 3);
21958 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21959 insn
= gen_frame_mem (SImode
, insn
);
21961 emit_set_insn (ip_rtx
, insn
);
21962 emit_insn (gen_force_register_use (ip_rtx
));
21965 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21967 /* This add can produce multiple insns for a large constant, so we
21968 need to get tricky. */
21969 rtx_insn
*last
= get_last_insn ();
21971 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21972 - offsets
->outgoing_args
);
21974 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21978 last
= last
? NEXT_INSN (last
) : get_insns ();
21979 RTX_FRAME_RELATED_P (last
) = 1;
21981 while (last
!= insn
);
21983 /* If the frame pointer is needed, emit a special barrier that
21984 will prevent the scheduler from moving stores to the frame
21985 before the stack adjustment. */
21986 if (frame_pointer_needed
)
21987 emit_insn (gen_stack_tie (stack_pointer_rtx
,
21988 hard_frame_pointer_rtx
));
21992 if (frame_pointer_needed
&& TARGET_THUMB2
)
21993 thumb_set_frame_pointer (offsets
);
21995 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21997 unsigned long mask
;
21999 mask
= live_regs_mask
;
22000 mask
&= THUMB2_WORK_REGS
;
22001 if (!IS_NESTED (func_type
))
22002 mask
|= (1 << IP_REGNUM
);
22003 arm_load_pic_register (mask
);
22006 /* If we are profiling, make sure no instructions are scheduled before
22007 the call to mcount. Similarly if the user has requested no
22008 scheduling in the prolog. Similarly if we want non-call exceptions
22009 using the EABI unwinder, to prevent faulting instructions from being
22010 swapped with a stack adjustment. */
22011 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
22012 || (arm_except_unwind_info (&global_options
) == UI_TARGET
22013 && cfun
->can_throw_non_call_exceptions
))
22014 emit_insn (gen_blockage ());
22016 /* If the link register is being kept alive, with the return address in it,
22017 then make sure that it does not get reused by the ce2 pass. */
22018 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
22019 cfun
->machine
->lr_save_eliminated
= 1;
22022 /* Print condition code to STREAM. Helper function for arm_print_operand. */
22024 arm_print_condition (FILE *stream
)
22026 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
22028 /* Branch conversion is not implemented for Thumb-2. */
22031 output_operand_lossage ("predicated Thumb instruction");
22034 if (current_insn_predicate
!= NULL
)
22036 output_operand_lossage
22037 ("predicated instruction in conditional sequence");
22041 fputs (arm_condition_codes
[arm_current_cc
], stream
);
22043 else if (current_insn_predicate
)
22045 enum arm_cond_code code
;
22049 output_operand_lossage ("predicated Thumb instruction");
22053 code
= get_arm_condition_code (current_insn_predicate
);
22054 fputs (arm_condition_codes
[code
], stream
);
22059 /* Globally reserved letters: acln
22060 Puncutation letters currently used: @_|?().!#
22061 Lower case letters currently used: bcdefhimpqtvwxyz
22062 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22063 Letters previously used, but now deprecated/obsolete: sVWXYZ.
22065 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22067 If CODE is 'd', then the X is a condition operand and the instruction
22068 should only be executed if the condition is true.
22069 if CODE is 'D', then the X is a condition operand and the instruction
22070 should only be executed if the condition is false: however, if the mode
22071 of the comparison is CCFPEmode, then always execute the instruction -- we
22072 do this because in these circumstances !GE does not necessarily imply LT;
22073 in these cases the instruction pattern will take care to make sure that
22074 an instruction containing %d will follow, thereby undoing the effects of
22075 doing this instruction unconditionally.
22076 If CODE is 'N' then X is a floating point operand that must be negated
22078 If CODE is 'B' then output a bitwise inverted value of X (a const int).
22079 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
22081 arm_print_operand (FILE *stream
, rtx x
, int code
)
22086 fputs (ASM_COMMENT_START
, stream
);
22090 fputs (user_label_prefix
, stream
);
22094 fputs (REGISTER_PREFIX
, stream
);
22098 arm_print_condition (stream
);
22102 /* The current condition code for a condition code setting instruction.
22103 Preceded by 's' in unified syntax, otherwise followed by 's'. */
22104 fputc('s', stream
);
22105 arm_print_condition (stream
);
22109 /* If the instruction is conditionally executed then print
22110 the current condition code, otherwise print 's'. */
22111 gcc_assert (TARGET_THUMB2
);
22112 if (current_insn_predicate
)
22113 arm_print_condition (stream
);
22115 fputc('s', stream
);
22118 /* %# is a "break" sequence. It doesn't output anything, but is used to
22119 separate e.g. operand numbers from following text, if that text consists
22120 of further digits which we don't want to be part of the operand
22128 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
22129 fprintf (stream
, "%s", fp_const_from_val (&r
));
22133 /* An integer or symbol address without a preceding # sign. */
22135 switch (GET_CODE (x
))
22138 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
22142 output_addr_const (stream
, x
);
22146 if (GET_CODE (XEXP (x
, 0)) == PLUS
22147 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
22149 output_addr_const (stream
, x
);
22152 /* Fall through. */
22155 output_operand_lossage ("Unsupported operand for code '%c'", code
);
22159 /* An integer that we want to print in HEX. */
22161 switch (GET_CODE (x
))
22164 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
22168 output_operand_lossage ("Unsupported operand for code '%c'", code
);
22173 if (CONST_INT_P (x
))
22176 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
22177 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
22181 putc ('~', stream
);
22182 output_addr_const (stream
, x
);
22187 /* Print the log2 of a CONST_INT. */
22191 if (!CONST_INT_P (x
)
22192 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
22193 output_operand_lossage ("Unsupported operand for code '%c'", code
);
22195 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
22200 /* The low 16 bits of an immediate constant. */
22201 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
22205 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
22209 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
22217 shift
= shift_op (x
, &val
);
22221 fprintf (stream
, ", %s ", shift
);
22223 arm_print_operand (stream
, XEXP (x
, 1), 0);
22225 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
22230 /* An explanation of the 'Q', 'R' and 'H' register operands:
22232 In a pair of registers containing a DI or DF value the 'Q'
22233 operand returns the register number of the register containing
22234 the least significant part of the value. The 'R' operand returns
22235 the register number of the register containing the most
22236 significant part of the value.
22238 The 'H' operand returns the higher of the two register numbers.
22239 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22240 same as the 'Q' operand, since the most significant part of the
22241 value is held in the lower number register. The reverse is true
22242 on systems where WORDS_BIG_ENDIAN is false.
22244 The purpose of these operands is to distinguish between cases
22245 where the endian-ness of the values is important (for example
22246 when they are added together), and cases where the endian-ness
22247 is irrelevant, but the order of register operations is important.
22248 For example when loading a value from memory into a register
22249 pair, the endian-ness does not matter. Provided that the value
22250 from the lower memory address is put into the lower numbered
22251 register, and the value from the higher address is put into the
22252 higher numbered register, the load will work regardless of whether
22253 the value being loaded is big-wordian or little-wordian. The
22254 order of the two register loads can matter however, if the address
22255 of the memory location is actually held in one of the registers
22256 being overwritten by the load.
22258 The 'Q' and 'R' constraints are also available for 64-bit
22261 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22263 rtx part
= gen_lowpart (SImode
, x
);
22264 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22268 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22270 output_operand_lossage ("invalid operand for code '%c'", code
);
22274 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
22278 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22280 machine_mode mode
= GET_MODE (x
);
22283 if (mode
== VOIDmode
)
22285 part
= gen_highpart_mode (SImode
, mode
, x
);
22286 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22290 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22292 output_operand_lossage ("invalid operand for code '%c'", code
);
22296 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
22300 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22302 output_operand_lossage ("invalid operand for code '%c'", code
);
22306 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
22310 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22312 output_operand_lossage ("invalid operand for code '%c'", code
);
22316 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
22320 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22322 output_operand_lossage ("invalid operand for code '%c'", code
);
22326 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
22330 asm_fprintf (stream
, "%r",
22331 REG_P (XEXP (x
, 0))
22332 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
22336 asm_fprintf (stream
, "{%r-%r}",
22338 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
22341 /* Like 'M', but writing doubleword vector registers, for use by Neon
22345 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
22346 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
22348 asm_fprintf (stream
, "{d%d}", regno
);
22350 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
22355 /* CONST_TRUE_RTX means always -- that's the default. */
22356 if (x
== const_true_rtx
)
22359 if (!COMPARISON_P (x
))
22361 output_operand_lossage ("invalid operand for code '%c'", code
);
22365 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
22370 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22371 want to do that. */
22372 if (x
== const_true_rtx
)
22374 output_operand_lossage ("instruction never executed");
22377 if (!COMPARISON_P (x
))
22379 output_operand_lossage ("invalid operand for code '%c'", code
);
22383 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
22384 (get_arm_condition_code (x
))],
22394 /* Former Maverick support, removed after GCC-4.7. */
22395 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
22400 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
22401 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
22402 /* Bad value for wCG register number. */
22404 output_operand_lossage ("invalid operand for code '%c'", code
);
22409 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
22412 /* Print an iWMMXt control register name. */
22414 if (!CONST_INT_P (x
)
22416 || INTVAL (x
) >= 16)
22417 /* Bad value for wC register number. */
22419 output_operand_lossage ("invalid operand for code '%c'", code
);
22425 static const char * wc_reg_names
[16] =
22427 "wCID", "wCon", "wCSSF", "wCASF",
22428 "wC4", "wC5", "wC6", "wC7",
22429 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22430 "wC12", "wC13", "wC14", "wC15"
22433 fputs (wc_reg_names
[INTVAL (x
)], stream
);
22437 /* Print the high single-precision register of a VFP double-precision
22441 machine_mode mode
= GET_MODE (x
);
22444 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
22446 output_operand_lossage ("invalid operand for code '%c'", code
);
22451 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
22453 output_operand_lossage ("invalid operand for code '%c'", code
);
22457 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
22461 /* Print a VFP/Neon double precision or quad precision register name. */
22465 machine_mode mode
= GET_MODE (x
);
22466 int is_quad
= (code
== 'q');
22469 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
22471 output_operand_lossage ("invalid operand for code '%c'", code
);
22476 || !IS_VFP_REGNUM (REGNO (x
)))
22478 output_operand_lossage ("invalid operand for code '%c'", code
);
22483 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
22484 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
22486 output_operand_lossage ("invalid operand for code '%c'", code
);
22490 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
22491 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
22495 /* These two codes print the low/high doubleword register of a Neon quad
22496 register, respectively. For pair-structure types, can also print
22497 low/high quadword registers. */
22501 machine_mode mode
= GET_MODE (x
);
22504 if ((GET_MODE_SIZE (mode
) != 16
22505 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
22507 output_operand_lossage ("invalid operand for code '%c'", code
);
22512 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
22514 output_operand_lossage ("invalid operand for code '%c'", code
);
22518 if (GET_MODE_SIZE (mode
) == 16)
22519 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
22520 + (code
== 'f' ? 1 : 0));
22522 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
22523 + (code
== 'f' ? 1 : 0));
22527 /* Print a VFPv3 floating-point constant, represented as an integer
22531 int index
= vfp3_const_double_index (x
);
22532 gcc_assert (index
!= -1);
22533 fprintf (stream
, "%d", index
);
22537 /* Print bits representing opcode features for Neon.
22539 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22540 and polynomials as unsigned.
22542 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22544 Bit 2 is 1 for rounding functions, 0 otherwise. */
22546 /* Identify the type as 's', 'u', 'p' or 'f'. */
22549 HOST_WIDE_INT bits
= INTVAL (x
);
22550 fputc ("uspf"[bits
& 3], stream
);
22554 /* Likewise, but signed and unsigned integers are both 'i'. */
22557 HOST_WIDE_INT bits
= INTVAL (x
);
22558 fputc ("iipf"[bits
& 3], stream
);
22562 /* As for 'T', but emit 'u' instead of 'p'. */
22565 HOST_WIDE_INT bits
= INTVAL (x
);
22566 fputc ("usuf"[bits
& 3], stream
);
22570 /* Bit 2: rounding (vs none). */
22573 HOST_WIDE_INT bits
= INTVAL (x
);
22574 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
22578 /* Memory operand for vld1/vst1 instruction. */
22582 bool postinc
= FALSE
;
22583 rtx postinc_reg
= NULL
;
22584 unsigned align
, memsize
, align_bits
;
22586 gcc_assert (MEM_P (x
));
22587 addr
= XEXP (x
, 0);
22588 if (GET_CODE (addr
) == POST_INC
)
22591 addr
= XEXP (addr
, 0);
22593 if (GET_CODE (addr
) == POST_MODIFY
)
22595 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22596 addr
= XEXP (addr
, 0);
22598 asm_fprintf (stream
, "[%r", REGNO (addr
));
22600 /* We know the alignment of this access, so we can emit a hint in the
22601 instruction (for some alignments) as an aid to the memory subsystem
22603 align
= MEM_ALIGN (x
) >> 3;
22604 memsize
= MEM_SIZE (x
);
22606 /* Only certain alignment specifiers are supported by the hardware. */
22607 if (memsize
== 32 && (align
% 32) == 0)
22609 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22611 else if (memsize
>= 8 && (align
% 8) == 0)
22616 if (align_bits
!= 0)
22617 asm_fprintf (stream
, ":%d", align_bits
);
22619 asm_fprintf (stream
, "]");
22622 fputs("!", stream
);
22624 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22632 gcc_assert (MEM_P (x
));
22633 addr
= XEXP (x
, 0);
22634 gcc_assert (REG_P (addr
));
22635 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22639 /* Translate an S register number into a D register number and element index. */
22642 machine_mode mode
= GET_MODE (x
);
22645 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22647 output_operand_lossage ("invalid operand for code '%c'", code
);
22652 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22654 output_operand_lossage ("invalid operand for code '%c'", code
);
22658 regno
= regno
- FIRST_VFP_REGNUM
;
22659 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22664 gcc_assert (CONST_DOUBLE_P (x
));
22666 result
= vfp3_const_double_for_fract_bits (x
);
22668 result
= vfp3_const_double_for_bits (x
);
22669 fprintf (stream
, "#%d", result
);
22672 /* Register specifier for vld1.16/vst1.16. Translate the S register
22673 number into a D register number and element index. */
22676 machine_mode mode
= GET_MODE (x
);
22679 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22681 output_operand_lossage ("invalid operand for code '%c'", code
);
22686 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22688 output_operand_lossage ("invalid operand for code '%c'", code
);
22692 regno
= regno
- FIRST_VFP_REGNUM
;
22693 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22700 output_operand_lossage ("missing operand");
22704 switch (GET_CODE (x
))
22707 asm_fprintf (stream
, "%r", REGNO (x
));
22711 output_address (GET_MODE (x
), XEXP (x
, 0));
22717 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22718 sizeof (fpstr
), 0, 1);
22719 fprintf (stream
, "#%s", fpstr
);
22724 gcc_assert (GET_CODE (x
) != NEG
);
22725 fputc ('#', stream
);
22726 if (GET_CODE (x
) == HIGH
)
22728 fputs (":lower16:", stream
);
22732 output_addr_const (stream
, x
);
22738 /* Target hook for printing a memory address. */
22740 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
22744 int is_minus
= GET_CODE (x
) == MINUS
;
22747 asm_fprintf (stream
, "[%r]", REGNO (x
));
22748 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22750 rtx base
= XEXP (x
, 0);
22751 rtx index
= XEXP (x
, 1);
22752 HOST_WIDE_INT offset
= 0;
22754 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22756 /* Ensure that BASE is a register. */
22757 /* (one of them must be). */
22758 /* Also ensure the SP is not used as in index register. */
22759 std::swap (base
, index
);
22761 switch (GET_CODE (index
))
22764 offset
= INTVAL (index
);
22767 asm_fprintf (stream
, "[%r, #%wd]",
22768 REGNO (base
), offset
);
22772 asm_fprintf (stream
, "[%r, %s%r]",
22773 REGNO (base
), is_minus
? "-" : "",
22783 asm_fprintf (stream
, "[%r, %s%r",
22784 REGNO (base
), is_minus
? "-" : "",
22785 REGNO (XEXP (index
, 0)));
22786 arm_print_operand (stream
, index
, 'S');
22787 fputs ("]", stream
);
22792 gcc_unreachable ();
22795 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22796 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22798 gcc_assert (REG_P (XEXP (x
, 0)));
22800 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22801 asm_fprintf (stream
, "[%r, #%s%d]!",
22802 REGNO (XEXP (x
, 0)),
22803 GET_CODE (x
) == PRE_DEC
? "-" : "",
22804 GET_MODE_SIZE (mode
));
22806 asm_fprintf (stream
, "[%r], #%s%d",
22807 REGNO (XEXP (x
, 0)),
22808 GET_CODE (x
) == POST_DEC
? "-" : "",
22809 GET_MODE_SIZE (mode
));
22811 else if (GET_CODE (x
) == PRE_MODIFY
)
22813 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22814 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22815 asm_fprintf (stream
, "#%wd]!",
22816 INTVAL (XEXP (XEXP (x
, 1), 1)));
22818 asm_fprintf (stream
, "%r]!",
22819 REGNO (XEXP (XEXP (x
, 1), 1)));
22821 else if (GET_CODE (x
) == POST_MODIFY
)
22823 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22824 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22825 asm_fprintf (stream
, "#%wd",
22826 INTVAL (XEXP (XEXP (x
, 1), 1)));
22828 asm_fprintf (stream
, "%r",
22829 REGNO (XEXP (XEXP (x
, 1), 1)));
22831 else output_addr_const (stream
, x
);
22836 asm_fprintf (stream
, "[%r]", REGNO (x
));
22837 else if (GET_CODE (x
) == POST_INC
)
22838 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22839 else if (GET_CODE (x
) == PLUS
)
22841 gcc_assert (REG_P (XEXP (x
, 0)));
22842 if (CONST_INT_P (XEXP (x
, 1)))
22843 asm_fprintf (stream
, "[%r, #%wd]",
22844 REGNO (XEXP (x
, 0)),
22845 INTVAL (XEXP (x
, 1)));
22847 asm_fprintf (stream
, "[%r, %r]",
22848 REGNO (XEXP (x
, 0)),
22849 REGNO (XEXP (x
, 1)));
22852 output_addr_const (stream
, x
);
22856 /* Target hook for indicating whether a punctuation character for
22857 TARGET_PRINT_OPERAND is valid. */
22859 arm_print_operand_punct_valid_p (unsigned char code
)
22861 return (code
== '@' || code
== '|' || code
== '.'
22862 || code
== '(' || code
== ')' || code
== '#'
22863 || (TARGET_32BIT
&& (code
== '?'))
22864 || (TARGET_THUMB2
&& (code
== '!'))
22865 || (TARGET_THUMB
&& (code
== '_')));
22868 /* Target hook for assembling integer objects. The ARM version needs to
22869 handle word-sized values specially. */
22871 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22875 if (size
== UNITS_PER_WORD
&& aligned_p
)
22877 fputs ("\t.word\t", asm_out_file
);
22878 output_addr_const (asm_out_file
, x
);
22880 /* Mark symbols as position independent. We only do this in the
22881 .text segment, not in the .data segment. */
22882 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22883 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22885 /* See legitimize_pic_address for an explanation of the
22886 TARGET_VXWORKS_RTP check. */
22887 /* References to weak symbols cannot be resolved locally:
22888 they may be overridden by a non-weak definition at link
22890 if (!arm_pic_data_is_text_relative
22891 || (GET_CODE (x
) == SYMBOL_REF
22892 && (!SYMBOL_REF_LOCAL_P (x
)
22893 || (SYMBOL_REF_DECL (x
)
22894 ? DECL_WEAK (SYMBOL_REF_DECL (x
)) : 0))))
22895 fputs ("(GOT)", asm_out_file
);
22897 fputs ("(GOTOFF)", asm_out_file
);
22899 fputc ('\n', asm_out_file
);
22903 mode
= GET_MODE (x
);
22905 if (arm_vector_mode_supported_p (mode
))
22909 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22911 units
= CONST_VECTOR_NUNITS (x
);
22912 size
= GET_MODE_UNIT_SIZE (mode
);
22914 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22915 for (i
= 0; i
< units
; i
++)
22917 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22919 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22922 for (i
= 0; i
< units
; i
++)
22924 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22926 (*CONST_DOUBLE_REAL_VALUE (elt
),
22927 as_a
<scalar_float_mode
> (GET_MODE_INNER (mode
)),
22928 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22934 return default_assemble_integer (x
, size
, aligned_p
);
22938 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22942 if (!TARGET_AAPCS_BASED
)
22945 default_named_section_asm_out_constructor
22946 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22950 /* Put these in the .init_array section, using a special relocation. */
22951 if (priority
!= DEFAULT_INIT_PRIORITY
)
22954 sprintf (buf
, "%s.%.5u",
22955 is_ctor
? ".init_array" : ".fini_array",
22957 s
= get_section (buf
, SECTION_WRITE
| SECTION_NOTYPE
, NULL_TREE
);
22964 switch_to_section (s
);
22965 assemble_align (POINTER_SIZE
);
22966 fputs ("\t.word\t", asm_out_file
);
22967 output_addr_const (asm_out_file
, symbol
);
22968 fputs ("(target1)\n", asm_out_file
);
22971 /* Add a function to the list of static constructors. */
22974 arm_elf_asm_constructor (rtx symbol
, int priority
)
22976 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22979 /* Add a function to the list of static destructors. */
22982 arm_elf_asm_destructor (rtx symbol
, int priority
)
22984 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22987 /* A finite state machine takes care of noticing whether or not instructions
22988 can be conditionally executed, and thus decrease execution time and code
22989 size by deleting branch instructions. The fsm is controlled by
22990 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22992 /* The state of the fsm controlling condition codes are:
22993 0: normal, do nothing special
22994 1: make ASM_OUTPUT_OPCODE not output this instruction
22995 2: make ASM_OUTPUT_OPCODE not output this instruction
22996 3: make instructions conditional
22997 4: make instructions conditional
22999 State transitions (state->state by whom under condition):
23000 0 -> 1 final_prescan_insn if the `target' is a label
23001 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
23002 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
23003 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
23004 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
23005 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
23006 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
23007 (the target insn is arm_target_insn).
23009 If the jump clobbers the conditions then we use states 2 and 4.
23011 A similar thing can be done with conditional return insns.
23013 XXX In case the `target' is an unconditional branch, this conditionalising
23014 of the instructions always reduces code size, but not always execution
23015 time. But then, I want to reduce the code size to somewhere near what
23016 /bin/cc produces. */
23018 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23019 instructions. When a COND_EXEC instruction is seen the subsequent
23020 instructions are scanned so that multiple conditional instructions can be
23021 combined into a single IT block. arm_condexec_count and arm_condexec_mask
23022 specify the length and true/false mask for the IT block. These will be
23023 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
23025 /* Returns the index of the ARM condition code string in
23026 `arm_condition_codes', or ARM_NV if the comparison is invalid.
23027 COMPARISON should be an rtx like `(eq (...) (...))'. */
23030 maybe_get_arm_condition_code (rtx comparison
)
23032 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
23033 enum arm_cond_code code
;
23034 enum rtx_code comp_code
= GET_CODE (comparison
);
23036 if (GET_MODE_CLASS (mode
) != MODE_CC
)
23037 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
23038 XEXP (comparison
, 1));
23042 case E_CC_DNEmode
: code
= ARM_NE
; goto dominance
;
23043 case E_CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
23044 case E_CC_DGEmode
: code
= ARM_GE
; goto dominance
;
23045 case E_CC_DGTmode
: code
= ARM_GT
; goto dominance
;
23046 case E_CC_DLEmode
: code
= ARM_LE
; goto dominance
;
23047 case E_CC_DLTmode
: code
= ARM_LT
; goto dominance
;
23048 case E_CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
23049 case E_CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
23050 case E_CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
23051 case E_CC_DLTUmode
: code
= ARM_CC
;
23054 if (comp_code
== EQ
)
23055 return ARM_INVERSE_CONDITION_CODE (code
);
23056 if (comp_code
== NE
)
23060 case E_CC_NOOVmode
:
23063 case NE
: return ARM_NE
;
23064 case EQ
: return ARM_EQ
;
23065 case GE
: return ARM_PL
;
23066 case LT
: return ARM_MI
;
23067 default: return ARM_NV
;
23073 case NE
: return ARM_NE
;
23074 case EQ
: return ARM_EQ
;
23075 default: return ARM_NV
;
23081 case NE
: return ARM_MI
;
23082 case EQ
: return ARM_PL
;
23083 default: return ARM_NV
;
23088 /* We can handle all cases except UNEQ and LTGT. */
23091 case GE
: return ARM_GE
;
23092 case GT
: return ARM_GT
;
23093 case LE
: return ARM_LS
;
23094 case LT
: return ARM_MI
;
23095 case NE
: return ARM_NE
;
23096 case EQ
: return ARM_EQ
;
23097 case ORDERED
: return ARM_VC
;
23098 case UNORDERED
: return ARM_VS
;
23099 case UNLT
: return ARM_LT
;
23100 case UNLE
: return ARM_LE
;
23101 case UNGT
: return ARM_HI
;
23102 case UNGE
: return ARM_PL
;
23103 /* UNEQ and LTGT do not have a representation. */
23104 case UNEQ
: /* Fall through. */
23105 case LTGT
: /* Fall through. */
23106 default: return ARM_NV
;
23112 case NE
: return ARM_NE
;
23113 case EQ
: return ARM_EQ
;
23114 case GE
: return ARM_LE
;
23115 case GT
: return ARM_LT
;
23116 case LE
: return ARM_GE
;
23117 case LT
: return ARM_GT
;
23118 case GEU
: return ARM_LS
;
23119 case GTU
: return ARM_CC
;
23120 case LEU
: return ARM_CS
;
23121 case LTU
: return ARM_HI
;
23122 default: return ARM_NV
;
23128 case LTU
: return ARM_CS
;
23129 case GEU
: return ARM_CC
;
23130 case NE
: return ARM_CS
;
23131 case EQ
: return ARM_CC
;
23132 default: return ARM_NV
;
23138 case NE
: return ARM_NE
;
23139 case EQ
: return ARM_EQ
;
23140 case GEU
: return ARM_CS
;
23141 case GTU
: return ARM_HI
;
23142 case LEU
: return ARM_LS
;
23143 case LTU
: return ARM_CC
;
23144 default: return ARM_NV
;
23150 case GE
: return ARM_GE
;
23151 case LT
: return ARM_LT
;
23152 case GEU
: return ARM_CS
;
23153 case LTU
: return ARM_CC
;
23154 default: return ARM_NV
;
23160 case NE
: return ARM_VS
;
23161 case EQ
: return ARM_VC
;
23162 default: return ARM_NV
;
23168 case NE
: return ARM_NE
;
23169 case EQ
: return ARM_EQ
;
23170 case GE
: return ARM_GE
;
23171 case GT
: return ARM_GT
;
23172 case LE
: return ARM_LE
;
23173 case LT
: return ARM_LT
;
23174 case GEU
: return ARM_CS
;
23175 case GTU
: return ARM_HI
;
23176 case LEU
: return ARM_LS
;
23177 case LTU
: return ARM_CC
;
23178 default: return ARM_NV
;
23181 default: gcc_unreachable ();
23185 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
23186 static enum arm_cond_code
23187 get_arm_condition_code (rtx comparison
)
23189 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
23190 gcc_assert (code
!= ARM_NV
);
23194 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
23195 code registers when not targetting Thumb1. The VFP condition register
23196 only exists when generating hard-float code. */
23198 arm_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
23204 *p2
= TARGET_HARD_FLOAT
? VFPCC_REGNUM
: INVALID_REGNUM
;
23208 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23211 thumb2_final_prescan_insn (rtx_insn
*insn
)
23213 rtx_insn
*first_insn
= insn
;
23214 rtx body
= PATTERN (insn
);
23216 enum arm_cond_code code
;
23221 /* max_insns_skipped in the tune was already taken into account in the
23222 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23223 just emit the IT blocks as we can. It does not make sense to split
23225 max
= MAX_INSN_PER_IT_BLOCK
;
23227 /* Remove the previous insn from the count of insns to be output. */
23228 if (arm_condexec_count
)
23229 arm_condexec_count
--;
23231 /* Nothing to do if we are already inside a conditional block. */
23232 if (arm_condexec_count
)
23235 if (GET_CODE (body
) != COND_EXEC
)
23238 /* Conditional jumps are implemented directly. */
23242 predicate
= COND_EXEC_TEST (body
);
23243 arm_current_cc
= get_arm_condition_code (predicate
);
23245 n
= get_attr_ce_count (insn
);
23246 arm_condexec_count
= 1;
23247 arm_condexec_mask
= (1 << n
) - 1;
23248 arm_condexec_masklen
= n
;
23249 /* See if subsequent instructions can be combined into the same block. */
23252 insn
= next_nonnote_insn (insn
);
23254 /* Jumping into the middle of an IT block is illegal, so a label or
23255 barrier terminates the block. */
23256 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
23259 body
= PATTERN (insn
);
23260 /* USE and CLOBBER aren't really insns, so just skip them. */
23261 if (GET_CODE (body
) == USE
23262 || GET_CODE (body
) == CLOBBER
)
23265 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23266 if (GET_CODE (body
) != COND_EXEC
)
23268 /* Maximum number of conditionally executed instructions in a block. */
23269 n
= get_attr_ce_count (insn
);
23270 if (arm_condexec_masklen
+ n
> max
)
23273 predicate
= COND_EXEC_TEST (body
);
23274 code
= get_arm_condition_code (predicate
);
23275 mask
= (1 << n
) - 1;
23276 if (arm_current_cc
== code
)
23277 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
23278 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
23281 arm_condexec_count
++;
23282 arm_condexec_masklen
+= n
;
23284 /* A jump must be the last instruction in a conditional block. */
23288 /* Restore recog_data (getting the attributes of other insns can
23289 destroy this array, but final.c assumes that it remains intact
23290 across this call). */
23291 extract_constrain_insn_cached (first_insn
);
23295 arm_final_prescan_insn (rtx_insn
*insn
)
23297 /* BODY will hold the body of INSN. */
23298 rtx body
= PATTERN (insn
);
23300 /* This will be 1 if trying to repeat the trick, and things need to be
23301 reversed if it appears to fail. */
23304 /* If we start with a return insn, we only succeed if we find another one. */
23305 int seeking_return
= 0;
23306 enum rtx_code return_code
= UNKNOWN
;
23308 /* START_INSN will hold the insn from where we start looking. This is the
23309 first insn after the following code_label if REVERSE is true. */
23310 rtx_insn
*start_insn
= insn
;
23312 /* If in state 4, check if the target branch is reached, in order to
23313 change back to state 0. */
23314 if (arm_ccfsm_state
== 4)
23316 if (insn
== arm_target_insn
)
23318 arm_target_insn
= NULL
;
23319 arm_ccfsm_state
= 0;
23324 /* If in state 3, it is possible to repeat the trick, if this insn is an
23325 unconditional branch to a label, and immediately following this branch
23326 is the previous target label which is only used once, and the label this
23327 branch jumps to is not too far off. */
23328 if (arm_ccfsm_state
== 3)
23330 if (simplejump_p (insn
))
23332 start_insn
= next_nonnote_insn (start_insn
);
23333 if (BARRIER_P (start_insn
))
23335 /* XXX Isn't this always a barrier? */
23336 start_insn
= next_nonnote_insn (start_insn
);
23338 if (LABEL_P (start_insn
)
23339 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23340 && LABEL_NUSES (start_insn
) == 1)
23345 else if (ANY_RETURN_P (body
))
23347 start_insn
= next_nonnote_insn (start_insn
);
23348 if (BARRIER_P (start_insn
))
23349 start_insn
= next_nonnote_insn (start_insn
);
23350 if (LABEL_P (start_insn
)
23351 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23352 && LABEL_NUSES (start_insn
) == 1)
23355 seeking_return
= 1;
23356 return_code
= GET_CODE (body
);
23365 gcc_assert (!arm_ccfsm_state
|| reverse
);
23366 if (!JUMP_P (insn
))
23369 /* This jump might be paralleled with a clobber of the condition codes
23370 the jump should always come first */
23371 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
23372 body
= XVECEXP (body
, 0, 0);
23375 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
23376 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
23379 int fail
= FALSE
, succeed
= FALSE
;
23380 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23381 int then_not_else
= TRUE
;
23382 rtx_insn
*this_insn
= start_insn
;
23385 /* Register the insn jumped to. */
23388 if (!seeking_return
)
23389 label
= XEXP (SET_SRC (body
), 0);
23391 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
23392 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
23393 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
23395 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
23396 then_not_else
= FALSE
;
23398 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
23400 seeking_return
= 1;
23401 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
23403 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
23405 seeking_return
= 1;
23406 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
23407 then_not_else
= FALSE
;
23410 gcc_unreachable ();
23412 /* See how many insns this branch skips, and what kind of insns. If all
23413 insns are okay, and the label or unconditional branch to the same
23414 label is not too far away, succeed. */
23415 for (insns_skipped
= 0;
23416 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
23420 this_insn
= next_nonnote_insn (this_insn
);
23424 switch (GET_CODE (this_insn
))
23427 /* Succeed if it is the target label, otherwise fail since
23428 control falls in from somewhere else. */
23429 if (this_insn
== label
)
23431 arm_ccfsm_state
= 1;
23439 /* Succeed if the following insn is the target label.
23441 If return insns are used then the last insn in a function
23442 will be a barrier. */
23443 this_insn
= next_nonnote_insn (this_insn
);
23444 if (this_insn
&& this_insn
== label
)
23446 arm_ccfsm_state
= 1;
23454 /* The AAPCS says that conditional calls should not be
23455 used since they make interworking inefficient (the
23456 linker can't transform BL<cond> into BLX). That's
23457 only a problem if the machine has BLX. */
23464 /* Succeed if the following insn is the target label, or
23465 if the following two insns are a barrier and the
23467 this_insn
= next_nonnote_insn (this_insn
);
23468 if (this_insn
&& BARRIER_P (this_insn
))
23469 this_insn
= next_nonnote_insn (this_insn
);
23471 if (this_insn
&& this_insn
== label
23472 && insns_skipped
< max_insns_skipped
)
23474 arm_ccfsm_state
= 1;
23482 /* If this is an unconditional branch to the same label, succeed.
23483 If it is to another label, do nothing. If it is conditional,
23485 /* XXX Probably, the tests for SET and the PC are
23488 scanbody
= PATTERN (this_insn
);
23489 if (GET_CODE (scanbody
) == SET
23490 && GET_CODE (SET_DEST (scanbody
)) == PC
)
23492 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
23493 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
23495 arm_ccfsm_state
= 2;
23498 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
23501 /* Fail if a conditional return is undesirable (e.g. on a
23502 StrongARM), but still allow this if optimizing for size. */
23503 else if (GET_CODE (scanbody
) == return_code
23504 && !use_return_insn (TRUE
, NULL
)
23507 else if (GET_CODE (scanbody
) == return_code
)
23509 arm_ccfsm_state
= 2;
23512 else if (GET_CODE (scanbody
) == PARALLEL
)
23514 switch (get_attr_conds (this_insn
))
23524 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
23529 /* Instructions using or affecting the condition codes make it
23531 scanbody
= PATTERN (this_insn
);
23532 if (!(GET_CODE (scanbody
) == SET
23533 || GET_CODE (scanbody
) == PARALLEL
)
23534 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
23544 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
23545 arm_target_label
= CODE_LABEL_NUMBER (label
);
23548 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
23550 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
23552 this_insn
= next_nonnote_insn (this_insn
);
23553 gcc_assert (!this_insn
23554 || (!BARRIER_P (this_insn
)
23555 && !LABEL_P (this_insn
)));
23559 /* Oh, dear! we ran off the end.. give up. */
23560 extract_constrain_insn_cached (insn
);
23561 arm_ccfsm_state
= 0;
23562 arm_target_insn
= NULL
;
23565 arm_target_insn
= this_insn
;
23568 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23571 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
23573 if (reverse
|| then_not_else
)
23574 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
23577 /* Restore recog_data (getting the attributes of other insns can
23578 destroy this array, but final.c assumes that it remains intact
23579 across this call. */
23580 extract_constrain_insn_cached (insn
);
23584 /* Output IT instructions. */
23586 thumb2_asm_output_opcode (FILE * stream
)
23591 if (arm_condexec_mask
)
23593 for (n
= 0; n
< arm_condexec_masklen
; n
++)
23594 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
23596 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
23597 arm_condition_codes
[arm_current_cc
]);
23598 arm_condexec_mask
= 0;
23602 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
23603 UNITS_PER_WORD bytes wide. */
23604 static unsigned int
23605 arm_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
23608 && regno
> PC_REGNUM
23609 && regno
!= FRAME_POINTER_REGNUM
23610 && regno
!= ARG_POINTER_REGNUM
23611 && !IS_VFP_REGNUM (regno
))
23614 return ARM_NUM_REGS (mode
);
23617 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23619 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
23621 if (GET_MODE_CLASS (mode
) == MODE_CC
)
23622 return (regno
== CC_REGNUM
23623 || (TARGET_HARD_FLOAT
23624 && regno
== VFPCC_REGNUM
));
23626 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23630 /* For the Thumb we only allow values bigger than SImode in
23631 registers 0 - 6, so that there is always a second low
23632 register available to hold the upper part of the value.
23633 We probably we ought to ensure that the register is the
23634 start of an even numbered register pair. */
23635 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23637 if (TARGET_HARD_FLOAT
&& IS_VFP_REGNUM (regno
))
23639 if (mode
== SFmode
|| mode
== SImode
)
23640 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23642 if (mode
== DFmode
)
23643 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23645 if (mode
== HFmode
)
23646 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23648 /* VFP registers can hold HImode values. */
23649 if (mode
== HImode
)
23650 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23653 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23654 || (VALID_NEON_QREG_MODE (mode
)
23655 && NEON_REGNO_OK_FOR_QUAD (regno
))
23656 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23657 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23658 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23659 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23660 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23665 if (TARGET_REALLY_IWMMXT
)
23667 if (IS_IWMMXT_GR_REGNUM (regno
))
23668 return mode
== SImode
;
23670 if (IS_IWMMXT_REGNUM (regno
))
23671 return VALID_IWMMXT_REG_MODE (mode
);
23674 /* We allow almost any value to be stored in the general registers.
23675 Restrict doubleword quantities to even register pairs in ARM state
23676 so that we can use ldrd. Do not allow very large Neon structure
23677 opaque modes in general registers; they would use too many. */
23678 if (regno
<= LAST_ARM_REGNUM
)
23680 if (ARM_NUM_REGS (mode
) > 4)
23686 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23689 if (regno
== FRAME_POINTER_REGNUM
23690 || regno
== ARG_POINTER_REGNUM
)
23691 /* We only allow integers in the fake hard registers. */
23692 return GET_MODE_CLASS (mode
) == MODE_INT
;
23697 /* Implement TARGET_MODES_TIEABLE_P. */
23700 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23702 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23705 /* We specifically want to allow elements of "structure" modes to
23706 be tieable to the structure. This more general condition allows
23707 other rarer situations too. */
23709 && (VALID_NEON_DREG_MODE (mode1
)
23710 || VALID_NEON_QREG_MODE (mode1
)
23711 || VALID_NEON_STRUCT_MODE (mode1
))
23712 && (VALID_NEON_DREG_MODE (mode2
)
23713 || VALID_NEON_QREG_MODE (mode2
)
23714 || VALID_NEON_STRUCT_MODE (mode2
)))
23720 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23721 not used in arm mode. */
23724 arm_regno_class (int regno
)
23726 if (regno
== PC_REGNUM
)
23731 if (regno
== STACK_POINTER_REGNUM
)
23733 if (regno
== CC_REGNUM
)
23740 if (TARGET_THUMB2
&& regno
< 8)
23743 if ( regno
<= LAST_ARM_REGNUM
23744 || regno
== FRAME_POINTER_REGNUM
23745 || regno
== ARG_POINTER_REGNUM
)
23746 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23748 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23749 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23751 if (IS_VFP_REGNUM (regno
))
23753 if (regno
<= D7_VFP_REGNUM
)
23754 return VFP_D0_D7_REGS
;
23755 else if (regno
<= LAST_LO_VFP_REGNUM
)
23756 return VFP_LO_REGS
;
23758 return VFP_HI_REGS
;
23761 if (IS_IWMMXT_REGNUM (regno
))
23762 return IWMMXT_REGS
;
23764 if (IS_IWMMXT_GR_REGNUM (regno
))
23765 return IWMMXT_GR_REGS
;
23770 /* Handle a special case when computing the offset
23771 of an argument from the frame pointer. */
23773 arm_debugger_arg_offset (int value
, rtx addr
)
23777 /* We are only interested if dbxout_parms() failed to compute the offset. */
23781 /* We can only cope with the case where the address is held in a register. */
23785 /* If we are using the frame pointer to point at the argument, then
23786 an offset of 0 is correct. */
23787 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23790 /* If we are using the stack pointer to point at the
23791 argument, then an offset of 0 is correct. */
23792 /* ??? Check this is consistent with thumb2 frame layout. */
23793 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23794 && REGNO (addr
) == SP_REGNUM
)
23797 /* Oh dear. The argument is pointed to by a register rather
23798 than being held in a register, or being stored at a known
23799 offset from the frame pointer. Since GDB only understands
23800 those two kinds of argument we must translate the address
23801 held in the register into an offset from the frame pointer.
23802 We do this by searching through the insns for the function
23803 looking to see where this register gets its value. If the
23804 register is initialized from the frame pointer plus an offset
23805 then we are in luck and we can continue, otherwise we give up.
23807 This code is exercised by producing debugging information
23808 for a function with arguments like this:
23810 double func (double a, double b, int c, double d) {return d;}
23812 Without this code the stab for parameter 'd' will be set to
23813 an offset of 0 from the frame pointer, rather than 8. */
23815 /* The if() statement says:
23817 If the insn is a normal instruction
23818 and if the insn is setting the value in a register
23819 and if the register being set is the register holding the address of the argument
23820 and if the address is computing by an addition
23821 that involves adding to a register
23822 which is the frame pointer
23827 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23829 if ( NONJUMP_INSN_P (insn
)
23830 && GET_CODE (PATTERN (insn
)) == SET
23831 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23832 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23833 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23834 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23835 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23838 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23847 warning (0, "unable to compute real location of stacked parameter");
23848 value
= 8; /* XXX magic hack */
23854 /* Implement TARGET_PROMOTED_TYPE. */
23857 arm_promoted_type (const_tree t
)
23859 if (SCALAR_FLOAT_TYPE_P (t
)
23860 && TYPE_PRECISION (t
) == 16
23861 && TYPE_MAIN_VARIANT (t
) == arm_fp16_type_node
)
23862 return float_type_node
;
23866 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23867 This simply adds HFmode as a supported mode; even though we don't
23868 implement arithmetic on this type directly, it's supported by
23869 optabs conversions, much the way the double-word arithmetic is
23870 special-cased in the default hook. */
23873 arm_scalar_mode_supported_p (scalar_mode mode
)
23875 if (mode
== HFmode
)
23876 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23877 else if (ALL_FIXED_POINT_MODE_P (mode
))
23880 return default_scalar_mode_supported_p (mode
);
23883 /* Set the value of FLT_EVAL_METHOD.
23884 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23886 0: evaluate all operations and constants, whose semantic type has at
23887 most the range and precision of type float, to the range and
23888 precision of float; evaluate all other operations and constants to
23889 the range and precision of the semantic type;
23891 N, where _FloatN is a supported interchange floating type
23892 evaluate all operations and constants, whose semantic type has at
23893 most the range and precision of _FloatN type, to the range and
23894 precision of the _FloatN type; evaluate all other operations and
23895 constants to the range and precision of the semantic type;
23897 If we have the ARMv8.2-A extensions then we support _Float16 in native
23898 precision, so we should set this to 16. Otherwise, we support the type,
23899 but want to evaluate expressions in float precision, so set this to
23902 static enum flt_eval_method
23903 arm_excess_precision (enum excess_precision_type type
)
23907 case EXCESS_PRECISION_TYPE_FAST
:
23908 case EXCESS_PRECISION_TYPE_STANDARD
:
23909 /* We can calculate either in 16-bit range and precision or
23910 32-bit range and precision. Make that decision based on whether
23911 we have native support for the ARMv8.2-A 16-bit floating-point
23912 instructions or not. */
23913 return (TARGET_VFP_FP16INST
23914 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23915 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
23916 case EXCESS_PRECISION_TYPE_IMPLICIT
:
23917 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
23919 gcc_unreachable ();
23921 return FLT_EVAL_METHOD_UNPREDICTABLE
;
23925 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23926 _Float16 if we are using anything other than ieee format for 16-bit
23927 floating point. Otherwise, punt to the default implementation. */
23928 static opt_scalar_float_mode
23929 arm_floatn_mode (int n
, bool extended
)
23931 if (!extended
&& n
== 16)
23933 if (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
)
23935 return opt_scalar_float_mode ();
23938 return default_floatn_mode (n
, extended
);
23942 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23943 not to early-clobber SRC registers in the process.
23945 We assume that the operands described by SRC and DEST represent a
23946 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23947 number of components into which the copy has been decomposed. */
23949 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23953 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23954 || REGNO (operands
[0]) < REGNO (operands
[1]))
23956 for (i
= 0; i
< count
; i
++)
23958 operands
[2 * i
] = dest
[i
];
23959 operands
[2 * i
+ 1] = src
[i
];
23964 for (i
= 0; i
< count
; i
++)
23966 operands
[2 * i
] = dest
[count
- i
- 1];
23967 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23972 /* Split operands into moves from op[1] + op[2] into op[0]. */
23975 neon_split_vcombine (rtx operands
[3])
23977 unsigned int dest
= REGNO (operands
[0]);
23978 unsigned int src1
= REGNO (operands
[1]);
23979 unsigned int src2
= REGNO (operands
[2]);
23980 machine_mode halfmode
= GET_MODE (operands
[1]);
23981 unsigned int halfregs
= REG_NREGS (operands
[1]);
23982 rtx destlo
, desthi
;
23984 if (src1
== dest
&& src2
== dest
+ halfregs
)
23986 /* No-op move. Can't split to nothing; emit something. */
23987 emit_note (NOTE_INSN_DELETED
);
23991 /* Preserve register attributes for variable tracking. */
23992 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23993 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23994 GET_MODE_SIZE (halfmode
));
23996 /* Special case of reversed high/low parts. Use VSWP. */
23997 if (src2
== dest
&& src1
== dest
+ halfregs
)
23999 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
24000 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
24001 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
24005 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
24007 /* Try to avoid unnecessary moves if part of the result
24008 is in the right place already. */
24010 emit_move_insn (destlo
, operands
[1]);
24011 if (src2
!= dest
+ halfregs
)
24012 emit_move_insn (desthi
, operands
[2]);
24016 if (src2
!= dest
+ halfregs
)
24017 emit_move_insn (desthi
, operands
[2]);
24019 emit_move_insn (destlo
, operands
[1]);
24023 /* Return the number (counting from 0) of
24024 the least significant set bit in MASK. */
24027 number_of_first_bit_set (unsigned mask
)
24029 return ctz_hwi (mask
);
24032 /* Like emit_multi_reg_push, but allowing for a different set of
24033 registers to be described as saved. MASK is the set of registers
24034 to be saved; REAL_REGS is the set of registers to be described as
24035 saved. If REAL_REGS is 0, only describe the stack adjustment. */
24038 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
24040 unsigned long regno
;
24041 rtx par
[10], tmp
, reg
;
24045 /* Build the parallel of the registers actually being stored. */
24046 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
24048 regno
= ctz_hwi (mask
);
24049 reg
= gen_rtx_REG (SImode
, regno
);
24052 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
24054 tmp
= gen_rtx_USE (VOIDmode
, reg
);
24059 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
24060 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
24061 tmp
= gen_frame_mem (BLKmode
, tmp
);
24062 tmp
= gen_rtx_SET (tmp
, par
[0]);
24065 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
24066 insn
= emit_insn (tmp
);
24068 /* Always build the stack adjustment note for unwind info. */
24069 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
24070 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
24073 /* Build the parallel of the registers recorded as saved for unwind. */
24074 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
24076 regno
= ctz_hwi (real_regs
);
24077 reg
= gen_rtx_REG (SImode
, regno
);
24079 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
24080 tmp
= gen_frame_mem (SImode
, tmp
);
24081 tmp
= gen_rtx_SET (tmp
, reg
);
24082 RTX_FRAME_RELATED_P (tmp
) = 1;
24090 RTX_FRAME_RELATED_P (par
[0]) = 1;
24091 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
24094 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
24099 /* Emit code to push or pop registers to or from the stack. F is the
24100 assembly file. MASK is the registers to pop. */
24102 thumb_pop (FILE *f
, unsigned long mask
)
24105 int lo_mask
= mask
& 0xFF;
24109 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
24111 /* Special case. Do not generate a POP PC statement here, do it in
24113 thumb_exit (f
, -1);
24117 fprintf (f
, "\tpop\t{");
24119 /* Look at the low registers first. */
24120 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
24124 asm_fprintf (f
, "%r", regno
);
24126 if ((lo_mask
& ~1) != 0)
24131 if (mask
& (1 << PC_REGNUM
))
24133 /* Catch popping the PC. */
24134 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
|| crtl
->calls_eh_return
24135 || IS_CMSE_ENTRY (arm_current_func_type ()))
24137 /* The PC is never poped directly, instead
24138 it is popped into r3 and then BX is used. */
24139 fprintf (f
, "}\n");
24141 thumb_exit (f
, -1);
24150 asm_fprintf (f
, "%r", PC_REGNUM
);
24154 fprintf (f
, "}\n");
24157 /* Generate code to return from a thumb function.
24158 If 'reg_containing_return_addr' is -1, then the return address is
24159 actually on the stack, at the stack pointer.
24161 Note: do not forget to update length attribute of corresponding insn pattern
24162 when changing assembly output (eg. length attribute of epilogue_insns when
24163 updating Armv8-M Baseline Security Extensions register clearing
24166 thumb_exit (FILE *f
, int reg_containing_return_addr
)
24168 unsigned regs_available_for_popping
;
24169 unsigned regs_to_pop
;
24171 unsigned available
;
24175 int restore_a4
= FALSE
;
24177 /* Compute the registers we need to pop. */
24181 if (reg_containing_return_addr
== -1)
24183 regs_to_pop
|= 1 << LR_REGNUM
;
24187 if (TARGET_BACKTRACE
)
24189 /* Restore the (ARM) frame pointer and stack pointer. */
24190 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
24194 /* If there is nothing to pop then just emit the BX instruction and
24196 if (pops_needed
== 0)
24198 if (crtl
->calls_eh_return
)
24199 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24201 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24203 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n",
24204 reg_containing_return_addr
);
24205 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
24208 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24211 /* Otherwise if we are not supporting interworking and we have not created
24212 a backtrace structure and the function was not entered in ARM mode then
24213 just pop the return address straight into the PC. */
24214 else if (!TARGET_INTERWORK
24215 && !TARGET_BACKTRACE
24216 && !is_called_in_ARM_mode (current_function_decl
)
24217 && !crtl
->calls_eh_return
24218 && !IS_CMSE_ENTRY (arm_current_func_type ()))
24220 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
24224 /* Find out how many of the (return) argument registers we can corrupt. */
24225 regs_available_for_popping
= 0;
24227 /* If returning via __builtin_eh_return, the bottom three registers
24228 all contain information needed for the return. */
24229 if (crtl
->calls_eh_return
)
24233 /* If we can deduce the registers used from the function's
24234 return value. This is more reliable that examining
24235 df_regs_ever_live_p () because that will be set if the register is
24236 ever used in the function, not just if the register is used
24237 to hold a return value. */
24239 if (crtl
->return_rtx
!= 0)
24240 mode
= GET_MODE (crtl
->return_rtx
);
24242 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
24244 size
= GET_MODE_SIZE (mode
);
24248 /* In a void function we can use any argument register.
24249 In a function that returns a structure on the stack
24250 we can use the second and third argument registers. */
24251 if (mode
== VOIDmode
)
24252 regs_available_for_popping
=
24253 (1 << ARG_REGISTER (1))
24254 | (1 << ARG_REGISTER (2))
24255 | (1 << ARG_REGISTER (3));
24257 regs_available_for_popping
=
24258 (1 << ARG_REGISTER (2))
24259 | (1 << ARG_REGISTER (3));
24261 else if (size
<= 4)
24262 regs_available_for_popping
=
24263 (1 << ARG_REGISTER (2))
24264 | (1 << ARG_REGISTER (3));
24265 else if (size
<= 8)
24266 regs_available_for_popping
=
24267 (1 << ARG_REGISTER (3));
24270 /* Match registers to be popped with registers into which we pop them. */
24271 for (available
= regs_available_for_popping
,
24272 required
= regs_to_pop
;
24273 required
!= 0 && available
!= 0;
24274 available
&= ~(available
& - available
),
24275 required
&= ~(required
& - required
))
24278 /* If we have any popping registers left over, remove them. */
24280 regs_available_for_popping
&= ~available
;
24282 /* Otherwise if we need another popping register we can use
24283 the fourth argument register. */
24284 else if (pops_needed
)
24286 /* If we have not found any free argument registers and
24287 reg a4 contains the return address, we must move it. */
24288 if (regs_available_for_popping
== 0
24289 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
24291 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24292 reg_containing_return_addr
= LR_REGNUM
;
24294 else if (size
> 12)
24296 /* Register a4 is being used to hold part of the return value,
24297 but we have dire need of a free, low register. */
24300 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
24303 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
24305 /* The fourth argument register is available. */
24306 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
24312 /* Pop as many registers as we can. */
24313 thumb_pop (f
, regs_available_for_popping
);
24315 /* Process the registers we popped. */
24316 if (reg_containing_return_addr
== -1)
24318 /* The return address was popped into the lowest numbered register. */
24319 regs_to_pop
&= ~(1 << LR_REGNUM
);
24321 reg_containing_return_addr
=
24322 number_of_first_bit_set (regs_available_for_popping
);
24324 /* Remove this register for the mask of available registers, so that
24325 the return address will not be corrupted by further pops. */
24326 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
24329 /* If we popped other registers then handle them here. */
24330 if (regs_available_for_popping
)
24334 /* Work out which register currently contains the frame pointer. */
24335 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24337 /* Move it into the correct place. */
24338 asm_fprintf (f
, "\tmov\t%r, %r\n",
24339 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
24341 /* (Temporarily) remove it from the mask of popped registers. */
24342 regs_available_for_popping
&= ~(1 << frame_pointer
);
24343 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
24345 if (regs_available_for_popping
)
24349 /* We popped the stack pointer as well,
24350 find the register that contains it. */
24351 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24353 /* Move it into the stack register. */
24354 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
24356 /* At this point we have popped all necessary registers, so
24357 do not worry about restoring regs_available_for_popping
24358 to its correct value:
24360 assert (pops_needed == 0)
24361 assert (regs_available_for_popping == (1 << frame_pointer))
24362 assert (regs_to_pop == (1 << STACK_POINTER)) */
24366 /* Since we have just move the popped value into the frame
24367 pointer, the popping register is available for reuse, and
24368 we know that we still have the stack pointer left to pop. */
24369 regs_available_for_popping
|= (1 << frame_pointer
);
24373 /* If we still have registers left on the stack, but we no longer have
24374 any registers into which we can pop them, then we must move the return
24375 address into the link register and make available the register that
24377 if (regs_available_for_popping
== 0 && pops_needed
> 0)
24379 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
24381 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
24382 reg_containing_return_addr
);
24384 reg_containing_return_addr
= LR_REGNUM
;
24387 /* If we have registers left on the stack then pop some more.
24388 We know that at most we will want to pop FP and SP. */
24389 if (pops_needed
> 0)
24394 thumb_pop (f
, regs_available_for_popping
);
24396 /* We have popped either FP or SP.
24397 Move whichever one it is into the correct register. */
24398 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24399 move_to
= number_of_first_bit_set (regs_to_pop
);
24401 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
24405 /* If we still have not popped everything then we must have only
24406 had one register available to us and we are now popping the SP. */
24407 if (pops_needed
> 0)
24411 thumb_pop (f
, regs_available_for_popping
);
24413 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24415 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
24417 assert (regs_to_pop == (1 << STACK_POINTER))
24418 assert (pops_needed == 1)
24422 /* If necessary restore the a4 register. */
24425 if (reg_containing_return_addr
!= LR_REGNUM
)
24427 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24428 reg_containing_return_addr
= LR_REGNUM
;
24431 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
24434 if (crtl
->calls_eh_return
)
24435 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24437 /* Return to caller. */
24438 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24440 /* This is for the cases where LR is not being used to contain the return
24441 address. It may therefore contain information that we might not want
24442 to leak, hence it must be cleared. The value in R0 will never be a
24443 secret at this point, so it is safe to use it, see the clearing code
24444 in 'cmse_nonsecure_entry_clear_before_return'. */
24445 if (reg_containing_return_addr
!= LR_REGNUM
)
24446 asm_fprintf (f
, "\tmov\tlr, r0\n");
24448 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr
);
24449 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
24452 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24455 /* Scan INSN just before assembler is output for it.
24456 For Thumb-1, we track the status of the condition codes; this
24457 information is used in the cbranchsi4_insn pattern. */
24459 thumb1_final_prescan_insn (rtx_insn
*insn
)
24461 if (flag_print_asm_name
)
24462 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
24463 INSN_ADDRESSES (INSN_UID (insn
)));
24464 /* Don't overwrite the previous setter when we get to a cbranch. */
24465 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
24467 enum attr_conds conds
;
24469 if (cfun
->machine
->thumb1_cc_insn
)
24471 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
24472 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
24475 conds
= get_attr_conds (insn
);
24476 if (conds
== CONDS_SET
)
24478 rtx set
= single_set (insn
);
24479 cfun
->machine
->thumb1_cc_insn
= insn
;
24480 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
24481 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
24482 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
24483 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
24485 rtx src1
= XEXP (SET_SRC (set
), 1);
24486 if (src1
== const0_rtx
)
24487 cfun
->machine
->thumb1_cc_mode
= CCmode
;
24489 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
24491 /* Record the src register operand instead of dest because
24492 cprop_hardreg pass propagates src. */
24493 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
24496 else if (conds
!= CONDS_NOCOND
)
24497 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
24500 /* Check if unexpected far jump is used. */
24501 if (cfun
->machine
->lr_save_eliminated
24502 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24503 internal_error("Unexpected thumb1 far jump");
24507 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
24509 unsigned HOST_WIDE_INT mask
= 0xff;
24512 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
24513 if (val
== 0) /* XXX */
24516 for (i
= 0; i
< 25; i
++)
24517 if ((val
& (mask
<< i
)) == val
)
24523 /* Returns nonzero if the current function contains,
24524 or might contain a far jump. */
24526 thumb_far_jump_used_p (void)
24529 bool far_jump
= false;
24530 unsigned int func_size
= 0;
24532 /* If we have already decided that far jumps may be used,
24533 do not bother checking again, and always return true even if
24534 it turns out that they are not being used. Once we have made
24535 the decision that far jumps are present (and that hence the link
24536 register will be pushed onto the stack) we cannot go back on it. */
24537 if (cfun
->machine
->far_jump_used
)
24540 /* If this function is not being called from the prologue/epilogue
24541 generation code then it must be being called from the
24542 INITIAL_ELIMINATION_OFFSET macro. */
24543 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
24545 /* In this case we know that we are being asked about the elimination
24546 of the arg pointer register. If that register is not being used,
24547 then there are no arguments on the stack, and we do not have to
24548 worry that a far jump might force the prologue to push the link
24549 register, changing the stack offsets. In this case we can just
24550 return false, since the presence of far jumps in the function will
24551 not affect stack offsets.
24553 If the arg pointer is live (or if it was live, but has now been
24554 eliminated and so set to dead) then we do have to test to see if
24555 the function might contain a far jump. This test can lead to some
24556 false negatives, since before reload is completed, then length of
24557 branch instructions is not known, so gcc defaults to returning their
24558 longest length, which in turn sets the far jump attribute to true.
24560 A false negative will not result in bad code being generated, but it
24561 will result in a needless push and pop of the link register. We
24562 hope that this does not occur too often.
24564 If we need doubleword stack alignment this could affect the other
24565 elimination offsets so we can't risk getting it wrong. */
24566 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
24567 cfun
->machine
->arg_pointer_live
= 1;
24568 else if (!cfun
->machine
->arg_pointer_live
)
24572 /* We should not change far_jump_used during or after reload, as there is
24573 no chance to change stack frame layout. */
24574 if (reload_in_progress
|| reload_completed
)
24577 /* Check to see if the function contains a branch
24578 insn with the far jump attribute set. */
24579 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
24581 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24585 func_size
+= get_attr_length (insn
);
24588 /* Attribute far_jump will always be true for thumb1 before
24589 shorten_branch pass. So checking far_jump attribute before
24590 shorten_branch isn't much useful.
24592 Following heuristic tries to estimate more accurately if a far jump
24593 may finally be used. The heuristic is very conservative as there is
24594 no chance to roll-back the decision of not to use far jump.
24596 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24597 2-byte insn is associated with a 4 byte constant pool. Using
24598 function size 2048/3 as the threshold is conservative enough. */
24601 if ((func_size
* 3) >= 2048)
24603 /* Record the fact that we have decided that
24604 the function does use far jumps. */
24605 cfun
->machine
->far_jump_used
= 1;
24613 /* Return nonzero if FUNC must be entered in ARM mode. */
24615 is_called_in_ARM_mode (tree func
)
24617 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
24619 /* Ignore the problem about functions whose address is taken. */
24620 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
24624 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
24630 /* Given the stack offsets and register mask in OFFSETS, decide how
24631 many additional registers to push instead of subtracting a constant
24632 from SP. For epilogues the principle is the same except we use pop.
24633 FOR_PROLOGUE indicates which we're generating. */
24635 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
24637 HOST_WIDE_INT amount
;
24638 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
24639 /* Extract a mask of the ones we can give to the Thumb's push/pop
24641 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
24642 /* Then count how many other high registers will need to be pushed. */
24643 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24644 int n_free
, reg_base
, size
;
24646 if (!for_prologue
&& frame_pointer_needed
)
24647 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24649 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24651 /* If the stack frame size is 512 exactly, we can save one load
24652 instruction, which should make this a win even when optimizing
24654 if (!optimize_size
&& amount
!= 512)
24657 /* Can't do this if there are high registers to push. */
24658 if (high_regs_pushed
!= 0)
24661 /* Shouldn't do it in the prologue if no registers would normally
24662 be pushed at all. In the epilogue, also allow it if we'll have
24663 a pop insn for the PC. */
24666 || TARGET_BACKTRACE
24667 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
24668 || TARGET_INTERWORK
24669 || crtl
->args
.pretend_args_size
!= 0))
24672 /* Don't do this if thumb_expand_prologue wants to emit instructions
24673 between the push and the stack frame allocation. */
24675 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24676 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
24683 size
= arm_size_return_regs ();
24684 reg_base
= ARM_NUM_INTS (size
);
24685 live_regs_mask
>>= reg_base
;
24688 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
24689 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
24691 live_regs_mask
>>= 1;
24697 gcc_assert (amount
/ 4 * 4 == amount
);
24699 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
24700 return (amount
- 508) / 4;
24701 if (amount
<= n_free
* 4)
24706 /* The bits which aren't usefully expanded as rtl. */
24708 thumb1_unexpanded_epilogue (void)
24710 arm_stack_offsets
*offsets
;
24712 unsigned long live_regs_mask
= 0;
24713 int high_regs_pushed
= 0;
24715 int had_to_push_lr
;
24718 if (cfun
->machine
->return_used_this_function
!= 0)
24721 if (IS_NAKED (arm_current_func_type ()))
24724 offsets
= arm_get_frame_offsets ();
24725 live_regs_mask
= offsets
->saved_regs_mask
;
24726 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24728 /* If we can deduce the registers used from the function's return value.
24729 This is more reliable that examining df_regs_ever_live_p () because that
24730 will be set if the register is ever used in the function, not just if
24731 the register is used to hold a return value. */
24732 size
= arm_size_return_regs ();
24734 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24737 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24738 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24741 /* The prolog may have pushed some high registers to use as
24742 work registers. e.g. the testsuite file:
24743 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24744 compiles to produce:
24745 push {r4, r5, r6, r7, lr}
24749 as part of the prolog. We have to undo that pushing here. */
24751 if (high_regs_pushed
)
24753 unsigned long mask
= live_regs_mask
& 0xff;
24756 /* The available low registers depend on the size of the value we are
24764 /* Oh dear! We have no low registers into which we can pop
24767 ("no low registers available for popping high registers");
24769 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
24770 if (live_regs_mask
& (1 << next_hi_reg
))
24773 while (high_regs_pushed
)
24775 /* Find lo register(s) into which the high register(s) can
24777 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24779 if (mask
& (1 << regno
))
24780 high_regs_pushed
--;
24781 if (high_regs_pushed
== 0)
24785 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24787 /* Pop the values into the low register(s). */
24788 thumb_pop (asm_out_file
, mask
);
24790 /* Move the value(s) into the high registers. */
24791 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24793 if (mask
& (1 << regno
))
24795 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24798 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24799 if (live_regs_mask
& (1 << next_hi_reg
))
24804 live_regs_mask
&= ~0x0f00;
24807 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24808 live_regs_mask
&= 0xff;
24810 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24812 /* Pop the return address into the PC. */
24813 if (had_to_push_lr
)
24814 live_regs_mask
|= 1 << PC_REGNUM
;
24816 /* Either no argument registers were pushed or a backtrace
24817 structure was created which includes an adjusted stack
24818 pointer, so just pop everything. */
24819 if (live_regs_mask
)
24820 thumb_pop (asm_out_file
, live_regs_mask
);
24822 /* We have either just popped the return address into the
24823 PC or it is was kept in LR for the entire function.
24824 Note that thumb_pop has already called thumb_exit if the
24825 PC was in the list. */
24826 if (!had_to_push_lr
)
24827 thumb_exit (asm_out_file
, LR_REGNUM
);
24831 /* Pop everything but the return address. */
24832 if (live_regs_mask
)
24833 thumb_pop (asm_out_file
, live_regs_mask
);
24835 if (had_to_push_lr
)
24839 /* We have no free low regs, so save one. */
24840 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24844 /* Get the return address into a temporary register. */
24845 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24849 /* Move the return address to lr. */
24850 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24852 /* Restore the low register. */
24853 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24858 regno
= LAST_ARG_REGNUM
;
24863 /* Remove the argument registers that were pushed onto the stack. */
24864 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24865 SP_REGNUM
, SP_REGNUM
,
24866 crtl
->args
.pretend_args_size
);
24868 thumb_exit (asm_out_file
, regno
);
24874 /* Functions to save and restore machine-specific function data. */
24875 static struct machine_function
*
24876 arm_init_machine_status (void)
24878 struct machine_function
*machine
;
24879 machine
= ggc_cleared_alloc
<machine_function
> ();
24881 #if ARM_FT_UNKNOWN != 0
24882 machine
->func_type
= ARM_FT_UNKNOWN
;
24887 /* Return an RTX indicating where the return address to the
24888 calling function can be found. */
24890 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24895 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24898 /* Do anything needed before RTL is emitted for each function. */
24900 arm_init_expanders (void)
24902 /* Arrange to initialize and mark the machine per-function status. */
24903 init_machine_status
= arm_init_machine_status
;
24905 /* This is to stop the combine pass optimizing away the alignment
24906 adjustment of va_arg. */
24907 /* ??? It is claimed that this should not be necessary. */
24909 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24912 /* Check that FUNC is called with a different mode. */
24915 arm_change_mode_p (tree func
)
24917 if (TREE_CODE (func
) != FUNCTION_DECL
)
24920 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
24923 callee_tree
= target_option_default_node
;
24925 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
24926 int flags
= callee_opts
->x_target_flags
;
24928 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
24931 /* Like arm_compute_initial_elimination offset. Simpler because there
24932 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24933 to point at the base of the local variables after static stack
24934 space for a function has been allocated. */
24937 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24939 arm_stack_offsets
*offsets
;
24941 offsets
= arm_get_frame_offsets ();
24945 case ARG_POINTER_REGNUM
:
24948 case STACK_POINTER_REGNUM
:
24949 return offsets
->outgoing_args
- offsets
->saved_args
;
24951 case FRAME_POINTER_REGNUM
:
24952 return offsets
->soft_frame
- offsets
->saved_args
;
24954 case ARM_HARD_FRAME_POINTER_REGNUM
:
24955 return offsets
->saved_regs
- offsets
->saved_args
;
24957 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24958 return offsets
->locals_base
- offsets
->saved_args
;
24961 gcc_unreachable ();
24965 case FRAME_POINTER_REGNUM
:
24968 case STACK_POINTER_REGNUM
:
24969 return offsets
->outgoing_args
- offsets
->soft_frame
;
24971 case ARM_HARD_FRAME_POINTER_REGNUM
:
24972 return offsets
->saved_regs
- offsets
->soft_frame
;
24974 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24975 return offsets
->locals_base
- offsets
->soft_frame
;
24978 gcc_unreachable ();
24983 gcc_unreachable ();
24987 /* Generate the function's prologue. */
24990 thumb1_expand_prologue (void)
24994 HOST_WIDE_INT amount
;
24995 HOST_WIDE_INT size
;
24996 arm_stack_offsets
*offsets
;
24997 unsigned long func_type
;
24999 unsigned long live_regs_mask
;
25000 unsigned long l_mask
;
25001 unsigned high_regs_pushed
= 0;
25002 bool lr_needs_saving
;
25004 func_type
= arm_current_func_type ();
25006 /* Naked functions don't have prologues. */
25007 if (IS_NAKED (func_type
))
25009 if (flag_stack_usage_info
)
25010 current_function_static_stack_size
= 0;
25014 if (IS_INTERRUPT (func_type
))
25016 error ("interrupt Service Routines cannot be coded in Thumb mode");
25020 if (is_called_in_ARM_mode (current_function_decl
))
25021 emit_insn (gen_prologue_thumb1_interwork ());
25023 offsets
= arm_get_frame_offsets ();
25024 live_regs_mask
= offsets
->saved_regs_mask
;
25025 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
25027 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
25028 l_mask
= live_regs_mask
& 0x40ff;
25029 /* Then count how many other high registers will need to be pushed. */
25030 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
25032 if (crtl
->args
.pretend_args_size
)
25034 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
25036 if (cfun
->machine
->uses_anonymous_args
)
25038 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
25039 unsigned long mask
;
25041 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
25042 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
25044 insn
= thumb1_emit_multi_reg_push (mask
, 0);
25048 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25049 stack_pointer_rtx
, x
));
25051 RTX_FRAME_RELATED_P (insn
) = 1;
25054 if (TARGET_BACKTRACE
)
25056 HOST_WIDE_INT offset
= 0;
25057 unsigned work_register
;
25058 rtx work_reg
, x
, arm_hfp_rtx
;
25060 /* We have been asked to create a stack backtrace structure.
25061 The code looks like this:
25065 0 sub SP, #16 Reserve space for 4 registers.
25066 2 push {R7} Push low registers.
25067 4 add R7, SP, #20 Get the stack pointer before the push.
25068 6 str R7, [SP, #8] Store the stack pointer
25069 (before reserving the space).
25070 8 mov R7, PC Get hold of the start of this code + 12.
25071 10 str R7, [SP, #16] Store it.
25072 12 mov R7, FP Get hold of the current frame pointer.
25073 14 str R7, [SP, #4] Store it.
25074 16 mov R7, LR Get hold of the current return address.
25075 18 str R7, [SP, #12] Store it.
25076 20 add R7, SP, #16 Point at the start of the
25077 backtrace structure.
25078 22 mov FP, R7 Put this value into the frame pointer. */
25080 work_register
= thumb_find_work_register (live_regs_mask
);
25081 work_reg
= gen_rtx_REG (SImode
, work_register
);
25082 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
25084 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25085 stack_pointer_rtx
, GEN_INT (-16)));
25086 RTX_FRAME_RELATED_P (insn
) = 1;
25090 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
25091 RTX_FRAME_RELATED_P (insn
) = 1;
25092 lr_needs_saving
= false;
25094 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
25097 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
25098 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
25100 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
25101 x
= gen_frame_mem (SImode
, x
);
25102 emit_move_insn (x
, work_reg
);
25104 /* Make sure that the instruction fetching the PC is in the right place
25105 to calculate "start of backtrace creation code + 12". */
25106 /* ??? The stores using the common WORK_REG ought to be enough to
25107 prevent the scheduler from doing anything weird. Failing that
25108 we could always move all of the following into an UNSPEC_VOLATILE. */
25111 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
25112 emit_move_insn (work_reg
, x
);
25114 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
25115 x
= gen_frame_mem (SImode
, x
);
25116 emit_move_insn (x
, work_reg
);
25118 emit_move_insn (work_reg
, arm_hfp_rtx
);
25120 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
25121 x
= gen_frame_mem (SImode
, x
);
25122 emit_move_insn (x
, work_reg
);
25126 emit_move_insn (work_reg
, arm_hfp_rtx
);
25128 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
25129 x
= gen_frame_mem (SImode
, x
);
25130 emit_move_insn (x
, work_reg
);
25132 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
25133 emit_move_insn (work_reg
, x
);
25135 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
25136 x
= gen_frame_mem (SImode
, x
);
25137 emit_move_insn (x
, work_reg
);
25140 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
25141 emit_move_insn (work_reg
, x
);
25143 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
25144 x
= gen_frame_mem (SImode
, x
);
25145 emit_move_insn (x
, work_reg
);
25147 x
= GEN_INT (offset
+ 12);
25148 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
25150 emit_move_insn (arm_hfp_rtx
, work_reg
);
25152 /* Optimization: If we are not pushing any low registers but we are going
25153 to push some high registers then delay our first push. This will just
25154 be a push of LR and we can combine it with the push of the first high
25156 else if ((l_mask
& 0xff) != 0
25157 || (high_regs_pushed
== 0 && lr_needs_saving
))
25159 unsigned long mask
= l_mask
;
25160 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
25161 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
25162 RTX_FRAME_RELATED_P (insn
) = 1;
25163 lr_needs_saving
= false;
25166 if (high_regs_pushed
)
25168 unsigned pushable_regs
;
25169 unsigned next_hi_reg
;
25170 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
25171 : crtl
->args
.info
.nregs
;
25172 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
25174 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
25175 if (live_regs_mask
& (1 << next_hi_reg
))
25178 /* Here we need to mask out registers used for passing arguments
25179 even if they can be pushed. This is to avoid using them to stash the high
25180 registers. Such kind of stash may clobber the use of arguments. */
25181 pushable_regs
= l_mask
& (~arg_regs_mask
);
25182 if (lr_needs_saving
)
25183 pushable_regs
&= ~(1 << LR_REGNUM
);
25185 if (pushable_regs
== 0)
25186 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
25188 while (high_regs_pushed
> 0)
25190 unsigned long real_regs_mask
= 0;
25191 unsigned long push_mask
= 0;
25193 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
25195 if (pushable_regs
& (1 << regno
))
25197 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25198 gen_rtx_REG (SImode
, next_hi_reg
));
25200 high_regs_pushed
--;
25201 real_regs_mask
|= (1 << next_hi_reg
);
25202 push_mask
|= (1 << regno
);
25204 if (high_regs_pushed
)
25206 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
25208 if (live_regs_mask
& (1 << next_hi_reg
))
25216 /* If we had to find a work register and we have not yet
25217 saved the LR then add it to the list of regs to push. */
25218 if (lr_needs_saving
)
25220 push_mask
|= 1 << LR_REGNUM
;
25221 real_regs_mask
|= 1 << LR_REGNUM
;
25222 lr_needs_saving
= false;
25225 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
25226 RTX_FRAME_RELATED_P (insn
) = 1;
25230 /* Load the pic register before setting the frame pointer,
25231 so we can use r7 as a temporary work register. */
25232 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
25233 arm_load_pic_register (live_regs_mask
);
25235 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
25236 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
25237 stack_pointer_rtx
);
25239 size
= offsets
->outgoing_args
- offsets
->saved_args
;
25240 if (flag_stack_usage_info
)
25241 current_function_static_stack_size
= size
;
25243 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25244 if ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
25245 || flag_stack_clash_protection
)
25247 sorry ("-fstack-check=specific for Thumb-1");
25249 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25250 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
25255 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25256 GEN_INT (- amount
)));
25257 RTX_FRAME_RELATED_P (insn
) = 1;
25263 /* The stack decrement is too big for an immediate value in a single
25264 insn. In theory we could issue multiple subtracts, but after
25265 three of them it becomes more space efficient to place the full
25266 value in the constant pool and load into a register. (Also the
25267 ARM debugger really likes to see only one stack decrement per
25268 function). So instead we look for a scratch register into which
25269 we can load the decrement, and then we subtract this from the
25270 stack pointer. Unfortunately on the thumb the only available
25271 scratch registers are the argument registers, and we cannot use
25272 these as they may hold arguments to the function. Instead we
25273 attempt to locate a call preserved register which is used by this
25274 function. If we can find one, then we know that it will have
25275 been pushed at the start of the prologue and so we can corrupt
25277 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
25278 if (live_regs_mask
& (1 << regno
))
25281 gcc_assert(regno
<= LAST_LO_REGNUM
);
25283 reg
= gen_rtx_REG (SImode
, regno
);
25285 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
25287 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25288 stack_pointer_rtx
, reg
));
25290 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
25291 plus_constant (Pmode
, stack_pointer_rtx
,
25293 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
25294 RTX_FRAME_RELATED_P (insn
) = 1;
25298 if (frame_pointer_needed
)
25299 thumb_set_frame_pointer (offsets
);
25301 /* If we are profiling, make sure no instructions are scheduled before
25302 the call to mcount. Similarly if the user has requested no
25303 scheduling in the prolog. Similarly if we want non-call exceptions
25304 using the EABI unwinder, to prevent faulting instructions from being
25305 swapped with a stack adjustment. */
25306 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
25307 || (arm_except_unwind_info (&global_options
) == UI_TARGET
25308 && cfun
->can_throw_non_call_exceptions
))
25309 emit_insn (gen_blockage ());
25311 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
25312 if (live_regs_mask
& 0xff)
25313 cfun
->machine
->lr_save_eliminated
= 0;
25316 /* Clear caller saved registers not used to pass return values and leaked
25317 condition flags before exiting a cmse_nonsecure_entry function. */
25320 cmse_nonsecure_entry_clear_before_return (void)
25322 int regno
, maxregno
= TARGET_HARD_FLOAT
? LAST_VFP_REGNUM
: IP_REGNUM
;
25323 uint32_t padding_bits_to_clear
= 0;
25324 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
25325 rtx r1_reg
, result_rtl
, clearing_reg
= NULL_RTX
;
25328 bitmap_clear (to_clear_bitmap
);
25329 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
, NUM_ARG_REGS
);
25330 bitmap_set_bit (to_clear_bitmap
, IP_REGNUM
);
25332 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25334 if (TARGET_HARD_FLOAT
)
25336 int float_bits
= D7_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1;
25338 bitmap_set_range (to_clear_bitmap
, FIRST_VFP_REGNUM
, float_bits
);
25340 /* Make sure we don't clear the two scratch registers used to clear the
25341 relevant FPSCR bits in output_return_instruction. */
25342 emit_use (gen_rtx_REG (SImode
, IP_REGNUM
));
25343 bitmap_clear_bit (to_clear_bitmap
, IP_REGNUM
);
25344 emit_use (gen_rtx_REG (SImode
, 4));
25345 bitmap_clear_bit (to_clear_bitmap
, 4);
25348 /* If the user has defined registers to be caller saved, these are no longer
25349 restored by the function before returning and must thus be cleared for
25350 security purposes. */
25351 for (regno
= NUM_ARG_REGS
; regno
<= maxregno
; regno
++)
25353 /* We do not touch registers that can be used to pass arguments as per
25354 the AAPCS, since these should never be made callee-saved by user
25356 if (IN_RANGE (regno
, FIRST_VFP_REGNUM
, D7_VFP_REGNUM
))
25358 if (IN_RANGE (regno
, IP_REGNUM
, PC_REGNUM
))
25360 if (call_used_regs
[regno
])
25361 bitmap_set_bit (to_clear_bitmap
, regno
);
25364 /* Make sure we do not clear the registers used to return the result in. */
25365 result_type
= TREE_TYPE (DECL_RESULT (current_function_decl
));
25366 if (!VOID_TYPE_P (result_type
))
25368 uint64_t to_clear_return_mask
;
25369 result_rtl
= arm_function_value (result_type
, current_function_decl
, 0);
25371 /* No need to check that we return in registers, because we don't
25372 support returning on stack yet. */
25373 gcc_assert (REG_P (result_rtl
));
25374 to_clear_return_mask
25375 = compute_not_to_clear_mask (result_type
, result_rtl
, 0,
25376 &padding_bits_to_clear
);
25377 if (to_clear_return_mask
)
25379 gcc_assert ((unsigned) maxregno
< sizeof (long long) * __CHAR_BIT__
);
25380 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
25382 if (to_clear_return_mask
& (1ULL << regno
))
25383 bitmap_clear_bit (to_clear_bitmap
, regno
);
25388 if (padding_bits_to_clear
!= 0)
25390 int to_clear_bitmap_size
= SBITMAP_SIZE ((sbitmap
) to_clear_bitmap
);
25391 auto_sbitmap
to_clear_arg_regs_bitmap (to_clear_bitmap_size
);
25393 /* Padding_bits_to_clear is not 0 so we know we are dealing with
25394 returning a composite type, which only uses r0. Let's make sure that
25395 r1-r3 is cleared too. */
25396 bitmap_clear (to_clear_arg_regs_bitmap
);
25397 bitmap_set_range (to_clear_arg_regs_bitmap
, R1_REGNUM
, NUM_ARG_REGS
- 1);
25398 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap
, to_clear_bitmap
));
25401 /* Clear full registers that leak before returning. */
25402 clearing_reg
= gen_rtx_REG (SImode
, TARGET_THUMB1
? R0_REGNUM
: LR_REGNUM
);
25403 r1_reg
= gen_rtx_REG (SImode
, R0_REGNUM
+ 1);
25404 cmse_clear_registers (to_clear_bitmap
, &padding_bits_to_clear
, 1, r1_reg
,
25408 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25409 POP instruction can be generated. LR should be replaced by PC. All
25410 the checks required are already done by USE_RETURN_INSN (). Hence,
25411 all we really need to check here is if single register is to be
25412 returned, or multiple register return. */
25414 thumb2_expand_return (bool simple_return
)
25417 unsigned long saved_regs_mask
;
25418 arm_stack_offsets
*offsets
;
25420 offsets
= arm_get_frame_offsets ();
25421 saved_regs_mask
= offsets
->saved_regs_mask
;
25423 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25424 if (saved_regs_mask
& (1 << i
))
25427 if (!simple_return
&& saved_regs_mask
)
25429 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25430 functions or adapt code to handle according to ACLE. This path should
25431 not be reachable for cmse_nonsecure_entry functions though we prefer
25432 to assert it for now to ensure that future code changes do not silently
25433 change this behavior. */
25434 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25437 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25438 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
25439 rtx addr
= gen_rtx_MEM (SImode
,
25440 gen_rtx_POST_INC (SImode
,
25441 stack_pointer_rtx
));
25442 set_mem_alias_set (addr
, get_frame_alias_set ());
25443 XVECEXP (par
, 0, 0) = ret_rtx
;
25444 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
25445 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
25446 emit_jump_insn (par
);
25450 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
25451 saved_regs_mask
|= (1 << PC_REGNUM
);
25452 arm_emit_multi_reg_pop (saved_regs_mask
);
25457 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25458 cmse_nonsecure_entry_clear_before_return ();
25459 emit_jump_insn (simple_return_rtx
);
25464 thumb1_expand_epilogue (void)
25466 HOST_WIDE_INT amount
;
25467 arm_stack_offsets
*offsets
;
25470 /* Naked functions don't have prologues. */
25471 if (IS_NAKED (arm_current_func_type ()))
25474 offsets
= arm_get_frame_offsets ();
25475 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25477 if (frame_pointer_needed
)
25479 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
25480 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25482 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
25484 gcc_assert (amount
>= 0);
25487 emit_insn (gen_blockage ());
25490 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25491 GEN_INT (amount
)));
25494 /* r3 is always free in the epilogue. */
25495 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
25497 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
25498 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
25502 /* Emit a USE (stack_pointer_rtx), so that
25503 the stack adjustment will not be deleted. */
25504 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25506 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
25507 emit_insn (gen_blockage ());
25509 /* Emit a clobber for each insn that will be restored in the epilogue,
25510 so that flow2 will get register lifetimes correct. */
25511 for (regno
= 0; regno
< 13; regno
++)
25512 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
25513 emit_clobber (gen_rtx_REG (SImode
, regno
));
25515 if (! df_regs_ever_live_p (LR_REGNUM
))
25516 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
25518 /* Clear all caller-saved regs that are not used to return. */
25519 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25520 cmse_nonsecure_entry_clear_before_return ();
25523 /* Epilogue code for APCS frame. */
25525 arm_expand_epilogue_apcs_frame (bool really_return
)
25527 unsigned long func_type
;
25528 unsigned long saved_regs_mask
;
25531 int floats_from_frame
= 0;
25532 arm_stack_offsets
*offsets
;
25534 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
25535 func_type
= arm_current_func_type ();
25537 /* Get frame offsets for ARM. */
25538 offsets
= arm_get_frame_offsets ();
25539 saved_regs_mask
= offsets
->saved_regs_mask
;
25541 /* Find the offset of the floating-point save area in the frame. */
25543 = (offsets
->saved_args
25544 + arm_compute_static_chain_stack_bytes ()
25547 /* Compute how many core registers saved and how far away the floats are. */
25548 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25549 if (saved_regs_mask
& (1 << i
))
25552 floats_from_frame
+= 4;
25555 if (TARGET_HARD_FLOAT
)
25558 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
25560 /* The offset is from IP_REGNUM. */
25561 int saved_size
= arm_get_vfp_saved_size ();
25562 if (saved_size
> 0)
25565 floats_from_frame
+= saved_size
;
25566 insn
= emit_insn (gen_addsi3 (ip_rtx
,
25567 hard_frame_pointer_rtx
,
25568 GEN_INT (-floats_from_frame
)));
25569 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
25570 ip_rtx
, hard_frame_pointer_rtx
);
25573 /* Generate VFP register multi-pop. */
25574 start_reg
= FIRST_VFP_REGNUM
;
25576 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
25577 /* Look for a case where a reg does not need restoring. */
25578 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25579 && (!df_regs_ever_live_p (i
+ 1)
25580 || call_used_regs
[i
+ 1]))
25582 if (start_reg
!= i
)
25583 arm_emit_vfp_multi_reg_pop (start_reg
,
25584 (i
- start_reg
) / 2,
25585 gen_rtx_REG (SImode
,
25590 /* Restore the remaining regs that we have discovered (or possibly
25591 even all of them, if the conditional in the for loop never
25593 if (start_reg
!= i
)
25594 arm_emit_vfp_multi_reg_pop (start_reg
,
25595 (i
- start_reg
) / 2,
25596 gen_rtx_REG (SImode
, IP_REGNUM
));
25601 /* The frame pointer is guaranteed to be non-double-word aligned, as
25602 it is set to double-word-aligned old_stack_pointer - 4. */
25604 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
25606 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
25607 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25609 rtx addr
= gen_frame_mem (V2SImode
,
25610 plus_constant (Pmode
, hard_frame_pointer_rtx
,
25612 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25613 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25614 gen_rtx_REG (V2SImode
, i
),
25620 /* saved_regs_mask should contain IP which contains old stack pointer
25621 at the time of activation creation. Since SP and IP are adjacent registers,
25622 we can restore the value directly into SP. */
25623 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
25624 saved_regs_mask
&= ~(1 << IP_REGNUM
);
25625 saved_regs_mask
|= (1 << SP_REGNUM
);
25627 /* There are two registers left in saved_regs_mask - LR and PC. We
25628 only need to restore LR (the return address), but to
25629 save time we can load it directly into PC, unless we need a
25630 special function exit sequence, or we are not really returning. */
25632 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
25633 && !crtl
->calls_eh_return
)
25634 /* Delete LR from the register mask, so that LR on
25635 the stack is loaded into the PC in the register mask. */
25636 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25638 saved_regs_mask
&= ~(1 << PC_REGNUM
);
25640 num_regs
= bit_count (saved_regs_mask
);
25641 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
25644 emit_insn (gen_blockage ());
25645 /* Unwind the stack to just below the saved registers. */
25646 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25647 hard_frame_pointer_rtx
,
25648 GEN_INT (- 4 * num_regs
)));
25650 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
25651 stack_pointer_rtx
, hard_frame_pointer_rtx
);
25654 arm_emit_multi_reg_pop (saved_regs_mask
);
25656 if (IS_INTERRUPT (func_type
))
25658 /* Interrupt handlers will have pushed the
25659 IP onto the stack, so restore it now. */
25661 rtx addr
= gen_rtx_MEM (SImode
,
25662 gen_rtx_POST_INC (SImode
,
25663 stack_pointer_rtx
));
25664 set_mem_alias_set (addr
, get_frame_alias_set ());
25665 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
25666 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25667 gen_rtx_REG (SImode
, IP_REGNUM
),
25671 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
25674 if (crtl
->calls_eh_return
)
25675 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25677 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25679 if (IS_STACKALIGN (func_type
))
25680 /* Restore the original stack pointer. Before prologue, the stack was
25681 realigned and the original stack pointer saved in r0. For details,
25682 see comment in arm_expand_prologue. */
25683 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25685 emit_jump_insn (simple_return_rtx
);
25688 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25689 function is not a sibcall. */
25691 arm_expand_epilogue (bool really_return
)
25693 unsigned long func_type
;
25694 unsigned long saved_regs_mask
;
25698 arm_stack_offsets
*offsets
;
25700 func_type
= arm_current_func_type ();
25702 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25703 let output_return_instruction take care of instruction emission if any. */
25704 if (IS_NAKED (func_type
)
25705 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
25708 emit_jump_insn (simple_return_rtx
);
25712 /* If we are throwing an exception, then we really must be doing a
25713 return, so we can't tail-call. */
25714 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
25716 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
25718 arm_expand_epilogue_apcs_frame (really_return
);
25722 /* Get frame offsets for ARM. */
25723 offsets
= arm_get_frame_offsets ();
25724 saved_regs_mask
= offsets
->saved_regs_mask
;
25725 num_regs
= bit_count (saved_regs_mask
);
25727 if (frame_pointer_needed
)
25730 /* Restore stack pointer if necessary. */
25733 /* In ARM mode, frame pointer points to first saved register.
25734 Restore stack pointer to last saved register. */
25735 amount
= offsets
->frame
- offsets
->saved_regs
;
25737 /* Force out any pending memory operations that reference stacked data
25738 before stack de-allocation occurs. */
25739 emit_insn (gen_blockage ());
25740 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25741 hard_frame_pointer_rtx
,
25742 GEN_INT (amount
)));
25743 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25745 hard_frame_pointer_rtx
);
25747 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25749 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25753 /* In Thumb-2 mode, the frame pointer points to the last saved
25755 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25758 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
25759 hard_frame_pointer_rtx
,
25760 GEN_INT (amount
)));
25761 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25762 hard_frame_pointer_rtx
,
25763 hard_frame_pointer_rtx
);
25766 /* Force out any pending memory operations that reference stacked data
25767 before stack de-allocation occurs. */
25768 emit_insn (gen_blockage ());
25769 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
25770 hard_frame_pointer_rtx
));
25771 arm_add_cfa_adjust_cfa_note (insn
, 0,
25773 hard_frame_pointer_rtx
);
25774 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25776 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25781 /* Pop off outgoing args and local frame to adjust stack pointer to
25782 last saved register. */
25783 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25787 /* Force out any pending memory operations that reference stacked data
25788 before stack de-allocation occurs. */
25789 emit_insn (gen_blockage ());
25790 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25792 GEN_INT (amount
)));
25793 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25794 stack_pointer_rtx
, stack_pointer_rtx
);
25795 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25797 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25801 if (TARGET_HARD_FLOAT
)
25803 /* Generate VFP register multi-pop. */
25804 int end_reg
= LAST_VFP_REGNUM
+ 1;
25806 /* Scan the registers in reverse order. We need to match
25807 any groupings made in the prologue and generate matching
25808 vldm operations. The need to match groups is because,
25809 unlike pop, vldm can only do consecutive regs. */
25810 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
25811 /* Look for a case where a reg does not need restoring. */
25812 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25813 && (!df_regs_ever_live_p (i
+ 1)
25814 || call_used_regs
[i
+ 1]))
25816 /* Restore the regs discovered so far (from reg+2 to
25818 if (end_reg
> i
+ 2)
25819 arm_emit_vfp_multi_reg_pop (i
+ 2,
25820 (end_reg
- (i
+ 2)) / 2,
25821 stack_pointer_rtx
);
25825 /* Restore the remaining regs that we have discovered (or possibly
25826 even all of them, if the conditional in the for loop never
25828 if (end_reg
> i
+ 2)
25829 arm_emit_vfp_multi_reg_pop (i
+ 2,
25830 (end_reg
- (i
+ 2)) / 2,
25831 stack_pointer_rtx
);
25835 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
25836 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25839 rtx addr
= gen_rtx_MEM (V2SImode
,
25840 gen_rtx_POST_INC (SImode
,
25841 stack_pointer_rtx
));
25842 set_mem_alias_set (addr
, get_frame_alias_set ());
25843 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25844 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25845 gen_rtx_REG (V2SImode
, i
),
25847 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25848 stack_pointer_rtx
, stack_pointer_rtx
);
25851 if (saved_regs_mask
)
25854 bool return_in_pc
= false;
25856 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
25857 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
25858 && !IS_CMSE_ENTRY (func_type
)
25859 && !IS_STACKALIGN (func_type
)
25861 && crtl
->args
.pretend_args_size
== 0
25862 && saved_regs_mask
& (1 << LR_REGNUM
)
25863 && !crtl
->calls_eh_return
)
25865 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25866 saved_regs_mask
|= (1 << PC_REGNUM
);
25867 return_in_pc
= true;
25870 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
25872 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25873 if (saved_regs_mask
& (1 << i
))
25875 rtx addr
= gen_rtx_MEM (SImode
,
25876 gen_rtx_POST_INC (SImode
,
25877 stack_pointer_rtx
));
25878 set_mem_alias_set (addr
, get_frame_alias_set ());
25880 if (i
== PC_REGNUM
)
25882 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25883 XVECEXP (insn
, 0, 0) = ret_rtx
;
25884 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
25886 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
25887 insn
= emit_jump_insn (insn
);
25891 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
25893 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25894 gen_rtx_REG (SImode
, i
),
25896 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25898 stack_pointer_rtx
);
25905 && current_tune
->prefer_ldrd_strd
25906 && !optimize_function_for_size_p (cfun
))
25909 thumb2_emit_ldrd_pop (saved_regs_mask
);
25910 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
25911 arm_emit_ldrd_pop (saved_regs_mask
);
25913 arm_emit_multi_reg_pop (saved_regs_mask
);
25916 arm_emit_multi_reg_pop (saved_regs_mask
);
25924 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
25928 rtx dwarf
= NULL_RTX
;
25930 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25932 GEN_INT (amount
)));
25934 RTX_FRAME_RELATED_P (tmp
) = 1;
25936 if (cfun
->machine
->uses_anonymous_args
)
25938 /* Restore pretend args. Refer arm_expand_prologue on how to save
25939 pretend_args in stack. */
25940 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25941 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25942 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25943 if (saved_regs_mask
& (1 << i
))
25945 rtx reg
= gen_rtx_REG (SImode
, i
);
25946 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25949 REG_NOTES (tmp
) = dwarf
;
25951 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25952 stack_pointer_rtx
, stack_pointer_rtx
);
25955 /* Clear all caller-saved regs that are not used to return. */
25956 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25958 /* CMSE_ENTRY always returns. */
25959 gcc_assert (really_return
);
25960 cmse_nonsecure_entry_clear_before_return ();
25963 if (!really_return
)
25966 if (crtl
->calls_eh_return
)
25967 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25969 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25971 if (IS_STACKALIGN (func_type
))
25972 /* Restore the original stack pointer. Before prologue, the stack was
25973 realigned and the original stack pointer saved in r0. For details,
25974 see comment in arm_expand_prologue. */
25975 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25977 emit_jump_insn (simple_return_rtx
);
25980 /* Implementation of insn prologue_thumb1_interwork. This is the first
25981 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25984 thumb1_output_interwork (void)
25987 FILE *f
= asm_out_file
;
25989 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25990 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25992 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25994 /* Generate code sequence to switch us into Thumb mode. */
25995 /* The .code 32 directive has already been emitted by
25996 ASM_DECLARE_FUNCTION_NAME. */
25997 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25998 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
26000 /* Generate a label, so that the debugger will notice the
26001 change in instruction sets. This label is also used by
26002 the assembler to bypass the ARM code when this function
26003 is called from a Thumb encoded function elsewhere in the
26004 same file. Hence the definition of STUB_NAME here must
26005 agree with the definition in gas/config/tc-arm.c. */
26007 #define STUB_NAME ".real_start_of"
26009 fprintf (f
, "\t.code\t16\n");
26011 if (arm_dllexport_name_p (name
))
26012 name
= arm_strip_name_encoding (name
);
26014 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
26015 fprintf (f
, "\t.thumb_func\n");
26016 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
26021 /* Handle the case of a double word load into a low register from
26022 a computed memory address. The computed address may involve a
26023 register which is overwritten by the load. */
26025 thumb_load_double_from_address (rtx
*operands
)
26033 gcc_assert (REG_P (operands
[0]));
26034 gcc_assert (MEM_P (operands
[1]));
26036 /* Get the memory address. */
26037 addr
= XEXP (operands
[1], 0);
26039 /* Work out how the memory address is computed. */
26040 switch (GET_CODE (addr
))
26043 operands
[2] = adjust_address (operands
[1], SImode
, 4);
26045 if (REGNO (operands
[0]) == REGNO (addr
))
26047 output_asm_insn ("ldr\t%H0, %2", operands
);
26048 output_asm_insn ("ldr\t%0, %1", operands
);
26052 output_asm_insn ("ldr\t%0, %1", operands
);
26053 output_asm_insn ("ldr\t%H0, %2", operands
);
26058 /* Compute <address> + 4 for the high order load. */
26059 operands
[2] = adjust_address (operands
[1], SImode
, 4);
26061 output_asm_insn ("ldr\t%0, %1", operands
);
26062 output_asm_insn ("ldr\t%H0, %2", operands
);
26066 arg1
= XEXP (addr
, 0);
26067 arg2
= XEXP (addr
, 1);
26069 if (CONSTANT_P (arg1
))
26070 base
= arg2
, offset
= arg1
;
26072 base
= arg1
, offset
= arg2
;
26074 gcc_assert (REG_P (base
));
26076 /* Catch the case of <address> = <reg> + <reg> */
26077 if (REG_P (offset
))
26079 int reg_offset
= REGNO (offset
);
26080 int reg_base
= REGNO (base
);
26081 int reg_dest
= REGNO (operands
[0]);
26083 /* Add the base and offset registers together into the
26084 higher destination register. */
26085 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
26086 reg_dest
+ 1, reg_base
, reg_offset
);
26088 /* Load the lower destination register from the address in
26089 the higher destination register. */
26090 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
26091 reg_dest
, reg_dest
+ 1);
26093 /* Load the higher destination register from its own address
26095 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
26096 reg_dest
+ 1, reg_dest
+ 1);
26100 /* Compute <address> + 4 for the high order load. */
26101 operands
[2] = adjust_address (operands
[1], SImode
, 4);
26103 /* If the computed address is held in the low order register
26104 then load the high order register first, otherwise always
26105 load the low order register first. */
26106 if (REGNO (operands
[0]) == REGNO (base
))
26108 output_asm_insn ("ldr\t%H0, %2", operands
);
26109 output_asm_insn ("ldr\t%0, %1", operands
);
26113 output_asm_insn ("ldr\t%0, %1", operands
);
26114 output_asm_insn ("ldr\t%H0, %2", operands
);
26120 /* With no registers to worry about we can just load the value
26122 operands
[2] = adjust_address (operands
[1], SImode
, 4);
26124 output_asm_insn ("ldr\t%H0, %2", operands
);
26125 output_asm_insn ("ldr\t%0, %1", operands
);
26129 gcc_unreachable ();
26136 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
26141 if (REGNO (operands
[4]) > REGNO (operands
[5]))
26142 std::swap (operands
[4], operands
[5]);
26144 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
26145 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
26149 if (REGNO (operands
[4]) > REGNO (operands
[5]))
26150 std::swap (operands
[4], operands
[5]);
26151 if (REGNO (operands
[5]) > REGNO (operands
[6]))
26152 std::swap (operands
[5], operands
[6]);
26153 if (REGNO (operands
[4]) > REGNO (operands
[5]))
26154 std::swap (operands
[4], operands
[5]);
26156 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
26157 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
26161 gcc_unreachable ();
26167 /* Output a call-via instruction for thumb state. */
26169 thumb_call_via_reg (rtx reg
)
26171 int regno
= REGNO (reg
);
26174 gcc_assert (regno
< LR_REGNUM
);
26176 /* If we are in the normal text section we can use a single instance
26177 per compilation unit. If we are doing function sections, then we need
26178 an entry per section, since we can't rely on reachability. */
26179 if (in_section
== text_section
)
26181 thumb_call_reg_needed
= 1;
26183 if (thumb_call_via_label
[regno
] == NULL
)
26184 thumb_call_via_label
[regno
] = gen_label_rtx ();
26185 labelp
= thumb_call_via_label
+ regno
;
26189 if (cfun
->machine
->call_via
[regno
] == NULL
)
26190 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
26191 labelp
= cfun
->machine
->call_via
+ regno
;
26194 output_asm_insn ("bl\t%a0", labelp
);
26198 /* Routines for generating rtl. */
26200 thumb_expand_movmemqi (rtx
*operands
)
26202 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
26203 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
26204 HOST_WIDE_INT len
= INTVAL (operands
[2]);
26205 HOST_WIDE_INT offset
= 0;
26209 emit_insn (gen_movmem12b (out
, in
, out
, in
));
26215 emit_insn (gen_movmem8b (out
, in
, out
, in
));
26221 rtx reg
= gen_reg_rtx (SImode
);
26222 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
26223 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
26230 rtx reg
= gen_reg_rtx (HImode
);
26231 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
26232 plus_constant (Pmode
, in
,
26234 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
26243 rtx reg
= gen_reg_rtx (QImode
);
26244 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
26245 plus_constant (Pmode
, in
,
26247 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
26254 thumb_reload_out_hi (rtx
*operands
)
26256 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
26259 /* Return the length of a function name prefix
26260 that starts with the character 'c'. */
26262 arm_get_strip_length (int c
)
26266 ARM_NAME_ENCODING_LENGTHS
26271 /* Return a pointer to a function's name with any
26272 and all prefix encodings stripped from it. */
26274 arm_strip_name_encoding (const char *name
)
26278 while ((skip
= arm_get_strip_length (* name
)))
26284 /* If there is a '*' anywhere in the name's prefix, then
26285 emit the stripped name verbatim, otherwise prepend an
26286 underscore if leading underscores are being used. */
26288 arm_asm_output_labelref (FILE *stream
, const char *name
)
26293 while ((skip
= arm_get_strip_length (* name
)))
26295 verbatim
|= (*name
== '*');
26300 fputs (name
, stream
);
26302 asm_fprintf (stream
, "%U%s", name
);
26305 /* This function is used to emit an EABI tag and its associated value.
26306 We emit the numerical value of the tag in case the assembler does not
26307 support textual tags. (Eg gas prior to 2.20). If requested we include
26308 the tag name in a comment so that anyone reading the assembler output
26309 will know which tag is being set.
26311 This function is not static because arm-c.c needs it too. */
26314 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
26316 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
26317 if (flag_verbose_asm
|| flag_debug_asm
)
26318 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
26319 asm_fprintf (asm_out_file
, "\n");
26322 /* This function is used to print CPU tuning information as comment
26323 in assembler file. Pointers are not printed for now. */
26326 arm_print_tune_info (void)
26328 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune parameters\n");
26329 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"constant_limit:\t%d\n",
26330 current_tune
->constant_limit
);
26331 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26332 "max_insns_skipped:\t%d\n", current_tune
->max_insns_skipped
);
26333 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26334 "prefetch.num_slots:\t%d\n", current_tune
->prefetch
.num_slots
);
26335 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26336 "prefetch.l1_cache_size:\t%d\n",
26337 current_tune
->prefetch
.l1_cache_size
);
26338 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26339 "prefetch.l1_cache_line_size:\t%d\n",
26340 current_tune
->prefetch
.l1_cache_line_size
);
26341 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26342 "prefer_constant_pool:\t%d\n",
26343 (int) current_tune
->prefer_constant_pool
);
26344 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26345 "branch_cost:\t(s:speed, p:predictable)\n");
26346 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\ts&p\tcost\n");
26347 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t00\t%d\n",
26348 current_tune
->branch_cost (false, false));
26349 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t01\t%d\n",
26350 current_tune
->branch_cost (false, true));
26351 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t10\t%d\n",
26352 current_tune
->branch_cost (true, false));
26353 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t11\t%d\n",
26354 current_tune
->branch_cost (true, true));
26355 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26356 "prefer_ldrd_strd:\t%d\n",
26357 (int) current_tune
->prefer_ldrd_strd
);
26358 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26359 "logical_op_non_short_circuit:\t[%d,%d]\n",
26360 (int) current_tune
->logical_op_non_short_circuit_thumb
,
26361 (int) current_tune
->logical_op_non_short_circuit_arm
);
26362 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26363 "prefer_neon_for_64bits:\t%d\n",
26364 (int) current_tune
->prefer_neon_for_64bits
);
26365 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26366 "disparage_flag_setting_t16_encodings:\t%d\n",
26367 (int) current_tune
->disparage_flag_setting_t16_encodings
);
26368 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26369 "string_ops_prefer_neon:\t%d\n",
26370 (int) current_tune
->string_ops_prefer_neon
);
26371 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26372 "max_insns_inline_memset:\t%d\n",
26373 current_tune
->max_insns_inline_memset
);
26374 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"fusible_ops:\t%u\n",
26375 current_tune
->fusible_ops
);
26376 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"sched_autopref:\t%d\n",
26377 (int) current_tune
->sched_autopref
);
26380 /* Print .arch and .arch_extension directives corresponding to the
26381 current architecture configuration. */
26383 arm_print_asm_arch_directives ()
26385 const arch_option
*arch
26386 = arm_parse_arch_option_name (all_architectures
, "-march",
26387 arm_active_target
.arch_name
);
26388 auto_sbitmap
opt_bits (isa_num_bits
);
26392 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_active_target
.arch_name
);
26393 if (!arch
->common
.extensions
)
26396 for (const struct cpu_arch_extension
*opt
= arch
->common
.extensions
;
26402 arm_initialize_isa (opt_bits
, opt
->isa_bits
);
26404 /* If every feature bit of this option is set in the target
26405 ISA specification, print out the option name. However,
26406 don't print anything if all the bits are part of the
26407 FPU specification. */
26408 if (bitmap_subset_p (opt_bits
, arm_active_target
.isa
)
26409 && !bitmap_subset_p (opt_bits
, isa_all_fpubits
))
26410 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", opt
->name
);
26416 arm_file_start (void)
26422 /* We don't have a specified CPU. Use the architecture to
26425 Note: it might be better to do this unconditionally, then the
26426 assembler would not need to know about all new CPU names as
26428 if (!arm_active_target
.core_name
)
26430 /* armv7ve doesn't support any extensions. */
26431 if (strcmp (arm_active_target
.arch_name
, "armv7ve") == 0)
26433 /* Keep backward compatability for assemblers
26434 which don't support armv7ve. */
26435 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
26436 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
26437 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
26438 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
26439 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
26442 arm_print_asm_arch_directives ();
26444 else if (strncmp (arm_active_target
.core_name
, "generic", 7) == 0)
26445 asm_fprintf (asm_out_file
, "\t.arch %s\n",
26446 arm_active_target
.core_name
+ 8);
26449 const char* truncated_name
26450 = arm_rewrite_selected_cpu (arm_active_target
.core_name
);
26451 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
26454 if (print_tune_info
)
26455 arm_print_tune_info ();
26457 if (! TARGET_SOFT_FLOAT
)
26459 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
26460 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26462 if (TARGET_HARD_FLOAT_ABI
)
26463 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26466 /* Some of these attributes only apply when the corresponding features
26467 are used. However we don't have any easy way of figuring this out.
26468 Conservatively record the setting that would have been used. */
26470 if (flag_rounding_math
)
26471 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26473 if (!flag_unsafe_math_optimizations
)
26475 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26476 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26478 if (flag_signaling_nans
)
26479 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26481 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26482 flag_finite_math_only
? 1 : 3);
26484 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26485 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26486 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26487 flag_short_enums
? 1 : 2);
26489 /* Tag_ABI_optimization_goals. */
26492 else if (optimize
>= 2)
26498 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
26500 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26503 if (arm_fp16_format
)
26504 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26505 (int) arm_fp16_format
);
26507 if (arm_lang_output_object_attributes_hook
)
26508 arm_lang_output_object_attributes_hook();
26511 default_file_start ();
26515 arm_file_end (void)
26519 if (NEED_INDICATE_EXEC_STACK
)
26520 /* Add .note.GNU-stack. */
26521 file_end_indicate_exec_stack ();
26523 if (! thumb_call_reg_needed
)
26526 switch_to_section (text_section
);
26527 asm_fprintf (asm_out_file
, "\t.code 16\n");
26528 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
26530 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
26532 rtx label
= thumb_call_via_label
[regno
];
26536 targetm
.asm_out
.internal_label (asm_out_file
, "L",
26537 CODE_LABEL_NUMBER (label
));
26538 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
26544 /* Symbols in the text segment can be accessed without indirecting via the
26545 constant pool; it may take an extra binary operation, but this is still
26546 faster than indirecting via memory. Don't do this when not optimizing,
26547 since we won't be calculating al of the offsets necessary to do this
26551 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
26553 if (optimize
> 0 && TREE_CONSTANT (decl
))
26554 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
26556 default_encode_section_info (decl
, rtl
, first
);
26558 #endif /* !ARM_PE */
26561 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
26563 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
26564 && !strcmp (prefix
, "L"))
26566 arm_ccfsm_state
= 0;
26567 arm_target_insn
= NULL
;
26569 default_internal_label (stream
, prefix
, labelno
);
26572 /* Output code to add DELTA to the first argument, and then jump
26573 to FUNCTION. Used for C++ multiple inheritance. */
26576 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26577 HOST_WIDE_INT
, tree function
)
26579 static int thunk_label
= 0;
26582 int mi_delta
= delta
;
26583 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
26585 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
26588 mi_delta
= - mi_delta
;
26590 final_start_function (emit_barrier (), file
, 1);
26594 int labelno
= thunk_label
++;
26595 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
26596 /* Thunks are entered in arm mode when available. */
26597 if (TARGET_THUMB1_ONLY
)
26599 /* push r3 so we can use it as a temporary. */
26600 /* TODO: Omit this save if r3 is not used. */
26601 fputs ("\tpush {r3}\n", file
);
26602 fputs ("\tldr\tr3, ", file
);
26606 fputs ("\tldr\tr12, ", file
);
26608 assemble_name (file
, label
);
26609 fputc ('\n', file
);
26612 /* If we are generating PIC, the ldr instruction below loads
26613 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26614 the address of the add + 8, so we have:
26616 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26619 Note that we have "+ 1" because some versions of GNU ld
26620 don't set the low bit of the result for R_ARM_REL32
26621 relocations against thumb function symbols.
26622 On ARMv6M this is +4, not +8. */
26623 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
26624 assemble_name (file
, labelpc
);
26625 fputs (":\n", file
);
26626 if (TARGET_THUMB1_ONLY
)
26628 /* This is 2 insns after the start of the thunk, so we know it
26629 is 4-byte aligned. */
26630 fputs ("\tadd\tr3, pc, r3\n", file
);
26631 fputs ("\tmov r12, r3\n", file
);
26634 fputs ("\tadd\tr12, pc, r12\n", file
);
26636 else if (TARGET_THUMB1_ONLY
)
26637 fputs ("\tmov r12, r3\n", file
);
26639 if (TARGET_THUMB1_ONLY
)
26641 if (mi_delta
> 255)
26643 fputs ("\tldr\tr3, ", file
);
26644 assemble_name (file
, label
);
26645 fputs ("+4\n", file
);
26646 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
26647 mi_op
, this_regno
, this_regno
);
26649 else if (mi_delta
!= 0)
26651 /* Thumb1 unified syntax requires s suffix in instruction name when
26652 one of the operands is immediate. */
26653 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
26654 mi_op
, this_regno
, this_regno
,
26660 /* TODO: Use movw/movt for large constants when available. */
26661 while (mi_delta
!= 0)
26663 if ((mi_delta
& (3 << shift
)) == 0)
26667 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
26668 mi_op
, this_regno
, this_regno
,
26669 mi_delta
& (0xff << shift
));
26670 mi_delta
&= ~(0xff << shift
);
26677 if (TARGET_THUMB1_ONLY
)
26678 fputs ("\tpop\t{r3}\n", file
);
26680 fprintf (file
, "\tbx\tr12\n");
26681 ASM_OUTPUT_ALIGN (file
, 2);
26682 assemble_name (file
, label
);
26683 fputs (":\n", file
);
26686 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26687 rtx tem
= XEXP (DECL_RTL (function
), 0);
26688 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26689 pipeline offset is four rather than eight. Adjust the offset
26691 tem
= plus_constant (GET_MODE (tem
), tem
,
26692 TARGET_THUMB1_ONLY
? -3 : -7);
26693 tem
= gen_rtx_MINUS (GET_MODE (tem
),
26695 gen_rtx_SYMBOL_REF (Pmode
,
26696 ggc_strdup (labelpc
)));
26697 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
26700 /* Output ".word .LTHUNKn". */
26701 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
26703 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
26704 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
26708 fputs ("\tb\t", file
);
26709 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
26710 if (NEED_PLT_RELOC
)
26711 fputs ("(PLT)", file
);
26712 fputc ('\n', file
);
26715 final_end_function ();
26718 /* MI thunk handling for TARGET_32BIT. */
26721 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26722 HOST_WIDE_INT vcall_offset
, tree function
)
26724 /* On ARM, this_regno is R0 or R1 depending on
26725 whether the function returns an aggregate or not.
26727 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
26729 ? R1_REGNUM
: R0_REGNUM
);
26731 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
26732 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
26733 reload_completed
= 1;
26734 emit_note (NOTE_INSN_PROLOGUE_END
);
26736 /* Add DELTA to THIS_RTX. */
26738 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
26739 delta
, this_rtx
, this_rtx
, false);
26741 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26742 if (vcall_offset
!= 0)
26744 /* Load *THIS_RTX. */
26745 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
26746 /* Compute *THIS_RTX + VCALL_OFFSET. */
26747 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
26749 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26750 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
26751 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
26754 /* Generate a tail call to the target function. */
26755 if (!TREE_USED (function
))
26757 assemble_external (function
);
26758 TREE_USED (function
) = 1;
26760 rtx funexp
= XEXP (DECL_RTL (function
), 0);
26761 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
26762 rtx_insn
* insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
26763 SIBLING_CALL_P (insn
) = 1;
26765 insn
= get_insns ();
26766 shorten_branches (insn
);
26767 final_start_function (insn
, file
, 1);
26768 final (insn
, file
, 1);
26769 final_end_function ();
26771 /* Stop pretending this is a post-reload pass. */
26772 reload_completed
= 0;
26775 /* Output code to add DELTA to the first argument, and then jump
26776 to FUNCTION. Used for C++ multiple inheritance. */
26779 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
26780 HOST_WIDE_INT vcall_offset
, tree function
)
26783 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26785 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26789 arm_emit_vector_const (FILE *file
, rtx x
)
26792 const char * pattern
;
26794 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
26796 switch (GET_MODE (x
))
26798 case E_V2SImode
: pattern
= "%08x"; break;
26799 case E_V4HImode
: pattern
= "%04x"; break;
26800 case E_V8QImode
: pattern
= "%02x"; break;
26801 default: gcc_unreachable ();
26804 fprintf (file
, "0x");
26805 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
26809 element
= CONST_VECTOR_ELT (x
, i
);
26810 fprintf (file
, pattern
, INTVAL (element
));
26816 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26817 HFmode constant pool entries are actually loaded with ldr. */
26819 arm_emit_fp16_const (rtx c
)
26823 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
26824 if (WORDS_BIG_ENDIAN
)
26825 assemble_zeros (2);
26826 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
26827 if (!WORDS_BIG_ENDIAN
)
26828 assemble_zeros (2);
26832 arm_output_load_gr (rtx
*operands
)
26839 if (!MEM_P (operands
[1])
26840 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
26841 || !REG_P (reg
= XEXP (sum
, 0))
26842 || !CONST_INT_P (offset
= XEXP (sum
, 1))
26843 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
26844 return "wldrw%?\t%0, %1";
26846 /* Fix up an out-of-range load of a GR register. */
26847 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
26848 wcgr
= operands
[0];
26850 output_asm_insn ("ldr%?\t%0, %1", operands
);
26852 operands
[0] = wcgr
;
26854 output_asm_insn ("tmcr%?\t%0, %1", operands
);
26855 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
26860 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26862 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26863 named arg and all anonymous args onto the stack.
26864 XXX I know the prologue shouldn't be pushing registers, but it is faster
26868 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
26872 int second_time ATTRIBUTE_UNUSED
)
26874 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
26877 cfun
->machine
->uses_anonymous_args
= 1;
26878 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
26880 nregs
= pcum
->aapcs_ncrn
;
26883 int res
= arm_needs_doubleword_align (mode
, type
);
26884 if (res
< 0 && warn_psabi
)
26885 inform (input_location
, "parameter passing for argument of "
26886 "type %qT changed in GCC 7.1", type
);
26892 nregs
= pcum
->nregs
;
26894 if (nregs
< NUM_ARG_REGS
)
26895 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
26898 /* We can't rely on the caller doing the proper promotion when
26899 using APCS or ATPCS. */
26902 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
26904 return !TARGET_AAPCS_BASED
;
26907 static machine_mode
26908 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
26910 int *punsignedp ATTRIBUTE_UNUSED
,
26911 const_tree fntype ATTRIBUTE_UNUSED
,
26912 int for_return ATTRIBUTE_UNUSED
)
26914 if (GET_MODE_CLASS (mode
) == MODE_INT
26915 && GET_MODE_SIZE (mode
) < 4)
26923 arm_default_short_enums (void)
26925 return ARM_DEFAULT_SHORT_ENUMS
;
26929 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26932 arm_align_anon_bitfield (void)
26934 return TARGET_AAPCS_BASED
;
26938 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26941 arm_cxx_guard_type (void)
26943 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
26947 /* The EABI says test the least significant bit of a guard variable. */
26950 arm_cxx_guard_mask_bit (void)
26952 return TARGET_AAPCS_BASED
;
26956 /* The EABI specifies that all array cookies are 8 bytes long. */
26959 arm_get_cookie_size (tree type
)
26963 if (!TARGET_AAPCS_BASED
)
26964 return default_cxx_get_cookie_size (type
);
26966 size
= build_int_cst (sizetype
, 8);
26971 /* The EABI says that array cookies should also contain the element size. */
26974 arm_cookie_has_size (void)
26976 return TARGET_AAPCS_BASED
;
26980 /* The EABI says constructors and destructors should return a pointer to
26981 the object constructed/destroyed. */
26984 arm_cxx_cdtor_returns_this (void)
26986 return TARGET_AAPCS_BASED
;
26989 /* The EABI says that an inline function may never be the key
26993 arm_cxx_key_method_may_be_inline (void)
26995 return !TARGET_AAPCS_BASED
;
26999 arm_cxx_determine_class_data_visibility (tree decl
)
27001 if (!TARGET_AAPCS_BASED
27002 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
27005 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27006 is exported. However, on systems without dynamic vague linkage,
27007 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
27008 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
27009 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
27011 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
27012 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
27016 arm_cxx_class_data_always_comdat (void)
27018 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27019 vague linkage if the class has no key function. */
27020 return !TARGET_AAPCS_BASED
;
27024 /* The EABI says __aeabi_atexit should be used to register static
27028 arm_cxx_use_aeabi_atexit (void)
27030 return TARGET_AAPCS_BASED
;
27035 arm_set_return_address (rtx source
, rtx scratch
)
27037 arm_stack_offsets
*offsets
;
27038 HOST_WIDE_INT delta
;
27040 unsigned long saved_regs
;
27042 offsets
= arm_get_frame_offsets ();
27043 saved_regs
= offsets
->saved_regs_mask
;
27045 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
27046 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
27049 if (frame_pointer_needed
)
27050 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
27053 /* LR will be the first saved register. */
27054 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
27059 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
27060 GEN_INT (delta
& ~4095)));
27065 addr
= stack_pointer_rtx
;
27067 addr
= plus_constant (Pmode
, addr
, delta
);
27070 /* The store needs to be marked to prevent DSE from deleting
27071 it as dead if it is based on fp. */
27072 mem
= gen_frame_mem (Pmode
, addr
);
27073 MEM_VOLATILE_P (mem
) = true;
27074 emit_move_insn (mem
, source
);
27080 thumb_set_return_address (rtx source
, rtx scratch
)
27082 arm_stack_offsets
*offsets
;
27083 HOST_WIDE_INT delta
;
27084 HOST_WIDE_INT limit
;
27087 unsigned long mask
;
27091 offsets
= arm_get_frame_offsets ();
27092 mask
= offsets
->saved_regs_mask
;
27093 if (mask
& (1 << LR_REGNUM
))
27096 /* Find the saved regs. */
27097 if (frame_pointer_needed
)
27099 delta
= offsets
->soft_frame
- offsets
->saved_args
;
27100 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
27106 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
27109 /* Allow for the stack frame. */
27110 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
27112 /* The link register is always the first saved register. */
27115 /* Construct the address. */
27116 addr
= gen_rtx_REG (SImode
, reg
);
27119 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
27120 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
27124 addr
= plus_constant (Pmode
, addr
, delta
);
27126 /* The store needs to be marked to prevent DSE from deleting
27127 it as dead if it is based on fp. */
27128 mem
= gen_frame_mem (Pmode
, addr
);
27129 MEM_VOLATILE_P (mem
) = true;
27130 emit_move_insn (mem
, source
);
27133 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
27136 /* Implements target hook vector_mode_supported_p. */
27138 arm_vector_mode_supported_p (machine_mode mode
)
27140 /* Neon also supports V2SImode, etc. listed in the clause below. */
27141 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
27142 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
27143 || mode
== V2DImode
|| mode
== V8HFmode
))
27146 if ((TARGET_NEON
|| TARGET_IWMMXT
)
27147 && ((mode
== V2SImode
)
27148 || (mode
== V4HImode
)
27149 || (mode
== V8QImode
)))
27152 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
27153 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
27154 || mode
== V2HAmode
))
27160 /* Implements target hook array_mode_supported_p. */
27163 arm_array_mode_supported_p (machine_mode mode
,
27164 unsigned HOST_WIDE_INT nelems
)
27167 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
27168 && (nelems
>= 2 && nelems
<= 4))
27174 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27175 registers when autovectorizing for Neon, at least until multiple vector
27176 widths are supported properly by the middle-end. */
27178 static machine_mode
27179 arm_preferred_simd_mode (scalar_mode mode
)
27185 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
27187 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
27189 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
27191 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
27193 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
27200 if (TARGET_REALLY_IWMMXT
)
27216 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27218 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27219 using r0-r4 for function arguments, r7 for the stack frame and don't have
27220 enough left over to do doubleword arithmetic. For Thumb-2 all the
27221 potentially problematic instructions accept high registers so this is not
27222 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27223 that require many low registers. */
27225 arm_class_likely_spilled_p (reg_class_t rclass
)
27227 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
27228 || rclass
== CC_REG
)
27234 /* Implements target hook small_register_classes_for_mode_p. */
27236 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
27238 return TARGET_THUMB1
;
27241 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27242 ARM insns and therefore guarantee that the shift count is modulo 256.
27243 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27244 guarantee no particular behavior for out-of-range counts. */
27246 static unsigned HOST_WIDE_INT
27247 arm_shift_truncation_mask (machine_mode mode
)
27249 return mode
== SImode
? 255 : 0;
27253 /* Map internal gcc register numbers to DWARF2 register numbers. */
27256 arm_dbx_register_number (unsigned int regno
)
27261 if (IS_VFP_REGNUM (regno
))
27263 /* See comment in arm_dwarf_register_span. */
27264 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
27265 return 64 + regno
- FIRST_VFP_REGNUM
;
27267 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
27270 if (IS_IWMMXT_GR_REGNUM (regno
))
27271 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
27273 if (IS_IWMMXT_REGNUM (regno
))
27274 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
27276 return DWARF_FRAME_REGISTERS
;
27279 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27280 GCC models tham as 64 32-bit registers, so we need to describe this to
27281 the DWARF generation code. Other registers can use the default. */
27283 arm_dwarf_register_span (rtx rtl
)
27291 regno
= REGNO (rtl
);
27292 if (!IS_VFP_REGNUM (regno
))
27295 /* XXX FIXME: The EABI defines two VFP register ranges:
27296 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27298 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27299 corresponding D register. Until GDB supports this, we shall use the
27300 legacy encodings. We also use these encodings for D0-D15 for
27301 compatibility with older debuggers. */
27302 mode
= GET_MODE (rtl
);
27303 if (GET_MODE_SIZE (mode
) < 8)
27306 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
27308 nregs
= GET_MODE_SIZE (mode
) / 4;
27309 for (i
= 0; i
< nregs
; i
+= 2)
27310 if (TARGET_BIG_END
)
27312 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
27313 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
27317 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
27318 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
27323 nregs
= GET_MODE_SIZE (mode
) / 8;
27324 for (i
= 0; i
< nregs
; i
++)
27325 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
27328 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
27331 #if ARM_UNWIND_INFO
27332 /* Emit unwind directives for a store-multiple instruction or stack pointer
27333 push during alignment.
27334 These should only ever be generated by the function prologue code, so
27335 expect them to have a particular form.
27336 The store-multiple instruction sometimes pushes pc as the last register,
27337 although it should not be tracked into unwind information, or for -Os
27338 sometimes pushes some dummy registers before first register that needs
27339 to be tracked in unwind information; such dummy registers are there just
27340 to avoid separate stack adjustment, and will not be restored in the
27344 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
27347 HOST_WIDE_INT offset
;
27348 HOST_WIDE_INT nregs
;
27352 unsigned padfirst
= 0, padlast
= 0;
27355 e
= XVECEXP (p
, 0, 0);
27356 gcc_assert (GET_CODE (e
) == SET
);
27358 /* First insn will adjust the stack pointer. */
27359 gcc_assert (GET_CODE (e
) == SET
27360 && REG_P (SET_DEST (e
))
27361 && REGNO (SET_DEST (e
)) == SP_REGNUM
27362 && GET_CODE (SET_SRC (e
)) == PLUS
);
27364 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
27365 nregs
= XVECLEN (p
, 0) - 1;
27366 gcc_assert (nregs
);
27368 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
27371 /* For -Os dummy registers can be pushed at the beginning to
27372 avoid separate stack pointer adjustment. */
27373 e
= XVECEXP (p
, 0, 1);
27374 e
= XEXP (SET_DEST (e
), 0);
27375 if (GET_CODE (e
) == PLUS
)
27376 padfirst
= INTVAL (XEXP (e
, 1));
27377 gcc_assert (padfirst
== 0 || optimize_size
);
27378 /* The function prologue may also push pc, but not annotate it as it is
27379 never restored. We turn this into a stack pointer adjustment. */
27380 e
= XVECEXP (p
, 0, nregs
);
27381 e
= XEXP (SET_DEST (e
), 0);
27382 if (GET_CODE (e
) == PLUS
)
27383 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
27385 padlast
= offset
- 4;
27386 gcc_assert (padlast
== 0 || padlast
== 4);
27388 fprintf (asm_out_file
, "\t.pad #4\n");
27390 fprintf (asm_out_file
, "\t.save {");
27392 else if (IS_VFP_REGNUM (reg
))
27395 fprintf (asm_out_file
, "\t.vsave {");
27398 /* Unknown register type. */
27399 gcc_unreachable ();
27401 /* If the stack increment doesn't match the size of the saved registers,
27402 something has gone horribly wrong. */
27403 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
27407 /* The remaining insns will describe the stores. */
27408 for (i
= 1; i
<= nregs
; i
++)
27410 /* Expect (set (mem <addr>) (reg)).
27411 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27412 e
= XVECEXP (p
, 0, i
);
27413 gcc_assert (GET_CODE (e
) == SET
27414 && MEM_P (SET_DEST (e
))
27415 && REG_P (SET_SRC (e
)));
27417 reg
= REGNO (SET_SRC (e
));
27418 gcc_assert (reg
>= lastreg
);
27421 fprintf (asm_out_file
, ", ");
27422 /* We can't use %r for vfp because we need to use the
27423 double precision register names. */
27424 if (IS_VFP_REGNUM (reg
))
27425 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
27427 asm_fprintf (asm_out_file
, "%r", reg
);
27431 /* Check that the addresses are consecutive. */
27432 e
= XEXP (SET_DEST (e
), 0);
27433 if (GET_CODE (e
) == PLUS
)
27434 gcc_assert (REG_P (XEXP (e
, 0))
27435 && REGNO (XEXP (e
, 0)) == SP_REGNUM
27436 && CONST_INT_P (XEXP (e
, 1))
27437 && offset
== INTVAL (XEXP (e
, 1)));
27441 && REGNO (e
) == SP_REGNUM
);
27442 offset
+= reg_size
;
27445 fprintf (asm_out_file
, "}\n");
27447 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
27450 /* Emit unwind directives for a SET. */
27453 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
27461 switch (GET_CODE (e0
))
27464 /* Pushing a single register. */
27465 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
27466 || !REG_P (XEXP (XEXP (e0
, 0), 0))
27467 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
27470 asm_fprintf (asm_out_file
, "\t.save ");
27471 if (IS_VFP_REGNUM (REGNO (e1
)))
27472 asm_fprintf(asm_out_file
, "{d%d}\n",
27473 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
27475 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
27479 if (REGNO (e0
) == SP_REGNUM
)
27481 /* A stack increment. */
27482 if (GET_CODE (e1
) != PLUS
27483 || !REG_P (XEXP (e1
, 0))
27484 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
27485 || !CONST_INT_P (XEXP (e1
, 1)))
27488 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
27489 -INTVAL (XEXP (e1
, 1)));
27491 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
27493 HOST_WIDE_INT offset
;
27495 if (GET_CODE (e1
) == PLUS
)
27497 if (!REG_P (XEXP (e1
, 0))
27498 || !CONST_INT_P (XEXP (e1
, 1)))
27500 reg
= REGNO (XEXP (e1
, 0));
27501 offset
= INTVAL (XEXP (e1
, 1));
27502 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
27503 HARD_FRAME_POINTER_REGNUM
, reg
,
27506 else if (REG_P (e1
))
27509 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
27510 HARD_FRAME_POINTER_REGNUM
, reg
);
27515 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
27517 /* Move from sp to reg. */
27518 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
27520 else if (GET_CODE (e1
) == PLUS
27521 && REG_P (XEXP (e1
, 0))
27522 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
27523 && CONST_INT_P (XEXP (e1
, 1)))
27525 /* Set reg to offset from sp. */
27526 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
27527 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
27539 /* Emit unwind directives for the given insn. */
27542 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
27545 bool handled_one
= false;
27547 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27550 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27551 && (TREE_NOTHROW (current_function_decl
)
27552 || crtl
->all_throwers_are_sibcalls
))
27555 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
27558 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
27560 switch (REG_NOTE_KIND (note
))
27562 case REG_FRAME_RELATED_EXPR
:
27563 pat
= XEXP (note
, 0);
27566 case REG_CFA_REGISTER
:
27567 pat
= XEXP (note
, 0);
27570 pat
= PATTERN (insn
);
27571 if (GET_CODE (pat
) == PARALLEL
)
27572 pat
= XVECEXP (pat
, 0, 0);
27575 /* Only emitted for IS_STACKALIGN re-alignment. */
27580 src
= SET_SRC (pat
);
27581 dest
= SET_DEST (pat
);
27583 gcc_assert (src
== stack_pointer_rtx
);
27584 reg
= REGNO (dest
);
27585 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27588 handled_one
= true;
27591 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27592 to get correct dwarf information for shrink-wrap. We should not
27593 emit unwind information for it because these are used either for
27594 pretend arguments or notes to adjust sp and restore registers from
27596 case REG_CFA_DEF_CFA
:
27597 case REG_CFA_ADJUST_CFA
:
27598 case REG_CFA_RESTORE
:
27601 case REG_CFA_EXPRESSION
:
27602 case REG_CFA_OFFSET
:
27603 /* ??? Only handling here what we actually emit. */
27604 gcc_unreachable ();
27612 pat
= PATTERN (insn
);
27615 switch (GET_CODE (pat
))
27618 arm_unwind_emit_set (asm_out_file
, pat
);
27622 /* Store multiple. */
27623 arm_unwind_emit_sequence (asm_out_file
, pat
);
27632 /* Output a reference from a function exception table to the type_info
27633 object X. The EABI specifies that the symbol should be relocated by
27634 an R_ARM_TARGET2 relocation. */
27637 arm_output_ttype (rtx x
)
27639 fputs ("\t.word\t", asm_out_file
);
27640 output_addr_const (asm_out_file
, x
);
27641 /* Use special relocations for symbol references. */
27642 if (!CONST_INT_P (x
))
27643 fputs ("(TARGET2)", asm_out_file
);
27644 fputc ('\n', asm_out_file
);
27649 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27652 arm_asm_emit_except_personality (rtx personality
)
27654 fputs ("\t.personality\t", asm_out_file
);
27655 output_addr_const (asm_out_file
, personality
);
27656 fputc ('\n', asm_out_file
);
27658 #endif /* ARM_UNWIND_INFO */
27660 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27663 arm_asm_init_sections (void)
27665 #if ARM_UNWIND_INFO
27666 exception_section
= get_unnamed_section (0, output_section_asm_op
,
27668 #endif /* ARM_UNWIND_INFO */
27670 #ifdef OBJECT_FORMAT_ELF
27671 if (target_pure_code
)
27672 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
27676 /* Output unwind directives for the start/end of a function. */
27679 arm_output_fn_unwind (FILE * f
, bool prologue
)
27681 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27685 fputs ("\t.fnstart\n", f
);
27688 /* If this function will never be unwound, then mark it as such.
27689 The came condition is used in arm_unwind_emit to suppress
27690 the frame annotations. */
27691 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27692 && (TREE_NOTHROW (current_function_decl
)
27693 || crtl
->all_throwers_are_sibcalls
))
27694 fputs("\t.cantunwind\n", f
);
27696 fputs ("\t.fnend\n", f
);
27701 arm_emit_tls_decoration (FILE *fp
, rtx x
)
27703 enum tls_reloc reloc
;
27706 val
= XVECEXP (x
, 0, 0);
27707 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
27709 output_addr_const (fp
, val
);
27714 fputs ("(tlsgd)", fp
);
27717 fputs ("(tlsldm)", fp
);
27720 fputs ("(tlsldo)", fp
);
27723 fputs ("(gottpoff)", fp
);
27726 fputs ("(tpoff)", fp
);
27729 fputs ("(tlsdesc)", fp
);
27732 gcc_unreachable ();
27741 fputs (" + (. - ", fp
);
27742 output_addr_const (fp
, XVECEXP (x
, 0, 2));
27743 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27744 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
27745 output_addr_const (fp
, XVECEXP (x
, 0, 3));
27755 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27758 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
27760 gcc_assert (size
== 4);
27761 fputs ("\t.word\t", file
);
27762 output_addr_const (file
, x
);
27763 fputs ("(tlsldo)", file
);
27766 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27769 arm_output_addr_const_extra (FILE *fp
, rtx x
)
27771 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
27772 return arm_emit_tls_decoration (fp
, x
);
27773 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
27776 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
27778 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
27779 assemble_name_raw (fp
, label
);
27783 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
27785 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
27789 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27793 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
27795 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27799 output_addr_const (fp
, XVECEXP (x
, 0, 1));
27803 else if (GET_CODE (x
) == CONST_VECTOR
)
27804 return arm_emit_vector_const (fp
, x
);
27809 /* Output assembly for a shift instruction.
27810 SET_FLAGS determines how the instruction modifies the condition codes.
27811 0 - Do not set condition codes.
27812 1 - Set condition codes.
27813 2 - Use smallest instruction. */
27815 arm_output_shift(rtx
* operands
, int set_flags
)
27818 static const char flag_chars
[3] = {'?', '.', '!'};
27823 c
= flag_chars
[set_flags
];
27824 shift
= shift_op(operands
[3], &val
);
27828 operands
[2] = GEN_INT(val
);
27829 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
27832 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
27834 output_asm_insn (pattern
, operands
);
27838 /* Output assembly for a WMMX immediate shift instruction. */
27840 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
27842 int shift
= INTVAL (operands
[2]);
27844 machine_mode opmode
= GET_MODE (operands
[0]);
27846 gcc_assert (shift
>= 0);
27848 /* If the shift value in the register versions is > 63 (for D qualifier),
27849 31 (for W qualifier) or 15 (for H qualifier). */
27850 if (((opmode
== V4HImode
) && (shift
> 15))
27851 || ((opmode
== V2SImode
) && (shift
> 31))
27852 || ((opmode
== DImode
) && (shift
> 63)))
27856 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27857 output_asm_insn (templ
, operands
);
27858 if (opmode
== DImode
)
27860 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
27861 output_asm_insn (templ
, operands
);
27866 /* The destination register will contain all zeros. */
27867 sprintf (templ
, "wzero\t%%0");
27868 output_asm_insn (templ
, operands
);
27873 if ((opmode
== DImode
) && (shift
> 32))
27875 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27876 output_asm_insn (templ
, operands
);
27877 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
27878 output_asm_insn (templ
, operands
);
27882 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
27883 output_asm_insn (templ
, operands
);
27888 /* Output assembly for a WMMX tinsr instruction. */
27890 arm_output_iwmmxt_tinsr (rtx
*operands
)
27892 int mask
= INTVAL (operands
[3]);
27895 int units
= mode_nunits
[GET_MODE (operands
[0])];
27896 gcc_assert ((mask
& (mask
- 1)) == 0);
27897 for (i
= 0; i
< units
; ++i
)
27899 if ((mask
& 0x01) == 1)
27905 gcc_assert (i
< units
);
27907 switch (GET_MODE (operands
[0]))
27910 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
27913 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
27916 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
27919 gcc_unreachable ();
27922 output_asm_insn (templ
, operands
);
27927 /* Output a Thumb-1 casesi dispatch sequence. */
27929 thumb1_output_casesi (rtx
*operands
)
27931 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
27933 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27935 switch (GET_MODE(diff_vec
))
27938 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27939 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27941 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27942 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27944 return "bl\t%___gnu_thumb1_case_si";
27946 gcc_unreachable ();
27950 /* Output a Thumb-2 casesi instruction. */
27952 thumb2_output_casesi (rtx
*operands
)
27954 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
27956 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27958 output_asm_insn ("cmp\t%0, %1", operands
);
27959 output_asm_insn ("bhi\t%l3", operands
);
27960 switch (GET_MODE(diff_vec
))
27963 return "tbb\t[%|pc, %0]";
27965 return "tbh\t[%|pc, %0, lsl #1]";
27969 output_asm_insn ("adr\t%4, %l2", operands
);
27970 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
27971 output_asm_insn ("add\t%4, %4, %5", operands
);
27976 output_asm_insn ("adr\t%4, %l2", operands
);
27977 return "ldr\t%|pc, [%4, %0, lsl #2]";
27980 gcc_unreachable ();
27984 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27985 per-core tuning structs. */
27987 arm_issue_rate (void)
27989 return current_tune
->issue_rate
;
27992 /* Return how many instructions should scheduler lookahead to choose the
27995 arm_first_cycle_multipass_dfa_lookahead (void)
27997 int issue_rate
= arm_issue_rate ();
27999 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
28002 /* Enable modeling of L2 auto-prefetcher. */
28004 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
28006 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
28010 arm_mangle_type (const_tree type
)
28012 /* The ARM ABI documents (10th October 2008) say that "__va_list"
28013 has to be managled as if it is in the "std" namespace. */
28014 if (TARGET_AAPCS_BASED
28015 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
28016 return "St9__va_list";
28018 /* Half-precision float. */
28019 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
28022 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28024 if (TYPE_NAME (type
) != NULL
)
28025 return arm_mangle_builtin_type (type
);
28027 /* Use the default mangling. */
28031 /* Order of allocation of core registers for Thumb: this allocation is
28032 written over the corresponding initial entries of the array
28033 initialized with REG_ALLOC_ORDER. We allocate all low registers
28034 first. Saving and restoring a low register is usually cheaper than
28035 using a call-clobbered high register. */
28037 static const int thumb_core_reg_alloc_order
[] =
28039 3, 2, 1, 0, 4, 5, 6, 7,
28040 12, 14, 8, 9, 10, 11
28043 /* Adjust register allocation order when compiling for Thumb. */
28046 arm_order_regs_for_local_alloc (void)
28048 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
28049 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
28051 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
28052 sizeof (thumb_core_reg_alloc_order
));
28055 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
28058 arm_frame_pointer_required (void)
28060 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
28063 /* If the function receives nonlocal gotos, it needs to save the frame
28064 pointer in the nonlocal_goto_save_area object. */
28065 if (cfun
->has_nonlocal_label
)
28068 /* The frame pointer is required for non-leaf APCS frames. */
28069 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !crtl
->is_leaf
)
28072 /* If we are probing the stack in the prologue, we will have a faulting
28073 instruction prior to the stack adjustment and this requires a frame
28074 pointer if we want to catch the exception using the EABI unwinder. */
28075 if (!IS_INTERRUPT (arm_current_func_type ())
28076 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
28077 || flag_stack_clash_protection
)
28078 && arm_except_unwind_info (&global_options
) == UI_TARGET
28079 && cfun
->can_throw_non_call_exceptions
)
28081 HOST_WIDE_INT size
= get_frame_size ();
28083 /* That's irrelevant if there is no stack adjustment. */
28087 /* That's relevant only if there is a stack probe. */
28088 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
28090 /* We don't have the final size of the frame so adjust. */
28091 size
+= 32 * UNITS_PER_WORD
;
28092 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
28102 /* Only thumb1 can't support conditional execution, so return true if
28103 the target is not thumb1. */
28105 arm_have_conditional_execution (void)
28107 return !TARGET_THUMB1
;
28110 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
28111 static HOST_WIDE_INT
28112 arm_vector_alignment (const_tree type
)
28114 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
28116 if (TARGET_AAPCS_BASED
)
28117 align
= MIN (align
, 64);
28122 static unsigned int
28123 arm_autovectorize_vector_sizes (void)
28125 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
28129 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
28131 /* Vectors which aren't in packed structures will not be less aligned than
28132 the natural alignment of their element type, so this is safe. */
28133 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
28136 return default_builtin_vector_alignment_reachable (type
, is_packed
);
28140 arm_builtin_support_vector_misalignment (machine_mode mode
,
28141 const_tree type
, int misalignment
,
28144 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
28146 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
28151 /* If the misalignment is unknown, we should be able to handle the access
28152 so long as it is not to a member of a packed data structure. */
28153 if (misalignment
== -1)
28156 /* Return true if the misalignment is a multiple of the natural alignment
28157 of the vector's element type. This is probably always going to be
28158 true in practice, since we've already established that this isn't a
28160 return ((misalignment
% align
) == 0);
28163 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
28168 arm_conditional_register_usage (void)
28172 if (TARGET_THUMB1
&& optimize_size
)
28174 /* When optimizing for size on Thumb-1, it's better not
28175 to use the HI regs, because of the overhead of
28177 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
28178 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
28181 /* The link register can be clobbered by any branch insn,
28182 but we have no way to track that at present, so mark
28183 it as unavailable. */
28185 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
28187 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
)
28189 /* VFPv3 registers are disabled when earlier VFP
28190 versions are selected due to the definition of
28191 LAST_VFP_REGNUM. */
28192 for (regno
= FIRST_VFP_REGNUM
;
28193 regno
<= LAST_VFP_REGNUM
; ++ regno
)
28195 fixed_regs
[regno
] = 0;
28196 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
28197 || regno
>= FIRST_VFP_REGNUM
+ 32;
28201 if (TARGET_REALLY_IWMMXT
)
28203 regno
= FIRST_IWMMXT_GR_REGNUM
;
28204 /* The 2002/10/09 revision of the XScale ABI has wCG0
28205 and wCG1 as call-preserved registers. The 2002/11/21
28206 revision changed this so that all wCG registers are
28207 scratch registers. */
28208 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
28209 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
28210 fixed_regs
[regno
] = 0;
28211 /* The XScale ABI has wR0 - wR9 as scratch registers,
28212 the rest as call-preserved registers. */
28213 for (regno
= FIRST_IWMMXT_REGNUM
;
28214 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
28216 fixed_regs
[regno
] = 0;
28217 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
28221 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
28223 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
28224 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
28226 else if (TARGET_APCS_STACK
)
28228 fixed_regs
[10] = 1;
28229 call_used_regs
[10] = 1;
28231 /* -mcaller-super-interworking reserves r11 for calls to
28232 _interwork_r11_call_via_rN(). Making the register global
28233 is an easy way of ensuring that it remains valid for all
28235 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
28236 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
28238 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28239 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28240 if (TARGET_CALLER_INTERWORKING
)
28241 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28243 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28247 arm_preferred_rename_class (reg_class_t rclass
)
28249 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28250 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28251 and code size can be reduced. */
28252 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
28258 /* Compute the attribute "length" of insn "*push_multi".
28259 So this function MUST be kept in sync with that insn pattern. */
28261 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
28263 int i
, regno
, hi_reg
;
28264 int num_saves
= XVECLEN (parallel_op
, 0);
28274 regno
= REGNO (first_op
);
28275 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28276 list is 8-bit. Normally this means all registers in the list must be
28277 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28278 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28279 with 16-bit encoding. */
28280 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
28281 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
28283 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
28284 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
28292 /* Compute the attribute "length" of insn. Currently, this function is used
28293 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28294 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28295 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28296 true if OPERANDS contains insn which explicit updates base register. */
28299 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
28308 rtx parallel_op
= operands
[0];
28309 /* Initialize to elements number of PARALLEL. */
28310 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
28311 /* Initialize the value to base register. */
28312 unsigned regno
= REGNO (operands
[1]);
28313 /* Skip return and write back pattern.
28314 We only need register pop pattern for later analysis. */
28315 unsigned first_indx
= 0;
28316 first_indx
+= return_pc
? 1 : 0;
28317 first_indx
+= write_back_p
? 1 : 0;
28319 /* A pop operation can be done through LDM or POP. If the base register is SP
28320 and if it's with write back, then a LDM will be alias of POP. */
28321 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
28322 bool ldm_p
= !pop_p
;
28324 /* Check base register for LDM. */
28325 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
28328 /* Check each register in the list. */
28329 for (; indx
>= first_indx
; indx
--)
28331 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
28332 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28333 comment in arm_attr_length_push_multi. */
28334 if (REGNO_REG_CLASS (regno
) == HI_REGS
28335 && (regno
!= PC_REGNUM
|| ldm_p
))
28342 /* Compute the number of instructions emitted by output_move_double. */
28344 arm_count_output_move_double_insns (rtx
*operands
)
28348 /* output_move_double may modify the operands array, so call it
28349 here on a copy of the array. */
28350 ops
[0] = operands
[0];
28351 ops
[1] = operands
[1];
28352 output_move_double (ops
, false, &count
);
28357 vfp3_const_double_for_fract_bits (rtx operand
)
28359 REAL_VALUE_TYPE r0
;
28361 if (!CONST_DOUBLE_P (operand
))
28364 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
28365 if (exact_real_inverse (DFmode
, &r0
)
28366 && !REAL_VALUE_NEGATIVE (r0
))
28368 if (exact_real_truncate (DFmode
, &r0
))
28370 HOST_WIDE_INT value
= real_to_integer (&r0
);
28371 value
= value
& 0xffffffff;
28372 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
28374 int ret
= exact_log2 (value
);
28375 gcc_assert (IN_RANGE (ret
, 0, 31));
28383 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28384 log2 is in [1, 32], return that log2. Otherwise return -1.
28385 This is used in the patterns for vcvt.s32.f32 floating-point to
28386 fixed-point conversions. */
28389 vfp3_const_double_for_bits (rtx x
)
28391 const REAL_VALUE_TYPE
*r
;
28393 if (!CONST_DOUBLE_P (x
))
28396 r
= CONST_DOUBLE_REAL_VALUE (x
);
28398 if (REAL_VALUE_NEGATIVE (*r
)
28399 || REAL_VALUE_ISNAN (*r
)
28400 || REAL_VALUE_ISINF (*r
)
28401 || !real_isinteger (r
, SFmode
))
28404 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
28406 /* The exact_log2 above will have returned -1 if this is
28407 not an exact log2. */
28408 if (!IN_RANGE (hwint
, 1, 32))
28415 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28418 arm_pre_atomic_barrier (enum memmodel model
)
28420 if (need_atomic_barrier_p (model
, true))
28421 emit_insn (gen_memory_barrier ());
28425 arm_post_atomic_barrier (enum memmodel model
)
28427 if (need_atomic_barrier_p (model
, false))
28428 emit_insn (gen_memory_barrier ());
28431 /* Emit the load-exclusive and store-exclusive instructions.
28432 Use acquire and release versions if necessary. */
28435 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
28437 rtx (*gen
) (rtx
, rtx
);
28443 case E_QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
28444 case E_HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
28445 case E_SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
28446 case E_DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
28448 gcc_unreachable ();
28455 case E_QImode
: gen
= gen_arm_load_exclusiveqi
; break;
28456 case E_HImode
: gen
= gen_arm_load_exclusivehi
; break;
28457 case E_SImode
: gen
= gen_arm_load_exclusivesi
; break;
28458 case E_DImode
: gen
= gen_arm_load_exclusivedi
; break;
28460 gcc_unreachable ();
28464 emit_insn (gen (rval
, mem
));
28468 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
28471 rtx (*gen
) (rtx
, rtx
, rtx
);
28477 case E_QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
28478 case E_HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
28479 case E_SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
28480 case E_DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
28482 gcc_unreachable ();
28489 case E_QImode
: gen
= gen_arm_store_exclusiveqi
; break;
28490 case E_HImode
: gen
= gen_arm_store_exclusivehi
; break;
28491 case E_SImode
: gen
= gen_arm_store_exclusivesi
; break;
28492 case E_DImode
: gen
= gen_arm_store_exclusivedi
; break;
28494 gcc_unreachable ();
28498 emit_insn (gen (bval
, rval
, mem
));
28501 /* Mark the previous jump instruction as unlikely. */
28504 emit_unlikely_jump (rtx insn
)
28506 rtx_insn
*jump
= emit_jump_insn (insn
);
28507 add_reg_br_prob_note (jump
, profile_probability::very_unlikely ());
28510 /* Expand a compare and swap pattern. */
28513 arm_expand_compare_and_swap (rtx operands
[])
28515 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
28517 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
28519 bval
= operands
[0];
28520 rval
= operands
[1];
28522 oldval
= operands
[3];
28523 newval
= operands
[4];
28524 is_weak
= operands
[5];
28525 mod_s
= operands
[6];
28526 mod_f
= operands
[7];
28527 mode
= GET_MODE (mem
);
28529 /* Normally the succ memory model must be stronger than fail, but in the
28530 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28531 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28533 if (TARGET_HAVE_LDACQ
28534 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
28535 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
28536 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
28542 /* For narrow modes, we're going to perform the comparison in SImode,
28543 so do the zero-extension now. */
28544 rval
= gen_reg_rtx (SImode
);
28545 oldval
= convert_modes (SImode
, mode
, oldval
, true);
28549 /* Force the value into a register if needed. We waited until after
28550 the zero-extension above to do this properly. */
28551 if (!arm_add_operand (oldval
, SImode
))
28552 oldval
= force_reg (SImode
, oldval
);
28556 if (!cmpdi_operand (oldval
, mode
))
28557 oldval
= force_reg (mode
, oldval
);
28561 gcc_unreachable ();
28568 case E_QImode
: gen
= gen_atomic_compare_and_swapt1qi_1
; break;
28569 case E_HImode
: gen
= gen_atomic_compare_and_swapt1hi_1
; break;
28570 case E_SImode
: gen
= gen_atomic_compare_and_swapt1si_1
; break;
28571 case E_DImode
: gen
= gen_atomic_compare_and_swapt1di_1
; break;
28573 gcc_unreachable ();
28580 case E_QImode
: gen
= gen_atomic_compare_and_swap32qi_1
; break;
28581 case E_HImode
: gen
= gen_atomic_compare_and_swap32hi_1
; break;
28582 case E_SImode
: gen
= gen_atomic_compare_and_swap32si_1
; break;
28583 case E_DImode
: gen
= gen_atomic_compare_and_swap32di_1
; break;
28585 gcc_unreachable ();
28589 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CC_Zmode
, CC_REGNUM
);
28590 emit_insn (gen (bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
28592 if (mode
== QImode
|| mode
== HImode
)
28593 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
28595 /* In all cases, we arrange for success to be signaled by Z set.
28596 This arrangement allows for the boolean result to be used directly
28597 in a subsequent branch, post optimization. For Thumb-1 targets, the
28598 boolean negation of the result is also stored in bval because Thumb-1
28599 backend lacks dependency tracking for CC flag due to flag-setting not
28600 being represented at RTL level. */
28602 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
28605 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
28606 emit_insn (gen_rtx_SET (bval
, x
));
28610 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28611 another memory store between the load-exclusive and store-exclusive can
28612 reset the monitor from Exclusive to Open state. This means we must wait
28613 until after reload to split the pattern, lest we get a register spill in
28614 the middle of the atomic sequence. Success of the compare and swap is
28615 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28616 for Thumb-1 targets (ie. negation of the boolean value returned by
28617 atomic_compare_and_swapmode standard pattern in operand 0). */
28620 arm_split_compare_and_swap (rtx operands
[])
28622 rtx rval
, mem
, oldval
, newval
, neg_bval
;
28624 enum memmodel mod_s
, mod_f
;
28626 rtx_code_label
*label1
, *label2
;
28629 rval
= operands
[1];
28631 oldval
= operands
[3];
28632 newval
= operands
[4];
28633 is_weak
= (operands
[5] != const0_rtx
);
28634 mod_s
= memmodel_from_int (INTVAL (operands
[6]));
28635 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
28636 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
28637 mode
= GET_MODE (mem
);
28639 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
28641 bool use_acquire
= TARGET_HAVE_LDACQ
28642 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28643 || is_mm_release (mod_s
));
28645 bool use_release
= TARGET_HAVE_LDACQ
28646 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28647 || is_mm_acquire (mod_s
));
28649 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28650 a full barrier is emitted after the store-release. */
28652 use_acquire
= false;
28654 /* Checks whether a barrier is needed and emits one accordingly. */
28655 if (!(use_acquire
|| use_release
))
28656 arm_pre_atomic_barrier (mod_s
);
28661 label1
= gen_label_rtx ();
28662 emit_label (label1
);
28664 label2
= gen_label_rtx ();
28666 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
28668 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28669 as required to communicate with arm_expand_compare_and_swap. */
28672 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
28673 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28674 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
28675 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
28676 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
28680 emit_move_insn (neg_bval
, const1_rtx
);
28681 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
28682 if (thumb1_cmpneg_operand (oldval
, SImode
))
28683 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval
, rval
, oldval
,
28686 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
28689 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
28691 /* Weak or strong, we want EQ to be true for success, so that we
28692 match the flags that we got from the compare above. */
28695 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
28696 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
28697 emit_insn (gen_rtx_SET (cond
, x
));
28702 /* Z is set to boolean value of !neg_bval, as required to communicate
28703 with arm_expand_compare_and_swap. */
28704 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
28705 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
28708 if (!is_mm_relaxed (mod_f
))
28709 emit_label (label2
);
28711 /* Checks whether a barrier is needed and emits one accordingly. */
28713 || !(use_acquire
|| use_release
))
28714 arm_post_atomic_barrier (mod_s
);
28716 if (is_mm_relaxed (mod_f
))
28717 emit_label (label2
);
28720 /* Split an atomic operation pattern. Operation is given by CODE and is one
28721 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28722 operation). Operation is performed on the content at MEM and on VALUE
28723 following the memory model MODEL_RTX. The content at MEM before and after
28724 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28725 success of the operation is returned in COND. Using a scratch register or
28726 an operand register for these determines what result is returned for that
28730 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
28731 rtx value
, rtx model_rtx
, rtx cond
)
28733 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
28734 machine_mode mode
= GET_MODE (mem
);
28735 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
28736 rtx_code_label
*label
;
28737 bool all_low_regs
, bind_old_new
;
28740 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
28742 bool use_acquire
= TARGET_HAVE_LDACQ
28743 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28744 || is_mm_release (model
));
28746 bool use_release
= TARGET_HAVE_LDACQ
28747 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28748 || is_mm_acquire (model
));
28750 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28751 a full barrier is emitted after the store-release. */
28753 use_acquire
= false;
28755 /* Checks whether a barrier is needed and emits one accordingly. */
28756 if (!(use_acquire
|| use_release
))
28757 arm_pre_atomic_barrier (model
);
28759 label
= gen_label_rtx ();
28760 emit_label (label
);
28763 new_out
= gen_lowpart (wmode
, new_out
);
28765 old_out
= gen_lowpart (wmode
, old_out
);
28768 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
28770 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
28772 /* Does the operation require destination and first operand to use the same
28773 register? This is decided by register constraints of relevant insn
28774 patterns in thumb1.md. */
28775 gcc_assert (!new_out
|| REG_P (new_out
));
28776 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
28777 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
28778 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
28783 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
28785 /* We want to return the old value while putting the result of the operation
28786 in the same register as the old value so copy the old value over to the
28787 destination register and use that register for the operation. */
28788 if (old_out
&& bind_old_new
)
28790 emit_move_insn (new_out
, old_out
);
28801 x
= gen_rtx_AND (wmode
, old_out
, value
);
28802 emit_insn (gen_rtx_SET (new_out
, x
));
28803 x
= gen_rtx_NOT (wmode
, new_out
);
28804 emit_insn (gen_rtx_SET (new_out
, x
));
28808 if (CONST_INT_P (value
))
28810 value
= GEN_INT (-INTVAL (value
));
28816 if (mode
== DImode
)
28818 /* DImode plus/minus need to clobber flags. */
28819 /* The adddi3 and subdi3 patterns are incorrectly written so that
28820 they require matching operands, even when we could easily support
28821 three operands. Thankfully, this can be fixed up post-splitting,
28822 as the individual add+adc patterns do accept three operands and
28823 post-reload cprop can make these moves go away. */
28824 emit_move_insn (new_out
, old_out
);
28826 x
= gen_adddi3 (new_out
, new_out
, value
);
28828 x
= gen_subdi3 (new_out
, new_out
, value
);
28835 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
28836 emit_insn (gen_rtx_SET (new_out
, x
));
28840 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
28843 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28844 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
28846 /* Checks whether a barrier is needed and emits one accordingly. */
28848 || !(use_acquire
|| use_release
))
28849 arm_post_atomic_barrier (model
);
28852 #define MAX_VECT_LEN 16
28854 struct expand_vec_perm_d
28856 rtx target
, op0
, op1
;
28857 auto_vec_perm_indices perm
;
28858 machine_mode vmode
;
28863 /* Generate a variable permutation. */
28866 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28868 machine_mode vmode
= GET_MODE (target
);
28869 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28871 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
28872 gcc_checking_assert (GET_MODE (op0
) == vmode
);
28873 gcc_checking_assert (GET_MODE (op1
) == vmode
);
28874 gcc_checking_assert (GET_MODE (sel
) == vmode
);
28875 gcc_checking_assert (TARGET_NEON
);
28879 if (vmode
== V8QImode
)
28880 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
28882 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
28888 if (vmode
== V8QImode
)
28890 pair
= gen_reg_rtx (V16QImode
);
28891 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
28892 pair
= gen_lowpart (TImode
, pair
);
28893 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
28897 pair
= gen_reg_rtx (OImode
);
28898 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
28899 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
28905 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28907 machine_mode vmode
= GET_MODE (target
);
28908 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
28909 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28912 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28913 numbering of elements for big-endian, we must reverse the order. */
28914 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
28916 /* The VTBL instruction does not use a modulo index, so we must take care
28917 of that ourselves. */
28918 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28919 mask
= gen_const_vec_duplicate (vmode
, mask
);
28920 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
28922 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
28925 /* Map lane ordering between architectural lane order, and GCC lane order,
28926 taking into account ABI. See comment above output_move_neon for details. */
28929 neon_endian_lane_map (machine_mode mode
, int lane
)
28931 if (BYTES_BIG_ENDIAN
)
28933 int nelems
= GET_MODE_NUNITS (mode
);
28934 /* Reverse lane order. */
28935 lane
= (nelems
- 1 - lane
);
28936 /* Reverse D register order, to match ABI. */
28937 if (GET_MODE_SIZE (mode
) == 16)
28938 lane
= lane
^ (nelems
/ 2);
28943 /* Some permutations index into pairs of vectors, this is a helper function
28944 to map indexes into those pairs of vectors. */
28947 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
28949 int nelem
= GET_MODE_NUNITS (mode
);
28950 if (BYTES_BIG_ENDIAN
)
28952 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
28956 /* Generate or test for an insn that supports a constant permutation. */
28958 /* Recognize patterns for the VUZP insns. */
28961 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
28963 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
28964 rtx out0
, out1
, in0
, in1
;
28965 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28969 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28972 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28973 big endian pattern on 64 bit vectors, so we correct for that. */
28974 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
28975 && GET_MODE_SIZE (d
->vmode
) == 8 ? nelt
: 0;
28977 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
28979 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28981 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
28985 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28987 for (i
= 0; i
< nelt
; i
++)
28990 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
28991 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
29001 case E_V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
29002 case E_V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
29003 case E_V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
29004 case E_V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
29005 case E_V8HFmode
: gen
= gen_neon_vuzpv8hf_internal
; break;
29006 case E_V4HFmode
: gen
= gen_neon_vuzpv4hf_internal
; break;
29007 case E_V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
29008 case E_V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
29009 case E_V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
29010 case E_V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
29012 gcc_unreachable ();
29017 if (swap_nelt
!= 0)
29018 std::swap (in0
, in1
);
29021 out1
= gen_reg_rtx (d
->vmode
);
29023 std::swap (out0
, out1
);
29025 emit_insn (gen (out0
, in0
, in1
, out1
));
29029 /* Recognize patterns for the VZIP insns. */
29032 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
29034 unsigned int i
, high
, mask
, nelt
= d
->perm
.length ();
29035 rtx out0
, out1
, in0
, in1
;
29036 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
29040 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
29043 is_swapped
= BYTES_BIG_ENDIAN
;
29045 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
29048 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
29050 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
29054 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
29056 for (i
= 0; i
< nelt
/ 2; i
++)
29059 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
29060 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
29064 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
29065 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
29076 case E_V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
29077 case E_V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
29078 case E_V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
29079 case E_V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
29080 case E_V8HFmode
: gen
= gen_neon_vzipv8hf_internal
; break;
29081 case E_V4HFmode
: gen
= gen_neon_vzipv4hf_internal
; break;
29082 case E_V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
29083 case E_V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
29084 case E_V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
29085 case E_V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
29087 gcc_unreachable ();
29093 std::swap (in0
, in1
);
29096 out1
= gen_reg_rtx (d
->vmode
);
29098 std::swap (out0
, out1
);
29100 emit_insn (gen (out0
, in0
, in1
, out1
));
29104 /* Recognize patterns for the VREV insns. */
29107 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
29109 unsigned int i
, j
, diff
, nelt
= d
->perm
.length ();
29110 rtx (*gen
)(rtx
, rtx
);
29112 if (!d
->one_vector_p
)
29121 case E_V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
29122 case E_V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
29130 case E_V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
29131 case E_V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
29132 case E_V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
29133 case E_V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
29134 case E_V8HFmode
: gen
= gen_neon_vrev64v8hf
; break;
29135 case E_V4HFmode
: gen
= gen_neon_vrev64v4hf
; break;
29143 case E_V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
29144 case E_V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
29145 case E_V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
29146 case E_V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
29147 case E_V4SImode
: gen
= gen_neon_vrev64v4si
; break;
29148 case E_V2SImode
: gen
= gen_neon_vrev64v2si
; break;
29149 case E_V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
29150 case E_V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
29159 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
29160 for (j
= 0; j
<= diff
; j
+= 1)
29162 /* This is guaranteed to be true as the value of diff
29163 is 7, 3, 1 and we should have enough elements in the
29164 queue to generate this. Getting a vector mask with a
29165 value of diff other than these values implies that
29166 something is wrong by the time we get here. */
29167 gcc_assert (i
+ j
< nelt
);
29168 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
29176 emit_insn (gen (d
->target
, d
->op0
));
29180 /* Recognize patterns for the VTRN insns. */
29183 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
29185 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
29186 rtx out0
, out1
, in0
, in1
;
29187 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
29189 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
29192 /* Note that these are little-endian tests. Adjust for big-endian later. */
29193 if (d
->perm
[0] == 0)
29195 else if (d
->perm
[0] == 1)
29199 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
29201 for (i
= 0; i
< nelt
; i
+= 2)
29203 if (d
->perm
[i
] != i
+ odd
)
29205 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
29215 case E_V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
29216 case E_V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
29217 case E_V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
29218 case E_V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
29219 case E_V8HFmode
: gen
= gen_neon_vtrnv8hf_internal
; break;
29220 case E_V4HFmode
: gen
= gen_neon_vtrnv4hf_internal
; break;
29221 case E_V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
29222 case E_V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
29223 case E_V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
29224 case E_V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
29226 gcc_unreachable ();
29231 if (BYTES_BIG_ENDIAN
)
29233 std::swap (in0
, in1
);
29238 out1
= gen_reg_rtx (d
->vmode
);
29240 std::swap (out0
, out1
);
29242 emit_insn (gen (out0
, in0
, in1
, out1
));
29246 /* Recognize patterns for the VEXT insns. */
29249 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
29251 unsigned int i
, nelt
= d
->perm
.length ();
29252 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
29255 unsigned int location
;
29257 unsigned int next
= d
->perm
[0] + 1;
29259 /* TODO: Handle GCC's numbering of elements for big-endian. */
29260 if (BYTES_BIG_ENDIAN
)
29263 /* Check if the extracted indexes are increasing by one. */
29264 for (i
= 1; i
< nelt
; next
++, i
++)
29266 /* If we hit the most significant element of the 2nd vector in
29267 the previous iteration, no need to test further. */
29268 if (next
== 2 * nelt
)
29271 /* If we are operating on only one vector: it could be a
29272 rotation. If there are only two elements of size < 64, let
29273 arm_evpc_neon_vrev catch it. */
29274 if (d
->one_vector_p
&& (next
== nelt
))
29276 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
29282 if (d
->perm
[i
] != next
)
29286 location
= d
->perm
[0];
29290 case E_V16QImode
: gen
= gen_neon_vextv16qi
; break;
29291 case E_V8QImode
: gen
= gen_neon_vextv8qi
; break;
29292 case E_V4HImode
: gen
= gen_neon_vextv4hi
; break;
29293 case E_V8HImode
: gen
= gen_neon_vextv8hi
; break;
29294 case E_V2SImode
: gen
= gen_neon_vextv2si
; break;
29295 case E_V4SImode
: gen
= gen_neon_vextv4si
; break;
29296 case E_V4HFmode
: gen
= gen_neon_vextv4hf
; break;
29297 case E_V8HFmode
: gen
= gen_neon_vextv8hf
; break;
29298 case E_V2SFmode
: gen
= gen_neon_vextv2sf
; break;
29299 case E_V4SFmode
: gen
= gen_neon_vextv4sf
; break;
29300 case E_V2DImode
: gen
= gen_neon_vextv2di
; break;
29309 offset
= GEN_INT (location
);
29310 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
29314 /* The NEON VTBL instruction is a fully variable permuation that's even
29315 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29316 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29317 can do slightly better by expanding this as a constant where we don't
29318 have to apply a mask. */
29321 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
29323 rtx rperm
[MAX_VECT_LEN
], sel
;
29324 machine_mode vmode
= d
->vmode
;
29325 unsigned int i
, nelt
= d
->perm
.length ();
29327 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29328 numbering of elements for big-endian, we must reverse the order. */
29329 if (BYTES_BIG_ENDIAN
)
29335 /* Generic code will try constant permutation twice. Once with the
29336 original mode and again with the elements lowered to QImode.
29337 So wait and don't do the selector expansion ourselves. */
29338 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
29341 for (i
= 0; i
< nelt
; ++i
)
29342 rperm
[i
] = GEN_INT (d
->perm
[i
]);
29343 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
29344 sel
= force_reg (vmode
, sel
);
29346 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
29351 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
29353 /* Check if the input mask matches vext before reordering the
29356 if (arm_evpc_neon_vext (d
))
29359 /* The pattern matching functions above are written to look for a small
29360 number to begin the sequence (0, 1, N/2). If we begin with an index
29361 from the second operand, we can swap the operands. */
29362 unsigned int nelt
= d
->perm
.length ();
29363 if (d
->perm
[0] >= nelt
)
29365 for (unsigned int i
= 0; i
< nelt
; ++i
)
29366 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
29368 std::swap (d
->op0
, d
->op1
);
29373 if (arm_evpc_neon_vuzp (d
))
29375 if (arm_evpc_neon_vzip (d
))
29377 if (arm_evpc_neon_vrev (d
))
29379 if (arm_evpc_neon_vtrn (d
))
29381 return arm_evpc_neon_vtbl (d
);
29386 /* Expand a vec_perm_const pattern. */
29389 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
29391 struct expand_vec_perm_d d
;
29392 int i
, nelt
, which
;
29398 d
.vmode
= GET_MODE (target
);
29399 gcc_assert (VECTOR_MODE_P (d
.vmode
));
29400 d
.testing_p
= false;
29402 nelt
= GET_MODE_NUNITS (d
.vmode
);
29403 d
.perm
.reserve (nelt
);
29404 for (i
= which
= 0; i
< nelt
; ++i
)
29406 rtx e
= XVECEXP (sel
, 0, i
);
29407 int ei
= INTVAL (e
) & (2 * nelt
- 1);
29408 which
|= (ei
< nelt
? 1 : 2);
29409 d
.perm
.quick_push (ei
);
29418 d
.one_vector_p
= false;
29419 if (!rtx_equal_p (op0
, op1
))
29422 /* The elements of PERM do not suggest that only the first operand
29423 is used, but both operands are identical. Allow easier matching
29424 of the permutation by folding the permutation into the single
29428 for (i
= 0; i
< nelt
; ++i
)
29429 d
.perm
[i
] &= nelt
- 1;
29431 d
.one_vector_p
= true;
29436 d
.one_vector_p
= true;
29440 return arm_expand_vec_perm_const_1 (&d
);
29443 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29446 arm_vectorize_vec_perm_const_ok (machine_mode vmode
, vec_perm_indices sel
)
29448 struct expand_vec_perm_d d
;
29449 unsigned int i
, nelt
, which
;
29453 d
.testing_p
= true;
29454 d
.perm
.safe_splice (sel
);
29456 /* Categorize the set of elements in the selector. */
29457 nelt
= GET_MODE_NUNITS (d
.vmode
);
29458 for (i
= which
= 0; i
< nelt
; ++i
)
29460 unsigned int e
= d
.perm
[i
];
29461 gcc_assert (e
< 2 * nelt
);
29462 which
|= (e
< nelt
? 1 : 2);
29465 /* For all elements from second vector, fold the elements to first. */
29467 for (i
= 0; i
< nelt
; ++i
)
29470 /* Check whether the mask can be applied to the vector type. */
29471 d
.one_vector_p
= (which
!= 3);
29473 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
29474 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
29475 if (!d
.one_vector_p
)
29476 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
29479 ret
= arm_expand_vec_perm_const_1 (&d
);
29486 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
29488 /* If we are soft float and we do not have ldrd
29489 then all auto increment forms are ok. */
29490 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
29495 /* Post increment and Pre Decrement are supported for all
29496 instruction forms except for vector forms. */
29499 if (VECTOR_MODE_P (mode
))
29501 if (code
!= ARM_PRE_DEC
)
29511 /* Without LDRD and mode size greater than
29512 word size, there is no point in auto-incrementing
29513 because ldm and stm will not have these forms. */
29514 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
29517 /* Vector and floating point modes do not support
29518 these auto increment forms. */
29519 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
29532 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29533 on ARM, since we know that shifts by negative amounts are no-ops.
29534 Additionally, the default expansion code is not available or suitable
29535 for post-reload insn splits (this can occur when the register allocator
29536 chooses not to do a shift in NEON).
29538 This function is used in both initial expand and post-reload splits, and
29539 handles all kinds of 64-bit shifts.
29541 Input requirements:
29542 - It is safe for the input and output to be the same register, but
29543 early-clobber rules apply for the shift amount and scratch registers.
29544 - Shift by register requires both scratch registers. In all other cases
29545 the scratch registers may be NULL.
29546 - Ashiftrt by a register also clobbers the CC register. */
29548 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
29549 rtx amount
, rtx scratch1
, rtx scratch2
)
29551 rtx out_high
= gen_highpart (SImode
, out
);
29552 rtx out_low
= gen_lowpart (SImode
, out
);
29553 rtx in_high
= gen_highpart (SImode
, in
);
29554 rtx in_low
= gen_lowpart (SImode
, in
);
29557 in = the register pair containing the input value.
29558 out = the destination register pair.
29559 up = the high- or low-part of each pair.
29560 down = the opposite part to "up".
29561 In a shift, we can consider bits to shift from "up"-stream to
29562 "down"-stream, so in a left-shift "up" is the low-part and "down"
29563 is the high-part of each register pair. */
29565 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
29566 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
29567 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
29568 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
29570 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
29572 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
29573 && GET_MODE (out
) == DImode
);
29575 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
29576 && GET_MODE (in
) == DImode
);
29578 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
29579 && GET_MODE (amount
) == SImode
)
29580 || CONST_INT_P (amount
)));
29581 gcc_assert (scratch1
== NULL
29582 || (GET_CODE (scratch1
) == SCRATCH
)
29583 || (GET_MODE (scratch1
) == SImode
29584 && REG_P (scratch1
)));
29585 gcc_assert (scratch2
== NULL
29586 || (GET_CODE (scratch2
) == SCRATCH
)
29587 || (GET_MODE (scratch2
) == SImode
29588 && REG_P (scratch2
)));
29589 gcc_assert (!REG_P (out
) || !REG_P (amount
)
29590 || !HARD_REGISTER_P (out
)
29591 || (REGNO (out
) != REGNO (amount
)
29592 && REGNO (out
) + 1 != REGNO (amount
)));
29594 /* Macros to make following code more readable. */
29595 #define SUB_32(DEST,SRC) \
29596 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29597 #define RSB_32(DEST,SRC) \
29598 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29599 #define SUB_S_32(DEST,SRC) \
29600 gen_addsi3_compare0 ((DEST), (SRC), \
29602 #define SET(DEST,SRC) \
29603 gen_rtx_SET ((DEST), (SRC))
29604 #define SHIFT(CODE,SRC,AMOUNT) \
29605 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29606 #define LSHIFT(CODE,SRC,AMOUNT) \
29607 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29608 SImode, (SRC), (AMOUNT))
29609 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29610 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29611 SImode, (SRC), (AMOUNT))
29613 gen_rtx_IOR (SImode, (A), (B))
29614 #define BRANCH(COND,LABEL) \
29615 gen_arm_cond_branch ((LABEL), \
29616 gen_rtx_ ## COND (CCmode, cc_reg, \
29620 /* Shifts by register and shifts by constant are handled separately. */
29621 if (CONST_INT_P (amount
))
29623 /* We have a shift-by-constant. */
29625 /* First, handle out-of-range shift amounts.
29626 In both cases we try to match the result an ARM instruction in a
29627 shift-by-register would give. This helps reduce execution
29628 differences between optimization levels, but it won't stop other
29629 parts of the compiler doing different things. This is "undefined
29630 behavior, in any case. */
29631 if (INTVAL (amount
) <= 0)
29632 emit_insn (gen_movdi (out
, in
));
29633 else if (INTVAL (amount
) >= 64)
29635 if (code
== ASHIFTRT
)
29637 rtx const31_rtx
= GEN_INT (31);
29638 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
29639 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
29642 emit_insn (gen_movdi (out
, const0_rtx
));
29645 /* Now handle valid shifts. */
29646 else if (INTVAL (amount
) < 32)
29648 /* Shifts by a constant less than 32. */
29649 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
29651 /* Clearing the out register in DImode first avoids lots
29652 of spilling and results in less stack usage.
29653 Later this redundant insn is completely removed.
29654 Do that only if "in" and "out" are different registers. */
29655 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29656 emit_insn (SET (out
, const0_rtx
));
29657 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29658 emit_insn (SET (out_down
,
29659 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
29661 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29665 /* Shifts by a constant greater than 31. */
29666 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
29668 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29669 emit_insn (SET (out
, const0_rtx
));
29670 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
29671 if (code
== ASHIFTRT
)
29672 emit_insn (gen_ashrsi3 (out_up
, in_up
,
29675 emit_insn (SET (out_up
, const0_rtx
));
29680 /* We have a shift-by-register. */
29681 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
29683 /* This alternative requires the scratch registers. */
29684 gcc_assert (scratch1
&& REG_P (scratch1
));
29685 gcc_assert (scratch2
&& REG_P (scratch2
));
29687 /* We will need the values "amount-32" and "32-amount" later.
29688 Swapping them around now allows the later code to be more general. */
29692 emit_insn (SUB_32 (scratch1
, amount
));
29693 emit_insn (RSB_32 (scratch2
, amount
));
29696 emit_insn (RSB_32 (scratch1
, amount
));
29697 /* Also set CC = amount > 32. */
29698 emit_insn (SUB_S_32 (scratch2
, amount
));
29701 emit_insn (RSB_32 (scratch1
, amount
));
29702 emit_insn (SUB_32 (scratch2
, amount
));
29705 gcc_unreachable ();
29708 /* Emit code like this:
29711 out_down = in_down << amount;
29712 out_down = (in_up << (amount - 32)) | out_down;
29713 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29714 out_up = in_up << amount;
29717 out_down = in_down >> amount;
29718 out_down = (in_up << (32 - amount)) | out_down;
29720 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29721 out_up = in_up << amount;
29724 out_down = in_down >> amount;
29725 out_down = (in_up << (32 - amount)) | out_down;
29727 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29728 out_up = in_up << amount;
29730 The ARM and Thumb2 variants are the same but implemented slightly
29731 differently. If this were only called during expand we could just
29732 use the Thumb2 case and let combine do the right thing, but this
29733 can also be called from post-reload splitters. */
29735 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29737 if (!TARGET_THUMB2
)
29739 /* Emit code for ARM mode. */
29740 emit_insn (SET (out_down
,
29741 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
29742 if (code
== ASHIFTRT
)
29744 rtx_code_label
*done_label
= gen_label_rtx ();
29745 emit_jump_insn (BRANCH (LT
, done_label
));
29746 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
29748 emit_label (done_label
);
29751 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
29756 /* Emit code for Thumb2 mode.
29757 Thumb2 can't do shift and or in one insn. */
29758 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
29759 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
29761 if (code
== ASHIFTRT
)
29763 rtx_code_label
*done_label
= gen_label_rtx ();
29764 emit_jump_insn (BRANCH (LT
, done_label
));
29765 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
29766 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
29767 emit_label (done_label
);
29771 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
29772 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
29776 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29790 /* Returns true if the pattern is a valid symbolic address, which is either a
29791 symbol_ref or (symbol_ref + addend).
29793 According to the ARM ELF ABI, the initial addend of REL-type relocations
29794 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29795 literal field of the instruction as a 16-bit signed value in the range
29796 -32768 <= A < 32768. */
29799 arm_valid_symbolic_address_p (rtx addr
)
29801 rtx xop0
, xop1
= NULL_RTX
;
29804 if (GET_CODE (tmp
) == SYMBOL_REF
|| GET_CODE (tmp
) == LABEL_REF
)
29807 /* (const (plus: symbol_ref const_int)) */
29808 if (GET_CODE (addr
) == CONST
)
29809 tmp
= XEXP (addr
, 0);
29811 if (GET_CODE (tmp
) == PLUS
)
29813 xop0
= XEXP (tmp
, 0);
29814 xop1
= XEXP (tmp
, 1);
29816 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
29817 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
29823 /* Returns true if a valid comparison operation and makes
29824 the operands in a form that is valid. */
29826 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
29828 enum rtx_code code
= GET_CODE (*comparison
);
29830 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
29831 ? GET_MODE (*op2
) : GET_MODE (*op1
);
29833 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
29835 if (code
== UNEQ
|| code
== LTGT
)
29838 code_int
= (int)code
;
29839 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
29840 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
29845 if (!arm_add_operand (*op1
, mode
))
29846 *op1
= force_reg (mode
, *op1
);
29847 if (!arm_add_operand (*op2
, mode
))
29848 *op2
= force_reg (mode
, *op2
);
29852 if (!cmpdi_operand (*op1
, mode
))
29853 *op1
= force_reg (mode
, *op1
);
29854 if (!cmpdi_operand (*op2
, mode
))
29855 *op2
= force_reg (mode
, *op2
);
29859 if (!TARGET_VFP_FP16INST
)
29861 /* FP16 comparisons are done in SF mode. */
29863 *op1
= convert_to_mode (mode
, *op1
, 1);
29864 *op2
= convert_to_mode (mode
, *op2
, 1);
29865 /* Fall through. */
29868 if (!vfp_compare_operand (*op1
, mode
))
29869 *op1
= force_reg (mode
, *op1
);
29870 if (!vfp_compare_operand (*op2
, mode
))
29871 *op2
= force_reg (mode
, *op2
);
29881 /* Maximum number of instructions to set block of memory. */
29883 arm_block_set_max_insns (void)
29885 if (optimize_function_for_size_p (cfun
))
29888 return current_tune
->max_insns_inline_memset
;
29891 /* Return TRUE if it's profitable to set block of memory for
29892 non-vectorized case. VAL is the value to set the memory
29893 with. LENGTH is the number of bytes to set. ALIGN is the
29894 alignment of the destination memory in bytes. UNALIGNED_P
29895 is TRUE if we can only set the memory with instructions
29896 meeting alignment requirements. USE_STRD_P is TRUE if we
29897 can use strd to set the memory. */
29899 arm_block_set_non_vect_profit_p (rtx val
,
29900 unsigned HOST_WIDE_INT length
,
29901 unsigned HOST_WIDE_INT align
,
29902 bool unaligned_p
, bool use_strd_p
)
29905 /* For leftovers in bytes of 0-7, we can set the memory block using
29906 strb/strh/str with minimum instruction number. */
29907 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29911 num
= arm_const_inline_cost (SET
, val
);
29912 num
+= length
/ align
+ length
% align
;
29914 else if (use_strd_p
)
29916 num
= arm_const_double_inline_cost (val
);
29917 num
+= (length
>> 3) + leftover
[length
& 7];
29921 num
= arm_const_inline_cost (SET
, val
);
29922 num
+= (length
>> 2) + leftover
[length
& 3];
29925 /* We may be able to combine last pair STRH/STRB into a single STR
29926 by shifting one byte back. */
29927 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
29930 return (num
<= arm_block_set_max_insns ());
29933 /* Return TRUE if it's profitable to set block of memory for
29934 vectorized case. LENGTH is the number of bytes to set.
29935 ALIGN is the alignment of destination memory in bytes.
29936 MODE is the vector mode used to set the memory. */
29938 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
29939 unsigned HOST_WIDE_INT align
,
29943 bool unaligned_p
= ((align
& 3) != 0);
29944 unsigned int nelt
= GET_MODE_NUNITS (mode
);
29946 /* Instruction loading constant value. */
29948 /* Instructions storing the memory. */
29949 num
+= (length
+ nelt
- 1) / nelt
;
29950 /* Instructions adjusting the address expression. Only need to
29951 adjust address expression if it's 4 bytes aligned and bytes
29952 leftover can only be stored by mis-aligned store instruction. */
29953 if (!unaligned_p
&& (length
& 3) != 0)
29956 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29957 if (!unaligned_p
&& mode
== V16QImode
)
29960 return (num
<= arm_block_set_max_insns ());
29963 /* Set a block of memory using vectorization instructions for the
29964 unaligned case. We fill the first LENGTH bytes of the memory
29965 area starting from DSTBASE with byte constant VALUE. ALIGN is
29966 the alignment requirement of memory. Return TRUE if succeeded. */
29968 arm_block_set_unaligned_vect (rtx dstbase
,
29969 unsigned HOST_WIDE_INT length
,
29970 unsigned HOST_WIDE_INT value
,
29971 unsigned HOST_WIDE_INT align
)
29973 unsigned int i
, nelt_v16
, nelt_v8
, nelt_mode
;
29976 rtx (*gen_func
) (rtx
, rtx
);
29978 unsigned HOST_WIDE_INT v
= value
;
29979 unsigned int offset
= 0;
29980 gcc_assert ((align
& 0x3) != 0);
29981 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29982 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29983 if (length
>= nelt_v16
)
29986 gen_func
= gen_movmisalignv16qi
;
29991 gen_func
= gen_movmisalignv8qi
;
29993 nelt_mode
= GET_MODE_NUNITS (mode
);
29994 gcc_assert (length
>= nelt_mode
);
29995 /* Skip if it isn't profitable. */
29996 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29999 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30000 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30002 v
= sext_hwi (v
, BITS_PER_WORD
);
30004 reg
= gen_reg_rtx (mode
);
30005 val_vec
= gen_const_vec_duplicate (mode
, GEN_INT (v
));
30006 /* Emit instruction loading the constant value. */
30007 emit_move_insn (reg
, val_vec
);
30009 /* Handle nelt_mode bytes in a vector. */
30010 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
30012 emit_insn ((*gen_func
) (mem
, reg
));
30013 if (i
+ 2 * nelt_mode
<= length
)
30015 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
30016 offset
+= nelt_mode
;
30017 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30021 /* If there are not less than nelt_v8 bytes leftover, we must be in
30023 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
30025 /* Handle (8, 16) bytes leftover. */
30026 if (i
+ nelt_v8
< length
)
30028 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
30029 offset
+= length
- i
;
30030 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30032 /* We are shifting bytes back, set the alignment accordingly. */
30033 if ((length
& 1) != 0 && align
>= 2)
30034 set_mem_align (mem
, BITS_PER_UNIT
);
30036 emit_insn (gen_movmisalignv16qi (mem
, reg
));
30038 /* Handle (0, 8] bytes leftover. */
30039 else if (i
< length
&& i
+ nelt_v8
>= length
)
30041 if (mode
== V16QImode
)
30042 reg
= gen_lowpart (V8QImode
, reg
);
30044 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
30045 + (nelt_mode
- nelt_v8
))));
30046 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
30047 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
30049 /* We are shifting bytes back, set the alignment accordingly. */
30050 if ((length
& 1) != 0 && align
>= 2)
30051 set_mem_align (mem
, BITS_PER_UNIT
);
30053 emit_insn (gen_movmisalignv8qi (mem
, reg
));
30059 /* Set a block of memory using vectorization instructions for the
30060 aligned case. We fill the first LENGTH bytes of the memory area
30061 starting from DSTBASE with byte constant VALUE. ALIGN is the
30062 alignment requirement of memory. Return TRUE if succeeded. */
30064 arm_block_set_aligned_vect (rtx dstbase
,
30065 unsigned HOST_WIDE_INT length
,
30066 unsigned HOST_WIDE_INT value
,
30067 unsigned HOST_WIDE_INT align
)
30069 unsigned int i
, nelt_v8
, nelt_v16
, nelt_mode
;
30070 rtx dst
, addr
, mem
;
30073 unsigned HOST_WIDE_INT v
= value
;
30074 unsigned int offset
= 0;
30076 gcc_assert ((align
& 0x3) == 0);
30077 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
30078 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
30079 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
30084 nelt_mode
= GET_MODE_NUNITS (mode
);
30085 gcc_assert (length
>= nelt_mode
);
30086 /* Skip if it isn't profitable. */
30087 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
30090 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30092 v
= sext_hwi (v
, BITS_PER_WORD
);
30094 reg
= gen_reg_rtx (mode
);
30095 val_vec
= gen_const_vec_duplicate (mode
, GEN_INT (v
));
30096 /* Emit instruction loading the constant value. */
30097 emit_move_insn (reg
, val_vec
);
30100 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
30101 if (mode
== V16QImode
)
30103 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30104 emit_insn (gen_movmisalignv16qi (mem
, reg
));
30106 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
30107 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
30109 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
30110 offset
+= length
- nelt_mode
;
30111 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30112 /* We are shifting bytes back, set the alignment accordingly. */
30113 if ((length
& 0x3) == 0)
30114 set_mem_align (mem
, BITS_PER_UNIT
* 4);
30115 else if ((length
& 0x1) == 0)
30116 set_mem_align (mem
, BITS_PER_UNIT
* 2);
30118 set_mem_align (mem
, BITS_PER_UNIT
);
30120 emit_insn (gen_movmisalignv16qi (mem
, reg
));
30123 /* Fall through for bytes leftover. */
30125 nelt_mode
= GET_MODE_NUNITS (mode
);
30126 reg
= gen_lowpart (V8QImode
, reg
);
30129 /* Handle 8 bytes in a vector. */
30130 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
30132 addr
= plus_constant (Pmode
, dst
, i
);
30133 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
30134 emit_move_insn (mem
, reg
);
30137 /* Handle single word leftover by shifting 4 bytes back. We can
30138 use aligned access for this case. */
30139 if (i
+ UNITS_PER_WORD
== length
)
30141 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
30142 offset
+= i
- UNITS_PER_WORD
;
30143 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
30144 /* We are shifting 4 bytes back, set the alignment accordingly. */
30145 if (align
> UNITS_PER_WORD
)
30146 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
30148 emit_move_insn (mem
, reg
);
30150 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30151 We have to use unaligned access for this case. */
30152 else if (i
< length
)
30154 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
30155 offset
+= length
- nelt_mode
;
30156 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30157 /* We are shifting bytes back, set the alignment accordingly. */
30158 if ((length
& 1) == 0)
30159 set_mem_align (mem
, BITS_PER_UNIT
* 2);
30161 set_mem_align (mem
, BITS_PER_UNIT
);
30163 emit_insn (gen_movmisalignv8qi (mem
, reg
));
30169 /* Set a block of memory using plain strh/strb instructions, only
30170 using instructions allowed by ALIGN on processor. We fill the
30171 first LENGTH bytes of the memory area starting from DSTBASE
30172 with byte constant VALUE. ALIGN is the alignment requirement
30175 arm_block_set_unaligned_non_vect (rtx dstbase
,
30176 unsigned HOST_WIDE_INT length
,
30177 unsigned HOST_WIDE_INT value
,
30178 unsigned HOST_WIDE_INT align
)
30181 rtx dst
, addr
, mem
;
30182 rtx val_exp
, val_reg
, reg
;
30184 HOST_WIDE_INT v
= value
;
30186 gcc_assert (align
== 1 || align
== 2);
30189 v
|= (value
<< BITS_PER_UNIT
);
30191 v
= sext_hwi (v
, BITS_PER_WORD
);
30192 val_exp
= GEN_INT (v
);
30193 /* Skip if it isn't profitable. */
30194 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30195 align
, true, false))
30198 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30199 mode
= (align
== 2 ? HImode
: QImode
);
30200 val_reg
= force_reg (SImode
, val_exp
);
30201 reg
= gen_lowpart (mode
, val_reg
);
30203 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
30205 addr
= plus_constant (Pmode
, dst
, i
);
30206 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
30207 emit_move_insn (mem
, reg
);
30210 /* Handle single byte leftover. */
30211 if (i
+ 1 == length
)
30213 reg
= gen_lowpart (QImode
, val_reg
);
30214 addr
= plus_constant (Pmode
, dst
, i
);
30215 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
30216 emit_move_insn (mem
, reg
);
30220 gcc_assert (i
== length
);
30224 /* Set a block of memory using plain strd/str/strh/strb instructions,
30225 to permit unaligned copies on processors which support unaligned
30226 semantics for those instructions. We fill the first LENGTH bytes
30227 of the memory area starting from DSTBASE with byte constant VALUE.
30228 ALIGN is the alignment requirement of memory. */
30230 arm_block_set_aligned_non_vect (rtx dstbase
,
30231 unsigned HOST_WIDE_INT length
,
30232 unsigned HOST_WIDE_INT value
,
30233 unsigned HOST_WIDE_INT align
)
30236 rtx dst
, addr
, mem
;
30237 rtx val_exp
, val_reg
, reg
;
30238 unsigned HOST_WIDE_INT v
;
30241 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
30242 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
30244 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
30245 if (length
< UNITS_PER_WORD
)
30246 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
30249 v
|= (v
<< BITS_PER_WORD
);
30251 v
= sext_hwi (v
, BITS_PER_WORD
);
30253 val_exp
= GEN_INT (v
);
30254 /* Skip if it isn't profitable. */
30255 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30256 align
, false, use_strd_p
))
30261 /* Try without strd. */
30262 v
= (v
>> BITS_PER_WORD
);
30263 v
= sext_hwi (v
, BITS_PER_WORD
);
30264 val_exp
= GEN_INT (v
);
30265 use_strd_p
= false;
30266 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30267 align
, false, use_strd_p
))
30272 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30273 /* Handle double words using strd if possible. */
30276 val_reg
= force_reg (DImode
, val_exp
);
30278 for (; (i
+ 8 <= length
); i
+= 8)
30280 addr
= plus_constant (Pmode
, dst
, i
);
30281 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
30282 emit_move_insn (mem
, reg
);
30286 val_reg
= force_reg (SImode
, val_exp
);
30288 /* Handle words. */
30289 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
30290 for (; (i
+ 4 <= length
); i
+= 4)
30292 addr
= plus_constant (Pmode
, dst
, i
);
30293 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
30294 if ((align
& 3) == 0)
30295 emit_move_insn (mem
, reg
);
30297 emit_insn (gen_unaligned_storesi (mem
, reg
));
30300 /* Merge last pair of STRH and STRB into a STR if possible. */
30301 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
30303 addr
= plus_constant (Pmode
, dst
, i
- 1);
30304 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
30305 /* We are shifting one byte back, set the alignment accordingly. */
30306 if ((align
& 1) == 0)
30307 set_mem_align (mem
, BITS_PER_UNIT
);
30309 /* Most likely this is an unaligned access, and we can't tell at
30310 compilation time. */
30311 emit_insn (gen_unaligned_storesi (mem
, reg
));
30315 /* Handle half word leftover. */
30316 if (i
+ 2 <= length
)
30318 reg
= gen_lowpart (HImode
, val_reg
);
30319 addr
= plus_constant (Pmode
, dst
, i
);
30320 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
30321 if ((align
& 1) == 0)
30322 emit_move_insn (mem
, reg
);
30324 emit_insn (gen_unaligned_storehi (mem
, reg
));
30329 /* Handle single byte leftover. */
30330 if (i
+ 1 == length
)
30332 reg
= gen_lowpart (QImode
, val_reg
);
30333 addr
= plus_constant (Pmode
, dst
, i
);
30334 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
30335 emit_move_insn (mem
, reg
);
30341 /* Set a block of memory using vectorization instructions for both
30342 aligned and unaligned cases. We fill the first LENGTH bytes of
30343 the memory area starting from DSTBASE with byte constant VALUE.
30344 ALIGN is the alignment requirement of memory. */
30346 arm_block_set_vect (rtx dstbase
,
30347 unsigned HOST_WIDE_INT length
,
30348 unsigned HOST_WIDE_INT value
,
30349 unsigned HOST_WIDE_INT align
)
30351 /* Check whether we need to use unaligned store instruction. */
30352 if (((align
& 3) != 0 || (length
& 3) != 0)
30353 /* Check whether unaligned store instruction is available. */
30354 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
30357 if ((align
& 3) == 0)
30358 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
30360 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
30363 /* Expand string store operation. Firstly we try to do that by using
30364 vectorization instructions, then try with ARM unaligned access and
30365 double-word store if profitable. OPERANDS[0] is the destination,
30366 OPERANDS[1] is the number of bytes, operands[2] is the value to
30367 initialize the memory, OPERANDS[3] is the known alignment of the
30370 arm_gen_setmem (rtx
*operands
)
30372 rtx dstbase
= operands
[0];
30373 unsigned HOST_WIDE_INT length
;
30374 unsigned HOST_WIDE_INT value
;
30375 unsigned HOST_WIDE_INT align
;
30377 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
30380 length
= UINTVAL (operands
[1]);
30384 value
= (UINTVAL (operands
[2]) & 0xFF);
30385 align
= UINTVAL (operands
[3]);
30386 if (TARGET_NEON
&& length
>= 8
30387 && current_tune
->string_ops_prefer_neon
30388 && arm_block_set_vect (dstbase
, length
, value
, align
))
30391 if (!unaligned_access
&& (align
& 3) != 0)
30392 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
30394 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
30399 arm_macro_fusion_p (void)
30401 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
30404 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30405 for MOVW / MOVT macro fusion. */
30408 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
30410 /* We are trying to fuse
30411 movw imm / movt imm
30412 instructions as a group that gets scheduled together. */
30414 rtx set_dest
= SET_DEST (curr_set
);
30416 if (GET_MODE (set_dest
) != SImode
)
30419 /* We are trying to match:
30420 prev (movw) == (set (reg r0) (const_int imm16))
30421 curr (movt) == (set (zero_extract (reg r0)
30424 (const_int imm16_1))
30426 prev (movw) == (set (reg r1)
30427 (high (symbol_ref ("SYM"))))
30428 curr (movt) == (set (reg r0)
30430 (symbol_ref ("SYM")))) */
30432 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
30434 if (CONST_INT_P (SET_SRC (curr_set
))
30435 && CONST_INT_P (SET_SRC (prev_set
))
30436 && REG_P (XEXP (set_dest
, 0))
30437 && REG_P (SET_DEST (prev_set
))
30438 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
30442 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
30443 && REG_P (SET_DEST (curr_set
))
30444 && REG_P (SET_DEST (prev_set
))
30445 && GET_CODE (SET_SRC (prev_set
)) == HIGH
30446 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
30453 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
30455 rtx prev_set
= single_set (prev
);
30456 rtx curr_set
= single_set (curr
);
30462 if (any_condjump_p (curr
))
30465 if (!arm_macro_fusion_p ())
30468 if (current_tune
->fusible_ops
& tune_params::FUSE_AES_AESMC
30469 && aarch_crypto_can_dual_issue (prev
, curr
))
30472 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
30473 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
30479 /* Return true iff the instruction fusion described by OP is enabled. */
30481 arm_fusion_enabled_p (tune_params::fuse_ops op
)
30483 return current_tune
->fusible_ops
& op
;
30486 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30487 scheduled for speculative execution. Reject the long-running division
30488 and square-root instructions. */
30491 arm_sched_can_speculate_insn (rtx_insn
*insn
)
30493 switch (get_attr_type (insn
))
30501 case TYPE_NEON_FP_SQRT_S
:
30502 case TYPE_NEON_FP_SQRT_D
:
30503 case TYPE_NEON_FP_SQRT_S_Q
:
30504 case TYPE_NEON_FP_SQRT_D_Q
:
30505 case TYPE_NEON_FP_DIV_S
:
30506 case TYPE_NEON_FP_DIV_D
:
30507 case TYPE_NEON_FP_DIV_S_Q
:
30508 case TYPE_NEON_FP_DIV_D_Q
:
30515 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30517 static unsigned HOST_WIDE_INT
30518 arm_asan_shadow_offset (void)
30520 return HOST_WIDE_INT_1U
<< 29;
30524 /* This is a temporary fix for PR60655. Ideally we need
30525 to handle most of these cases in the generic part but
30526 currently we reject minus (..) (sym_ref). We try to
30527 ameliorate the case with minus (sym_ref1) (sym_ref2)
30528 where they are in the same section. */
30531 arm_const_not_ok_for_debug_p (rtx p
)
30533 tree decl_op0
= NULL
;
30534 tree decl_op1
= NULL
;
30536 if (GET_CODE (p
) == UNSPEC
)
30538 if (GET_CODE (p
) == MINUS
)
30540 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
30542 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
30544 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
30545 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
30547 if ((VAR_P (decl_op1
)
30548 || TREE_CODE (decl_op1
) == CONST_DECL
)
30549 && (VAR_P (decl_op0
)
30550 || TREE_CODE (decl_op0
) == CONST_DECL
))
30551 return (get_variable_section (decl_op1
, false)
30552 != get_variable_section (decl_op0
, false));
30554 if (TREE_CODE (decl_op1
) == LABEL_DECL
30555 && TREE_CODE (decl_op0
) == LABEL_DECL
)
30556 return (DECL_CONTEXT (decl_op1
)
30557 != DECL_CONTEXT (decl_op0
));
30567 /* return TRUE if x is a reference to a value in a constant pool */
30569 arm_is_constant_pool_ref (rtx x
)
30572 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
30573 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
30576 /* Remember the last target of arm_set_current_function. */
30577 static GTY(()) tree arm_previous_fndecl
;
30579 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30582 save_restore_target_globals (tree new_tree
)
30584 /* If we have a previous state, use it. */
30585 if (TREE_TARGET_GLOBALS (new_tree
))
30586 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
30587 else if (new_tree
== target_option_default_node
)
30588 restore_target_globals (&default_target_globals
);
30591 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30592 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
30595 arm_option_params_internal ();
30598 /* Invalidate arm_previous_fndecl. */
30601 arm_reset_previous_fndecl (void)
30603 arm_previous_fndecl
= NULL_TREE
;
30606 /* Establish appropriate back-end context for processing the function
30607 FNDECL. The argument might be NULL to indicate processing at top
30608 level, outside of any function scope. */
30611 arm_set_current_function (tree fndecl
)
30613 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
30616 tree old_tree
= (arm_previous_fndecl
30617 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
30620 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30622 /* If current function has no attributes but previous one did,
30623 use the default node. */
30624 if (! new_tree
&& old_tree
)
30625 new_tree
= target_option_default_node
;
30627 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30628 the default have been handled by save_restore_target_globals from
30629 arm_pragma_target_parse. */
30630 if (old_tree
== new_tree
)
30633 arm_previous_fndecl
= fndecl
;
30635 /* First set the target options. */
30636 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
30638 save_restore_target_globals (new_tree
);
30641 /* Implement TARGET_OPTION_PRINT. */
30644 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
30646 int flags
= ptr
->x_target_flags
;
30647 const char *fpu_name
;
30649 fpu_name
= (ptr
->x_arm_fpu_index
== TARGET_FPU_auto
30650 ? "auto" : all_fpus
[ptr
->x_arm_fpu_index
].name
);
30652 fprintf (file
, "%*sselected isa %s\n", indent
, "",
30653 TARGET_THUMB2_P (flags
) ? "thumb2" :
30654 TARGET_THUMB_P (flags
) ? "thumb1" :
30657 if (ptr
->x_arm_arch_string
)
30658 fprintf (file
, "%*sselected architecture %s\n", indent
, "",
30659 ptr
->x_arm_arch_string
);
30661 if (ptr
->x_arm_cpu_string
)
30662 fprintf (file
, "%*sselected CPU %s\n", indent
, "",
30663 ptr
->x_arm_cpu_string
);
30665 if (ptr
->x_arm_tune_string
)
30666 fprintf (file
, "%*sselected tune %s\n", indent
, "",
30667 ptr
->x_arm_tune_string
);
30669 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_name
);
30672 /* Hook to determine if one function can safely inline another. */
30675 arm_can_inline_p (tree caller
, tree callee
)
30677 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
30678 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
30679 bool can_inline
= true;
30681 struct cl_target_option
*caller_opts
30682 = TREE_TARGET_OPTION (caller_tree
? caller_tree
30683 : target_option_default_node
);
30685 struct cl_target_option
*callee_opts
30686 = TREE_TARGET_OPTION (callee_tree
? callee_tree
30687 : target_option_default_node
);
30689 if (callee_opts
== caller_opts
)
30692 /* Callee's ISA features should be a subset of the caller's. */
30693 struct arm_build_target caller_target
;
30694 struct arm_build_target callee_target
;
30695 caller_target
.isa
= sbitmap_alloc (isa_num_bits
);
30696 callee_target
.isa
= sbitmap_alloc (isa_num_bits
);
30698 arm_configure_build_target (&caller_target
, caller_opts
, &global_options_set
,
30700 arm_configure_build_target (&callee_target
, callee_opts
, &global_options_set
,
30702 if (!bitmap_subset_p (callee_target
.isa
, caller_target
.isa
))
30703 can_inline
= false;
30705 sbitmap_free (caller_target
.isa
);
30706 sbitmap_free (callee_target
.isa
);
30708 /* OK to inline between different modes.
30709 Function with mode specific instructions, e.g using asm,
30710 must be explicitly protected with noinline. */
30714 /* Hook to fix function's alignment affected by target attribute. */
30717 arm_relayout_function (tree fndecl
)
30719 if (DECL_USER_ALIGN (fndecl
))
30722 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30725 callee_tree
= target_option_default_node
;
30727 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
30730 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts
->x_target_flags
)));
30733 /* Inner function to process the attribute((target(...))), take an argument and
30734 set the current options from the argument. If we have a list, recursively
30735 go over the list. */
30738 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
30740 if (TREE_CODE (args
) == TREE_LIST
)
30744 for (; args
; args
= TREE_CHAIN (args
))
30745 if (TREE_VALUE (args
)
30746 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
30751 else if (TREE_CODE (args
) != STRING_CST
)
30753 error ("attribute %<target%> argument not a string");
30757 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
30760 while ((q
= strtok (argstr
, ",")) != NULL
)
30762 while (ISSPACE (*q
)) ++q
;
30765 if (!strncmp (q
, "thumb", 5))
30766 opts
->x_target_flags
|= MASK_THUMB
;
30768 else if (!strncmp (q
, "arm", 3))
30769 opts
->x_target_flags
&= ~MASK_THUMB
;
30771 else if (!strncmp (q
, "fpu=", 4))
30774 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+4,
30775 &fpu_index
, CL_TARGET
))
30777 error ("invalid fpu for target attribute or pragma %qs", q
);
30780 if (fpu_index
== TARGET_FPU_auto
)
30782 /* This doesn't really make sense until we support
30783 general dynamic selection of the architecture and all
30785 sorry ("auto fpu selection not currently permitted here");
30788 opts
->x_arm_fpu_index
= (enum fpu_type
) fpu_index
;
30790 else if (!strncmp (q
, "arch=", 5))
30793 const arch_option
*arm_selected_arch
30794 = arm_parse_arch_option_name (all_architectures
, "arch", arch
);
30796 if (!arm_selected_arch
)
30798 error ("invalid architecture for target attribute or pragma %qs",
30803 opts
->x_arm_arch_string
= xstrndup (arch
, strlen (arch
));
30805 else if (q
[0] == '+')
30807 opts
->x_arm_arch_string
30808 = xasprintf ("%s%s", opts
->x_arm_arch_string
, q
);
30812 error ("unknown target attribute or pragma %qs", q
);
30820 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30823 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
30824 struct gcc_options
*opts_set
)
30826 struct cl_target_option cl_opts
;
30828 if (!arm_valid_target_attribute_rec (args
, opts
))
30831 cl_target_option_save (&cl_opts
, opts
);
30832 arm_configure_build_target (&arm_active_target
, &cl_opts
, opts_set
, false);
30833 arm_option_check_internal (opts
);
30834 /* Do any overrides, such as global options arch=xxx.
30835 We do this since arm_active_target was overridden. */
30836 arm_option_reconfigure_globals ();
30837 arm_options_perform_arch_sanity_checks ();
30838 arm_option_override_internal (opts
, opts_set
);
30840 return build_target_option_node (opts
);
30844 add_attribute (const char * mode
, tree
*attributes
)
30846 size_t len
= strlen (mode
);
30847 tree value
= build_string (len
, mode
);
30849 TREE_TYPE (value
) = build_array_type (char_type_node
,
30850 build_index_type (size_int (len
)));
30852 *attributes
= tree_cons (get_identifier ("target"),
30853 build_tree_list (NULL_TREE
, value
),
30857 /* For testing. Insert thumb or arm modes alternatively on functions. */
30860 arm_insert_attributes (tree fndecl
, tree
* attributes
)
30864 if (! TARGET_FLIP_THUMB
)
30867 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
30868 || DECL_BUILT_IN (fndecl
) || DECL_ARTIFICIAL (fndecl
))
30871 /* Nested definitions must inherit mode. */
30872 if (current_function_decl
)
30874 mode
= TARGET_THUMB
? "thumb" : "arm";
30875 add_attribute (mode
, attributes
);
30879 /* If there is already a setting don't change it. */
30880 if (lookup_attribute ("target", *attributes
) != NULL
)
30883 mode
= thumb_flipper
? "thumb" : "arm";
30884 add_attribute (mode
, attributes
);
30886 thumb_flipper
= !thumb_flipper
;
30889 /* Hook to validate attribute((target("string"))). */
30892 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
30893 tree args
, int ARG_UNUSED (flags
))
30896 struct gcc_options func_options
;
30897 tree cur_tree
, new_optimize
;
30898 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
30900 /* Get the optimization options of the current function. */
30901 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
30903 /* If the function changed the optimization levels as well as setting target
30904 options, start with the optimizations specified. */
30905 if (!func_optimize
)
30906 func_optimize
= optimization_default_node
;
30908 /* Init func_options. */
30909 memset (&func_options
, 0, sizeof (func_options
));
30910 init_options_struct (&func_options
, NULL
);
30911 lang_hooks
.init_options_struct (&func_options
);
30913 /* Initialize func_options to the defaults. */
30914 cl_optimization_restore (&func_options
,
30915 TREE_OPTIMIZATION (func_optimize
));
30917 cl_target_option_restore (&func_options
,
30918 TREE_TARGET_OPTION (target_option_default_node
));
30920 /* Set func_options flags with new target mode. */
30921 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
30922 &global_options_set
);
30924 if (cur_tree
== NULL_TREE
)
30927 new_optimize
= build_optimization_node (&func_options
);
30929 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
30931 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
30933 finalize_options_struct (&func_options
);
30938 /* Match an ISA feature bitmap to a named FPU. We always use the
30939 first entry that exactly matches the feature set, so that we
30940 effectively canonicalize the FPU name for the assembler. */
30942 arm_identify_fpu_from_isa (sbitmap isa
)
30944 auto_sbitmap
fpubits (isa_num_bits
);
30945 auto_sbitmap
cand_fpubits (isa_num_bits
);
30947 bitmap_and (fpubits
, isa
, isa_all_fpubits
);
30949 /* If there are no ISA feature bits relating to the FPU, we must be
30950 doing soft-float. */
30951 if (bitmap_empty_p (fpubits
))
30954 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
30956 arm_initialize_isa (cand_fpubits
, all_fpus
[i
].isa_bits
);
30957 if (bitmap_equal_p (fpubits
, cand_fpubits
))
30958 return all_fpus
[i
].name
;
30960 /* We must find an entry, or things have gone wrong. */
30961 gcc_unreachable ();
30964 /* The last .arch and .fpu assembly strings that we printed. */
30965 static std::string arm_last_printed_arch_string
;
30966 static std::string arm_last_printed_fpu_string
;
30968 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
30969 by the function fndecl. */
30971 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
30973 tree target_parts
= DECL_FUNCTION_SPECIFIC_TARGET (decl
);
30975 struct cl_target_option
*targ_options
;
30977 targ_options
= TREE_TARGET_OPTION (target_parts
);
30979 targ_options
= TREE_TARGET_OPTION (target_option_current_node
);
30980 gcc_assert (targ_options
);
30982 /* Only update the assembler .arch string if it is distinct from the last
30983 such string we printed. */
30984 std::string arch_to_print
= targ_options
->x_arm_arch_string
;
30985 if (arch_to_print
!= arm_last_printed_arch_string
)
30987 std::string arch_name
30988 = arch_to_print
.substr (0, arch_to_print
.find ("+"));
30989 asm_fprintf (asm_out_file
, "\t.arch %s\n", arch_name
.c_str ());
30990 const arch_option
*arch
30991 = arm_parse_arch_option_name (all_architectures
, "-march",
30992 targ_options
->x_arm_arch_string
);
30993 auto_sbitmap
opt_bits (isa_num_bits
);
30996 if (arch
->common
.extensions
)
30998 for (const struct cpu_arch_extension
*opt
= arch
->common
.extensions
;
31004 arm_initialize_isa (opt_bits
, opt
->isa_bits
);
31005 if (bitmap_subset_p (opt_bits
, arm_active_target
.isa
)
31006 && !bitmap_subset_p (opt_bits
, isa_all_fpubits
))
31007 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n",
31013 arm_last_printed_arch_string
= arch_to_print
;
31016 fprintf (stream
, "\t.syntax unified\n");
31020 if (is_called_in_ARM_mode (decl
)
31021 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
31022 && cfun
->is_thunk
))
31023 fprintf (stream
, "\t.code 32\n");
31024 else if (TARGET_THUMB1
)
31025 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
31027 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
31030 fprintf (stream
, "\t.arm\n");
31032 std::string fpu_to_print
31033 = TARGET_SOFT_FLOAT
31034 ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target
.isa
);
31036 if (fpu_to_print
!= arm_last_printed_arch_string
)
31038 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_to_print
.c_str ());
31039 arm_last_printed_fpu_string
= fpu_to_print
;
31042 if (TARGET_POKE_FUNCTION_NAME
)
31043 arm_poke_function_name (stream
, (const char *) name
);
31046 /* If MEM is in the form of [base+offset], extract the two parts
31047 of address and set to BASE and OFFSET, otherwise return false
31048 after clearing BASE and OFFSET. */
31051 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
31055 gcc_assert (MEM_P (mem
));
31057 addr
= XEXP (mem
, 0);
31059 /* Strip off const from addresses like (const (addr)). */
31060 if (GET_CODE (addr
) == CONST
)
31061 addr
= XEXP (addr
, 0);
31063 if (GET_CODE (addr
) == REG
)
31066 *offset
= const0_rtx
;
31070 if (GET_CODE (addr
) == PLUS
31071 && GET_CODE (XEXP (addr
, 0)) == REG
31072 && CONST_INT_P (XEXP (addr
, 1)))
31074 *base
= XEXP (addr
, 0);
31075 *offset
= XEXP (addr
, 1);
31080 *offset
= NULL_RTX
;
31085 /* If INSN is a load or store of address in the form of [base+offset],
31086 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
31087 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
31088 otherwise return FALSE. */
31091 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
31095 gcc_assert (INSN_P (insn
));
31096 x
= PATTERN (insn
);
31097 if (GET_CODE (x
) != SET
)
31101 dest
= SET_DEST (x
);
31102 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
31105 extract_base_offset_in_addr (dest
, base
, offset
);
31107 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
31110 extract_base_offset_in_addr (src
, base
, offset
);
31115 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
31118 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31120 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31121 and PRI are only calculated for these instructions. For other instruction,
31122 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
31123 instruction fusion can be supported by returning different priorities.
31125 It's important that irrelevant instructions get the largest FUSION_PRI. */
31128 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
31129 int *fusion_pri
, int *pri
)
31135 gcc_assert (INSN_P (insn
));
31138 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
31145 /* Load goes first. */
31147 *fusion_pri
= tmp
- 1;
31149 *fusion_pri
= tmp
- 2;
31153 /* INSN with smaller base register goes first. */
31154 tmp
-= ((REGNO (base
) & 0xff) << 20);
31156 /* INSN with smaller offset goes first. */
31157 off_val
= (int)(INTVAL (offset
));
31159 tmp
-= (off_val
& 0xfffff);
31161 tmp
+= ((- off_val
) & 0xfffff);
31168 /* Construct and return a PARALLEL RTX vector with elements numbering the
31169 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31170 the vector - from the perspective of the architecture. This does not
31171 line up with GCC's perspective on lane numbers, so we end up with
31172 different masks depending on our target endian-ness. The diagram
31173 below may help. We must draw the distinction when building masks
31174 which select one half of the vector. An instruction selecting
31175 architectural low-lanes for a big-endian target, must be described using
31176 a mask selecting GCC high-lanes.
31178 Big-Endian Little-Endian
31180 GCC 0 1 2 3 3 2 1 0
31181 | x | x | x | x | | x | x | x | x |
31182 Architecture 3 2 1 0 3 2 1 0
31184 Low Mask: { 2, 3 } { 0, 1 }
31185 High Mask: { 0, 1 } { 2, 3 }
31189 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
31191 int nunits
= GET_MODE_NUNITS (mode
);
31192 rtvec v
= rtvec_alloc (nunits
/ 2);
31193 int high_base
= nunits
/ 2;
31199 if (BYTES_BIG_ENDIAN
)
31200 base
= high
? low_base
: high_base
;
31202 base
= high
? high_base
: low_base
;
31204 for (i
= 0; i
< nunits
/ 2; i
++)
31205 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
31207 t1
= gen_rtx_PARALLEL (mode
, v
);
31211 /* Check OP for validity as a PARALLEL RTX vector with elements
31212 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31213 from the perspective of the architecture. See the diagram above
31214 arm_simd_vect_par_cnst_half_p for more details. */
31217 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
31220 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
31221 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
31222 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
31225 if (!VECTOR_MODE_P (mode
))
31228 if (count_op
!= count_ideal
)
31231 for (i
= 0; i
< count_ideal
; i
++)
31233 rtx elt_op
= XVECEXP (op
, 0, i
);
31234 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
31236 if (!CONST_INT_P (elt_op
)
31237 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
31243 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31246 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
31249 /* For now, we punt and not handle this for TARGET_THUMB1. */
31250 if (vcall_offset
&& TARGET_THUMB1
)
31253 /* Otherwise ok. */
31257 /* Generate RTL for a conditional branch with rtx comparison CODE in
31258 mode CC_MODE. The destination of the unlikely conditional branch
31262 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
31266 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
31267 gen_rtx_REG (cc_mode
, CC_REGNUM
),
31270 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
31271 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
31273 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
31276 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31278 For pure-code sections there is no letter code for this attribute, so
31279 output all the section flags numerically when this is needed. */
31282 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
31285 if (flags
& SECTION_ARM_PURECODE
)
31289 if (!(flags
& SECTION_DEBUG
))
31291 if (flags
& SECTION_EXCLUDE
)
31292 *num
|= 0x80000000;
31293 if (flags
& SECTION_WRITE
)
31295 if (flags
& SECTION_CODE
)
31297 if (flags
& SECTION_MERGE
)
31299 if (flags
& SECTION_STRINGS
)
31301 if (flags
& SECTION_TLS
)
31303 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
31312 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31314 If pure-code is passed as an option, make sure all functions are in
31315 sections that have the SHF_ARM_PURECODE attribute. */
31318 arm_function_section (tree decl
, enum node_frequency freq
,
31319 bool startup
, bool exit
)
31321 const char * section_name
;
31324 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
31325 return default_function_section (decl
, freq
, startup
, exit
);
31327 if (!target_pure_code
)
31328 return default_function_section (decl
, freq
, startup
, exit
);
31331 section_name
= DECL_SECTION_NAME (decl
);
31333 /* If a function is not in a named section then it falls under the 'default'
31334 text section, also known as '.text'. We can preserve previous behavior as
31335 the default text section already has the SHF_ARM_PURECODE section
31339 section
*default_sec
= default_function_section (decl
, freq
, startup
,
31342 /* If default_sec is not null, then it must be a special section like for
31343 example .text.startup. We set the pure-code attribute and return the
31344 same section to preserve existing behavior. */
31346 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
31347 return default_sec
;
31350 /* Otherwise look whether a section has already been created with
31352 sec
= get_named_section (decl
, section_name
, 0);
31354 /* If that is not the case passing NULL as the section's name to
31355 'get_named_section' will create a section with the declaration's
31357 sec
= get_named_section (decl
, NULL
, 0);
31359 /* Set the SHF_ARM_PURECODE attribute. */
31360 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
31365 /* Implements the TARGET_SECTION_FLAGS hook.
31367 If DECL is a function declaration and pure-code is passed as an option
31368 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31369 section's name and RELOC indicates whether the declarations initializer may
31370 contain runtime relocations. */
31372 static unsigned int
31373 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
31375 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
31377 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
31378 flags
|= SECTION_ARM_PURECODE
;
31383 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31386 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
31388 rtx
*quot_p
, rtx
*rem_p
)
31390 if (mode
== SImode
)
31391 gcc_assert (!TARGET_IDIV
);
31393 scalar_int_mode libval_mode
31394 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode
));
31396 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
31398 op0
, GET_MODE (op0
),
31399 op1
, GET_MODE (op1
));
31401 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
31402 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
31403 GET_MODE_SIZE (mode
));
31405 gcc_assert (quotient
);
31406 gcc_assert (remainder
);
31408 *quot_p
= quotient
;
31409 *rem_p
= remainder
;
31412 /* This function checks for the availability of the coprocessor builtin passed
31413 in BUILTIN for the current target. Returns true if it is available and
31414 false otherwise. If a BUILTIN is passed for which this function has not
31415 been implemented it will cause an exception. */
31418 arm_coproc_builtin_available (enum unspecv builtin
)
31420 /* None of these builtins are available in Thumb mode if the target only
31421 supports Thumb-1. */
31439 case VUNSPEC_LDC2L
:
31441 case VUNSPEC_STC2L
:
31444 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31451 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31453 if (arm_arch6
|| arm_arch5te
)
31456 case VUNSPEC_MCRR2
:
31457 case VUNSPEC_MRRC2
:
31462 gcc_unreachable ();
31467 /* This function returns true if OP is a valid memory operand for the ldc and
31468 stc coprocessor instructions and false otherwise. */
31471 arm_coproc_ldc_stc_legitimate_address (rtx op
)
31473 HOST_WIDE_INT range
;
31474 /* Has to be a memory operand. */
31480 /* We accept registers. */
31484 switch GET_CODE (op
)
31488 /* Or registers with an offset. */
31489 if (!REG_P (XEXP (op
, 0)))
31494 /* The offset must be an immediate though. */
31495 if (!CONST_INT_P (op
))
31498 range
= INTVAL (op
);
31500 /* Within the range of [-1020,1020]. */
31501 if (!IN_RANGE (range
, -1020, 1020))
31504 /* And a multiple of 4. */
31505 return (range
% 4) == 0;
31511 return REG_P (XEXP (op
, 0));
31513 gcc_unreachable ();
31518 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31520 In VFPv1, VFP registers could only be accessed in the mode they were
31521 set, so subregs would be invalid there. However, we don't support
31522 VFPv1 at the moment, and the restriction was lifted in VFPv2.
31524 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31525 VFP registers in little-endian order. We can't describe that accurately to
31526 GCC, so avoid taking subregs of such values.
31528 The only exception is going from a 128-bit to a 64-bit type. In that
31529 case the data layout happens to be consistent for big-endian, so we
31530 explicitly allow that case. */
31533 arm_can_change_mode_class (machine_mode from
, machine_mode to
,
31534 reg_class_t rclass
)
31537 && !(GET_MODE_SIZE (from
) == 16 && GET_MODE_SIZE (to
) == 8)
31538 && (GET_MODE_SIZE (from
) > UNITS_PER_WORD
31539 || GET_MODE_SIZE (to
) > UNITS_PER_WORD
)
31540 && reg_classes_intersect_p (VFP_REGS
, rclass
))
31545 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
31546 strcpy from constants will be faster. */
31548 static HOST_WIDE_INT
31549 arm_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
31551 unsigned int factor
= (TARGET_THUMB
|| ! arm_tune_xscale
? 1 : 2);
31552 if (TREE_CODE (exp
) == STRING_CST
&& !optimize_size
)
31553 return MAX (align
, BITS_PER_WORD
* factor
);
31558 namespace selftest
{
31560 /* Scan the static data tables generated by parsecpu.awk looking for
31561 potential issues with the data. We primarily check for
31562 inconsistencies in the option extensions at present (extensions
31563 that duplicate others but aren't marked as aliases). Furthermore,
31564 for correct canonicalization later options must never be a subset
31565 of an earlier option. Any extension should also only specify other
31566 feature bits and never an architecture bit. The architecture is inferred
31567 from the declaration of the extension. */
31569 arm_test_cpu_arch_data (void)
31571 const arch_option
*arch
;
31572 const cpu_option
*cpu
;
31573 auto_sbitmap
target_isa (isa_num_bits
);
31574 auto_sbitmap
isa1 (isa_num_bits
);
31575 auto_sbitmap
isa2 (isa_num_bits
);
31577 for (arch
= all_architectures
; arch
->common
.name
!= NULL
; ++arch
)
31579 const cpu_arch_extension
*ext1
, *ext2
;
31581 if (arch
->common
.extensions
== NULL
)
31584 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
31586 for (ext1
= arch
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
31591 arm_initialize_isa (isa1
, ext1
->isa_bits
);
31592 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
31594 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
31597 arm_initialize_isa (isa2
, ext2
->isa_bits
);
31598 /* If the option is a subset of the parent option, it doesn't
31599 add anything and so isn't useful. */
31600 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
31602 /* If the extension specifies any architectural bits then
31603 disallow it. Extensions should only specify feature bits. */
31604 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
31609 for (cpu
= all_cores
; cpu
->common
.name
!= NULL
; ++cpu
)
31611 const cpu_arch_extension
*ext1
, *ext2
;
31613 if (cpu
->common
.extensions
== NULL
)
31616 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
31618 for (ext1
= cpu
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
31623 arm_initialize_isa (isa1
, ext1
->isa_bits
);
31624 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
31626 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
31629 arm_initialize_isa (isa2
, ext2
->isa_bits
);
31630 /* If the option is a subset of the parent option, it doesn't
31631 add anything and so isn't useful. */
31632 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
31634 /* If the extension specifies any architectural bits then
31635 disallow it. Extensions should only specify feature bits. */
31636 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
31642 /* Scan the static data tables generated by parsecpu.awk looking for
31643 potential issues with the data. Here we check for consistency between the
31644 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31645 a feature bit that is not defined by any FPU flag. */
31647 arm_test_fpu_data (void)
31649 auto_sbitmap
isa_all_fpubits (isa_num_bits
);
31650 auto_sbitmap
fpubits (isa_num_bits
);
31651 auto_sbitmap
tmpset (isa_num_bits
);
31653 static const enum isa_feature fpu_bitlist
[]
31654 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
31655 arm_initialize_isa (isa_all_fpubits
, fpu_bitlist
);
31657 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
31659 arm_initialize_isa (fpubits
, all_fpus
[i
].isa_bits
);
31660 bitmap_and_compl (tmpset
, isa_all_fpubits
, fpubits
);
31661 bitmap_clear (isa_all_fpubits
);
31662 bitmap_copy (isa_all_fpubits
, tmpset
);
31665 if (!bitmap_empty_p (isa_all_fpubits
))
31667 fprintf (stderr
, "Error: found feature bits in the ALL_FPU_INTERAL"
31668 " group that are not defined by any FPU.\n"
31669 " Check your arm-cpus.in.\n");
31670 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits
));
31675 arm_run_selftests (void)
31677 arm_test_cpu_arch_data ();
31678 arm_test_fpu_data ();
31680 } /* Namespace selftest. */
31682 #undef TARGET_RUN_TARGET_SELFTESTS
31683 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31684 #endif /* CHECKING_P */
31686 struct gcc_target targetm
= TARGET_INITIALIZER
;
31688 #include "gt-arm.h"