1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
34 #include "stringpool.h"
40 #include "diagnostic-core.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
47 #include "insn-attr.h"
53 #include "sched-int.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
61 #include "target-globals.h"
63 #include "tm-constrs.h"
65 #include "optabs-libfuncs.h"
69 /* This file should be included last. */
70 #include "target-def.h"
72 /* Forward definitions of types. */
73 typedef struct minipool_node Mnode
;
74 typedef struct minipool_fixup Mfix
;
76 void (*arm_lang_output_object_attributes_hook
)(void);
83 /* Forward function declarations. */
84 static bool arm_const_not_ok_for_debug_p (rtx
);
85 static int arm_needs_doubleword_align (machine_mode
, const_tree
);
86 static int arm_compute_static_chain_stack_bytes (void);
87 static arm_stack_offsets
*arm_get_frame_offsets (void);
88 static void arm_compute_frame_layout (void);
89 static void arm_add_gc_roots (void);
90 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
91 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
92 static unsigned bit_count (unsigned long);
93 static unsigned bitmap_popcount (const sbitmap
);
94 static int arm_address_register_rtx_p (rtx
, int);
95 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
96 static bool is_called_in_ARM_mode (tree
);
97 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
98 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
99 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
100 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
101 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
102 inline static int thumb1_index_register_rtx_p (rtx
, int);
103 static int thumb_far_jump_used_p (void);
104 static bool thumb_force_lr_save (void);
105 static unsigned arm_size_return_regs (void);
106 static bool arm_assemble_integer (rtx
, unsigned int, int);
107 static void arm_print_operand (FILE *, rtx
, int);
108 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
109 static bool arm_print_operand_punct_valid_p (unsigned char code
);
110 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
111 static arm_cc
get_arm_condition_code (rtx
);
112 static const char *output_multi_immediate (rtx
*, const char *, const char *,
114 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
115 static struct machine_function
*arm_init_machine_status (void);
116 static void thumb_exit (FILE *, int);
117 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
118 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
119 static Mnode
*add_minipool_forward_ref (Mfix
*);
120 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
121 static Mnode
*add_minipool_backward_ref (Mfix
*);
122 static void assign_minipool_offsets (Mfix
*);
123 static void arm_print_value (FILE *, rtx
);
124 static void dump_minipool (rtx_insn
*);
125 static int arm_barrier_cost (rtx_insn
*);
126 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
127 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
128 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
130 static void arm_reorg (void);
131 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
132 static unsigned long arm_compute_save_reg0_reg12_mask (void);
133 static unsigned long arm_compute_save_core_reg_mask (void);
134 static unsigned long arm_isr_value (tree
);
135 static unsigned long arm_compute_func_type (void);
136 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
137 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
138 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
139 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
140 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
142 static tree
arm_handle_cmse_nonsecure_entry (tree
*, tree
, tree
, int, bool *);
143 static tree
arm_handle_cmse_nonsecure_call (tree
*, tree
, tree
, int, bool *);
144 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
145 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
146 static int arm_comp_type_attributes (const_tree
, const_tree
);
147 static void arm_set_default_type_attributes (tree
);
148 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
149 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
150 static int optimal_immediate_sequence (enum rtx_code code
,
151 unsigned HOST_WIDE_INT val
,
152 struct four_ints
*return_sequence
);
153 static int optimal_immediate_sequence_1 (enum rtx_code code
,
154 unsigned HOST_WIDE_INT val
,
155 struct four_ints
*return_sequence
,
157 static int arm_get_strip_length (int);
158 static bool arm_function_ok_for_sibcall (tree
, tree
);
159 static machine_mode
arm_promote_function_mode (const_tree
,
162 static bool arm_return_in_memory (const_tree
, const_tree
);
163 static rtx
arm_function_value (const_tree
, const_tree
, bool);
164 static rtx
arm_libcall_value_1 (machine_mode
);
165 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
166 static bool arm_function_value_regno_p (const unsigned int);
167 static void arm_internal_label (FILE *, const char *, unsigned long);
168 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
170 static bool arm_have_conditional_execution (void);
171 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
172 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
173 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
174 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
175 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
176 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
177 static void emit_constant_insn (rtx cond
, rtx pattern
);
178 static rtx_insn
*emit_set_insn (rtx
, rtx
);
179 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
180 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
182 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
184 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
186 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
187 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
189 static rtx
aapcs_libcall_value (machine_mode
);
190 static int aapcs_select_return_coproc (const_tree
, const_tree
);
192 #ifdef OBJECT_FORMAT_ELF
193 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
194 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
197 static void arm_encode_section_info (tree
, rtx
, int);
200 static void arm_file_end (void);
201 static void arm_file_start (void);
202 static void arm_insert_attributes (tree
, tree
*);
204 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
206 static bool arm_pass_by_reference (cumulative_args_t
,
207 machine_mode
, const_tree
, bool);
208 static bool arm_promote_prototypes (const_tree
);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree
);
212 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
213 static bool arm_return_in_memory (const_tree
, const_tree
);
215 static void arm_unwind_emit (FILE *, rtx_insn
*);
216 static bool arm_output_ttype (rtx
);
217 static void arm_asm_emit_except_personality (rtx
);
219 static void arm_asm_init_sections (void);
220 static rtx
arm_dwarf_register_span (rtx
);
222 static tree
arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree
arm_get_cookie_size (tree
);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree
);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree
arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree
, rtx
);
234 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
235 static void arm_option_override (void);
236 static void arm_option_restore (struct gcc_options
*,
237 struct cl_target_option
*);
238 static void arm_override_options_after_change (void);
239 static void arm_option_print (FILE *, int, struct cl_target_option
*);
240 static void arm_set_current_function (tree
);
241 static bool arm_can_inline_p (tree
, tree
);
242 static void arm_relayout_function (tree
);
243 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
244 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
245 static bool arm_sched_can_speculate_insn (rtx_insn
*);
246 static bool arm_macro_fusion_p (void);
247 static bool arm_cannot_copy_insn_p (rtx_insn
*);
248 static int arm_issue_rate (void);
249 static int arm_first_cycle_multipass_dfa_lookahead (void);
250 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
251 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
252 static bool arm_output_addr_const_extra (FILE *, rtx
);
253 static bool arm_allocate_stack_slots_for_args (void);
254 static bool arm_warn_func_return (tree
);
255 static tree
arm_promoted_type (const_tree t
);
256 static bool arm_scalar_mode_supported_p (machine_mode
);
257 static bool arm_frame_pointer_required (void);
258 static bool arm_can_eliminate (const int, const int);
259 static void arm_asm_trampoline_template (FILE *);
260 static void arm_trampoline_init (rtx
, tree
, rtx
);
261 static rtx
arm_trampoline_adjust_address (rtx
);
262 static rtx_insn
*arm_pic_static_addr (rtx orig
, rtx reg
);
263 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
264 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
265 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
266 static bool arm_array_mode_supported_p (machine_mode
,
267 unsigned HOST_WIDE_INT
);
268 static machine_mode
arm_preferred_simd_mode (machine_mode
);
269 static bool arm_class_likely_spilled_p (reg_class_t
);
270 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
271 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
272 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
276 static void arm_conditional_register_usage (void);
277 static enum flt_eval_method
arm_excess_precision (enum excess_precision_type
);
278 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
279 static unsigned int arm_autovectorize_vector_sizes (void);
280 static int arm_default_branch_cost (bool, bool);
281 static int arm_cortex_a5_branch_cost (bool, bool);
282 static int arm_cortex_m_branch_cost (bool, bool);
283 static int arm_cortex_m7_branch_cost (bool, bool);
285 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
286 const unsigned char *sel
);
288 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
290 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
292 int misalign ATTRIBUTE_UNUSED
);
293 static unsigned arm_add_stmt_cost (void *data
, int count
,
294 enum vect_cost_for_stmt kind
,
295 struct _stmt_vec_info
*stmt_info
,
297 enum vect_cost_model_location where
);
299 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
300 bool op0_preserve_value
);
301 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
303 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
304 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
306 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
307 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
308 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
310 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
311 static machine_mode
arm_floatn_mode (int, bool);
313 /* Table of machine attributes. */
314 static const struct attribute_spec arm_attribute_table
[] =
316 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
317 affects_type_identity } */
318 /* Function calls made to this symbol must be done indirectly, because
319 it may lie outside of the 26 bit addressing range of a normal function
321 { "long_call", 0, 0, false, true, true, NULL
, false },
322 /* Whereas these functions are always known to reside within the 26 bit
324 { "short_call", 0, 0, false, true, true, NULL
, false },
325 /* Specify the procedure call conventions for a function. */
326 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
328 /* Interrupt Service Routines have special prologue and epilogue requirements. */
329 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
331 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
333 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
336 /* ARM/PE has three new attributes:
338 dllexport - for exporting a function/variable that will live in a dll
339 dllimport - for importing a function/variable from a dll
341 Microsoft allows multiple declspecs in one __declspec, separating
342 them with spaces. We do NOT support this. Instead, use __declspec
345 { "dllimport", 0, 0, true, false, false, NULL
, false },
346 { "dllexport", 0, 0, true, false, false, NULL
, false },
347 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
349 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
350 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
351 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
352 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
355 /* ARMv8-M Security Extensions support. */
356 { "cmse_nonsecure_entry", 0, 0, true, false, false,
357 arm_handle_cmse_nonsecure_entry
, false },
358 { "cmse_nonsecure_call", 0, 0, true, false, false,
359 arm_handle_cmse_nonsecure_call
, true },
360 { NULL
, 0, 0, false, false, false, NULL
, false }
363 /* Initialize the GCC target structure. */
364 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
365 #undef TARGET_MERGE_DECL_ATTRIBUTES
366 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
369 #undef TARGET_LEGITIMIZE_ADDRESS
370 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
372 #undef TARGET_ATTRIBUTE_TABLE
373 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
375 #undef TARGET_INSERT_ATTRIBUTES
376 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
378 #undef TARGET_ASM_FILE_START
379 #define TARGET_ASM_FILE_START arm_file_start
380 #undef TARGET_ASM_FILE_END
381 #define TARGET_ASM_FILE_END arm_file_end
383 #undef TARGET_ASM_ALIGNED_SI_OP
384 #define TARGET_ASM_ALIGNED_SI_OP NULL
385 #undef TARGET_ASM_INTEGER
386 #define TARGET_ASM_INTEGER arm_assemble_integer
388 #undef TARGET_PRINT_OPERAND
389 #define TARGET_PRINT_OPERAND arm_print_operand
390 #undef TARGET_PRINT_OPERAND_ADDRESS
391 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
392 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
393 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
395 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
396 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
398 #undef TARGET_ASM_FUNCTION_PROLOGUE
399 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
401 #undef TARGET_ASM_FUNCTION_EPILOGUE
402 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
404 #undef TARGET_CAN_INLINE_P
405 #define TARGET_CAN_INLINE_P arm_can_inline_p
407 #undef TARGET_RELAYOUT_FUNCTION
408 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
410 #undef TARGET_OPTION_OVERRIDE
411 #define TARGET_OPTION_OVERRIDE arm_option_override
413 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
414 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
416 #undef TARGET_OPTION_RESTORE
417 #define TARGET_OPTION_RESTORE arm_option_restore
419 #undef TARGET_OPTION_PRINT
420 #define TARGET_OPTION_PRINT arm_option_print
422 #undef TARGET_COMP_TYPE_ATTRIBUTES
423 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
425 #undef TARGET_SCHED_CAN_SPECULATE_INSN
426 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
428 #undef TARGET_SCHED_MACRO_FUSION_P
429 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
431 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
432 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
434 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
435 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
437 #undef TARGET_SCHED_ADJUST_COST
438 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
440 #undef TARGET_SET_CURRENT_FUNCTION
441 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
443 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
444 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
446 #undef TARGET_SCHED_REORDER
447 #define TARGET_SCHED_REORDER arm_sched_reorder
449 #undef TARGET_REGISTER_MOVE_COST
450 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
452 #undef TARGET_MEMORY_MOVE_COST
453 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
455 #undef TARGET_ENCODE_SECTION_INFO
457 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
459 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
462 #undef TARGET_STRIP_NAME_ENCODING
463 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
465 #undef TARGET_ASM_INTERNAL_LABEL
466 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
468 #undef TARGET_FLOATN_MODE
469 #define TARGET_FLOATN_MODE arm_floatn_mode
471 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
472 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
474 #undef TARGET_FUNCTION_VALUE
475 #define TARGET_FUNCTION_VALUE arm_function_value
477 #undef TARGET_LIBCALL_VALUE
478 #define TARGET_LIBCALL_VALUE arm_libcall_value
480 #undef TARGET_FUNCTION_VALUE_REGNO_P
481 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
483 #undef TARGET_ASM_OUTPUT_MI_THUNK
484 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
485 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
486 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
488 #undef TARGET_RTX_COSTS
489 #define TARGET_RTX_COSTS arm_rtx_costs
490 #undef TARGET_ADDRESS_COST
491 #define TARGET_ADDRESS_COST arm_address_cost
493 #undef TARGET_SHIFT_TRUNCATION_MASK
494 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
495 #undef TARGET_VECTOR_MODE_SUPPORTED_P
496 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
497 #undef TARGET_ARRAY_MODE_SUPPORTED_P
498 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
499 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
500 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
501 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
502 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
503 arm_autovectorize_vector_sizes
505 #undef TARGET_MACHINE_DEPENDENT_REORG
506 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
508 #undef TARGET_INIT_BUILTINS
509 #define TARGET_INIT_BUILTINS arm_init_builtins
510 #undef TARGET_EXPAND_BUILTIN
511 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
512 #undef TARGET_BUILTIN_DECL
513 #define TARGET_BUILTIN_DECL arm_builtin_decl
515 #undef TARGET_INIT_LIBFUNCS
516 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
518 #undef TARGET_PROMOTE_FUNCTION_MODE
519 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
520 #undef TARGET_PROMOTE_PROTOTYPES
521 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
522 #undef TARGET_PASS_BY_REFERENCE
523 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
524 #undef TARGET_ARG_PARTIAL_BYTES
525 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
526 #undef TARGET_FUNCTION_ARG
527 #define TARGET_FUNCTION_ARG arm_function_arg
528 #undef TARGET_FUNCTION_ARG_ADVANCE
529 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
530 #undef TARGET_FUNCTION_ARG_BOUNDARY
531 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
533 #undef TARGET_SETUP_INCOMING_VARARGS
534 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
536 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
537 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
539 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
540 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
541 #undef TARGET_TRAMPOLINE_INIT
542 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
543 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
544 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
546 #undef TARGET_WARN_FUNC_RETURN
547 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
549 #undef TARGET_DEFAULT_SHORT_ENUMS
550 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
552 #undef TARGET_ALIGN_ANON_BITFIELD
553 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
555 #undef TARGET_NARROW_VOLATILE_BITFIELD
556 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
558 #undef TARGET_CXX_GUARD_TYPE
559 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
561 #undef TARGET_CXX_GUARD_MASK_BIT
562 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
564 #undef TARGET_CXX_GET_COOKIE_SIZE
565 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
567 #undef TARGET_CXX_COOKIE_HAS_SIZE
568 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
570 #undef TARGET_CXX_CDTOR_RETURNS_THIS
571 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
573 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
574 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
576 #undef TARGET_CXX_USE_AEABI_ATEXIT
577 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
579 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
580 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
581 arm_cxx_determine_class_data_visibility
583 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
584 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
586 #undef TARGET_RETURN_IN_MSB
587 #define TARGET_RETURN_IN_MSB arm_return_in_msb
589 #undef TARGET_RETURN_IN_MEMORY
590 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
592 #undef TARGET_MUST_PASS_IN_STACK
593 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
596 #undef TARGET_ASM_UNWIND_EMIT
597 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
599 /* EABI unwinding tables use a different format for the typeinfo tables. */
600 #undef TARGET_ASM_TTYPE
601 #define TARGET_ASM_TTYPE arm_output_ttype
603 #undef TARGET_ARM_EABI_UNWINDER
604 #define TARGET_ARM_EABI_UNWINDER true
606 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
607 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
609 #endif /* ARM_UNWIND_INFO */
611 #undef TARGET_ASM_INIT_SECTIONS
612 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
614 #undef TARGET_DWARF_REGISTER_SPAN
615 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
617 #undef TARGET_CANNOT_COPY_INSN_P
618 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
621 #undef TARGET_HAVE_TLS
622 #define TARGET_HAVE_TLS true
625 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
626 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
628 #undef TARGET_LEGITIMATE_CONSTANT_P
629 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
631 #undef TARGET_CANNOT_FORCE_CONST_MEM
632 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
634 #undef TARGET_MAX_ANCHOR_OFFSET
635 #define TARGET_MAX_ANCHOR_OFFSET 4095
637 /* The minimum is set such that the total size of the block
638 for a particular anchor is -4088 + 1 + 4095 bytes, which is
639 divisible by eight, ensuring natural spacing of anchors. */
640 #undef TARGET_MIN_ANCHOR_OFFSET
641 #define TARGET_MIN_ANCHOR_OFFSET -4088
643 #undef TARGET_SCHED_ISSUE_RATE
644 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
646 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
647 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
648 arm_first_cycle_multipass_dfa_lookahead
650 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
651 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
652 arm_first_cycle_multipass_dfa_lookahead_guard
654 #undef TARGET_MANGLE_TYPE
655 #define TARGET_MANGLE_TYPE arm_mangle_type
657 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
658 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
660 #undef TARGET_BUILD_BUILTIN_VA_LIST
661 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
662 #undef TARGET_EXPAND_BUILTIN_VA_START
663 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
664 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
665 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
668 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
669 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
672 #undef TARGET_LEGITIMATE_ADDRESS_P
673 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
675 #undef TARGET_PREFERRED_RELOAD_CLASS
676 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
678 #undef TARGET_PROMOTED_TYPE
679 #define TARGET_PROMOTED_TYPE arm_promoted_type
681 #undef TARGET_SCALAR_MODE_SUPPORTED_P
682 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
684 #undef TARGET_COMPUTE_FRAME_LAYOUT
685 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
687 #undef TARGET_FRAME_POINTER_REQUIRED
688 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
690 #undef TARGET_CAN_ELIMINATE
691 #define TARGET_CAN_ELIMINATE arm_can_eliminate
693 #undef TARGET_CONDITIONAL_REGISTER_USAGE
694 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
696 #undef TARGET_CLASS_LIKELY_SPILLED_P
697 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
699 #undef TARGET_VECTORIZE_BUILTINS
700 #define TARGET_VECTORIZE_BUILTINS
702 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
703 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
704 arm_builtin_vectorized_function
706 #undef TARGET_VECTOR_ALIGNMENT
707 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
709 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
710 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
711 arm_vector_alignment_reachable
713 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
714 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
715 arm_builtin_support_vector_misalignment
717 #undef TARGET_PREFERRED_RENAME_CLASS
718 #define TARGET_PREFERRED_RENAME_CLASS \
719 arm_preferred_rename_class
721 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
722 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
723 arm_vectorize_vec_perm_const_ok
725 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
726 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
727 arm_builtin_vectorization_cost
728 #undef TARGET_VECTORIZE_ADD_STMT_COST
729 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
731 #undef TARGET_CANONICALIZE_COMPARISON
732 #define TARGET_CANONICALIZE_COMPARISON \
733 arm_canonicalize_comparison
735 #undef TARGET_ASAN_SHADOW_OFFSET
736 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
738 #undef MAX_INSN_PER_IT_BLOCK
739 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
741 #undef TARGET_CAN_USE_DOLOOP_P
742 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
744 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
745 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
747 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
748 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
750 #undef TARGET_SCHED_FUSION_PRIORITY
751 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
753 #undef TARGET_ASM_FUNCTION_SECTION
754 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
756 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
757 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
759 #undef TARGET_SECTION_TYPE_FLAGS
760 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
762 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
763 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
765 #undef TARGET_C_EXCESS_PRECISION
766 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
768 /* Although the architecture reserves bits 0 and 1, only the former is
769 used for ARM/Thumb ISA selection in v7 and earlier versions. */
770 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
771 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
773 struct gcc_target targetm
= TARGET_INITIALIZER
;
775 /* Obstack for minipool constant handling. */
776 static struct obstack minipool_obstack
;
777 static char * minipool_startobj
;
779 /* The maximum number of insns skipped which
780 will be conditionalised if possible. */
781 static int max_insns_skipped
= 5;
783 extern FILE * asm_out_file
;
785 /* True if we are currently building a constant table. */
786 int making_const_table
;
788 /* The processor for which instructions should be scheduled. */
789 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
791 /* The current tuning set. */
792 const struct tune_params
*current_tune
;
794 /* Which floating point hardware to schedule for. */
797 /* Used for Thumb call_via trampolines. */
798 rtx thumb_call_via_label
[14];
799 static int thumb_call_reg_needed
;
801 /* The bits in this mask specify which instruction scheduling options should
803 unsigned int tune_flags
= 0;
805 /* The highest ARM architecture version supported by the
807 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
809 /* Active target architecture and tuning. */
811 struct arm_build_target arm_active_target
;
813 /* The following are used in the arm.md file as equivalents to bits
814 in the above two flag variables. */
816 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
819 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
822 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
825 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
828 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
831 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
834 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
837 /* Nonzero if this chip supports the ARM 6K extensions. */
840 /* Nonzero if this chip supports the ARM 6KZ extensions. */
843 /* Nonzero if instructions present in ARMv6-M can be used. */
846 /* Nonzero if this chip supports the ARM 7 extensions. */
849 /* Nonzero if this chip supports the Large Physical Address Extension. */
850 int arm_arch_lpae
= 0;
852 /* Nonzero if instructions not present in the 'M' profile can be used. */
853 int arm_arch_notm
= 0;
855 /* Nonzero if instructions present in ARMv7E-M can be used. */
858 /* Nonzero if instructions present in ARMv8 can be used. */
861 /* Nonzero if this chip supports the ARMv8.1 extensions. */
864 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
867 /* Nonzero if this chip supports the FP16 instructions extension of ARM
869 int arm_fp16_inst
= 0;
871 /* Nonzero if this chip can benefit from load scheduling. */
872 int arm_ld_sched
= 0;
874 /* Nonzero if this chip is a StrongARM. */
875 int arm_tune_strongarm
= 0;
877 /* Nonzero if this chip supports Intel Wireless MMX technology. */
878 int arm_arch_iwmmxt
= 0;
880 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
881 int arm_arch_iwmmxt2
= 0;
883 /* Nonzero if this chip is an XScale. */
884 int arm_arch_xscale
= 0;
886 /* Nonzero if tuning for XScale */
887 int arm_tune_xscale
= 0;
889 /* Nonzero if we want to tune for stores that access the write-buffer.
890 This typically means an ARM6 or ARM7 with MMU or MPU. */
891 int arm_tune_wbuf
= 0;
893 /* Nonzero if tuning for Cortex-A9. */
894 int arm_tune_cortex_a9
= 0;
896 /* Nonzero if we should define __THUMB_INTERWORK__ in the
898 XXX This is a bit of a hack, it's intended to help work around
899 problems in GLD which doesn't understand that armv5t code is
900 interworking clean. */
901 int arm_cpp_interwork
= 0;
903 /* Nonzero if chip supports Thumb 1. */
906 /* Nonzero if chip supports Thumb 2. */
909 /* Nonzero if chip supports integer division instruction. */
910 int arm_arch_arm_hwdiv
;
911 int arm_arch_thumb_hwdiv
;
913 /* Nonzero if chip disallows volatile memory access in IT block. */
914 int arm_arch_no_volatile_ce
;
916 /* Nonzero if we should use Neon to handle 64-bits operations rather
917 than core registers. */
918 int prefer_neon_for_64bits
= 0;
920 /* Nonzero if we shouldn't use literal pools. */
921 bool arm_disable_literal_pool
= false;
923 /* The register number to be used for the PIC offset register. */
924 unsigned arm_pic_register
= INVALID_REGNUM
;
926 enum arm_pcs arm_pcs_default
;
928 /* For an explanation of these variables, see final_prescan_insn below. */
930 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
931 enum arm_cond_code arm_current_cc
;
934 int arm_target_label
;
935 /* The number of conditionally executed insns, including the current insn. */
936 int arm_condexec_count
= 0;
937 /* A bitmask specifying the patterns for the IT block.
938 Zero means do not output an IT block before this insn. */
939 int arm_condexec_mask
= 0;
940 /* The number of bits used in arm_condexec_mask. */
941 int arm_condexec_masklen
= 0;
943 /* Nonzero if chip supports the ARMv8 CRC instructions. */
944 int arm_arch_crc
= 0;
946 /* Nonzero if chip supports the ARMv8-M security extensions. */
947 int arm_arch_cmse
= 0;
949 /* Nonzero if the core has a very small, high-latency, multiply unit. */
950 int arm_m_profile_small_mul
= 0;
952 /* The condition codes of the ARM, and the inverse function. */
953 static const char * const arm_condition_codes
[] =
955 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
956 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
959 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
960 int arm_regs_in_sequence
[] =
962 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
965 #define ARM_LSL_NAME "lsl"
966 #define streq(string1, string2) (strcmp (string1, string2) == 0)
968 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
969 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
970 | (1 << PIC_OFFSET_TABLE_REGNUM)))
972 /* Initialization code. */
976 const char *const name
;
977 enum processor_type core
;
978 unsigned int tune_flags
;
980 enum base_architecture base_arch
;
981 enum isa_feature isa_bits
[isa_num_bits
];
982 const struct tune_params
*const tune
;
986 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
987 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
994 /* arm generic vectorizer costs. */
996 struct cpu_vec_costs arm_default_vec_cost
= {
997 1, /* scalar_stmt_cost. */
998 1, /* scalar load_cost. */
999 1, /* scalar_store_cost. */
1000 1, /* vec_stmt_cost. */
1001 1, /* vec_to_scalar_cost. */
1002 1, /* scalar_to_vec_cost. */
1003 1, /* vec_align_load_cost. */
1004 1, /* vec_unalign_load_cost. */
1005 1, /* vec_unalign_store_cost. */
1006 1, /* vec_store_cost. */
1007 3, /* cond_taken_branch_cost. */
1008 1, /* cond_not_taken_branch_cost. */
1011 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1012 #include "aarch-cost-tables.h"
1016 const struct cpu_cost_table cortexa9_extra_costs
=
1023 COSTS_N_INSNS (1), /* shift_reg. */
1024 COSTS_N_INSNS (1), /* arith_shift. */
1025 COSTS_N_INSNS (2), /* arith_shift_reg. */
1027 COSTS_N_INSNS (1), /* log_shift_reg. */
1028 COSTS_N_INSNS (1), /* extend. */
1029 COSTS_N_INSNS (2), /* extend_arith. */
1030 COSTS_N_INSNS (1), /* bfi. */
1031 COSTS_N_INSNS (1), /* bfx. */
1035 true /* non_exec_costs_exec. */
1040 COSTS_N_INSNS (3), /* simple. */
1041 COSTS_N_INSNS (3), /* flag_setting. */
1042 COSTS_N_INSNS (2), /* extend. */
1043 COSTS_N_INSNS (3), /* add. */
1044 COSTS_N_INSNS (2), /* extend_add. */
1045 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1049 0, /* simple (N/A). */
1050 0, /* flag_setting (N/A). */
1051 COSTS_N_INSNS (4), /* extend. */
1053 COSTS_N_INSNS (4), /* extend_add. */
1059 COSTS_N_INSNS (2), /* load. */
1060 COSTS_N_INSNS (2), /* load_sign_extend. */
1061 COSTS_N_INSNS (2), /* ldrd. */
1062 COSTS_N_INSNS (2), /* ldm_1st. */
1063 1, /* ldm_regs_per_insn_1st. */
1064 2, /* ldm_regs_per_insn_subsequent. */
1065 COSTS_N_INSNS (5), /* loadf. */
1066 COSTS_N_INSNS (5), /* loadd. */
1067 COSTS_N_INSNS (1), /* load_unaligned. */
1068 COSTS_N_INSNS (2), /* store. */
1069 COSTS_N_INSNS (2), /* strd. */
1070 COSTS_N_INSNS (2), /* stm_1st. */
1071 1, /* stm_regs_per_insn_1st. */
1072 2, /* stm_regs_per_insn_subsequent. */
1073 COSTS_N_INSNS (1), /* storef. */
1074 COSTS_N_INSNS (1), /* stored. */
1075 COSTS_N_INSNS (1), /* store_unaligned. */
1076 COSTS_N_INSNS (1), /* loadv. */
1077 COSTS_N_INSNS (1) /* storev. */
1082 COSTS_N_INSNS (14), /* div. */
1083 COSTS_N_INSNS (4), /* mult. */
1084 COSTS_N_INSNS (7), /* mult_addsub. */
1085 COSTS_N_INSNS (30), /* fma. */
1086 COSTS_N_INSNS (3), /* addsub. */
1087 COSTS_N_INSNS (1), /* fpconst. */
1088 COSTS_N_INSNS (1), /* neg. */
1089 COSTS_N_INSNS (3), /* compare. */
1090 COSTS_N_INSNS (3), /* widen. */
1091 COSTS_N_INSNS (3), /* narrow. */
1092 COSTS_N_INSNS (3), /* toint. */
1093 COSTS_N_INSNS (3), /* fromint. */
1094 COSTS_N_INSNS (3) /* roundint. */
1098 COSTS_N_INSNS (24), /* div. */
1099 COSTS_N_INSNS (5), /* mult. */
1100 COSTS_N_INSNS (8), /* mult_addsub. */
1101 COSTS_N_INSNS (30), /* fma. */
1102 COSTS_N_INSNS (3), /* addsub. */
1103 COSTS_N_INSNS (1), /* fpconst. */
1104 COSTS_N_INSNS (1), /* neg. */
1105 COSTS_N_INSNS (3), /* compare. */
1106 COSTS_N_INSNS (3), /* widen. */
1107 COSTS_N_INSNS (3), /* narrow. */
1108 COSTS_N_INSNS (3), /* toint. */
1109 COSTS_N_INSNS (3), /* fromint. */
1110 COSTS_N_INSNS (3) /* roundint. */
1115 COSTS_N_INSNS (1) /* alu. */
1119 const struct cpu_cost_table cortexa8_extra_costs
=
1125 COSTS_N_INSNS (1), /* shift. */
1127 COSTS_N_INSNS (1), /* arith_shift. */
1128 0, /* arith_shift_reg. */
1129 COSTS_N_INSNS (1), /* log_shift. */
1130 0, /* log_shift_reg. */
1132 0, /* extend_arith. */
1138 true /* non_exec_costs_exec. */
1143 COSTS_N_INSNS (1), /* simple. */
1144 COSTS_N_INSNS (1), /* flag_setting. */
1145 COSTS_N_INSNS (1), /* extend. */
1146 COSTS_N_INSNS (1), /* add. */
1147 COSTS_N_INSNS (1), /* extend_add. */
1148 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1152 0, /* simple (N/A). */
1153 0, /* flag_setting (N/A). */
1154 COSTS_N_INSNS (2), /* extend. */
1156 COSTS_N_INSNS (2), /* extend_add. */
1162 COSTS_N_INSNS (1), /* load. */
1163 COSTS_N_INSNS (1), /* load_sign_extend. */
1164 COSTS_N_INSNS (1), /* ldrd. */
1165 COSTS_N_INSNS (1), /* ldm_1st. */
1166 1, /* ldm_regs_per_insn_1st. */
1167 2, /* ldm_regs_per_insn_subsequent. */
1168 COSTS_N_INSNS (1), /* loadf. */
1169 COSTS_N_INSNS (1), /* loadd. */
1170 COSTS_N_INSNS (1), /* load_unaligned. */
1171 COSTS_N_INSNS (1), /* store. */
1172 COSTS_N_INSNS (1), /* strd. */
1173 COSTS_N_INSNS (1), /* stm_1st. */
1174 1, /* stm_regs_per_insn_1st. */
1175 2, /* stm_regs_per_insn_subsequent. */
1176 COSTS_N_INSNS (1), /* storef. */
1177 COSTS_N_INSNS (1), /* stored. */
1178 COSTS_N_INSNS (1), /* store_unaligned. */
1179 COSTS_N_INSNS (1), /* loadv. */
1180 COSTS_N_INSNS (1) /* storev. */
1185 COSTS_N_INSNS (36), /* div. */
1186 COSTS_N_INSNS (11), /* mult. */
1187 COSTS_N_INSNS (20), /* mult_addsub. */
1188 COSTS_N_INSNS (30), /* fma. */
1189 COSTS_N_INSNS (9), /* addsub. */
1190 COSTS_N_INSNS (3), /* fpconst. */
1191 COSTS_N_INSNS (3), /* neg. */
1192 COSTS_N_INSNS (6), /* compare. */
1193 COSTS_N_INSNS (4), /* widen. */
1194 COSTS_N_INSNS (4), /* narrow. */
1195 COSTS_N_INSNS (8), /* toint. */
1196 COSTS_N_INSNS (8), /* fromint. */
1197 COSTS_N_INSNS (8) /* roundint. */
1201 COSTS_N_INSNS (64), /* div. */
1202 COSTS_N_INSNS (16), /* mult. */
1203 COSTS_N_INSNS (25), /* mult_addsub. */
1204 COSTS_N_INSNS (30), /* fma. */
1205 COSTS_N_INSNS (9), /* addsub. */
1206 COSTS_N_INSNS (3), /* fpconst. */
1207 COSTS_N_INSNS (3), /* neg. */
1208 COSTS_N_INSNS (6), /* compare. */
1209 COSTS_N_INSNS (6), /* widen. */
1210 COSTS_N_INSNS (6), /* narrow. */
1211 COSTS_N_INSNS (8), /* toint. */
1212 COSTS_N_INSNS (8), /* fromint. */
1213 COSTS_N_INSNS (8) /* roundint. */
1218 COSTS_N_INSNS (1) /* alu. */
1222 const struct cpu_cost_table cortexa5_extra_costs
=
1228 COSTS_N_INSNS (1), /* shift. */
1229 COSTS_N_INSNS (1), /* shift_reg. */
1230 COSTS_N_INSNS (1), /* arith_shift. */
1231 COSTS_N_INSNS (1), /* arith_shift_reg. */
1232 COSTS_N_INSNS (1), /* log_shift. */
1233 COSTS_N_INSNS (1), /* log_shift_reg. */
1234 COSTS_N_INSNS (1), /* extend. */
1235 COSTS_N_INSNS (1), /* extend_arith. */
1236 COSTS_N_INSNS (1), /* bfi. */
1237 COSTS_N_INSNS (1), /* bfx. */
1238 COSTS_N_INSNS (1), /* clz. */
1239 COSTS_N_INSNS (1), /* rev. */
1241 true /* non_exec_costs_exec. */
1248 COSTS_N_INSNS (1), /* flag_setting. */
1249 COSTS_N_INSNS (1), /* extend. */
1250 COSTS_N_INSNS (1), /* add. */
1251 COSTS_N_INSNS (1), /* extend_add. */
1252 COSTS_N_INSNS (7) /* idiv. */
1256 0, /* simple (N/A). */
1257 0, /* flag_setting (N/A). */
1258 COSTS_N_INSNS (1), /* extend. */
1260 COSTS_N_INSNS (2), /* extend_add. */
1266 COSTS_N_INSNS (1), /* load. */
1267 COSTS_N_INSNS (1), /* load_sign_extend. */
1268 COSTS_N_INSNS (6), /* ldrd. */
1269 COSTS_N_INSNS (1), /* ldm_1st. */
1270 1, /* ldm_regs_per_insn_1st. */
1271 2, /* ldm_regs_per_insn_subsequent. */
1272 COSTS_N_INSNS (2), /* loadf. */
1273 COSTS_N_INSNS (4), /* loadd. */
1274 COSTS_N_INSNS (1), /* load_unaligned. */
1275 COSTS_N_INSNS (1), /* store. */
1276 COSTS_N_INSNS (3), /* strd. */
1277 COSTS_N_INSNS (1), /* stm_1st. */
1278 1, /* stm_regs_per_insn_1st. */
1279 2, /* stm_regs_per_insn_subsequent. */
1280 COSTS_N_INSNS (2), /* storef. */
1281 COSTS_N_INSNS (2), /* stored. */
1282 COSTS_N_INSNS (1), /* store_unaligned. */
1283 COSTS_N_INSNS (1), /* loadv. */
1284 COSTS_N_INSNS (1) /* storev. */
1289 COSTS_N_INSNS (15), /* div. */
1290 COSTS_N_INSNS (3), /* mult. */
1291 COSTS_N_INSNS (7), /* mult_addsub. */
1292 COSTS_N_INSNS (7), /* fma. */
1293 COSTS_N_INSNS (3), /* addsub. */
1294 COSTS_N_INSNS (3), /* fpconst. */
1295 COSTS_N_INSNS (3), /* neg. */
1296 COSTS_N_INSNS (3), /* compare. */
1297 COSTS_N_INSNS (3), /* widen. */
1298 COSTS_N_INSNS (3), /* narrow. */
1299 COSTS_N_INSNS (3), /* toint. */
1300 COSTS_N_INSNS (3), /* fromint. */
1301 COSTS_N_INSNS (3) /* roundint. */
1305 COSTS_N_INSNS (30), /* div. */
1306 COSTS_N_INSNS (6), /* mult. */
1307 COSTS_N_INSNS (10), /* mult_addsub. */
1308 COSTS_N_INSNS (7), /* fma. */
1309 COSTS_N_INSNS (3), /* addsub. */
1310 COSTS_N_INSNS (3), /* fpconst. */
1311 COSTS_N_INSNS (3), /* neg. */
1312 COSTS_N_INSNS (3), /* compare. */
1313 COSTS_N_INSNS (3), /* widen. */
1314 COSTS_N_INSNS (3), /* narrow. */
1315 COSTS_N_INSNS (3), /* toint. */
1316 COSTS_N_INSNS (3), /* fromint. */
1317 COSTS_N_INSNS (3) /* roundint. */
1322 COSTS_N_INSNS (1) /* alu. */
1327 const struct cpu_cost_table cortexa7_extra_costs
=
1333 COSTS_N_INSNS (1), /* shift. */
1334 COSTS_N_INSNS (1), /* shift_reg. */
1335 COSTS_N_INSNS (1), /* arith_shift. */
1336 COSTS_N_INSNS (1), /* arith_shift_reg. */
1337 COSTS_N_INSNS (1), /* log_shift. */
1338 COSTS_N_INSNS (1), /* log_shift_reg. */
1339 COSTS_N_INSNS (1), /* extend. */
1340 COSTS_N_INSNS (1), /* extend_arith. */
1341 COSTS_N_INSNS (1), /* bfi. */
1342 COSTS_N_INSNS (1), /* bfx. */
1343 COSTS_N_INSNS (1), /* clz. */
1344 COSTS_N_INSNS (1), /* rev. */
1346 true /* non_exec_costs_exec. */
1353 COSTS_N_INSNS (1), /* flag_setting. */
1354 COSTS_N_INSNS (1), /* extend. */
1355 COSTS_N_INSNS (1), /* add. */
1356 COSTS_N_INSNS (1), /* extend_add. */
1357 COSTS_N_INSNS (7) /* idiv. */
1361 0, /* simple (N/A). */
1362 0, /* flag_setting (N/A). */
1363 COSTS_N_INSNS (1), /* extend. */
1365 COSTS_N_INSNS (2), /* extend_add. */
1371 COSTS_N_INSNS (1), /* load. */
1372 COSTS_N_INSNS (1), /* load_sign_extend. */
1373 COSTS_N_INSNS (3), /* ldrd. */
1374 COSTS_N_INSNS (1), /* ldm_1st. */
1375 1, /* ldm_regs_per_insn_1st. */
1376 2, /* ldm_regs_per_insn_subsequent. */
1377 COSTS_N_INSNS (2), /* loadf. */
1378 COSTS_N_INSNS (2), /* loadd. */
1379 COSTS_N_INSNS (1), /* load_unaligned. */
1380 COSTS_N_INSNS (1), /* store. */
1381 COSTS_N_INSNS (3), /* strd. */
1382 COSTS_N_INSNS (1), /* stm_1st. */
1383 1, /* stm_regs_per_insn_1st. */
1384 2, /* stm_regs_per_insn_subsequent. */
1385 COSTS_N_INSNS (2), /* storef. */
1386 COSTS_N_INSNS (2), /* stored. */
1387 COSTS_N_INSNS (1), /* store_unaligned. */
1388 COSTS_N_INSNS (1), /* loadv. */
1389 COSTS_N_INSNS (1) /* storev. */
1394 COSTS_N_INSNS (15), /* div. */
1395 COSTS_N_INSNS (3), /* mult. */
1396 COSTS_N_INSNS (7), /* mult_addsub. */
1397 COSTS_N_INSNS (7), /* fma. */
1398 COSTS_N_INSNS (3), /* addsub. */
1399 COSTS_N_INSNS (3), /* fpconst. */
1400 COSTS_N_INSNS (3), /* neg. */
1401 COSTS_N_INSNS (3), /* compare. */
1402 COSTS_N_INSNS (3), /* widen. */
1403 COSTS_N_INSNS (3), /* narrow. */
1404 COSTS_N_INSNS (3), /* toint. */
1405 COSTS_N_INSNS (3), /* fromint. */
1406 COSTS_N_INSNS (3) /* roundint. */
1410 COSTS_N_INSNS (30), /* div. */
1411 COSTS_N_INSNS (6), /* mult. */
1412 COSTS_N_INSNS (10), /* mult_addsub. */
1413 COSTS_N_INSNS (7), /* fma. */
1414 COSTS_N_INSNS (3), /* addsub. */
1415 COSTS_N_INSNS (3), /* fpconst. */
1416 COSTS_N_INSNS (3), /* neg. */
1417 COSTS_N_INSNS (3), /* compare. */
1418 COSTS_N_INSNS (3), /* widen. */
1419 COSTS_N_INSNS (3), /* narrow. */
1420 COSTS_N_INSNS (3), /* toint. */
1421 COSTS_N_INSNS (3), /* fromint. */
1422 COSTS_N_INSNS (3) /* roundint. */
1427 COSTS_N_INSNS (1) /* alu. */
1431 const struct cpu_cost_table cortexa12_extra_costs
=
1438 COSTS_N_INSNS (1), /* shift_reg. */
1439 COSTS_N_INSNS (1), /* arith_shift. */
1440 COSTS_N_INSNS (1), /* arith_shift_reg. */
1441 COSTS_N_INSNS (1), /* log_shift. */
1442 COSTS_N_INSNS (1), /* log_shift_reg. */
1444 COSTS_N_INSNS (1), /* extend_arith. */
1446 COSTS_N_INSNS (1), /* bfx. */
1447 COSTS_N_INSNS (1), /* clz. */
1448 COSTS_N_INSNS (1), /* rev. */
1450 true /* non_exec_costs_exec. */
1455 COSTS_N_INSNS (2), /* simple. */
1456 COSTS_N_INSNS (3), /* flag_setting. */
1457 COSTS_N_INSNS (2), /* extend. */
1458 COSTS_N_INSNS (3), /* add. */
1459 COSTS_N_INSNS (2), /* extend_add. */
1460 COSTS_N_INSNS (18) /* idiv. */
1464 0, /* simple (N/A). */
1465 0, /* flag_setting (N/A). */
1466 COSTS_N_INSNS (3), /* extend. */
1468 COSTS_N_INSNS (3), /* extend_add. */
1474 COSTS_N_INSNS (3), /* load. */
1475 COSTS_N_INSNS (3), /* load_sign_extend. */
1476 COSTS_N_INSNS (3), /* ldrd. */
1477 COSTS_N_INSNS (3), /* ldm_1st. */
1478 1, /* ldm_regs_per_insn_1st. */
1479 2, /* ldm_regs_per_insn_subsequent. */
1480 COSTS_N_INSNS (3), /* loadf. */
1481 COSTS_N_INSNS (3), /* loadd. */
1482 0, /* load_unaligned. */
1486 1, /* stm_regs_per_insn_1st. */
1487 2, /* stm_regs_per_insn_subsequent. */
1488 COSTS_N_INSNS (2), /* storef. */
1489 COSTS_N_INSNS (2), /* stored. */
1490 0, /* store_unaligned. */
1491 COSTS_N_INSNS (1), /* loadv. */
1492 COSTS_N_INSNS (1) /* storev. */
1497 COSTS_N_INSNS (17), /* div. */
1498 COSTS_N_INSNS (4), /* mult. */
1499 COSTS_N_INSNS (8), /* mult_addsub. */
1500 COSTS_N_INSNS (8), /* fma. */
1501 COSTS_N_INSNS (4), /* addsub. */
1502 COSTS_N_INSNS (2), /* fpconst. */
1503 COSTS_N_INSNS (2), /* neg. */
1504 COSTS_N_INSNS (2), /* compare. */
1505 COSTS_N_INSNS (4), /* widen. */
1506 COSTS_N_INSNS (4), /* narrow. */
1507 COSTS_N_INSNS (4), /* toint. */
1508 COSTS_N_INSNS (4), /* fromint. */
1509 COSTS_N_INSNS (4) /* roundint. */
1513 COSTS_N_INSNS (31), /* div. */
1514 COSTS_N_INSNS (4), /* mult. */
1515 COSTS_N_INSNS (8), /* mult_addsub. */
1516 COSTS_N_INSNS (8), /* fma. */
1517 COSTS_N_INSNS (4), /* addsub. */
1518 COSTS_N_INSNS (2), /* fpconst. */
1519 COSTS_N_INSNS (2), /* neg. */
1520 COSTS_N_INSNS (2), /* compare. */
1521 COSTS_N_INSNS (4), /* widen. */
1522 COSTS_N_INSNS (4), /* narrow. */
1523 COSTS_N_INSNS (4), /* toint. */
1524 COSTS_N_INSNS (4), /* fromint. */
1525 COSTS_N_INSNS (4) /* roundint. */
1530 COSTS_N_INSNS (1) /* alu. */
1534 const struct cpu_cost_table cortexa15_extra_costs
=
1542 COSTS_N_INSNS (1), /* arith_shift. */
1543 COSTS_N_INSNS (1), /* arith_shift_reg. */
1544 COSTS_N_INSNS (1), /* log_shift. */
1545 COSTS_N_INSNS (1), /* log_shift_reg. */
1547 COSTS_N_INSNS (1), /* extend_arith. */
1548 COSTS_N_INSNS (1), /* bfi. */
1553 true /* non_exec_costs_exec. */
1558 COSTS_N_INSNS (2), /* simple. */
1559 COSTS_N_INSNS (3), /* flag_setting. */
1560 COSTS_N_INSNS (2), /* extend. */
1561 COSTS_N_INSNS (2), /* add. */
1562 COSTS_N_INSNS (2), /* extend_add. */
1563 COSTS_N_INSNS (18) /* idiv. */
1567 0, /* simple (N/A). */
1568 0, /* flag_setting (N/A). */
1569 COSTS_N_INSNS (3), /* extend. */
1571 COSTS_N_INSNS (3), /* extend_add. */
1577 COSTS_N_INSNS (3), /* load. */
1578 COSTS_N_INSNS (3), /* load_sign_extend. */
1579 COSTS_N_INSNS (3), /* ldrd. */
1580 COSTS_N_INSNS (4), /* ldm_1st. */
1581 1, /* ldm_regs_per_insn_1st. */
1582 2, /* ldm_regs_per_insn_subsequent. */
1583 COSTS_N_INSNS (4), /* loadf. */
1584 COSTS_N_INSNS (4), /* loadd. */
1585 0, /* load_unaligned. */
1588 COSTS_N_INSNS (1), /* stm_1st. */
1589 1, /* stm_regs_per_insn_1st. */
1590 2, /* stm_regs_per_insn_subsequent. */
1593 0, /* store_unaligned. */
1594 COSTS_N_INSNS (1), /* loadv. */
1595 COSTS_N_INSNS (1) /* storev. */
1600 COSTS_N_INSNS (17), /* div. */
1601 COSTS_N_INSNS (4), /* mult. */
1602 COSTS_N_INSNS (8), /* mult_addsub. */
1603 COSTS_N_INSNS (8), /* fma. */
1604 COSTS_N_INSNS (4), /* addsub. */
1605 COSTS_N_INSNS (2), /* fpconst. */
1606 COSTS_N_INSNS (2), /* neg. */
1607 COSTS_N_INSNS (5), /* compare. */
1608 COSTS_N_INSNS (4), /* widen. */
1609 COSTS_N_INSNS (4), /* narrow. */
1610 COSTS_N_INSNS (4), /* toint. */
1611 COSTS_N_INSNS (4), /* fromint. */
1612 COSTS_N_INSNS (4) /* roundint. */
1616 COSTS_N_INSNS (31), /* div. */
1617 COSTS_N_INSNS (4), /* mult. */
1618 COSTS_N_INSNS (8), /* mult_addsub. */
1619 COSTS_N_INSNS (8), /* fma. */
1620 COSTS_N_INSNS (4), /* addsub. */
1621 COSTS_N_INSNS (2), /* fpconst. */
1622 COSTS_N_INSNS (2), /* neg. */
1623 COSTS_N_INSNS (2), /* compare. */
1624 COSTS_N_INSNS (4), /* widen. */
1625 COSTS_N_INSNS (4), /* narrow. */
1626 COSTS_N_INSNS (4), /* toint. */
1627 COSTS_N_INSNS (4), /* fromint. */
1628 COSTS_N_INSNS (4) /* roundint. */
1633 COSTS_N_INSNS (1) /* alu. */
1637 const struct cpu_cost_table v7m_extra_costs
=
1645 0, /* arith_shift. */
1646 COSTS_N_INSNS (1), /* arith_shift_reg. */
1648 COSTS_N_INSNS (1), /* log_shift_reg. */
1650 COSTS_N_INSNS (1), /* extend_arith. */
1655 COSTS_N_INSNS (1), /* non_exec. */
1656 false /* non_exec_costs_exec. */
1661 COSTS_N_INSNS (1), /* simple. */
1662 COSTS_N_INSNS (1), /* flag_setting. */
1663 COSTS_N_INSNS (2), /* extend. */
1664 COSTS_N_INSNS (1), /* add. */
1665 COSTS_N_INSNS (3), /* extend_add. */
1666 COSTS_N_INSNS (8) /* idiv. */
1670 0, /* simple (N/A). */
1671 0, /* flag_setting (N/A). */
1672 COSTS_N_INSNS (2), /* extend. */
1674 COSTS_N_INSNS (3), /* extend_add. */
1680 COSTS_N_INSNS (2), /* load. */
1681 0, /* load_sign_extend. */
1682 COSTS_N_INSNS (3), /* ldrd. */
1683 COSTS_N_INSNS (2), /* ldm_1st. */
1684 1, /* ldm_regs_per_insn_1st. */
1685 1, /* ldm_regs_per_insn_subsequent. */
1686 COSTS_N_INSNS (2), /* loadf. */
1687 COSTS_N_INSNS (3), /* loadd. */
1688 COSTS_N_INSNS (1), /* load_unaligned. */
1689 COSTS_N_INSNS (2), /* store. */
1690 COSTS_N_INSNS (3), /* strd. */
1691 COSTS_N_INSNS (2), /* stm_1st. */
1692 1, /* stm_regs_per_insn_1st. */
1693 1, /* stm_regs_per_insn_subsequent. */
1694 COSTS_N_INSNS (2), /* storef. */
1695 COSTS_N_INSNS (3), /* stored. */
1696 COSTS_N_INSNS (1), /* store_unaligned. */
1697 COSTS_N_INSNS (1), /* loadv. */
1698 COSTS_N_INSNS (1) /* storev. */
1703 COSTS_N_INSNS (7), /* div. */
1704 COSTS_N_INSNS (2), /* mult. */
1705 COSTS_N_INSNS (5), /* mult_addsub. */
1706 COSTS_N_INSNS (3), /* fma. */
1707 COSTS_N_INSNS (1), /* addsub. */
1719 COSTS_N_INSNS (15), /* div. */
1720 COSTS_N_INSNS (5), /* mult. */
1721 COSTS_N_INSNS (7), /* mult_addsub. */
1722 COSTS_N_INSNS (7), /* fma. */
1723 COSTS_N_INSNS (3), /* addsub. */
1736 COSTS_N_INSNS (1) /* alu. */
1740 const struct tune_params arm_slowmul_tune
=
1742 &generic_extra_costs
, /* Insn extra costs. */
1743 NULL
, /* Sched adj cost. */
1744 arm_default_branch_cost
,
1745 &arm_default_vec_cost
,
1746 3, /* Constant limit. */
1747 5, /* Max cond insns. */
1748 8, /* Memset max inline. */
1749 1, /* Issue rate. */
1750 ARM_PREFETCH_NOT_BENEFICIAL
,
1751 tune_params::PREF_CONST_POOL_TRUE
,
1752 tune_params::PREF_LDRD_FALSE
,
1753 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1754 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1755 tune_params::DISPARAGE_FLAGS_NEITHER
,
1756 tune_params::PREF_NEON_64_FALSE
,
1757 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1758 tune_params::FUSE_NOTHING
,
1759 tune_params::SCHED_AUTOPREF_OFF
1762 const struct tune_params arm_fastmul_tune
=
1764 &generic_extra_costs
, /* Insn extra costs. */
1765 NULL
, /* Sched adj cost. */
1766 arm_default_branch_cost
,
1767 &arm_default_vec_cost
,
1768 1, /* Constant limit. */
1769 5, /* Max cond insns. */
1770 8, /* Memset max inline. */
1771 1, /* Issue rate. */
1772 ARM_PREFETCH_NOT_BENEFICIAL
,
1773 tune_params::PREF_CONST_POOL_TRUE
,
1774 tune_params::PREF_LDRD_FALSE
,
1775 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1776 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1777 tune_params::DISPARAGE_FLAGS_NEITHER
,
1778 tune_params::PREF_NEON_64_FALSE
,
1779 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1780 tune_params::FUSE_NOTHING
,
1781 tune_params::SCHED_AUTOPREF_OFF
1784 /* StrongARM has early execution of branches, so a sequence that is worth
1785 skipping is shorter. Set max_insns_skipped to a lower value. */
1787 const struct tune_params arm_strongarm_tune
=
1789 &generic_extra_costs
, /* Insn extra costs. */
1790 NULL
, /* Sched adj cost. */
1791 arm_default_branch_cost
,
1792 &arm_default_vec_cost
,
1793 1, /* Constant limit. */
1794 3, /* Max cond insns. */
1795 8, /* Memset max inline. */
1796 1, /* Issue rate. */
1797 ARM_PREFETCH_NOT_BENEFICIAL
,
1798 tune_params::PREF_CONST_POOL_TRUE
,
1799 tune_params::PREF_LDRD_FALSE
,
1800 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1801 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1802 tune_params::DISPARAGE_FLAGS_NEITHER
,
1803 tune_params::PREF_NEON_64_FALSE
,
1804 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1805 tune_params::FUSE_NOTHING
,
1806 tune_params::SCHED_AUTOPREF_OFF
1809 const struct tune_params arm_xscale_tune
=
1811 &generic_extra_costs
, /* Insn extra costs. */
1812 xscale_sched_adjust_cost
,
1813 arm_default_branch_cost
,
1814 &arm_default_vec_cost
,
1815 2, /* Constant limit. */
1816 3, /* Max cond insns. */
1817 8, /* Memset max inline. */
1818 1, /* Issue rate. */
1819 ARM_PREFETCH_NOT_BENEFICIAL
,
1820 tune_params::PREF_CONST_POOL_TRUE
,
1821 tune_params::PREF_LDRD_FALSE
,
1822 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1823 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1824 tune_params::DISPARAGE_FLAGS_NEITHER
,
1825 tune_params::PREF_NEON_64_FALSE
,
1826 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1827 tune_params::FUSE_NOTHING
,
1828 tune_params::SCHED_AUTOPREF_OFF
1831 const struct tune_params arm_9e_tune
=
1833 &generic_extra_costs
, /* Insn extra costs. */
1834 NULL
, /* Sched adj cost. */
1835 arm_default_branch_cost
,
1836 &arm_default_vec_cost
,
1837 1, /* Constant limit. */
1838 5, /* Max cond insns. */
1839 8, /* Memset max inline. */
1840 1, /* Issue rate. */
1841 ARM_PREFETCH_NOT_BENEFICIAL
,
1842 tune_params::PREF_CONST_POOL_TRUE
,
1843 tune_params::PREF_LDRD_FALSE
,
1844 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1845 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1846 tune_params::DISPARAGE_FLAGS_NEITHER
,
1847 tune_params::PREF_NEON_64_FALSE
,
1848 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1849 tune_params::FUSE_NOTHING
,
1850 tune_params::SCHED_AUTOPREF_OFF
1853 const struct tune_params arm_marvell_pj4_tune
=
1855 &generic_extra_costs
, /* Insn extra costs. */
1856 NULL
, /* Sched adj cost. */
1857 arm_default_branch_cost
,
1858 &arm_default_vec_cost
,
1859 1, /* Constant limit. */
1860 5, /* Max cond insns. */
1861 8, /* Memset max inline. */
1862 2, /* Issue rate. */
1863 ARM_PREFETCH_NOT_BENEFICIAL
,
1864 tune_params::PREF_CONST_POOL_TRUE
,
1865 tune_params::PREF_LDRD_FALSE
,
1866 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1867 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1868 tune_params::DISPARAGE_FLAGS_NEITHER
,
1869 tune_params::PREF_NEON_64_FALSE
,
1870 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1871 tune_params::FUSE_NOTHING
,
1872 tune_params::SCHED_AUTOPREF_OFF
1875 const struct tune_params arm_v6t2_tune
=
1877 &generic_extra_costs
, /* Insn extra costs. */
1878 NULL
, /* Sched adj cost. */
1879 arm_default_branch_cost
,
1880 &arm_default_vec_cost
,
1881 1, /* Constant limit. */
1882 5, /* Max cond insns. */
1883 8, /* Memset max inline. */
1884 1, /* Issue rate. */
1885 ARM_PREFETCH_NOT_BENEFICIAL
,
1886 tune_params::PREF_CONST_POOL_FALSE
,
1887 tune_params::PREF_LDRD_FALSE
,
1888 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1889 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1890 tune_params::DISPARAGE_FLAGS_NEITHER
,
1891 tune_params::PREF_NEON_64_FALSE
,
1892 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1893 tune_params::FUSE_NOTHING
,
1894 tune_params::SCHED_AUTOPREF_OFF
1898 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1899 const struct tune_params arm_cortex_tune
=
1901 &generic_extra_costs
,
1902 NULL
, /* Sched adj cost. */
1903 arm_default_branch_cost
,
1904 &arm_default_vec_cost
,
1905 1, /* Constant limit. */
1906 5, /* Max cond insns. */
1907 8, /* Memset max inline. */
1908 2, /* Issue rate. */
1909 ARM_PREFETCH_NOT_BENEFICIAL
,
1910 tune_params::PREF_CONST_POOL_FALSE
,
1911 tune_params::PREF_LDRD_FALSE
,
1912 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1913 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1914 tune_params::DISPARAGE_FLAGS_NEITHER
,
1915 tune_params::PREF_NEON_64_FALSE
,
1916 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1917 tune_params::FUSE_NOTHING
,
1918 tune_params::SCHED_AUTOPREF_OFF
1921 const struct tune_params arm_cortex_a8_tune
=
1923 &cortexa8_extra_costs
,
1924 NULL
, /* Sched adj cost. */
1925 arm_default_branch_cost
,
1926 &arm_default_vec_cost
,
1927 1, /* Constant limit. */
1928 5, /* Max cond insns. */
1929 8, /* Memset max inline. */
1930 2, /* Issue rate. */
1931 ARM_PREFETCH_NOT_BENEFICIAL
,
1932 tune_params::PREF_CONST_POOL_FALSE
,
1933 tune_params::PREF_LDRD_FALSE
,
1934 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1935 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1936 tune_params::DISPARAGE_FLAGS_NEITHER
,
1937 tune_params::PREF_NEON_64_FALSE
,
1938 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1939 tune_params::FUSE_NOTHING
,
1940 tune_params::SCHED_AUTOPREF_OFF
1943 const struct tune_params arm_cortex_a7_tune
=
1945 &cortexa7_extra_costs
,
1946 NULL
, /* Sched adj cost. */
1947 arm_default_branch_cost
,
1948 &arm_default_vec_cost
,
1949 1, /* Constant limit. */
1950 5, /* Max cond insns. */
1951 8, /* Memset max inline. */
1952 2, /* Issue rate. */
1953 ARM_PREFETCH_NOT_BENEFICIAL
,
1954 tune_params::PREF_CONST_POOL_FALSE
,
1955 tune_params::PREF_LDRD_FALSE
,
1956 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1957 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1958 tune_params::DISPARAGE_FLAGS_NEITHER
,
1959 tune_params::PREF_NEON_64_FALSE
,
1960 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1961 tune_params::FUSE_NOTHING
,
1962 tune_params::SCHED_AUTOPREF_OFF
1965 const struct tune_params arm_cortex_a15_tune
=
1967 &cortexa15_extra_costs
,
1968 NULL
, /* Sched adj cost. */
1969 arm_default_branch_cost
,
1970 &arm_default_vec_cost
,
1971 1, /* Constant limit. */
1972 2, /* Max cond insns. */
1973 8, /* Memset max inline. */
1974 3, /* Issue rate. */
1975 ARM_PREFETCH_NOT_BENEFICIAL
,
1976 tune_params::PREF_CONST_POOL_FALSE
,
1977 tune_params::PREF_LDRD_TRUE
,
1978 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1979 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1980 tune_params::DISPARAGE_FLAGS_ALL
,
1981 tune_params::PREF_NEON_64_FALSE
,
1982 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1983 tune_params::FUSE_NOTHING
,
1984 tune_params::SCHED_AUTOPREF_FULL
1987 const struct tune_params arm_cortex_a35_tune
=
1989 &cortexa53_extra_costs
,
1990 NULL
, /* Sched adj cost. */
1991 arm_default_branch_cost
,
1992 &arm_default_vec_cost
,
1993 1, /* Constant limit. */
1994 5, /* Max cond insns. */
1995 8, /* Memset max inline. */
1996 1, /* Issue rate. */
1997 ARM_PREFETCH_NOT_BENEFICIAL
,
1998 tune_params::PREF_CONST_POOL_FALSE
,
1999 tune_params::PREF_LDRD_FALSE
,
2000 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2001 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2002 tune_params::DISPARAGE_FLAGS_NEITHER
,
2003 tune_params::PREF_NEON_64_FALSE
,
2004 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2005 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2006 tune_params::SCHED_AUTOPREF_OFF
2009 const struct tune_params arm_cortex_a53_tune
=
2011 &cortexa53_extra_costs
,
2012 NULL
, /* Sched adj cost. */
2013 arm_default_branch_cost
,
2014 &arm_default_vec_cost
,
2015 1, /* Constant limit. */
2016 5, /* Max cond insns. */
2017 8, /* Memset max inline. */
2018 2, /* Issue rate. */
2019 ARM_PREFETCH_NOT_BENEFICIAL
,
2020 tune_params::PREF_CONST_POOL_FALSE
,
2021 tune_params::PREF_LDRD_FALSE
,
2022 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2023 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2024 tune_params::DISPARAGE_FLAGS_NEITHER
,
2025 tune_params::PREF_NEON_64_FALSE
,
2026 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2027 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2028 tune_params::SCHED_AUTOPREF_OFF
2031 const struct tune_params arm_cortex_a57_tune
=
2033 &cortexa57_extra_costs
,
2034 NULL
, /* Sched adj cost. */
2035 arm_default_branch_cost
,
2036 &arm_default_vec_cost
,
2037 1, /* Constant limit. */
2038 2, /* Max cond insns. */
2039 8, /* Memset max inline. */
2040 3, /* Issue rate. */
2041 ARM_PREFETCH_NOT_BENEFICIAL
,
2042 tune_params::PREF_CONST_POOL_FALSE
,
2043 tune_params::PREF_LDRD_TRUE
,
2044 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2045 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2046 tune_params::DISPARAGE_FLAGS_ALL
,
2047 tune_params::PREF_NEON_64_FALSE
,
2048 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2049 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2050 tune_params::SCHED_AUTOPREF_FULL
2053 const struct tune_params arm_exynosm1_tune
=
2055 &exynosm1_extra_costs
,
2056 NULL
, /* Sched adj cost. */
2057 arm_default_branch_cost
,
2058 &arm_default_vec_cost
,
2059 1, /* Constant limit. */
2060 2, /* Max cond insns. */
2061 8, /* Memset max inline. */
2062 3, /* Issue rate. */
2063 ARM_PREFETCH_NOT_BENEFICIAL
,
2064 tune_params::PREF_CONST_POOL_FALSE
,
2065 tune_params::PREF_LDRD_TRUE
,
2066 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2067 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2068 tune_params::DISPARAGE_FLAGS_ALL
,
2069 tune_params::PREF_NEON_64_FALSE
,
2070 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2071 tune_params::FUSE_NOTHING
,
2072 tune_params::SCHED_AUTOPREF_OFF
2075 const struct tune_params arm_xgene1_tune
=
2077 &xgene1_extra_costs
,
2078 NULL
, /* Sched adj cost. */
2079 arm_default_branch_cost
,
2080 &arm_default_vec_cost
,
2081 1, /* Constant limit. */
2082 2, /* Max cond insns. */
2083 32, /* Memset max inline. */
2084 4, /* Issue rate. */
2085 ARM_PREFETCH_NOT_BENEFICIAL
,
2086 tune_params::PREF_CONST_POOL_FALSE
,
2087 tune_params::PREF_LDRD_TRUE
,
2088 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2089 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2090 tune_params::DISPARAGE_FLAGS_ALL
,
2091 tune_params::PREF_NEON_64_FALSE
,
2092 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2093 tune_params::FUSE_NOTHING
,
2094 tune_params::SCHED_AUTOPREF_OFF
2097 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2098 less appealing. Set max_insns_skipped to a low value. */
2100 const struct tune_params arm_cortex_a5_tune
=
2102 &cortexa5_extra_costs
,
2103 NULL
, /* Sched adj cost. */
2104 arm_cortex_a5_branch_cost
,
2105 &arm_default_vec_cost
,
2106 1, /* Constant limit. */
2107 1, /* Max cond insns. */
2108 8, /* Memset max inline. */
2109 2, /* Issue rate. */
2110 ARM_PREFETCH_NOT_BENEFICIAL
,
2111 tune_params::PREF_CONST_POOL_FALSE
,
2112 tune_params::PREF_LDRD_FALSE
,
2113 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2114 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2115 tune_params::DISPARAGE_FLAGS_NEITHER
,
2116 tune_params::PREF_NEON_64_FALSE
,
2117 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2118 tune_params::FUSE_NOTHING
,
2119 tune_params::SCHED_AUTOPREF_OFF
2122 const struct tune_params arm_cortex_a9_tune
=
2124 &cortexa9_extra_costs
,
2125 cortex_a9_sched_adjust_cost
,
2126 arm_default_branch_cost
,
2127 &arm_default_vec_cost
,
2128 1, /* Constant limit. */
2129 5, /* Max cond insns. */
2130 8, /* Memset max inline. */
2131 2, /* Issue rate. */
2132 ARM_PREFETCH_BENEFICIAL(4,32,32),
2133 tune_params::PREF_CONST_POOL_FALSE
,
2134 tune_params::PREF_LDRD_FALSE
,
2135 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2136 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2137 tune_params::DISPARAGE_FLAGS_NEITHER
,
2138 tune_params::PREF_NEON_64_FALSE
,
2139 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2140 tune_params::FUSE_NOTHING
,
2141 tune_params::SCHED_AUTOPREF_OFF
2144 const struct tune_params arm_cortex_a12_tune
=
2146 &cortexa12_extra_costs
,
2147 NULL
, /* Sched adj cost. */
2148 arm_default_branch_cost
,
2149 &arm_default_vec_cost
, /* Vectorizer costs. */
2150 1, /* Constant limit. */
2151 2, /* Max cond insns. */
2152 8, /* Memset max inline. */
2153 2, /* Issue rate. */
2154 ARM_PREFETCH_NOT_BENEFICIAL
,
2155 tune_params::PREF_CONST_POOL_FALSE
,
2156 tune_params::PREF_LDRD_TRUE
,
2157 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2158 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2159 tune_params::DISPARAGE_FLAGS_ALL
,
2160 tune_params::PREF_NEON_64_FALSE
,
2161 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2162 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2163 tune_params::SCHED_AUTOPREF_OFF
2166 const struct tune_params arm_cortex_a73_tune
=
2168 &cortexa57_extra_costs
,
2169 NULL
, /* Sched adj cost. */
2170 arm_default_branch_cost
,
2171 &arm_default_vec_cost
, /* Vectorizer costs. */
2172 1, /* Constant limit. */
2173 2, /* Max cond insns. */
2174 8, /* Memset max inline. */
2175 2, /* Issue rate. */
2176 ARM_PREFETCH_NOT_BENEFICIAL
,
2177 tune_params::PREF_CONST_POOL_FALSE
,
2178 tune_params::PREF_LDRD_TRUE
,
2179 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2180 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2181 tune_params::DISPARAGE_FLAGS_ALL
,
2182 tune_params::PREF_NEON_64_FALSE
,
2183 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2184 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2185 tune_params::SCHED_AUTOPREF_FULL
2188 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2189 cycle to execute each. An LDR from the constant pool also takes two cycles
2190 to execute, but mildly increases pipelining opportunity (consecutive
2191 loads/stores can be pipelined together, saving one cycle), and may also
2192 improve icache utilisation. Hence we prefer the constant pool for such
2195 const struct tune_params arm_v7m_tune
=
2198 NULL
, /* Sched adj cost. */
2199 arm_cortex_m_branch_cost
,
2200 &arm_default_vec_cost
,
2201 1, /* Constant limit. */
2202 2, /* Max cond insns. */
2203 8, /* Memset max inline. */
2204 1, /* Issue rate. */
2205 ARM_PREFETCH_NOT_BENEFICIAL
,
2206 tune_params::PREF_CONST_POOL_TRUE
,
2207 tune_params::PREF_LDRD_FALSE
,
2208 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2209 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2210 tune_params::DISPARAGE_FLAGS_NEITHER
,
2211 tune_params::PREF_NEON_64_FALSE
,
2212 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2213 tune_params::FUSE_NOTHING
,
2214 tune_params::SCHED_AUTOPREF_OFF
2217 /* Cortex-M7 tuning. */
2219 const struct tune_params arm_cortex_m7_tune
=
2222 NULL
, /* Sched adj cost. */
2223 arm_cortex_m7_branch_cost
,
2224 &arm_default_vec_cost
,
2225 0, /* Constant limit. */
2226 1, /* Max cond insns. */
2227 8, /* Memset max inline. */
2228 2, /* Issue rate. */
2229 ARM_PREFETCH_NOT_BENEFICIAL
,
2230 tune_params::PREF_CONST_POOL_TRUE
,
2231 tune_params::PREF_LDRD_FALSE
,
2232 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2233 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2234 tune_params::DISPARAGE_FLAGS_NEITHER
,
2235 tune_params::PREF_NEON_64_FALSE
,
2236 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2237 tune_params::FUSE_NOTHING
,
2238 tune_params::SCHED_AUTOPREF_OFF
2241 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2242 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2244 const struct tune_params arm_v6m_tune
=
2246 &generic_extra_costs
, /* Insn extra costs. */
2247 NULL
, /* Sched adj cost. */
2248 arm_default_branch_cost
,
2249 &arm_default_vec_cost
, /* Vectorizer costs. */
2250 1, /* Constant limit. */
2251 5, /* Max cond insns. */
2252 8, /* Memset max inline. */
2253 1, /* Issue rate. */
2254 ARM_PREFETCH_NOT_BENEFICIAL
,
2255 tune_params::PREF_CONST_POOL_FALSE
,
2256 tune_params::PREF_LDRD_FALSE
,
2257 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2258 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2259 tune_params::DISPARAGE_FLAGS_NEITHER
,
2260 tune_params::PREF_NEON_64_FALSE
,
2261 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2262 tune_params::FUSE_NOTHING
,
2263 tune_params::SCHED_AUTOPREF_OFF
2266 const struct tune_params arm_fa726te_tune
=
2268 &generic_extra_costs
, /* Insn extra costs. */
2269 fa726te_sched_adjust_cost
,
2270 arm_default_branch_cost
,
2271 &arm_default_vec_cost
,
2272 1, /* Constant limit. */
2273 5, /* Max cond insns. */
2274 8, /* Memset max inline. */
2275 2, /* Issue rate. */
2276 ARM_PREFETCH_NOT_BENEFICIAL
,
2277 tune_params::PREF_CONST_POOL_TRUE
,
2278 tune_params::PREF_LDRD_FALSE
,
2279 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2280 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2281 tune_params::DISPARAGE_FLAGS_NEITHER
,
2282 tune_params::PREF_NEON_64_FALSE
,
2283 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2284 tune_params::FUSE_NOTHING
,
2285 tune_params::SCHED_AUTOPREF_OFF
2288 /* Auto-generated CPU, FPU and architecture tables. */
2289 #include "arm-cpu-data.h"
2291 /* The name of the preprocessor macro to define for this architecture. PROFILE
2292 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2293 is thus chosen to be big enough to hold the longest architecture name. */
2295 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2297 /* Supported TLS relocations. */
2305 TLS_DESCSEQ
/* GNU scheme */
2308 /* The maximum number of insns to be used when loading a constant. */
2310 arm_constant_limit (bool size_p
)
2312 return size_p
? 1 : current_tune
->constant_limit
;
2315 /* Emit an insn that's a simple single-set. Both the operands must be known
2317 inline static rtx_insn
*
2318 emit_set_insn (rtx x
, rtx y
)
2320 return emit_insn (gen_rtx_SET (x
, y
));
2323 /* Return the number of bits set in VALUE. */
2325 bit_count (unsigned long value
)
2327 unsigned long count
= 0;
2332 value
&= value
- 1; /* Clear the least-significant set bit. */
2338 /* Return the number of bits set in BMAP. */
2340 bitmap_popcount (const sbitmap bmap
)
2342 unsigned int count
= 0;
2344 sbitmap_iterator sbi
;
2346 EXECUTE_IF_SET_IN_BITMAP (bmap
, 0, n
, sbi
)
2355 } arm_fixed_mode_set
;
2357 /* A small helper for setting fixed-point library libfuncs. */
2360 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2361 const char *funcname
, const char *modename
,
2366 if (num_suffix
== 0)
2367 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2369 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2371 set_optab_libfunc (optable
, mode
, buffer
);
2375 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2376 machine_mode from
, const char *funcname
,
2377 const char *toname
, const char *fromname
)
2380 const char *maybe_suffix_2
= "";
2382 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2383 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2384 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2385 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2386 maybe_suffix_2
= "2";
2388 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2391 set_conv_libfunc (optable
, to
, from
, buffer
);
2394 /* Set up library functions unique to ARM. */
2397 arm_init_libfuncs (void)
2399 /* For Linux, we have access to kernel support for atomic operations. */
2400 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2401 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2403 /* There are no special library functions unless we are using the
2408 /* The functions below are described in Section 4 of the "Run-Time
2409 ABI for the ARM architecture", Version 1.0. */
2411 /* Double-precision floating-point arithmetic. Table 2. */
2412 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2413 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2414 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2415 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2416 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2418 /* Double-precision comparisons. Table 3. */
2419 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2420 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2421 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2422 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2423 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2424 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2425 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2427 /* Single-precision floating-point arithmetic. Table 4. */
2428 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2429 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2430 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2431 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2432 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2434 /* Single-precision comparisons. Table 5. */
2435 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2436 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2437 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2438 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2439 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2440 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2441 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2443 /* Floating-point to integer conversions. Table 6. */
2444 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2445 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2446 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2447 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2448 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2449 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2450 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2451 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2453 /* Conversions between floating types. Table 7. */
2454 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2455 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2457 /* Integer to floating-point conversions. Table 8. */
2458 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2459 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2460 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2461 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2462 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2463 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2464 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2465 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2467 /* Long long. Table 9. */
2468 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2469 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2470 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2471 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2472 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2473 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2474 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2475 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2477 /* Integer (32/32->32) division. \S 4.3.1. */
2478 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2479 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2481 /* The divmod functions are designed so that they can be used for
2482 plain division, even though they return both the quotient and the
2483 remainder. The quotient is returned in the usual location (i.e.,
2484 r0 for SImode, {r0, r1} for DImode), just as would be expected
2485 for an ordinary division routine. Because the AAPCS calling
2486 conventions specify that all of { r0, r1, r2, r3 } are
2487 callee-saved registers, there is no need to tell the compiler
2488 explicitly that those registers are clobbered by these
2490 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2491 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2493 /* For SImode division the ABI provides div-without-mod routines,
2494 which are faster. */
2495 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2496 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2498 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2499 divmod libcalls instead. */
2500 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2501 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2502 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2503 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2505 /* Half-precision float operations. The compiler handles all operations
2506 with NULL libfuncs by converting the SFmode. */
2507 switch (arm_fp16_format
)
2509 case ARM_FP16_FORMAT_IEEE
:
2510 case ARM_FP16_FORMAT_ALTERNATIVE
:
2513 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2514 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2516 : "__gnu_f2h_alternative"));
2517 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2518 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2520 : "__gnu_h2f_alternative"));
2522 set_conv_libfunc (trunc_optab
, HFmode
, DFmode
,
2523 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2525 : "__gnu_d2h_alternative"));
2528 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2529 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2530 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2531 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2532 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2535 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2536 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2537 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2538 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2539 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2540 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2541 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2548 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2550 const arm_fixed_mode_set fixed_arith_modes
[] =
2571 const arm_fixed_mode_set fixed_conv_modes
[] =
2601 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2603 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2604 "add", fixed_arith_modes
[i
].name
, 3);
2605 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2606 "ssadd", fixed_arith_modes
[i
].name
, 3);
2607 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2608 "usadd", fixed_arith_modes
[i
].name
, 3);
2609 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2610 "sub", fixed_arith_modes
[i
].name
, 3);
2611 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2612 "sssub", fixed_arith_modes
[i
].name
, 3);
2613 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2614 "ussub", fixed_arith_modes
[i
].name
, 3);
2615 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2616 "mul", fixed_arith_modes
[i
].name
, 3);
2617 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2618 "ssmul", fixed_arith_modes
[i
].name
, 3);
2619 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2620 "usmul", fixed_arith_modes
[i
].name
, 3);
2621 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2622 "div", fixed_arith_modes
[i
].name
, 3);
2623 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2624 "udiv", fixed_arith_modes
[i
].name
, 3);
2625 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2626 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2627 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2628 "usdiv", fixed_arith_modes
[i
].name
, 3);
2629 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2630 "neg", fixed_arith_modes
[i
].name
, 2);
2631 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2632 "ssneg", fixed_arith_modes
[i
].name
, 2);
2633 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2634 "usneg", fixed_arith_modes
[i
].name
, 2);
2635 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2636 "ashl", fixed_arith_modes
[i
].name
, 3);
2637 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2638 "ashr", fixed_arith_modes
[i
].name
, 3);
2639 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2640 "lshr", fixed_arith_modes
[i
].name
, 3);
2641 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2642 "ssashl", fixed_arith_modes
[i
].name
, 3);
2643 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2644 "usashl", fixed_arith_modes
[i
].name
, 3);
2645 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2646 "cmp", fixed_arith_modes
[i
].name
, 2);
2649 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2650 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2653 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2654 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2657 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2658 fixed_conv_modes
[j
].mode
, "fract",
2659 fixed_conv_modes
[i
].name
,
2660 fixed_conv_modes
[j
].name
);
2661 arm_set_fixed_conv_libfunc (satfract_optab
,
2662 fixed_conv_modes
[i
].mode
,
2663 fixed_conv_modes
[j
].mode
, "satfract",
2664 fixed_conv_modes
[i
].name
,
2665 fixed_conv_modes
[j
].name
);
2666 arm_set_fixed_conv_libfunc (fractuns_optab
,
2667 fixed_conv_modes
[i
].mode
,
2668 fixed_conv_modes
[j
].mode
, "fractuns",
2669 fixed_conv_modes
[i
].name
,
2670 fixed_conv_modes
[j
].name
);
2671 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2672 fixed_conv_modes
[i
].mode
,
2673 fixed_conv_modes
[j
].mode
, "satfractuns",
2674 fixed_conv_modes
[i
].name
,
2675 fixed_conv_modes
[j
].name
);
2679 if (TARGET_AAPCS_BASED
)
2680 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2683 /* On AAPCS systems, this is the "struct __va_list". */
2684 static GTY(()) tree va_list_type
;
2686 /* Return the type to use as __builtin_va_list. */
2688 arm_build_builtin_va_list (void)
2693 if (!TARGET_AAPCS_BASED
)
2694 return std_build_builtin_va_list ();
2696 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2704 The C Library ABI further reinforces this definition in \S
2707 We must follow this definition exactly. The structure tag
2708 name is visible in C++ mangled names, and thus forms a part
2709 of the ABI. The field name may be used by people who
2710 #include <stdarg.h>. */
2711 /* Create the type. */
2712 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2713 /* Give it the required name. */
2714 va_list_name
= build_decl (BUILTINS_LOCATION
,
2716 get_identifier ("__va_list"),
2718 DECL_ARTIFICIAL (va_list_name
) = 1;
2719 TYPE_NAME (va_list_type
) = va_list_name
;
2720 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2721 /* Create the __ap field. */
2722 ap_field
= build_decl (BUILTINS_LOCATION
,
2724 get_identifier ("__ap"),
2726 DECL_ARTIFICIAL (ap_field
) = 1;
2727 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2728 TYPE_FIELDS (va_list_type
) = ap_field
;
2729 /* Compute its layout. */
2730 layout_type (va_list_type
);
2732 return va_list_type
;
2735 /* Return an expression of type "void *" pointing to the next
2736 available argument in a variable-argument list. VALIST is the
2737 user-level va_list object, of type __builtin_va_list. */
2739 arm_extract_valist_ptr (tree valist
)
2741 if (TREE_TYPE (valist
) == error_mark_node
)
2742 return error_mark_node
;
2744 /* On an AAPCS target, the pointer is stored within "struct
2746 if (TARGET_AAPCS_BASED
)
2748 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2749 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2750 valist
, ap_field
, NULL_TREE
);
2756 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2758 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2760 valist
= arm_extract_valist_ptr (valist
);
2761 std_expand_builtin_va_start (valist
, nextarg
);
2764 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2766 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2769 valist
= arm_extract_valist_ptr (valist
);
2770 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2773 /* Check any incompatible options that the user has specified. */
2775 arm_option_check_internal (struct gcc_options
*opts
)
2777 int flags
= opts
->x_target_flags
;
2779 /* iWMMXt and NEON are incompatible. */
2781 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_neon
))
2782 error ("iWMMXt and NEON are incompatible");
2784 /* Make sure that the processor choice does not conflict with any of the
2785 other command line choices. */
2786 if (TARGET_ARM_P (flags
)
2787 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
))
2788 error ("target CPU does not support ARM mode");
2790 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2791 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2792 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2794 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2795 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2797 /* If this target is normally configured to use APCS frames, warn if they
2798 are turned off and debugging is turned on. */
2799 if (TARGET_ARM_P (flags
)
2800 && write_symbols
!= NO_DEBUG
2801 && !TARGET_APCS_FRAME
2802 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2803 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2805 /* iWMMXt unsupported under Thumb mode. */
2806 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2807 error ("iWMMXt unsupported under Thumb mode");
2809 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2810 error ("can not use -mtp=cp15 with 16-bit Thumb");
2812 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2814 error ("RTP PIC is incompatible with Thumb");
2818 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2820 if ((target_pure_code
|| target_slow_flash_data
)
2821 && (!TARGET_HAVE_MOVT
|| arm_arch_notm
|| flag_pic
|| TARGET_NEON
))
2823 const char *flag
= (target_pure_code
? "-mpure-code" :
2824 "-mslow-flash-data");
2825 error ("%s only supports non-pic code on M-profile targets with the "
2826 "MOVT instruction", flag
);
2831 /* Recompute the global settings depending on target attribute options. */
2834 arm_option_params_internal (void)
2836 /* If we are not using the default (ARM mode) section anchor offset
2837 ranges, then set the correct ranges now. */
2840 /* Thumb-1 LDR instructions cannot have negative offsets.
2841 Permissible positive offset ranges are 5-bit (for byte loads),
2842 6-bit (for halfword loads), or 7-bit (for word loads).
2843 Empirical results suggest a 7-bit anchor range gives the best
2844 overall code size. */
2845 targetm
.min_anchor_offset
= 0;
2846 targetm
.max_anchor_offset
= 127;
2848 else if (TARGET_THUMB2
)
2850 /* The minimum is set such that the total size of the block
2851 for a particular anchor is 248 + 1 + 4095 bytes, which is
2852 divisible by eight, ensuring natural spacing of anchors. */
2853 targetm
.min_anchor_offset
= -248;
2854 targetm
.max_anchor_offset
= 4095;
2858 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2859 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2864 /* If optimizing for size, bump the number of instructions that we
2865 are prepared to conditionally execute (even on a StrongARM). */
2866 max_insns_skipped
= 6;
2868 /* For THUMB2, we limit the conditional sequence to one IT block. */
2870 max_insns_skipped
= arm_restrict_it
? 1 : 4;
2873 /* When -mrestrict-it is in use tone down the if-conversion. */
2874 max_insns_skipped
= (TARGET_THUMB2
&& arm_restrict_it
)
2875 ? 1 : current_tune
->max_insns_skipped
;
2878 /* True if -mflip-thumb should next add an attribute for the default
2879 mode, false if it should next add an attribute for the opposite mode. */
2880 static GTY(()) bool thumb_flipper
;
2882 /* Options after initial target override. */
2883 static GTY(()) tree init_optimize
;
2886 arm_override_options_after_change_1 (struct gcc_options
*opts
)
2888 if (opts
->x_align_functions
<= 0)
2889 opts
->x_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
2890 && opts
->x_optimize_size
? 2 : 4;
2893 /* Implement targetm.override_options_after_change. */
2896 arm_override_options_after_change (void)
2898 arm_configure_build_target (&arm_active_target
,
2899 TREE_TARGET_OPTION (target_option_default_node
),
2900 &global_options_set
, false);
2902 arm_override_options_after_change_1 (&global_options
);
2906 arm_option_restore (struct gcc_options
*, struct cl_target_option
*ptr
)
2908 arm_configure_build_target (&arm_active_target
, ptr
, &global_options_set
,
2912 /* Reset options between modes that the user has specified. */
2914 arm_option_override_internal (struct gcc_options
*opts
,
2915 struct gcc_options
*opts_set
)
2917 arm_override_options_after_change_1 (opts
);
2919 if (TARGET_INTERWORK
&& !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
2921 /* The default is to enable interworking, so this warning message would
2922 be confusing to users who have just compiled with, eg, -march=armv3. */
2923 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2924 opts
->x_target_flags
&= ~MASK_INTERWORK
;
2927 if (TARGET_THUMB_P (opts
->x_target_flags
)
2928 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
2930 warning (0, "target CPU does not support THUMB instructions");
2931 opts
->x_target_flags
&= ~MASK_THUMB
;
2934 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
2936 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2937 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
2940 /* Callee super interworking implies thumb interworking. Adding
2941 this to the flags here simplifies the logic elsewhere. */
2942 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
2943 opts
->x_target_flags
|= MASK_INTERWORK
;
2945 /* need to remember initial values so combinaisons of options like
2946 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2947 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
2949 if (! opts_set
->x_arm_restrict_it
)
2950 opts
->x_arm_restrict_it
= arm_arch8
;
2952 /* ARM execution state and M profile don't have [restrict] IT. */
2953 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
2954 opts
->x_arm_restrict_it
= 0;
2956 /* Enable -munaligned-access by default for
2957 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2958 i.e. Thumb2 and ARM state only.
2959 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2960 - ARMv8 architecture-base processors.
2962 Disable -munaligned-access by default for
2963 - all pre-ARMv6 architecture-based processors
2964 - ARMv6-M architecture-based processors
2965 - ARMv8-M Baseline processors. */
2967 if (! opts_set
->x_unaligned_access
)
2969 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
2970 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
2972 else if (opts
->x_unaligned_access
== 1
2973 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
2975 warning (0, "target CPU does not support unaligned accesses");
2976 opts
->x_unaligned_access
= 0;
2979 /* Don't warn since it's on by default in -O2. */
2980 if (TARGET_THUMB1_P (opts
->x_target_flags
))
2981 opts
->x_flag_schedule_insns
= 0;
2983 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
2985 /* Disable shrink-wrap when optimizing function for size, since it tends to
2986 generate additional returns. */
2987 if (optimize_function_for_size_p (cfun
)
2988 && TARGET_THUMB2_P (opts
->x_target_flags
))
2989 opts
->x_flag_shrink_wrap
= false;
2991 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
2993 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
2994 - epilogue_insns - does not accurately model the corresponding insns
2995 emitted in the asm file. In particular, see the comment in thumb_exit
2996 'Find out how many of the (return) argument registers we can corrupt'.
2997 As a consequence, the epilogue may clobber registers without fipa-ra
2998 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
2999 TODO: Accurately model clobbers for epilogue_insns and reenable
3001 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3002 opts
->x_flag_ipa_ra
= 0;
3004 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3006 /* Thumb2 inline assembly code should always use unified syntax.
3007 This will apply to ARM and Thumb1 eventually. */
3008 opts
->x_inline_asm_unified
= TARGET_THUMB2_P (opts
->x_target_flags
);
3010 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3011 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3015 /* Convert a static initializer array of feature bits to sbitmap
3018 arm_initialize_isa (sbitmap isa
, const enum isa_feature
*isa_bits
)
3021 while (*isa_bits
!= isa_nobit
)
3022 bitmap_set_bit (isa
, *(isa_bits
++));
3025 static sbitmap isa_all_fpubits
;
3026 static sbitmap isa_quirkbits
;
3028 /* Configure a build target TARGET from the user-specified options OPTS and
3029 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3030 architecture have been specified, but the two are not identical. */
3032 arm_configure_build_target (struct arm_build_target
*target
,
3033 struct cl_target_option
*opts
,
3034 struct gcc_options
*opts_set
,
3035 bool warn_compatible
)
3037 const struct processors
*arm_selected_tune
= NULL
;
3038 const struct processors
*arm_selected_arch
= NULL
;
3039 const struct processors
*arm_selected_cpu
= NULL
;
3040 const struct arm_fpu_desc
*arm_selected_fpu
= NULL
;
3042 bitmap_clear (target
->isa
);
3043 target
->core_name
= NULL
;
3044 target
->arch_name
= NULL
;
3046 if (opts_set
->x_arm_arch_option
)
3047 arm_selected_arch
= &all_architectures
[opts
->x_arm_arch_option
];
3049 if (opts_set
->x_arm_cpu_option
)
3051 arm_selected_cpu
= &all_cores
[(int) opts
->x_arm_cpu_option
];
3052 arm_selected_tune
= &all_cores
[(int) opts
->x_arm_cpu_option
];
3055 if (opts_set
->x_arm_tune_option
)
3056 arm_selected_tune
= &all_cores
[(int) opts
->x_arm_tune_option
];
3058 if (arm_selected_arch
)
3060 arm_initialize_isa (target
->isa
, arm_selected_arch
->isa_bits
);
3062 if (arm_selected_cpu
)
3064 auto_sbitmap
cpu_isa (isa_num_bits
);
3066 arm_initialize_isa (cpu_isa
, arm_selected_cpu
->isa_bits
);
3067 bitmap_xor (cpu_isa
, cpu_isa
, target
->isa
);
3068 /* Ignore any bits that are quirk bits. */
3069 bitmap_and_compl (cpu_isa
, cpu_isa
, isa_quirkbits
);
3070 /* Ignore (for now) any bits that might be set by -mfpu. */
3071 bitmap_and_compl (cpu_isa
, cpu_isa
, isa_all_fpubits
);
3073 if (!bitmap_empty_p (cpu_isa
))
3075 if (warn_compatible
)
3076 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3077 arm_selected_cpu
->name
, arm_selected_arch
->name
);
3078 /* -march wins for code generation.
3079 -mcpu wins for default tuning. */
3080 if (!arm_selected_tune
)
3081 arm_selected_tune
= arm_selected_cpu
;
3083 arm_selected_cpu
= arm_selected_arch
;
3084 target
->arch_name
= arm_selected_arch
->name
;
3088 /* Architecture and CPU are essentially the same.
3089 Prefer the CPU setting. */
3090 arm_selected_arch
= NULL
;
3091 target
->core_name
= arm_selected_cpu
->name
;
3096 /* Pick a CPU based on the architecture. */
3097 arm_selected_cpu
= arm_selected_arch
;
3098 target
->arch_name
= arm_selected_arch
->name
;
3099 /* Note: target->core_name is left unset in this path. */
3102 else if (arm_selected_cpu
)
3104 target
->core_name
= arm_selected_cpu
->name
;
3105 arm_initialize_isa (target
->isa
, arm_selected_cpu
->isa_bits
);
3107 /* If the user did not specify a processor, choose one for them. */
3110 const struct processors
* sel
;
3111 auto_sbitmap
sought_isa (isa_num_bits
);
3112 bitmap_clear (sought_isa
);
3113 auto_sbitmap
default_isa (isa_num_bits
);
3115 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
3116 gcc_assert (arm_selected_cpu
->name
);
3118 /* RWE: All of the selection logic below (to the end of this
3119 'if' clause) looks somewhat suspect. It appears to be mostly
3120 there to support forcing thumb support when the default CPU
3121 does not have thumb (somewhat dubious in terms of what the
3122 user might be expecting). I think it should be removed once
3123 support for the pre-thumb era cores is removed. */
3124 sel
= arm_selected_cpu
;
3125 arm_initialize_isa (default_isa
, sel
->isa_bits
);
3127 /* Now check to see if the user has specified any command line
3128 switches that require certain abilities from the cpu. */
3130 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3132 bitmap_set_bit (sought_isa
, isa_bit_thumb
);
3133 bitmap_set_bit (sought_isa
, isa_bit_mode32
);
3135 /* There are no ARM processors that support both APCS-26 and
3136 interworking. Therefore we forcibly remove MODE26 from
3137 from the isa features here (if it was set), so that the
3138 search below will always be able to find a compatible
3140 bitmap_clear_bit (default_isa
, isa_bit_mode26
);
3143 /* If there are such requirements and the default CPU does not
3144 satisfy them, we need to run over the complete list of
3145 cores looking for one that is satisfactory. */
3146 if (!bitmap_empty_p (sought_isa
)
3147 && !bitmap_subset_p (sought_isa
, default_isa
))
3149 auto_sbitmap
candidate_isa (isa_num_bits
);
3150 /* We're only interested in a CPU with at least the
3151 capabilities of the default CPU and the required
3152 additional features. */
3153 bitmap_ior (default_isa
, default_isa
, sought_isa
);
3155 /* Try to locate a CPU type that supports all of the abilities
3156 of the default CPU, plus the extra abilities requested by
3158 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
3160 arm_initialize_isa (candidate_isa
, sel
->isa_bits
);
3161 /* An exact match? */
3162 if (bitmap_equal_p (default_isa
, candidate_isa
))
3166 if (sel
->name
== NULL
)
3168 unsigned current_bit_count
= isa_num_bits
;
3169 const struct processors
* best_fit
= NULL
;
3171 /* Ideally we would like to issue an error message here
3172 saying that it was not possible to find a CPU compatible
3173 with the default CPU, but which also supports the command
3174 line options specified by the programmer, and so they
3175 ought to use the -mcpu=<name> command line option to
3176 override the default CPU type.
3178 If we cannot find a CPU that has exactly the
3179 characteristics of the default CPU and the given
3180 command line options we scan the array again looking
3181 for a best match. The best match must have at least
3182 the capabilities of the perfect match. */
3183 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
3185 arm_initialize_isa (candidate_isa
, sel
->isa_bits
);
3187 if (bitmap_subset_p (default_isa
, candidate_isa
))
3191 bitmap_and_compl (candidate_isa
, candidate_isa
,
3193 count
= bitmap_popcount (candidate_isa
);
3195 if (count
< current_bit_count
)
3198 current_bit_count
= count
;
3202 gcc_assert (best_fit
);
3206 arm_selected_cpu
= sel
;
3209 /* Now we know the CPU, we can finally initialize the target
3211 target
->core_name
= arm_selected_cpu
->name
;
3212 arm_initialize_isa (target
->isa
, arm_selected_cpu
->isa_bits
);
3215 gcc_assert (arm_selected_cpu
);
3217 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
)
3219 arm_selected_fpu
= &all_fpus
[opts
->x_arm_fpu_index
];
3220 auto_sbitmap
fpu_bits (isa_num_bits
);
3222 arm_initialize_isa (fpu_bits
, arm_selected_fpu
->isa_bits
);
3223 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpubits
);
3224 bitmap_ior (target
->isa
, target
->isa
, fpu_bits
);
3226 else if (target
->core_name
== NULL
)
3227 /* To support this we need to be able to parse FPU feature options
3228 from the architecture string. */
3229 sorry ("-mfpu=auto not currently supported without an explicit CPU.");
3231 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3232 if (!arm_selected_tune
)
3233 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
3235 /* Finish initializing the target structure. */
3236 target
->arch_pp_name
= arm_selected_cpu
->arch
;
3237 target
->base_arch
= arm_selected_cpu
->base_arch
;
3238 target
->arch_core
= arm_selected_cpu
->core
;
3240 target
->tune_flags
= arm_selected_tune
->tune_flags
;
3241 target
->tune
= arm_selected_tune
->tune
;
3242 target
->tune_core
= arm_selected_tune
->core
;
3245 /* Fix up any incompatible options that the user has specified. */
3247 arm_option_override (void)
3249 static const enum isa_feature fpu_bitlist
[] = { ISA_ALL_FPU
, isa_nobit
};
3250 static const enum isa_feature quirk_bitlist
[] = { ISA_ALL_QUIRKS
, isa_nobit
};
3251 cl_target_option opts
;
3253 isa_quirkbits
= sbitmap_alloc (isa_num_bits
);
3254 arm_initialize_isa (isa_quirkbits
, quirk_bitlist
);
3256 isa_all_fpubits
= sbitmap_alloc (isa_num_bits
);
3257 arm_initialize_isa (isa_all_fpubits
, fpu_bitlist
);
3259 arm_active_target
.isa
= sbitmap_alloc (isa_num_bits
);
3261 if (!global_options_set
.x_arm_fpu_index
)
3263 const char *target_fpu_name
;
3267 #ifdef FPUTYPE_DEFAULT
3268 target_fpu_name
= FPUTYPE_DEFAULT
;
3270 target_fpu_name
= "vfp";
3273 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &fpu_index
,
3276 arm_fpu_index
= (enum fpu_type
) fpu_index
;
3279 cl_target_option_save (&opts
, &global_options
);
3280 arm_configure_build_target (&arm_active_target
, &opts
, &global_options_set
,
3283 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3284 SUBTARGET_OVERRIDE_OPTIONS
;
3287 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_active_target
.arch_pp_name
);
3288 arm_base_arch
= arm_active_target
.base_arch
;
3290 arm_tune
= arm_active_target
.tune_core
;
3291 tune_flags
= arm_active_target
.tune_flags
;
3292 current_tune
= arm_active_target
.tune
;
3294 /* TBD: Dwarf info for apcs frame is not handled yet. */
3295 if (TARGET_APCS_FRAME
)
3296 flag_shrink_wrap
= false;
3298 /* BPABI targets use linker tricks to allow interworking on cores
3299 without thumb support. */
3300 if (TARGET_INTERWORK
3302 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3304 warning (0, "target CPU does not support interworking" );
3305 target_flags
&= ~MASK_INTERWORK
;
3308 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3310 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3311 target_flags
|= MASK_APCS_FRAME
;
3314 if (TARGET_POKE_FUNCTION_NAME
)
3315 target_flags
|= MASK_APCS_FRAME
;
3317 if (TARGET_APCS_REENT
&& flag_pic
)
3318 error ("-fpic and -mapcs-reent are incompatible");
3320 if (TARGET_APCS_REENT
)
3321 warning (0, "APCS reentrant code not supported. Ignored");
3323 /* Initialize boolean versions of the architectural flags, for use
3324 in the arm.md file. */
3325 arm_arch3m
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv3m
);
3326 arm_arch4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv4
);
3327 arm_arch4t
= arm_arch4
&& bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3328 arm_arch5
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv5
);
3329 arm_arch5e
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv5e
);
3330 arm_arch5te
= arm_arch5e
3331 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3332 arm_arch6
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv6
);
3333 arm_arch6k
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv6k
);
3334 arm_arch_notm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
);
3335 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3336 arm_arch7
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv7
);
3337 arm_arch7em
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv7em
);
3338 arm_arch8
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8
);
3339 arm_arch8_1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8_1
);
3340 arm_arch8_2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8_2
);
3341 arm_arch_thumb1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3342 arm_arch_thumb2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb2
);
3343 arm_arch_xscale
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_xscale
);
3344 arm_arch_iwmmxt
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt
);
3345 arm_arch_iwmmxt2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt2
);
3346 arm_arch_thumb_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_tdiv
);
3347 arm_arch_arm_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_adiv
);
3348 arm_arch_crc
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_crc32
);
3349 arm_arch_cmse
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_cmse
);
3350 arm_fp16_inst
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_fp16
);
3351 arm_arch_lpae
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_lpae
);
3354 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3355 error ("selected fp16 options are incompatible");
3356 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3360 /* Set up some tuning parameters. */
3361 arm_ld_sched
= (tune_flags
& TF_LDSCHED
) != 0;
3362 arm_tune_strongarm
= (tune_flags
& TF_STRONG
) != 0;
3363 arm_tune_wbuf
= (tune_flags
& TF_WBUF
) != 0;
3364 arm_tune_xscale
= (tune_flags
& TF_XSCALE
) != 0;
3365 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3366 arm_m_profile_small_mul
= (tune_flags
& TF_SMALLMUL
) != 0;
3368 /* And finally, set up some quirks. */
3369 arm_arch_no_volatile_ce
3370 = bitmap_bit_p (arm_active_target
.isa
, isa_quirk_no_volatile_ce
);
3372 = arm_arch6k
&& bitmap_bit_p (arm_active_target
.isa
, isa_quirk_ARMv6kz
);
3374 /* V5 code we generate is completely interworking capable, so we turn off
3375 TARGET_INTERWORK here to avoid many tests later on. */
3377 /* XXX However, we must pass the right pre-processor defines to CPP
3378 or GLD can get confused. This is a hack. */
3379 if (TARGET_INTERWORK
)
3380 arm_cpp_interwork
= 1;
3383 target_flags
&= ~MASK_INTERWORK
;
3385 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3386 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3388 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3389 error ("iwmmxt abi requires an iwmmxt capable cpu");
3391 /* If soft-float is specified then don't use FPU. */
3392 if (TARGET_SOFT_FLOAT
)
3393 arm_fpu_attr
= FPU_NONE
;
3395 arm_fpu_attr
= FPU_VFP
;
3397 if (TARGET_AAPCS_BASED
)
3399 if (TARGET_CALLER_INTERWORKING
)
3400 error ("AAPCS does not support -mcaller-super-interworking");
3402 if (TARGET_CALLEE_INTERWORKING
)
3403 error ("AAPCS does not support -mcallee-super-interworking");
3406 /* __fp16 support currently assumes the core has ldrh. */
3407 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3408 sorry ("__fp16 and no ldrh");
3410 if (TARGET_AAPCS_BASED
)
3412 if (arm_abi
== ARM_ABI_IWMMXT
)
3413 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3414 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
3415 && TARGET_HARD_FLOAT
)
3417 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3418 if (!bitmap_bit_p (arm_active_target
.isa
, isa_bit_VFPv2
))
3419 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3422 arm_pcs_default
= ARM_PCS_AAPCS
;
3426 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
3427 sorry ("-mfloat-abi=hard and VFP");
3429 if (arm_abi
== ARM_ABI_APCS
)
3430 arm_pcs_default
= ARM_PCS_APCS
;
3432 arm_pcs_default
= ARM_PCS_ATPCS
;
3435 /* For arm2/3 there is no need to do any scheduling if we are doing
3436 software floating-point. */
3437 if (TARGET_SOFT_FLOAT
&& (tune_flags
& TF_NO_MODE32
))
3438 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3440 /* Use the cp15 method if it is available. */
3441 if (target_thread_pointer
== TP_AUTO
)
3443 if (arm_arch6k
&& !TARGET_THUMB1
)
3444 target_thread_pointer
= TP_CP15
;
3446 target_thread_pointer
= TP_SOFT
;
3449 /* Override the default structure alignment for AAPCS ABI. */
3450 if (!global_options_set
.x_arm_structure_size_boundary
)
3452 if (TARGET_AAPCS_BASED
)
3453 arm_structure_size_boundary
= 8;
3457 if (arm_structure_size_boundary
!= 8
3458 && arm_structure_size_boundary
!= 32
3459 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3461 if (ARM_DOUBLEWORD_ALIGN
)
3463 "structure size boundary can only be set to 8, 32 or 64");
3465 warning (0, "structure size boundary can only be set to 8 or 32");
3466 arm_structure_size_boundary
3467 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3471 if (TARGET_VXWORKS_RTP
)
3473 if (!global_options_set
.x_arm_pic_data_is_text_relative
)
3474 arm_pic_data_is_text_relative
= 0;
3477 && !arm_pic_data_is_text_relative
3478 && !(global_options_set
.x_target_flags
& MASK_SINGLE_PIC_BASE
))
3479 /* When text & data segments don't have a fixed displacement, the
3480 intended use is with a single, read only, pic base register.
3481 Unless the user explicitly requested not to do that, set
3483 target_flags
|= MASK_SINGLE_PIC_BASE
;
3485 /* If stack checking is disabled, we can use r10 as the PIC register,
3486 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3487 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3489 if (TARGET_VXWORKS_RTP
)
3490 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3491 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3494 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3495 arm_pic_register
= 9;
3497 if (arm_pic_register_string
!= NULL
)
3499 int pic_register
= decode_reg_name (arm_pic_register_string
);
3502 warning (0, "-mpic-register= is useless without -fpic");
3504 /* Prevent the user from choosing an obviously stupid PIC register. */
3505 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3506 || pic_register
== HARD_FRAME_POINTER_REGNUM
3507 || pic_register
== STACK_POINTER_REGNUM
3508 || pic_register
>= PC_REGNUM
3509 || (TARGET_VXWORKS_RTP
3510 && (unsigned int) pic_register
!= arm_pic_register
))
3511 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3513 arm_pic_register
= pic_register
;
3516 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3517 if (fix_cm3_ldrd
== 2)
3519 if (bitmap_bit_p (arm_active_target
.isa
, isa_quirk_cm3_ldrd
))
3525 /* Hot/Cold partitioning is not currently supported, since we can't
3526 handle literal pool placement in that case. */
3527 if (flag_reorder_blocks_and_partition
)
3529 inform (input_location
,
3530 "-freorder-blocks-and-partition not supported on this architecture");
3531 flag_reorder_blocks_and_partition
= 0;
3532 flag_reorder_blocks
= 1;
3536 /* Hoisting PIC address calculations more aggressively provides a small,
3537 but measurable, size reduction for PIC code. Therefore, we decrease
3538 the bar for unrestricted expression hoisting to the cost of PIC address
3539 calculation, which is 2 instructions. */
3540 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3541 global_options
.x_param_values
,
3542 global_options_set
.x_param_values
);
3544 /* ARM EABI defaults to strict volatile bitfields. */
3545 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3546 && abi_version_at_least(2))
3547 flag_strict_volatile_bitfields
= 1;
3549 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3550 have deemed it beneficial (signified by setting
3551 prefetch.num_slots to 1 or more). */
3552 if (flag_prefetch_loop_arrays
< 0
3555 && current_tune
->prefetch
.num_slots
> 0)
3556 flag_prefetch_loop_arrays
= 1;
3558 /* Set up parameters to be used in prefetching algorithm. Do not
3559 override the defaults unless we are tuning for a core we have
3560 researched values for. */
3561 if (current_tune
->prefetch
.num_slots
> 0)
3562 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3563 current_tune
->prefetch
.num_slots
,
3564 global_options
.x_param_values
,
3565 global_options_set
.x_param_values
);
3566 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3567 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3568 current_tune
->prefetch
.l1_cache_line_size
,
3569 global_options
.x_param_values
,
3570 global_options_set
.x_param_values
);
3571 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3572 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3573 current_tune
->prefetch
.l1_cache_size
,
3574 global_options
.x_param_values
,
3575 global_options_set
.x_param_values
);
3577 /* Use Neon to perform 64-bits operations rather than core
3579 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3580 if (use_neon_for_64bits
== 1)
3581 prefer_neon_for_64bits
= true;
3583 /* Use the alternative scheduling-pressure algorithm by default. */
3584 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3585 global_options
.x_param_values
,
3586 global_options_set
.x_param_values
);
3588 /* Look through ready list and all of queue for instructions
3589 relevant for L2 auto-prefetcher. */
3590 int param_sched_autopref_queue_depth
;
3592 switch (current_tune
->sched_autopref
)
3594 case tune_params::SCHED_AUTOPREF_OFF
:
3595 param_sched_autopref_queue_depth
= -1;
3598 case tune_params::SCHED_AUTOPREF_RANK
:
3599 param_sched_autopref_queue_depth
= 0;
3602 case tune_params::SCHED_AUTOPREF_FULL
:
3603 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3610 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3611 param_sched_autopref_queue_depth
,
3612 global_options
.x_param_values
,
3613 global_options_set
.x_param_values
);
3615 /* Currently, for slow flash data, we just disable literal pools. We also
3616 disable it for pure-code. */
3617 if (target_slow_flash_data
|| target_pure_code
)
3618 arm_disable_literal_pool
= true;
3620 if (use_cmse
&& !arm_arch_cmse
)
3621 error ("target CPU does not support ARMv8-M Security Extensions");
3623 /* Disable scheduling fusion by default if it's not armv7 processor
3624 or doesn't prefer ldrd/strd. */
3625 if (flag_schedule_fusion
== 2
3626 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3627 flag_schedule_fusion
= 0;
3629 /* Need to remember initial options before they are overriden. */
3630 init_optimize
= build_optimization_node (&global_options
);
3632 arm_option_override_internal (&global_options
, &global_options_set
);
3633 arm_option_check_internal (&global_options
);
3634 arm_option_params_internal ();
3636 /* Create the default target_options structure. */
3637 target_option_default_node
= target_option_current_node
3638 = build_target_option_node (&global_options
);
3640 /* Register global variables with the garbage collector. */
3641 arm_add_gc_roots ();
3643 /* Init initial mode for testing. */
3644 thumb_flipper
= TARGET_THUMB
;
3648 arm_add_gc_roots (void)
3650 gcc_obstack_init(&minipool_obstack
);
3651 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3654 /* A table of known ARM exception types.
3655 For use with the interrupt function attribute. */
3659 const char *const arg
;
3660 const unsigned long return_value
;
3664 static const isr_attribute_arg isr_attribute_args
[] =
3666 { "IRQ", ARM_FT_ISR
},
3667 { "irq", ARM_FT_ISR
},
3668 { "FIQ", ARM_FT_FIQ
},
3669 { "fiq", ARM_FT_FIQ
},
3670 { "ABORT", ARM_FT_ISR
},
3671 { "abort", ARM_FT_ISR
},
3672 { "ABORT", ARM_FT_ISR
},
3673 { "abort", ARM_FT_ISR
},
3674 { "UNDEF", ARM_FT_EXCEPTION
},
3675 { "undef", ARM_FT_EXCEPTION
},
3676 { "SWI", ARM_FT_EXCEPTION
},
3677 { "swi", ARM_FT_EXCEPTION
},
3678 { NULL
, ARM_FT_NORMAL
}
3681 /* Returns the (interrupt) function type of the current
3682 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3684 static unsigned long
3685 arm_isr_value (tree argument
)
3687 const isr_attribute_arg
* ptr
;
3691 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3693 /* No argument - default to IRQ. */
3694 if (argument
== NULL_TREE
)
3697 /* Get the value of the argument. */
3698 if (TREE_VALUE (argument
) == NULL_TREE
3699 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3700 return ARM_FT_UNKNOWN
;
3702 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3704 /* Check it against the list of known arguments. */
3705 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3706 if (streq (arg
, ptr
->arg
))
3707 return ptr
->return_value
;
3709 /* An unrecognized interrupt type. */
3710 return ARM_FT_UNKNOWN
;
3713 /* Computes the type of the current function. */
3715 static unsigned long
3716 arm_compute_func_type (void)
3718 unsigned long type
= ARM_FT_UNKNOWN
;
3722 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3724 /* Decide if the current function is volatile. Such functions
3725 never return, and many memory cycles can be saved by not storing
3726 register values that will never be needed again. This optimization
3727 was added to speed up context switching in a kernel application. */
3729 && (TREE_NOTHROW (current_function_decl
)
3730 || !(flag_unwind_tables
3732 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3733 && TREE_THIS_VOLATILE (current_function_decl
))
3734 type
|= ARM_FT_VOLATILE
;
3736 if (cfun
->static_chain_decl
!= NULL
)
3737 type
|= ARM_FT_NESTED
;
3739 attr
= DECL_ATTRIBUTES (current_function_decl
);
3741 a
= lookup_attribute ("naked", attr
);
3743 type
|= ARM_FT_NAKED
;
3745 a
= lookup_attribute ("isr", attr
);
3747 a
= lookup_attribute ("interrupt", attr
);
3750 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3752 type
|= arm_isr_value (TREE_VALUE (a
));
3754 if (lookup_attribute ("cmse_nonsecure_entry", attr
))
3755 type
|= ARM_FT_CMSE_ENTRY
;
3760 /* Returns the type of the current function. */
3763 arm_current_func_type (void)
3765 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3766 cfun
->machine
->func_type
= arm_compute_func_type ();
3768 return cfun
->machine
->func_type
;
3772 arm_allocate_stack_slots_for_args (void)
3774 /* Naked functions should not allocate stack slots for arguments. */
3775 return !IS_NAKED (arm_current_func_type ());
3779 arm_warn_func_return (tree decl
)
3781 /* Naked functions are implemented entirely in assembly, including the
3782 return sequence, so suppress warnings about this. */
3783 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3787 /* Output assembler code for a block containing the constant parts
3788 of a trampoline, leaving space for the variable parts.
3790 On the ARM, (if r8 is the static chain regnum, and remembering that
3791 referencing pc adds an offset of 8) the trampoline looks like:
3794 .word static chain value
3795 .word function's address
3796 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3799 arm_asm_trampoline_template (FILE *f
)
3801 fprintf (f
, "\t.syntax unified\n");
3805 fprintf (f
, "\t.arm\n");
3806 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3807 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3809 else if (TARGET_THUMB2
)
3811 fprintf (f
, "\t.thumb\n");
3812 /* The Thumb-2 trampoline is similar to the arm implementation.
3813 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3814 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3815 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3816 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3820 ASM_OUTPUT_ALIGN (f
, 2);
3821 fprintf (f
, "\t.code\t16\n");
3822 fprintf (f
, ".Ltrampoline_start:\n");
3823 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3824 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3825 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3826 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3827 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3828 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3830 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3831 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3834 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3837 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3839 rtx fnaddr
, mem
, a_tramp
;
3841 emit_block_move (m_tramp
, assemble_trampoline_template (),
3842 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3844 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3845 emit_move_insn (mem
, chain_value
);
3847 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3848 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3849 emit_move_insn (mem
, fnaddr
);
3851 a_tramp
= XEXP (m_tramp
, 0);
3852 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3853 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3854 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3857 /* Thumb trampolines should be entered in thumb mode, so set
3858 the bottom bit of the address. */
3861 arm_trampoline_adjust_address (rtx addr
)
3864 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3865 NULL
, 0, OPTAB_LIB_WIDEN
);
3869 /* Return 1 if it is possible to return using a single instruction.
3870 If SIBLING is non-null, this is a test for a return before a sibling
3871 call. SIBLING is the call insn, so we can examine its register usage. */
3874 use_return_insn (int iscond
, rtx sibling
)
3877 unsigned int func_type
;
3878 unsigned long saved_int_regs
;
3879 unsigned HOST_WIDE_INT stack_adjust
;
3880 arm_stack_offsets
*offsets
;
3882 /* Never use a return instruction before reload has run. */
3883 if (!reload_completed
)
3886 func_type
= arm_current_func_type ();
3888 /* Naked, volatile and stack alignment functions need special
3890 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3893 /* So do interrupt functions that use the frame pointer and Thumb
3894 interrupt functions. */
3895 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3898 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3899 && !optimize_function_for_size_p (cfun
))
3902 offsets
= arm_get_frame_offsets ();
3903 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3905 /* As do variadic functions. */
3906 if (crtl
->args
.pretend_args_size
3907 || cfun
->machine
->uses_anonymous_args
3908 /* Or if the function calls __builtin_eh_return () */
3909 || crtl
->calls_eh_return
3910 /* Or if the function calls alloca */
3911 || cfun
->calls_alloca
3912 /* Or if there is a stack adjustment. However, if the stack pointer
3913 is saved on the stack, we can use a pre-incrementing stack load. */
3914 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3915 && stack_adjust
== 4))
3916 /* Or if the static chain register was saved above the frame, under the
3917 assumption that the stack pointer isn't saved on the stack. */
3918 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
3919 && arm_compute_static_chain_stack_bytes() != 0))
3922 saved_int_regs
= offsets
->saved_regs_mask
;
3924 /* Unfortunately, the insn
3926 ldmib sp, {..., sp, ...}
3928 triggers a bug on most SA-110 based devices, such that the stack
3929 pointer won't be correctly restored if the instruction takes a
3930 page fault. We work around this problem by popping r3 along with
3931 the other registers, since that is never slower than executing
3932 another instruction.
3934 We test for !arm_arch5 here, because code for any architecture
3935 less than this could potentially be run on one of the buggy
3937 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3939 /* Validate that r3 is a call-clobbered register (always true in
3940 the default abi) ... */
3941 if (!call_used_regs
[3])
3944 /* ... that it isn't being used for a return value ... */
3945 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3948 /* ... or for a tail-call argument ... */
3951 gcc_assert (CALL_P (sibling
));
3953 if (find_regno_fusage (sibling
, USE
, 3))
3957 /* ... and that there are no call-saved registers in r0-r2
3958 (always true in the default ABI). */
3959 if (saved_int_regs
& 0x7)
3963 /* Can't be done if interworking with Thumb, and any registers have been
3965 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3968 /* On StrongARM, conditional returns are expensive if they aren't
3969 taken and multiple registers have been stacked. */
3970 if (iscond
&& arm_tune_strongarm
)
3972 /* Conditional return when just the LR is stored is a simple
3973 conditional-load instruction, that's not expensive. */
3974 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3978 && arm_pic_register
!= INVALID_REGNUM
3979 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3983 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
3984 several instructions if anything needs to be popped. */
3985 if (saved_int_regs
&& IS_CMSE_ENTRY (func_type
))
3988 /* If there are saved registers but the LR isn't saved, then we need
3989 two instructions for the return. */
3990 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3993 /* Can't be done if any of the VFP regs are pushed,
3994 since this also requires an insn. */
3995 if (TARGET_HARD_FLOAT
)
3996 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3997 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
4000 if (TARGET_REALLY_IWMMXT
)
4001 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
4002 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
4008 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4009 shrink-wrapping if possible. This is the case if we need to emit a
4010 prologue, which we can test by looking at the offsets. */
4012 use_simple_return_p (void)
4014 arm_stack_offsets
*offsets
;
4016 /* Note this function can be called before or after reload. */
4017 if (!reload_completed
)
4018 arm_compute_frame_layout ();
4020 offsets
= arm_get_frame_offsets ();
4021 return offsets
->outgoing_args
!= 0;
4024 /* Return TRUE if int I is a valid immediate ARM constant. */
4027 const_ok_for_arm (HOST_WIDE_INT i
)
4031 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4032 be all zero, or all one. */
4033 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
4034 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
4035 != ((~(unsigned HOST_WIDE_INT
) 0)
4036 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
4039 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
4041 /* Fast return for 0 and small values. We must do this for zero, since
4042 the code below can't handle that one case. */
4043 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
4046 /* Get the number of trailing zeros. */
4047 lowbit
= ffs((int) i
) - 1;
4049 /* Only even shifts are allowed in ARM mode so round down to the
4050 nearest even number. */
4054 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
4059 /* Allow rotated constants in ARM mode. */
4061 && ((i
& ~0xc000003f) == 0
4062 || (i
& ~0xf000000f) == 0
4063 || (i
& ~0xfc000003) == 0))
4066 else if (TARGET_THUMB2
)
4070 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4073 if (i
== v
|| i
== (v
| (v
<< 8)))
4076 /* Allow repeated pattern 0xXY00XY00. */
4082 else if (TARGET_HAVE_MOVT
)
4084 /* Thumb-1 Targets with MOVT. */
4094 /* Return true if I is a valid constant for the operation CODE. */
4096 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
4098 if (const_ok_for_arm (i
))
4104 /* See if we can use movw. */
4105 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
4108 /* Otherwise, try mvn. */
4109 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4112 /* See if we can use addw or subw. */
4114 && ((i
& 0xfffff000) == 0
4115 || ((-i
) & 0xfffff000) == 0))
4136 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4138 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4144 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4148 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4155 /* Return true if I is a valid di mode constant for the operation CODE. */
4157 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4159 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4160 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4161 rtx hi
= GEN_INT (hi_val
);
4162 rtx lo
= GEN_INT (lo_val
);
4172 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
4173 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
4175 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4182 /* Emit a sequence of insns to handle a large constant.
4183 CODE is the code of the operation required, it can be any of SET, PLUS,
4184 IOR, AND, XOR, MINUS;
4185 MODE is the mode in which the operation is being performed;
4186 VAL is the integer to operate on;
4187 SOURCE is the other operand (a register, or a null-pointer for SET);
4188 SUBTARGETS means it is safe to create scratch registers if that will
4189 either produce a simpler sequence, or we will want to cse the values.
4190 Return value is the number of insns emitted. */
4192 /* ??? Tweak this for thumb2. */
4194 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4195 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4199 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4200 cond
= COND_EXEC_TEST (PATTERN (insn
));
4204 if (subtargets
|| code
== SET
4205 || (REG_P (target
) && REG_P (source
)
4206 && REGNO (target
) != REGNO (source
)))
4208 /* After arm_reorg has been called, we can't fix up expensive
4209 constants by pushing them into memory so we must synthesize
4210 them in-line, regardless of the cost. This is only likely to
4211 be more costly on chips that have load delay slots and we are
4212 compiling without running the scheduler (so no splitting
4213 occurred before the final instruction emission).
4215 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4217 if (!cfun
->machine
->after_arm_reorg
4219 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4221 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4226 /* Currently SET is the only monadic value for CODE, all
4227 the rest are diadic. */
4228 if (TARGET_USE_MOVT
)
4229 arm_emit_movpair (target
, GEN_INT (val
));
4231 emit_set_insn (target
, GEN_INT (val
));
4237 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4239 if (TARGET_USE_MOVT
)
4240 arm_emit_movpair (temp
, GEN_INT (val
));
4242 emit_set_insn (temp
, GEN_INT (val
));
4244 /* For MINUS, the value is subtracted from, since we never
4245 have subtraction of a constant. */
4247 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4249 emit_set_insn (target
,
4250 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4256 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4260 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4261 ARM/THUMB2 immediates, and add up to VAL.
4262 Thr function return value gives the number of insns required. */
4264 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4265 struct four_ints
*return_sequence
)
4267 int best_consecutive_zeros
= 0;
4271 struct four_ints tmp_sequence
;
4273 /* If we aren't targeting ARM, the best place to start is always at
4274 the bottom, otherwise look more closely. */
4277 for (i
= 0; i
< 32; i
+= 2)
4279 int consecutive_zeros
= 0;
4281 if (!(val
& (3 << i
)))
4283 while ((i
< 32) && !(val
& (3 << i
)))
4285 consecutive_zeros
+= 2;
4288 if (consecutive_zeros
> best_consecutive_zeros
)
4290 best_consecutive_zeros
= consecutive_zeros
;
4291 best_start
= i
- consecutive_zeros
;
4298 /* So long as it won't require any more insns to do so, it's
4299 desirable to emit a small constant (in bits 0...9) in the last
4300 insn. This way there is more chance that it can be combined with
4301 a later addressing insn to form a pre-indexed load or store
4302 operation. Consider:
4304 *((volatile int *)0xe0000100) = 1;
4305 *((volatile int *)0xe0000110) = 2;
4307 We want this to wind up as:
4311 str rB, [rA, #0x100]
4313 str rB, [rA, #0x110]
4315 rather than having to synthesize both large constants from scratch.
4317 Therefore, we calculate how many insns would be required to emit
4318 the constant starting from `best_start', and also starting from
4319 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4320 yield a shorter sequence, we may as well use zero. */
4321 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4323 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4325 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4326 if (insns2
<= insns1
)
4328 *return_sequence
= tmp_sequence
;
4336 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4338 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4339 struct four_ints
*return_sequence
, int i
)
4341 int remainder
= val
& 0xffffffff;
4344 /* Try and find a way of doing the job in either two or three
4347 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4348 location. We start at position I. This may be the MSB, or
4349 optimial_immediate_sequence may have positioned it at the largest block
4350 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4351 wrapping around to the top of the word when we drop off the bottom.
4352 In the worst case this code should produce no more than four insns.
4354 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4355 constants, shifted to any arbitrary location. We should always start
4360 unsigned int b1
, b2
, b3
, b4
;
4361 unsigned HOST_WIDE_INT result
;
4364 gcc_assert (insns
< 4);
4369 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4370 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4373 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4374 /* We can use addw/subw for the last 12 bits. */
4378 /* Use an 8-bit shifted/rotated immediate. */
4382 result
= remainder
& ((0x0ff << end
)
4383 | ((i
< end
) ? (0xff >> (32 - end
))
4390 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4391 arbitrary shifts. */
4392 i
-= TARGET_ARM
? 2 : 1;
4396 /* Next, see if we can do a better job with a thumb2 replicated
4399 We do it this way around to catch the cases like 0x01F001E0 where
4400 two 8-bit immediates would work, but a replicated constant would
4403 TODO: 16-bit constants that don't clear all the bits, but still win.
4404 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4407 b1
= (remainder
& 0xff000000) >> 24;
4408 b2
= (remainder
& 0x00ff0000) >> 16;
4409 b3
= (remainder
& 0x0000ff00) >> 8;
4410 b4
= remainder
& 0xff;
4414 /* The 8-bit immediate already found clears b1 (and maybe b2),
4415 but must leave b3 and b4 alone. */
4417 /* First try to find a 32-bit replicated constant that clears
4418 almost everything. We can assume that we can't do it in one,
4419 or else we wouldn't be here. */
4420 unsigned int tmp
= b1
& b2
& b3
& b4
;
4421 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4423 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4424 + (tmp
== b3
) + (tmp
== b4
);
4426 && (matching_bytes
>= 3
4427 || (matching_bytes
== 2
4428 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4430 /* At least 3 of the bytes match, and the fourth has at
4431 least as many bits set, or two of the bytes match
4432 and it will only require one more insn to finish. */
4440 /* Second, try to find a 16-bit replicated constant that can
4441 leave three of the bytes clear. If b2 or b4 is already
4442 zero, then we can. If the 8-bit from above would not
4443 clear b2 anyway, then we still win. */
4444 else if (b1
== b3
&& (!b2
|| !b4
4445 || (remainder
& 0x00ff0000 & ~result
)))
4447 result
= remainder
& 0xff00ff00;
4453 /* The 8-bit immediate already found clears b2 (and maybe b3)
4454 and we don't get here unless b1 is alredy clear, but it will
4455 leave b4 unchanged. */
4457 /* If we can clear b2 and b4 at once, then we win, since the
4458 8-bits couldn't possibly reach that far. */
4461 result
= remainder
& 0x00ff00ff;
4467 return_sequence
->i
[insns
++] = result
;
4468 remainder
&= ~result
;
4470 if (code
== SET
|| code
== MINUS
)
4478 /* Emit an instruction with the indicated PATTERN. If COND is
4479 non-NULL, conditionalize the execution of the instruction on COND
4483 emit_constant_insn (rtx cond
, rtx pattern
)
4486 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4487 emit_insn (pattern
);
4490 /* As above, but extra parameter GENERATE which, if clear, suppresses
4494 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4495 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4496 int subtargets
, int generate
)
4500 int final_invert
= 0;
4502 int set_sign_bit_copies
= 0;
4503 int clear_sign_bit_copies
= 0;
4504 int clear_zero_bit_copies
= 0;
4505 int set_zero_bit_copies
= 0;
4506 int insns
= 0, neg_insns
, inv_insns
;
4507 unsigned HOST_WIDE_INT temp1
, temp2
;
4508 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4509 struct four_ints
*immediates
;
4510 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4512 /* Find out which operations are safe for a given CODE. Also do a quick
4513 check for degenerate cases; these can occur when DImode operations
4526 if (remainder
== 0xffffffff)
4529 emit_constant_insn (cond
,
4530 gen_rtx_SET (target
,
4531 GEN_INT (ARM_SIGN_EXTEND (val
))));
4537 if (reload_completed
&& rtx_equal_p (target
, source
))
4541 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4550 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4553 if (remainder
== 0xffffffff)
4555 if (reload_completed
&& rtx_equal_p (target
, source
))
4558 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4567 if (reload_completed
&& rtx_equal_p (target
, source
))
4570 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4574 if (remainder
== 0xffffffff)
4577 emit_constant_insn (cond
,
4578 gen_rtx_SET (target
,
4579 gen_rtx_NOT (mode
, source
)));
4586 /* We treat MINUS as (val - source), since (source - val) is always
4587 passed as (source + (-val)). */
4591 emit_constant_insn (cond
,
4592 gen_rtx_SET (target
,
4593 gen_rtx_NEG (mode
, source
)));
4596 if (const_ok_for_arm (val
))
4599 emit_constant_insn (cond
,
4600 gen_rtx_SET (target
,
4601 gen_rtx_MINUS (mode
, GEN_INT (val
),
4612 /* If we can do it in one insn get out quickly. */
4613 if (const_ok_for_op (val
, code
))
4616 emit_constant_insn (cond
,
4617 gen_rtx_SET (target
,
4619 ? gen_rtx_fmt_ee (code
, mode
, source
,
4625 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4627 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4628 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4632 if (mode
== SImode
&& i
== 16)
4633 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4635 emit_constant_insn (cond
,
4636 gen_zero_extendhisi2
4637 (target
, gen_lowpart (HImode
, source
)));
4639 /* Extz only supports SImode, but we can coerce the operands
4641 emit_constant_insn (cond
,
4642 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4643 gen_lowpart (SImode
, source
),
4644 GEN_INT (i
), const0_rtx
));
4650 /* Calculate a few attributes that may be useful for specific
4652 /* Count number of leading zeros. */
4653 for (i
= 31; i
>= 0; i
--)
4655 if ((remainder
& (1 << i
)) == 0)
4656 clear_sign_bit_copies
++;
4661 /* Count number of leading 1's. */
4662 for (i
= 31; i
>= 0; i
--)
4664 if ((remainder
& (1 << i
)) != 0)
4665 set_sign_bit_copies
++;
4670 /* Count number of trailing zero's. */
4671 for (i
= 0; i
<= 31; i
++)
4673 if ((remainder
& (1 << i
)) == 0)
4674 clear_zero_bit_copies
++;
4679 /* Count number of trailing 1's. */
4680 for (i
= 0; i
<= 31; i
++)
4682 if ((remainder
& (1 << i
)) != 0)
4683 set_zero_bit_copies
++;
4691 /* See if we can do this by sign_extending a constant that is known
4692 to be negative. This is a good, way of doing it, since the shift
4693 may well merge into a subsequent insn. */
4694 if (set_sign_bit_copies
> 1)
4696 if (const_ok_for_arm
4697 (temp1
= ARM_SIGN_EXTEND (remainder
4698 << (set_sign_bit_copies
- 1))))
4702 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4703 emit_constant_insn (cond
,
4704 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4705 emit_constant_insn (cond
,
4706 gen_ashrsi3 (target
, new_src
,
4707 GEN_INT (set_sign_bit_copies
- 1)));
4711 /* For an inverted constant, we will need to set the low bits,
4712 these will be shifted out of harm's way. */
4713 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4714 if (const_ok_for_arm (~temp1
))
4718 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4719 emit_constant_insn (cond
,
4720 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4721 emit_constant_insn (cond
,
4722 gen_ashrsi3 (target
, new_src
,
4723 GEN_INT (set_sign_bit_copies
- 1)));
4729 /* See if we can calculate the value as the difference between two
4730 valid immediates. */
4731 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4733 int topshift
= clear_sign_bit_copies
& ~1;
4735 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4736 & (0xff000000 >> topshift
));
4738 /* If temp1 is zero, then that means the 9 most significant
4739 bits of remainder were 1 and we've caused it to overflow.
4740 When topshift is 0 we don't need to do anything since we
4741 can borrow from 'bit 32'. */
4742 if (temp1
== 0 && topshift
!= 0)
4743 temp1
= 0x80000000 >> (topshift
- 1);
4745 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4747 if (const_ok_for_arm (temp2
))
4751 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4752 emit_constant_insn (cond
,
4753 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4754 emit_constant_insn (cond
,
4755 gen_addsi3 (target
, new_src
,
4763 /* See if we can generate this by setting the bottom (or the top)
4764 16 bits, and then shifting these into the other half of the
4765 word. We only look for the simplest cases, to do more would cost
4766 too much. Be careful, however, not to generate this when the
4767 alternative would take fewer insns. */
4768 if (val
& 0xffff0000)
4770 temp1
= remainder
& 0xffff0000;
4771 temp2
= remainder
& 0x0000ffff;
4773 /* Overlaps outside this range are best done using other methods. */
4774 for (i
= 9; i
< 24; i
++)
4776 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4777 && !const_ok_for_arm (temp2
))
4779 rtx new_src
= (subtargets
4780 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4782 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4783 source
, subtargets
, generate
);
4791 gen_rtx_ASHIFT (mode
, source
,
4798 /* Don't duplicate cases already considered. */
4799 for (i
= 17; i
< 24; i
++)
4801 if (((temp1
| (temp1
>> i
)) == remainder
)
4802 && !const_ok_for_arm (temp1
))
4804 rtx new_src
= (subtargets
4805 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4807 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4808 source
, subtargets
, generate
);
4813 gen_rtx_SET (target
,
4816 gen_rtx_LSHIFTRT (mode
, source
,
4827 /* If we have IOR or XOR, and the constant can be loaded in a
4828 single instruction, and we can find a temporary to put it in,
4829 then this can be done in two instructions instead of 3-4. */
4831 /* TARGET can't be NULL if SUBTARGETS is 0 */
4832 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4834 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4838 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4840 emit_constant_insn (cond
,
4841 gen_rtx_SET (sub
, GEN_INT (val
)));
4842 emit_constant_insn (cond
,
4843 gen_rtx_SET (target
,
4844 gen_rtx_fmt_ee (code
, mode
,
4855 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4856 and the remainder 0s for e.g. 0xfff00000)
4857 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4859 This can be done in 2 instructions by using shifts with mov or mvn.
4864 mvn r0, r0, lsr #12 */
4865 if (set_sign_bit_copies
> 8
4866 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
4870 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4871 rtx shift
= GEN_INT (set_sign_bit_copies
);
4877 gen_rtx_ASHIFT (mode
,
4882 gen_rtx_SET (target
,
4884 gen_rtx_LSHIFTRT (mode
, sub
,
4891 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4893 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4895 For eg. r0 = r0 | 0xfff
4900 if (set_zero_bit_copies
> 8
4901 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4905 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4906 rtx shift
= GEN_INT (set_zero_bit_copies
);
4912 gen_rtx_LSHIFTRT (mode
,
4917 gen_rtx_SET (target
,
4919 gen_rtx_ASHIFT (mode
, sub
,
4925 /* This will never be reached for Thumb2 because orn is a valid
4926 instruction. This is for Thumb1 and the ARM 32 bit cases.
4928 x = y | constant (such that ~constant is a valid constant)
4930 x = ~(~y & ~constant).
4932 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4936 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4937 emit_constant_insn (cond
,
4939 gen_rtx_NOT (mode
, source
)));
4942 sub
= gen_reg_rtx (mode
);
4943 emit_constant_insn (cond
,
4945 gen_rtx_AND (mode
, source
,
4947 emit_constant_insn (cond
,
4948 gen_rtx_SET (target
,
4949 gen_rtx_NOT (mode
, sub
)));
4956 /* See if two shifts will do 2 or more insn's worth of work. */
4957 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4959 HOST_WIDE_INT shift_mask
= ((0xffffffff
4960 << (32 - clear_sign_bit_copies
))
4963 if ((remainder
| shift_mask
) != 0xffffffff)
4965 HOST_WIDE_INT new_val
4966 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4970 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4971 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
4972 new_src
, source
, subtargets
, 1);
4977 rtx targ
= subtargets
? NULL_RTX
: target
;
4978 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4979 targ
, source
, subtargets
, 0);
4985 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4986 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4988 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4989 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4995 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4997 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4999 if ((remainder
| shift_mask
) != 0xffffffff)
5001 HOST_WIDE_INT new_val
5002 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5005 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5007 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5008 new_src
, source
, subtargets
, 1);
5013 rtx targ
= subtargets
? NULL_RTX
: target
;
5015 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5016 targ
, source
, subtargets
, 0);
5022 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5023 rtx shift
= GEN_INT (clear_zero_bit_copies
);
5025 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
5026 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
5038 /* Calculate what the instruction sequences would be if we generated it
5039 normally, negated, or inverted. */
5041 /* AND cannot be split into multiple insns, so invert and use BIC. */
5044 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
5047 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
5052 if (can_invert
|| final_invert
)
5053 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
5058 immediates
= &pos_immediates
;
5060 /* Is the negated immediate sequence more efficient? */
5061 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
5064 immediates
= &neg_immediates
;
5069 /* Is the inverted immediate sequence more efficient?
5070 We must allow for an extra NOT instruction for XOR operations, although
5071 there is some chance that the final 'mvn' will get optimized later. */
5072 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
5075 immediates
= &inv_immediates
;
5083 /* Now output the chosen sequence as instructions. */
5086 for (i
= 0; i
< insns
; i
++)
5088 rtx new_src
, temp1_rtx
;
5090 temp1
= immediates
->i
[i
];
5092 if (code
== SET
|| code
== MINUS
)
5093 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
5094 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
5095 new_src
= gen_reg_rtx (mode
);
5101 else if (can_negate
)
5104 temp1
= trunc_int_for_mode (temp1
, mode
);
5105 temp1_rtx
= GEN_INT (temp1
);
5109 else if (code
== MINUS
)
5110 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
5112 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
5114 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5119 can_negate
= can_invert
;
5123 else if (code
== MINUS
)
5131 emit_constant_insn (cond
, gen_rtx_SET (target
,
5132 gen_rtx_NOT (mode
, source
)));
5139 /* Canonicalize a comparison so that we are more likely to recognize it.
5140 This can be done for a few constant compares, where we can make the
5141 immediate value easier to load. */
5144 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5145 bool op0_preserve_value
)
5148 unsigned HOST_WIDE_INT i
, maxval
;
5150 mode
= GET_MODE (*op0
);
5151 if (mode
== VOIDmode
)
5152 mode
= GET_MODE (*op1
);
5154 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5156 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5157 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5158 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5159 for GTU/LEU in Thumb mode. */
5163 if (*code
== GT
|| *code
== LE
5164 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
5166 /* Missing comparison. First try to use an available
5168 if (CONST_INT_P (*op1
))
5176 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5178 *op1
= GEN_INT (i
+ 1);
5179 *code
= *code
== GT
? GE
: LT
;
5185 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5186 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5188 *op1
= GEN_INT (i
+ 1);
5189 *code
= *code
== GTU
? GEU
: LTU
;
5198 /* If that did not work, reverse the condition. */
5199 if (!op0_preserve_value
)
5201 std::swap (*op0
, *op1
);
5202 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5208 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5209 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5210 to facilitate possible combining with a cmp into 'ands'. */
5212 && GET_CODE (*op0
) == ZERO_EXTEND
5213 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5214 && GET_MODE (XEXP (*op0
, 0)) == QImode
5215 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5216 && subreg_lowpart_p (XEXP (*op0
, 0))
5217 && *op1
== const0_rtx
)
5218 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5221 /* Comparisons smaller than DImode. Only adjust comparisons against
5222 an out-of-range constant. */
5223 if (!CONST_INT_P (*op1
)
5224 || const_ok_for_arm (INTVAL (*op1
))
5225 || const_ok_for_arm (- INTVAL (*op1
)))
5239 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5241 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5242 *code
= *code
== GT
? GE
: LT
;
5250 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5252 *op1
= GEN_INT (i
- 1);
5253 *code
= *code
== GE
? GT
: LE
;
5260 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5261 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5263 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5264 *code
= *code
== GTU
? GEU
: LTU
;
5272 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5274 *op1
= GEN_INT (i
- 1);
5275 *code
= *code
== GEU
? GTU
: LEU
;
5286 /* Define how to find the value returned by a function. */
5289 arm_function_value(const_tree type
, const_tree func
,
5290 bool outgoing ATTRIBUTE_UNUSED
)
5293 int unsignedp ATTRIBUTE_UNUSED
;
5294 rtx r ATTRIBUTE_UNUSED
;
5296 mode
= TYPE_MODE (type
);
5298 if (TARGET_AAPCS_BASED
)
5299 return aapcs_allocate_return_reg (mode
, type
, func
);
5301 /* Promote integer types. */
5302 if (INTEGRAL_TYPE_P (type
))
5303 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5305 /* Promotes small structs returned in a register to full-word size
5306 for big-endian AAPCS. */
5307 if (arm_return_in_msb (type
))
5309 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5310 if (size
% UNITS_PER_WORD
!= 0)
5312 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5313 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5317 return arm_libcall_value_1 (mode
);
5320 /* libcall hashtable helpers. */
5322 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5324 static inline hashval_t
hash (const rtx_def
*);
5325 static inline bool equal (const rtx_def
*, const rtx_def
*);
5326 static inline void remove (rtx_def
*);
5330 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5332 return rtx_equal_p (p1
, p2
);
5336 libcall_hasher::hash (const rtx_def
*p1
)
5338 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5341 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5344 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5346 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5350 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5352 static bool init_done
= false;
5353 static libcall_table_type
*libcall_htab
= NULL
;
5359 libcall_htab
= new libcall_table_type (31);
5360 add_libcall (libcall_htab
,
5361 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5362 add_libcall (libcall_htab
,
5363 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5364 add_libcall (libcall_htab
,
5365 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5366 add_libcall (libcall_htab
,
5367 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5369 add_libcall (libcall_htab
,
5370 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5371 add_libcall (libcall_htab
,
5372 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5373 add_libcall (libcall_htab
,
5374 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5375 add_libcall (libcall_htab
,
5376 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5378 add_libcall (libcall_htab
,
5379 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5380 add_libcall (libcall_htab
,
5381 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5382 add_libcall (libcall_htab
,
5383 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5384 add_libcall (libcall_htab
,
5385 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5386 add_libcall (libcall_htab
,
5387 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5388 add_libcall (libcall_htab
,
5389 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5390 add_libcall (libcall_htab
,
5391 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5392 add_libcall (libcall_htab
,
5393 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5395 /* Values from double-precision helper functions are returned in core
5396 registers if the selected core only supports single-precision
5397 arithmetic, even if we are using the hard-float ABI. The same is
5398 true for single-precision helpers, but we will never be using the
5399 hard-float ABI on a CPU which doesn't support single-precision
5400 operations in hardware. */
5401 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5402 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5403 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5404 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5405 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5406 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5407 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5408 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5409 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5410 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5411 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5412 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5414 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5416 add_libcall (libcall_htab
,
5417 convert_optab_libfunc (trunc_optab
, HFmode
, DFmode
));
5420 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5424 arm_libcall_value_1 (machine_mode mode
)
5426 if (TARGET_AAPCS_BASED
)
5427 return aapcs_libcall_value (mode
);
5428 else if (TARGET_IWMMXT_ABI
5429 && arm_vector_mode_supported_p (mode
))
5430 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5432 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5435 /* Define how to find the value returned by a library function
5436 assuming the value has mode MODE. */
5439 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5441 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5442 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5444 /* The following libcalls return their result in integer registers,
5445 even though they return a floating point value. */
5446 if (arm_libcall_uses_aapcs_base (libcall
))
5447 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5451 return arm_libcall_value_1 (mode
);
5454 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5457 arm_function_value_regno_p (const unsigned int regno
)
5459 if (regno
== ARG_REGISTER (1)
5461 && TARGET_AAPCS_BASED
5462 && TARGET_HARD_FLOAT
5463 && regno
== FIRST_VFP_REGNUM
)
5464 || (TARGET_IWMMXT_ABI
5465 && regno
== FIRST_IWMMXT_REGNUM
))
5471 /* Determine the amount of memory needed to store the possible return
5472 registers of an untyped call. */
5474 arm_apply_result_size (void)
5480 if (TARGET_HARD_FLOAT_ABI
)
5482 if (TARGET_IWMMXT_ABI
)
5489 /* Decide whether TYPE should be returned in memory (true)
5490 or in a register (false). FNTYPE is the type of the function making
5493 arm_return_in_memory (const_tree type
, const_tree fntype
)
5497 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5499 if (TARGET_AAPCS_BASED
)
5501 /* Simple, non-aggregate types (ie not including vectors and
5502 complex) are always returned in a register (or registers).
5503 We don't care about which register here, so we can short-cut
5504 some of the detail. */
5505 if (!AGGREGATE_TYPE_P (type
)
5506 && TREE_CODE (type
) != VECTOR_TYPE
5507 && TREE_CODE (type
) != COMPLEX_TYPE
)
5510 /* Any return value that is no larger than one word can be
5512 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5515 /* Check any available co-processors to see if they accept the
5516 type as a register candidate (VFP, for example, can return
5517 some aggregates in consecutive registers). These aren't
5518 available if the call is variadic. */
5519 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5522 /* Vector values should be returned using ARM registers, not
5523 memory (unless they're over 16 bytes, which will break since
5524 we only have four call-clobbered registers to play with). */
5525 if (TREE_CODE (type
) == VECTOR_TYPE
)
5526 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5528 /* The rest go in memory. */
5532 if (TREE_CODE (type
) == VECTOR_TYPE
)
5533 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5535 if (!AGGREGATE_TYPE_P (type
) &&
5536 (TREE_CODE (type
) != VECTOR_TYPE
))
5537 /* All simple types are returned in registers. */
5540 if (arm_abi
!= ARM_ABI_APCS
)
5542 /* ATPCS and later return aggregate types in memory only if they are
5543 larger than a word (or are variable size). */
5544 return (size
< 0 || size
> UNITS_PER_WORD
);
5547 /* For the arm-wince targets we choose to be compatible with Microsoft's
5548 ARM and Thumb compilers, which always return aggregates in memory. */
5550 /* All structures/unions bigger than one word are returned in memory.
5551 Also catch the case where int_size_in_bytes returns -1. In this case
5552 the aggregate is either huge or of variable size, and in either case
5553 we will want to return it via memory and not in a register. */
5554 if (size
< 0 || size
> UNITS_PER_WORD
)
5557 if (TREE_CODE (type
) == RECORD_TYPE
)
5561 /* For a struct the APCS says that we only return in a register
5562 if the type is 'integer like' and every addressable element
5563 has an offset of zero. For practical purposes this means
5564 that the structure can have at most one non bit-field element
5565 and that this element must be the first one in the structure. */
5567 /* Find the first field, ignoring non FIELD_DECL things which will
5568 have been created by C++. */
5569 for (field
= TYPE_FIELDS (type
);
5570 field
&& TREE_CODE (field
) != FIELD_DECL
;
5571 field
= DECL_CHAIN (field
))
5575 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5577 /* Check that the first field is valid for returning in a register. */
5579 /* ... Floats are not allowed */
5580 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5583 /* ... Aggregates that are not themselves valid for returning in
5584 a register are not allowed. */
5585 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5588 /* Now check the remaining fields, if any. Only bitfields are allowed,
5589 since they are not addressable. */
5590 for (field
= DECL_CHAIN (field
);
5592 field
= DECL_CHAIN (field
))
5594 if (TREE_CODE (field
) != FIELD_DECL
)
5597 if (!DECL_BIT_FIELD_TYPE (field
))
5604 if (TREE_CODE (type
) == UNION_TYPE
)
5608 /* Unions can be returned in registers if every element is
5609 integral, or can be returned in an integer register. */
5610 for (field
= TYPE_FIELDS (type
);
5612 field
= DECL_CHAIN (field
))
5614 if (TREE_CODE (field
) != FIELD_DECL
)
5617 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5620 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5626 #endif /* not ARM_WINCE */
5628 /* Return all other types in memory. */
5632 const struct pcs_attribute_arg
5636 } pcs_attribute_args
[] =
5638 {"aapcs", ARM_PCS_AAPCS
},
5639 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5641 /* We could recognize these, but changes would be needed elsewhere
5642 * to implement them. */
5643 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5644 {"atpcs", ARM_PCS_ATPCS
},
5645 {"apcs", ARM_PCS_APCS
},
5647 {NULL
, ARM_PCS_UNKNOWN
}
5651 arm_pcs_from_attribute (tree attr
)
5653 const struct pcs_attribute_arg
*ptr
;
5656 /* Get the value of the argument. */
5657 if (TREE_VALUE (attr
) == NULL_TREE
5658 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5659 return ARM_PCS_UNKNOWN
;
5661 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5663 /* Check it against the list of known arguments. */
5664 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5665 if (streq (arg
, ptr
->arg
))
5668 /* An unrecognized interrupt type. */
5669 return ARM_PCS_UNKNOWN
;
5672 /* Get the PCS variant to use for this call. TYPE is the function's type
5673 specification, DECL is the specific declartion. DECL may be null if
5674 the call could be indirect or if this is a library call. */
5676 arm_get_pcs_model (const_tree type
, const_tree decl
)
5678 bool user_convention
= false;
5679 enum arm_pcs user_pcs
= arm_pcs_default
;
5684 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5687 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5688 user_convention
= true;
5691 if (TARGET_AAPCS_BASED
)
5693 /* Detect varargs functions. These always use the base rules
5694 (no argument is ever a candidate for a co-processor
5696 bool base_rules
= stdarg_p (type
);
5698 if (user_convention
)
5700 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5701 sorry ("non-AAPCS derived PCS variant");
5702 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5703 error ("variadic functions must use the base AAPCS variant");
5707 return ARM_PCS_AAPCS
;
5708 else if (user_convention
)
5710 else if (decl
&& flag_unit_at_a_time
)
5712 /* Local functions never leak outside this compilation unit,
5713 so we are free to use whatever conventions are
5715 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5716 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5718 return ARM_PCS_AAPCS_LOCAL
;
5721 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5722 sorry ("PCS variant");
5724 /* For everything else we use the target's default. */
5725 return arm_pcs_default
;
5730 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5731 const_tree fntype ATTRIBUTE_UNUSED
,
5732 rtx libcall ATTRIBUTE_UNUSED
,
5733 const_tree fndecl ATTRIBUTE_UNUSED
)
5735 /* Record the unallocated VFP registers. */
5736 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5737 pcum
->aapcs_vfp_reg_alloc
= 0;
5740 /* Walk down the type tree of TYPE counting consecutive base elements.
5741 If *MODEP is VOIDmode, then set it to the first valid floating point
5742 type. If a non-floating point type is found, or if a floating point
5743 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5744 otherwise return the count in the sub-tree. */
5746 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5751 switch (TREE_CODE (type
))
5754 mode
= TYPE_MODE (type
);
5755 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
)
5758 if (*modep
== VOIDmode
)
5767 mode
= TYPE_MODE (TREE_TYPE (type
));
5768 if (mode
!= DFmode
&& mode
!= SFmode
)
5771 if (*modep
== VOIDmode
)
5780 /* Use V2SImode and V4SImode as representatives of all 64-bit
5781 and 128-bit vector types, whether or not those modes are
5782 supported with the present options. */
5783 size
= int_size_in_bytes (type
);
5796 if (*modep
== VOIDmode
)
5799 /* Vector modes are considered to be opaque: two vectors are
5800 equivalent for the purposes of being homogeneous aggregates
5801 if they are the same size. */
5810 tree index
= TYPE_DOMAIN (type
);
5812 /* Can't handle incomplete types nor sizes that are not
5814 if (!COMPLETE_TYPE_P (type
)
5815 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5818 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5821 || !TYPE_MAX_VALUE (index
)
5822 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5823 || !TYPE_MIN_VALUE (index
)
5824 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5828 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5829 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5831 /* There must be no padding. */
5832 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5844 /* Can't handle incomplete types nor sizes that are not
5846 if (!COMPLETE_TYPE_P (type
)
5847 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5850 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5852 if (TREE_CODE (field
) != FIELD_DECL
)
5855 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5861 /* There must be no padding. */
5862 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5869 case QUAL_UNION_TYPE
:
5871 /* These aren't very interesting except in a degenerate case. */
5876 /* Can't handle incomplete types nor sizes that are not
5878 if (!COMPLETE_TYPE_P (type
)
5879 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5882 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5884 if (TREE_CODE (field
) != FIELD_DECL
)
5887 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5890 count
= count
> sub_count
? count
: sub_count
;
5893 /* There must be no padding. */
5894 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5907 /* Return true if PCS_VARIANT should use VFP registers. */
5909 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5911 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5913 static bool seen_thumb1_vfp
= false;
5915 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5917 sorry ("Thumb-1 hard-float VFP ABI");
5918 /* sorry() is not immediately fatal, so only display this once. */
5919 seen_thumb1_vfp
= true;
5925 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5928 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
5929 (TARGET_VFP_DOUBLE
|| !is_double
));
5932 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5933 suitable for passing or returning in VFP registers for the PCS
5934 variant selected. If it is, then *BASE_MODE is updated to contain
5935 a machine mode describing each element of the argument's type and
5936 *COUNT to hold the number of such elements. */
5938 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5939 machine_mode mode
, const_tree type
,
5940 machine_mode
*base_mode
, int *count
)
5942 machine_mode new_mode
= VOIDmode
;
5944 /* If we have the type information, prefer that to working things
5945 out from the mode. */
5948 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5950 if (ag_count
> 0 && ag_count
<= 4)
5955 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5956 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5957 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5962 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5965 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5971 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5974 *base_mode
= new_mode
;
5979 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5980 machine_mode mode
, const_tree type
)
5982 int count ATTRIBUTE_UNUSED
;
5983 machine_mode ag_mode ATTRIBUTE_UNUSED
;
5985 if (!use_vfp_abi (pcs_variant
, false))
5987 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5992 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5995 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5998 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5999 &pcum
->aapcs_vfp_rmode
,
6000 &pcum
->aapcs_vfp_rcount
);
6003 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6004 for the behaviour of this function. */
6007 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6008 const_tree type ATTRIBUTE_UNUSED
)
6011 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
6012 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
6013 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
6016 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
6017 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
6019 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
6021 || (mode
== TImode
&& ! TARGET_NEON
)
6022 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
6025 int rcount
= pcum
->aapcs_vfp_rcount
;
6027 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
6031 /* Avoid using unsupported vector modes. */
6032 if (rmode
== V2SImode
)
6034 else if (rmode
== V4SImode
)
6041 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
6042 for (i
= 0; i
< rcount
; i
++)
6044 rtx tmp
= gen_rtx_REG (rmode
,
6045 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
6046 tmp
= gen_rtx_EXPR_LIST
6048 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
6049 XVECEXP (par
, 0, i
) = tmp
;
6052 pcum
->aapcs_reg
= par
;
6055 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
6061 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6062 comment there for the behaviour of this function. */
6065 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
6067 const_tree type ATTRIBUTE_UNUSED
)
6069 if (!use_vfp_abi (pcs_variant
, false))
6073 || (GET_MODE_CLASS (mode
) == MODE_INT
6074 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
6078 machine_mode ag_mode
;
6083 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6088 if (ag_mode
== V2SImode
)
6090 else if (ag_mode
== V4SImode
)
6096 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
6097 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
6098 for (i
= 0; i
< count
; i
++)
6100 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
6101 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
6102 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
6103 XVECEXP (par
, 0, i
) = tmp
;
6109 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
6113 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6114 machine_mode mode ATTRIBUTE_UNUSED
,
6115 const_tree type ATTRIBUTE_UNUSED
)
6117 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6118 pcum
->aapcs_vfp_reg_alloc
= 0;
6122 #define AAPCS_CP(X) \
6124 aapcs_ ## X ## _cum_init, \
6125 aapcs_ ## X ## _is_call_candidate, \
6126 aapcs_ ## X ## _allocate, \
6127 aapcs_ ## X ## _is_return_candidate, \
6128 aapcs_ ## X ## _allocate_return_reg, \
6129 aapcs_ ## X ## _advance \
6132 /* Table of co-processors that can be used to pass arguments in
6133 registers. Idealy no arugment should be a candidate for more than
6134 one co-processor table entry, but the table is processed in order
6135 and stops after the first match. If that entry then fails to put
6136 the argument into a co-processor register, the argument will go on
6140 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6141 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6143 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6144 BLKmode) is a candidate for this co-processor's registers; this
6145 function should ignore any position-dependent state in
6146 CUMULATIVE_ARGS and only use call-type dependent information. */
6147 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6149 /* Return true if the argument does get a co-processor register; it
6150 should set aapcs_reg to an RTX of the register allocated as is
6151 required for a return from FUNCTION_ARG. */
6152 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6154 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6155 be returned in this co-processor's registers. */
6156 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6158 /* Allocate and return an RTX element to hold the return type of a call. This
6159 routine must not fail and will only be called if is_return_candidate
6160 returned true with the same parameters. */
6161 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6163 /* Finish processing this argument and prepare to start processing
6165 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6166 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6174 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6179 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6180 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6187 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6189 /* We aren't passed a decl, so we can't check that a call is local.
6190 However, it isn't clear that that would be a win anyway, since it
6191 might limit some tail-calling opportunities. */
6192 enum arm_pcs pcs_variant
;
6196 const_tree fndecl
= NULL_TREE
;
6198 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6201 fntype
= TREE_TYPE (fntype
);
6204 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6207 pcs_variant
= arm_pcs_default
;
6209 if (pcs_variant
!= ARM_PCS_AAPCS
)
6213 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6214 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6223 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6226 /* We aren't passed a decl, so we can't check that a call is local.
6227 However, it isn't clear that that would be a win anyway, since it
6228 might limit some tail-calling opportunities. */
6229 enum arm_pcs pcs_variant
;
6230 int unsignedp ATTRIBUTE_UNUSED
;
6234 const_tree fndecl
= NULL_TREE
;
6236 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6239 fntype
= TREE_TYPE (fntype
);
6242 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6245 pcs_variant
= arm_pcs_default
;
6247 /* Promote integer types. */
6248 if (type
&& INTEGRAL_TYPE_P (type
))
6249 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6251 if (pcs_variant
!= ARM_PCS_AAPCS
)
6255 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6256 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6258 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6262 /* Promotes small structs returned in a register to full-word size
6263 for big-endian AAPCS. */
6264 if (type
&& arm_return_in_msb (type
))
6266 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6267 if (size
% UNITS_PER_WORD
!= 0)
6269 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6270 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
6274 return gen_rtx_REG (mode
, R0_REGNUM
);
6278 aapcs_libcall_value (machine_mode mode
)
6280 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6281 && GET_MODE_SIZE (mode
) <= 4)
6284 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6287 /* Lay out a function argument using the AAPCS rules. The rule
6288 numbers referred to here are those in the AAPCS. */
6290 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6291 const_tree type
, bool named
)
6296 /* We only need to do this once per argument. */
6297 if (pcum
->aapcs_arg_processed
)
6300 pcum
->aapcs_arg_processed
= true;
6302 /* Special case: if named is false then we are handling an incoming
6303 anonymous argument which is on the stack. */
6307 /* Is this a potential co-processor register candidate? */
6308 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6310 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6311 pcum
->aapcs_cprc_slot
= slot
;
6313 /* We don't have to apply any of the rules from part B of the
6314 preparation phase, these are handled elsewhere in the
6319 /* A Co-processor register candidate goes either in its own
6320 class of registers or on the stack. */
6321 if (!pcum
->aapcs_cprc_failed
[slot
])
6323 /* C1.cp - Try to allocate the argument to co-processor
6325 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6328 /* C2.cp - Put the argument on the stack and note that we
6329 can't assign any more candidates in this slot. We also
6330 need to note that we have allocated stack space, so that
6331 we won't later try to split a non-cprc candidate between
6332 core registers and the stack. */
6333 pcum
->aapcs_cprc_failed
[slot
] = true;
6334 pcum
->can_split
= false;
6337 /* We didn't get a register, so this argument goes on the
6339 gcc_assert (pcum
->can_split
== false);
6344 /* C3 - For double-word aligned arguments, round the NCRN up to the
6345 next even number. */
6346 ncrn
= pcum
->aapcs_ncrn
;
6349 int res
= arm_needs_doubleword_align (mode
, type
);
6350 /* Only warn during RTL expansion of call stmts, otherwise we would
6351 warn e.g. during gimplification even on functions that will be
6352 always inlined, and we'd warn multiple times. Don't warn when
6353 called in expand_function_start either, as we warn instead in
6354 arm_function_arg_boundary in that case. */
6355 if (res
< 0 && warn_psabi
&& currently_expanding_gimple_stmt
)
6356 inform (input_location
, "parameter passing for argument of type "
6357 "%qT changed in GCC 7.1", type
);
6362 nregs
= ARM_NUM_REGS2(mode
, type
);
6364 /* Sigh, this test should really assert that nregs > 0, but a GCC
6365 extension allows empty structs and then gives them empty size; it
6366 then allows such a structure to be passed by value. For some of
6367 the code below we have to pretend that such an argument has
6368 non-zero size so that we 'locate' it correctly either in
6369 registers or on the stack. */
6370 gcc_assert (nregs
>= 0);
6372 nregs2
= nregs
? nregs
: 1;
6374 /* C4 - Argument fits entirely in core registers. */
6375 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6377 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6378 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6382 /* C5 - Some core registers left and there are no arguments already
6383 on the stack: split this argument between the remaining core
6384 registers and the stack. */
6385 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6387 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6388 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6389 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6393 /* C6 - NCRN is set to 4. */
6394 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6396 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6400 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6401 for a call to a function whose data type is FNTYPE.
6402 For a library call, FNTYPE is NULL. */
6404 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6406 tree fndecl ATTRIBUTE_UNUSED
)
6408 /* Long call handling. */
6410 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6412 pcum
->pcs_variant
= arm_pcs_default
;
6414 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6416 if (arm_libcall_uses_aapcs_base (libname
))
6417 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6419 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6420 pcum
->aapcs_reg
= NULL_RTX
;
6421 pcum
->aapcs_partial
= 0;
6422 pcum
->aapcs_arg_processed
= false;
6423 pcum
->aapcs_cprc_slot
= -1;
6424 pcum
->can_split
= true;
6426 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6430 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6432 pcum
->aapcs_cprc_failed
[i
] = false;
6433 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6441 /* On the ARM, the offset starts at 0. */
6443 pcum
->iwmmxt_nregs
= 0;
6444 pcum
->can_split
= true;
6446 /* Varargs vectors are treated the same as long long.
6447 named_count avoids having to change the way arm handles 'named' */
6448 pcum
->named_count
= 0;
6451 if (TARGET_REALLY_IWMMXT
&& fntype
)
6455 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6457 fn_arg
= TREE_CHAIN (fn_arg
))
6458 pcum
->named_count
+= 1;
6460 if (! pcum
->named_count
)
6461 pcum
->named_count
= INT_MAX
;
6465 /* Return 1 if double word alignment is required for argument passing.
6466 Return -1 if double word alignment used to be required for argument
6467 passing before PR77728 ABI fix, but is not required anymore.
6468 Return 0 if double word alignment is not required and wasn't requried
6471 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6474 return GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
;
6476 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6477 if (!AGGREGATE_TYPE_P (type
))
6478 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6480 /* Array types: Use member alignment of element type. */
6481 if (TREE_CODE (type
) == ARRAY_TYPE
)
6482 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
6485 /* Record/aggregate types: Use greatest member alignment of any member. */
6486 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6487 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
6489 if (TREE_CODE (field
) == FIELD_DECL
)
6492 /* Before PR77728 fix, we were incorrectly considering also
6493 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6494 Make sure we can warn about that with -Wpsabi. */
6502 /* Determine where to put an argument to a function.
6503 Value is zero to push the argument on the stack,
6504 or a hard register in which to store the argument.
6506 MODE is the argument's machine mode.
6507 TYPE is the data type of the argument (as a tree).
6508 This is null for libcalls where that information may
6510 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6511 the preceding args and about the function being called.
6512 NAMED is nonzero if this argument is a named parameter
6513 (otherwise it is an extra parameter matching an ellipsis).
6515 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6516 other arguments are passed on the stack. If (NAMED == 0) (which happens
6517 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6518 defined), say it is passed in the stack (function_prologue will
6519 indeed make it pass in the stack if necessary). */
6522 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6523 const_tree type
, bool named
)
6525 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6528 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6529 a call insn (op3 of a call_value insn). */
6530 if (mode
== VOIDmode
)
6533 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6535 aapcs_layout_arg (pcum
, mode
, type
, named
);
6536 return pcum
->aapcs_reg
;
6539 /* Varargs vectors are treated the same as long long.
6540 named_count avoids having to change the way arm handles 'named' */
6541 if (TARGET_IWMMXT_ABI
6542 && arm_vector_mode_supported_p (mode
)
6543 && pcum
->named_count
> pcum
->nargs
+ 1)
6545 if (pcum
->iwmmxt_nregs
<= 9)
6546 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6549 pcum
->can_split
= false;
6554 /* Put doubleword aligned quantities in even register pairs. */
6555 if ((pcum
->nregs
& 1) && ARM_DOUBLEWORD_ALIGN
)
6557 int res
= arm_needs_doubleword_align (mode
, type
);
6558 if (res
< 0 && warn_psabi
)
6559 inform (input_location
, "parameter passing for argument of type "
6560 "%qT changed in GCC 7.1", type
);
6565 /* Only allow splitting an arg between regs and memory if all preceding
6566 args were allocated to regs. For args passed by reference we only count
6567 the reference pointer. */
6568 if (pcum
->can_split
)
6571 nregs
= ARM_NUM_REGS2 (mode
, type
);
6573 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6576 return gen_rtx_REG (mode
, pcum
->nregs
);
6580 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6582 if (!ARM_DOUBLEWORD_ALIGN
)
6583 return PARM_BOUNDARY
;
6585 int res
= arm_needs_doubleword_align (mode
, type
);
6586 if (res
< 0 && warn_psabi
)
6587 inform (input_location
, "parameter passing for argument of type %qT "
6588 "changed in GCC 7.1", type
);
6590 return res
> 0 ? DOUBLEWORD_ALIGNMENT
: PARM_BOUNDARY
;
6594 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6595 tree type
, bool named
)
6597 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6598 int nregs
= pcum
->nregs
;
6600 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6602 aapcs_layout_arg (pcum
, mode
, type
, named
);
6603 return pcum
->aapcs_partial
;
6606 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6609 if (NUM_ARG_REGS
> nregs
6610 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6612 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6617 /* Update the data in PCUM to advance over an argument
6618 of mode MODE and data type TYPE.
6619 (TYPE is null for libcalls where that information may not be available.) */
6622 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6623 const_tree type
, bool named
)
6625 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6627 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6629 aapcs_layout_arg (pcum
, mode
, type
, named
);
6631 if (pcum
->aapcs_cprc_slot
>= 0)
6633 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6635 pcum
->aapcs_cprc_slot
= -1;
6638 /* Generic stuff. */
6639 pcum
->aapcs_arg_processed
= false;
6640 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6641 pcum
->aapcs_reg
= NULL_RTX
;
6642 pcum
->aapcs_partial
= 0;
6647 if (arm_vector_mode_supported_p (mode
)
6648 && pcum
->named_count
> pcum
->nargs
6649 && TARGET_IWMMXT_ABI
)
6650 pcum
->iwmmxt_nregs
+= 1;
6652 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6656 /* Variable sized types are passed by reference. This is a GCC
6657 extension to the ARM ABI. */
6660 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6661 machine_mode mode ATTRIBUTE_UNUSED
,
6662 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6664 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6667 /* Encode the current state of the #pragma [no_]long_calls. */
6670 OFF
, /* No #pragma [no_]long_calls is in effect. */
6671 LONG
, /* #pragma long_calls is in effect. */
6672 SHORT
/* #pragma no_long_calls is in effect. */
6675 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6678 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6680 arm_pragma_long_calls
= LONG
;
6684 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6686 arm_pragma_long_calls
= SHORT
;
6690 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6692 arm_pragma_long_calls
= OFF
;
6695 /* Handle an attribute requiring a FUNCTION_DECL;
6696 arguments as in struct attribute_spec.handler. */
6698 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6699 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6701 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6703 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6705 *no_add_attrs
= true;
6711 /* Handle an "interrupt" or "isr" attribute;
6712 arguments as in struct attribute_spec.handler. */
6714 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6719 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6721 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6723 *no_add_attrs
= true;
6725 /* FIXME: the argument if any is checked for type attributes;
6726 should it be checked for decl ones? */
6730 if (TREE_CODE (*node
) == FUNCTION_TYPE
6731 || TREE_CODE (*node
) == METHOD_TYPE
)
6733 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6735 warning (OPT_Wattributes
, "%qE attribute ignored",
6737 *no_add_attrs
= true;
6740 else if (TREE_CODE (*node
) == POINTER_TYPE
6741 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6742 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6743 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6745 *node
= build_variant_type_copy (*node
);
6746 TREE_TYPE (*node
) = build_type_attribute_variant
6748 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6749 *no_add_attrs
= true;
6753 /* Possibly pass this attribute on from the type to a decl. */
6754 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6755 | (int) ATTR_FLAG_FUNCTION_NEXT
6756 | (int) ATTR_FLAG_ARRAY_NEXT
))
6758 *no_add_attrs
= true;
6759 return tree_cons (name
, args
, NULL_TREE
);
6763 warning (OPT_Wattributes
, "%qE attribute ignored",
6772 /* Handle a "pcs" attribute; arguments as in struct
6773 attribute_spec.handler. */
6775 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6776 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6778 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6780 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6781 *no_add_attrs
= true;
6786 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6787 /* Handle the "notshared" attribute. This attribute is another way of
6788 requesting hidden visibility. ARM's compiler supports
6789 "__declspec(notshared)"; we support the same thing via an
6793 arm_handle_notshared_attribute (tree
*node
,
6794 tree name ATTRIBUTE_UNUSED
,
6795 tree args ATTRIBUTE_UNUSED
,
6796 int flags ATTRIBUTE_UNUSED
,
6799 tree decl
= TYPE_NAME (*node
);
6803 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6804 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6805 *no_add_attrs
= false;
6811 /* This function returns true if a function with declaration FNDECL and type
6812 FNTYPE uses the stack to pass arguments or return variables and false
6813 otherwise. This is used for functions with the attributes
6814 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6815 diagnostic messages if the stack is used. NAME is the name of the attribute
6819 cmse_func_args_or_return_in_stack (tree fndecl
, tree name
, tree fntype
)
6821 function_args_iterator args_iter
;
6822 CUMULATIVE_ARGS args_so_far_v
;
6823 cumulative_args_t args_so_far
;
6824 bool first_param
= true;
6825 tree arg_type
, prev_arg_type
= NULL_TREE
, ret_type
;
6827 /* Error out if any argument is passed on the stack. */
6828 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
6829 args_so_far
= pack_cumulative_args (&args_so_far_v
);
6830 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
6833 machine_mode arg_mode
= TYPE_MODE (arg_type
);
6835 prev_arg_type
= arg_type
;
6836 if (VOID_TYPE_P (arg_type
))
6840 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
, true);
6841 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
, true);
6843 || arm_arg_partial_bytes (args_so_far
, arg_mode
, arg_type
, true))
6845 error ("%qE attribute not available to functions with arguments "
6846 "passed on the stack", name
);
6849 first_param
= false;
6852 /* Error out for variadic functions since we cannot control how many
6853 arguments will be passed and thus stack could be used. stdarg_p () is not
6854 used for the checking to avoid browsing arguments twice. */
6855 if (prev_arg_type
!= NULL_TREE
&& !VOID_TYPE_P (prev_arg_type
))
6857 error ("%qE attribute not available to functions with variable number "
6858 "of arguments", name
);
6862 /* Error out if return value is passed on the stack. */
6863 ret_type
= TREE_TYPE (fntype
);
6864 if (arm_return_in_memory (ret_type
, fntype
))
6866 error ("%qE attribute not available to functions that return value on "
6873 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6874 function will check whether the attribute is allowed here and will add the
6875 attribute to the function declaration tree or otherwise issue a warning. */
6878 arm_handle_cmse_nonsecure_entry (tree
*node
, tree name
,
6887 *no_add_attrs
= true;
6888 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
6893 /* Ignore attribute for function types. */
6894 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6896 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6898 *no_add_attrs
= true;
6904 /* Warn for static linkage functions. */
6905 if (!TREE_PUBLIC (fndecl
))
6907 warning (OPT_Wattributes
, "%qE attribute has no effect on functions "
6908 "with static linkage", name
);
6909 *no_add_attrs
= true;
6913 *no_add_attrs
|= cmse_func_args_or_return_in_stack (fndecl
, name
,
6914 TREE_TYPE (fndecl
));
6919 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6920 function will check whether the attribute is allowed here and will add the
6921 attribute to the function type tree or otherwise issue a diagnostic. The
6922 reason we check this at declaration time is to only allow the use of the
6923 attribute with declarations of function pointers and not function
6924 declarations. This function checks NODE is of the expected type and issues
6925 diagnostics otherwise using NAME. If it is not of the expected type
6926 *NO_ADD_ATTRS will be set to true. */
6929 arm_handle_cmse_nonsecure_call (tree
*node
, tree name
,
6934 tree decl
= NULL_TREE
, fntype
= NULL_TREE
;
6939 *no_add_attrs
= true;
6940 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
6945 if (TREE_CODE (*node
) == VAR_DECL
|| TREE_CODE (*node
) == TYPE_DECL
)
6948 fntype
= TREE_TYPE (decl
);
6951 while (fntype
!= NULL_TREE
&& TREE_CODE (fntype
) == POINTER_TYPE
)
6952 fntype
= TREE_TYPE (fntype
);
6954 if (!decl
|| TREE_CODE (fntype
) != FUNCTION_TYPE
)
6956 warning (OPT_Wattributes
, "%qE attribute only applies to base type of a "
6957 "function pointer", name
);
6958 *no_add_attrs
= true;
6962 *no_add_attrs
|= cmse_func_args_or_return_in_stack (NULL
, name
, fntype
);
6967 /* Prevent trees being shared among function types with and without
6968 cmse_nonsecure_call attribute. */
6969 type
= TREE_TYPE (decl
);
6971 type
= build_distinct_type_copy (type
);
6972 TREE_TYPE (decl
) = type
;
6975 while (TREE_CODE (fntype
) != FUNCTION_TYPE
)
6978 fntype
= TREE_TYPE (fntype
);
6979 fntype
= build_distinct_type_copy (fntype
);
6980 TREE_TYPE (type
) = fntype
;
6983 /* Construct a type attribute and add it to the function type. */
6984 tree attrs
= tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE
,
6985 TYPE_ATTRIBUTES (fntype
));
6986 TYPE_ATTRIBUTES (fntype
) = attrs
;
6990 /* Return 0 if the attributes for two types are incompatible, 1 if they
6991 are compatible, and 2 if they are nearly compatible (which causes a
6992 warning to be generated). */
6994 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
6998 /* Check for mismatch of non-default calling convention. */
6999 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
7002 /* Check for mismatched call attributes. */
7003 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7004 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7005 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7006 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7008 /* Only bother to check if an attribute is defined. */
7009 if (l1
| l2
| s1
| s2
)
7011 /* If one type has an attribute, the other must have the same attribute. */
7012 if ((l1
!= l2
) || (s1
!= s2
))
7015 /* Disallow mixed attributes. */
7016 if ((l1
& s2
) || (l2
& s1
))
7020 /* Check for mismatched ISR attribute. */
7021 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
7023 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
7024 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
7026 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
7030 l1
= lookup_attribute ("cmse_nonsecure_call",
7031 TYPE_ATTRIBUTES (type1
)) != NULL
;
7032 l2
= lookup_attribute ("cmse_nonsecure_call",
7033 TYPE_ATTRIBUTES (type2
)) != NULL
;
7041 /* Assigns default attributes to newly defined type. This is used to
7042 set short_call/long_call attributes for function types of
7043 functions defined inside corresponding #pragma scopes. */
7045 arm_set_default_type_attributes (tree type
)
7047 /* Add __attribute__ ((long_call)) to all functions, when
7048 inside #pragma long_calls or __attribute__ ((short_call)),
7049 when inside #pragma no_long_calls. */
7050 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
7052 tree type_attr_list
, attr_name
;
7053 type_attr_list
= TYPE_ATTRIBUTES (type
);
7055 if (arm_pragma_long_calls
== LONG
)
7056 attr_name
= get_identifier ("long_call");
7057 else if (arm_pragma_long_calls
== SHORT
)
7058 attr_name
= get_identifier ("short_call");
7062 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
7063 TYPE_ATTRIBUTES (type
) = type_attr_list
;
7067 /* Return true if DECL is known to be linked into section SECTION. */
7070 arm_function_in_section_p (tree decl
, section
*section
)
7072 /* We can only be certain about the prevailing symbol definition. */
7073 if (!decl_binds_to_current_def_p (decl
))
7076 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7077 if (!DECL_SECTION_NAME (decl
))
7079 /* Make sure that we will not create a unique section for DECL. */
7080 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
7084 return function_section (decl
) == section
;
7087 /* Return nonzero if a 32-bit "long_call" should be generated for
7088 a call from the current function to DECL. We generate a long_call
7091 a. has an __attribute__((long call))
7092 or b. is within the scope of a #pragma long_calls
7093 or c. the -mlong-calls command line switch has been specified
7095 However we do not generate a long call if the function:
7097 d. has an __attribute__ ((short_call))
7098 or e. is inside the scope of a #pragma no_long_calls
7099 or f. is defined in the same section as the current function. */
7102 arm_is_long_call_p (tree decl
)
7107 return TARGET_LONG_CALLS
;
7109 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
7110 if (lookup_attribute ("short_call", attrs
))
7113 /* For "f", be conservative, and only cater for cases in which the
7114 whole of the current function is placed in the same section. */
7115 if (!flag_reorder_blocks_and_partition
7116 && TREE_CODE (decl
) == FUNCTION_DECL
7117 && arm_function_in_section_p (decl
, current_function_section ()))
7120 if (lookup_attribute ("long_call", attrs
))
7123 return TARGET_LONG_CALLS
;
7126 /* Return nonzero if it is ok to make a tail-call to DECL. */
7128 arm_function_ok_for_sibcall (tree decl
, tree exp
)
7130 unsigned long func_type
;
7132 if (cfun
->machine
->sibcall_blocked
)
7135 /* Never tailcall something if we are generating code for Thumb-1. */
7139 /* The PIC register is live on entry to VxWorks PLT entries, so we
7140 must make the call before restoring the PIC register. */
7141 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
7144 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7145 may be used both as target of the call and base register for restoring
7146 the VFP registers */
7147 if (TARGET_APCS_FRAME
&& TARGET_ARM
7148 && TARGET_HARD_FLOAT
7149 && decl
&& arm_is_long_call_p (decl
))
7152 /* If we are interworking and the function is not declared static
7153 then we can't tail-call it unless we know that it exists in this
7154 compilation unit (since it might be a Thumb routine). */
7155 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
7156 && !TREE_ASM_WRITTEN (decl
))
7159 func_type
= arm_current_func_type ();
7160 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7161 if (IS_INTERRUPT (func_type
))
7164 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7165 generated for entry functions themselves. */
7166 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7169 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7170 this would complicate matters for later code generation. */
7171 if (TREE_CODE (exp
) == CALL_EXPR
)
7173 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7174 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype
)))
7178 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
7180 /* Check that the return value locations are the same. For
7181 example that we aren't returning a value from the sibling in
7182 a VFP register but then need to transfer it to a core
7185 tree decl_or_type
= decl
;
7187 /* If it is an indirect function pointer, get the function type. */
7189 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7191 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
7192 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
7194 if (!rtx_equal_p (a
, b
))
7198 /* Never tailcall if function may be called with a misaligned SP. */
7199 if (IS_STACKALIGN (func_type
))
7202 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7203 references should become a NOP. Don't convert such calls into
7205 if (TARGET_AAPCS_BASED
7206 && arm_abi
== ARM_ABI_AAPCS
7208 && DECL_WEAK (decl
))
7211 /* We cannot do a tailcall for an indirect call by descriptor if all the
7212 argument registers are used because the only register left to load the
7213 address is IP and it will already contain the static chain. */
7214 if (!decl
&& CALL_EXPR_BY_DESCRIPTOR (exp
) && !flag_trampolines
)
7216 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7217 CUMULATIVE_ARGS cum
;
7218 cumulative_args_t cum_v
;
7220 arm_init_cumulative_args (&cum
, fntype
, NULL_RTX
, NULL_TREE
);
7221 cum_v
= pack_cumulative_args (&cum
);
7223 for (tree t
= TYPE_ARG_TYPES (fntype
); t
; t
= TREE_CHAIN (t
))
7225 tree type
= TREE_VALUE (t
);
7226 if (!VOID_TYPE_P (type
))
7227 arm_function_arg_advance (cum_v
, TYPE_MODE (type
), type
, true);
7230 if (!arm_function_arg (cum_v
, SImode
, integer_type_node
, true))
7234 /* Everything else is ok. */
7239 /* Addressing mode support functions. */
7241 /* Return nonzero if X is a legitimate immediate operand when compiling
7242 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7244 legitimate_pic_operand_p (rtx x
)
7246 if (GET_CODE (x
) == SYMBOL_REF
7247 || (GET_CODE (x
) == CONST
7248 && GET_CODE (XEXP (x
, 0)) == PLUS
7249 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7255 /* Record that the current function needs a PIC register. Initialize
7256 cfun->machine->pic_reg if we have not already done so. */
7259 require_pic_register (void)
7261 /* A lot of the logic here is made obscure by the fact that this
7262 routine gets called as part of the rtx cost estimation process.
7263 We don't want those calls to affect any assumptions about the real
7264 function; and further, we can't call entry_of_function() until we
7265 start the real expansion process. */
7266 if (!crtl
->uses_pic_offset_table
)
7268 gcc_assert (can_create_pseudo_p ());
7269 if (arm_pic_register
!= INVALID_REGNUM
7270 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
7272 if (!cfun
->machine
->pic_reg
)
7273 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
7275 /* Play games to avoid marking the function as needing pic
7276 if we are being called as part of the cost-estimation
7278 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7279 crtl
->uses_pic_offset_table
= 1;
7283 rtx_insn
*seq
, *insn
;
7285 if (!cfun
->machine
->pic_reg
)
7286 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
7288 /* Play games to avoid marking the function as needing pic
7289 if we are being called as part of the cost-estimation
7291 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7293 crtl
->uses_pic_offset_table
= 1;
7296 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
7297 && arm_pic_register
> LAST_LO_REGNUM
)
7298 emit_move_insn (cfun
->machine
->pic_reg
,
7299 gen_rtx_REG (Pmode
, arm_pic_register
));
7301 arm_load_pic_register (0UL);
7306 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
7308 INSN_LOCATION (insn
) = prologue_location
;
7310 /* We can be called during expansion of PHI nodes, where
7311 we can't yet emit instructions directly in the final
7312 insn stream. Queue the insns on the entry edge, they will
7313 be committed after everything else is expanded. */
7314 insert_insn_on_edge (seq
,
7315 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
7322 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
7324 if (GET_CODE (orig
) == SYMBOL_REF
7325 || GET_CODE (orig
) == LABEL_REF
)
7329 gcc_assert (can_create_pseudo_p ());
7330 reg
= gen_reg_rtx (Pmode
);
7333 /* VxWorks does not impose a fixed gap between segments; the run-time
7334 gap can be different from the object-file gap. We therefore can't
7335 use GOTOFF unless we are absolutely sure that the symbol is in the
7336 same segment as the GOT. Unfortunately, the flexibility of linker
7337 scripts means that we can't be sure of that in general, so assume
7338 that GOTOFF is never valid on VxWorks. */
7339 /* References to weak symbols cannot be resolved locally: they
7340 may be overridden by a non-weak definition at link time. */
7342 if ((GET_CODE (orig
) == LABEL_REF
7343 || (GET_CODE (orig
) == SYMBOL_REF
7344 && SYMBOL_REF_LOCAL_P (orig
)
7345 && (SYMBOL_REF_DECL (orig
)
7346 ? !DECL_WEAK (SYMBOL_REF_DECL (orig
)) : 1)))
7348 && arm_pic_data_is_text_relative
)
7349 insn
= arm_pic_static_addr (orig
, reg
);
7355 /* If this function doesn't have a pic register, create one now. */
7356 require_pic_register ();
7358 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
7360 /* Make the MEM as close to a constant as possible. */
7361 mem
= SET_SRC (pat
);
7362 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
7363 MEM_READONLY_P (mem
) = 1;
7364 MEM_NOTRAP_P (mem
) = 1;
7366 insn
= emit_insn (pat
);
7369 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7371 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
7375 else if (GET_CODE (orig
) == CONST
)
7379 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7380 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
7383 /* Handle the case where we have: const (UNSPEC_TLS). */
7384 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
7385 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
7388 /* Handle the case where we have:
7389 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7391 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7392 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
7393 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
7395 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
7401 gcc_assert (can_create_pseudo_p ());
7402 reg
= gen_reg_rtx (Pmode
);
7405 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
7407 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
7408 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
7409 base
== reg
? 0 : reg
);
7411 if (CONST_INT_P (offset
))
7413 /* The base register doesn't really matter, we only want to
7414 test the index for the appropriate mode. */
7415 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
7417 gcc_assert (can_create_pseudo_p ());
7418 offset
= force_reg (Pmode
, offset
);
7421 if (CONST_INT_P (offset
))
7422 return plus_constant (Pmode
, base
, INTVAL (offset
));
7425 if (GET_MODE_SIZE (mode
) > 4
7426 && (GET_MODE_CLASS (mode
) == MODE_INT
7427 || TARGET_SOFT_FLOAT
))
7429 emit_insn (gen_addsi3 (reg
, base
, offset
));
7433 return gen_rtx_PLUS (Pmode
, base
, offset
);
7440 /* Find a spare register to use during the prolog of a function. */
7443 thumb_find_work_register (unsigned long pushed_regs_mask
)
7447 /* Check the argument registers first as these are call-used. The
7448 register allocation order means that sometimes r3 might be used
7449 but earlier argument registers might not, so check them all. */
7450 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
7451 if (!df_regs_ever_live_p (reg
))
7454 /* Before going on to check the call-saved registers we can try a couple
7455 more ways of deducing that r3 is available. The first is when we are
7456 pushing anonymous arguments onto the stack and we have less than 4
7457 registers worth of fixed arguments(*). In this case r3 will be part of
7458 the variable argument list and so we can be sure that it will be
7459 pushed right at the start of the function. Hence it will be available
7460 for the rest of the prologue.
7461 (*): ie crtl->args.pretend_args_size is greater than 0. */
7462 if (cfun
->machine
->uses_anonymous_args
7463 && crtl
->args
.pretend_args_size
> 0)
7464 return LAST_ARG_REGNUM
;
7466 /* The other case is when we have fixed arguments but less than 4 registers
7467 worth. In this case r3 might be used in the body of the function, but
7468 it is not being used to convey an argument into the function. In theory
7469 we could just check crtl->args.size to see how many bytes are
7470 being passed in argument registers, but it seems that it is unreliable.
7471 Sometimes it will have the value 0 when in fact arguments are being
7472 passed. (See testcase execute/20021111-1.c for an example). So we also
7473 check the args_info.nregs field as well. The problem with this field is
7474 that it makes no allowances for arguments that are passed to the
7475 function but which are not used. Hence we could miss an opportunity
7476 when a function has an unused argument in r3. But it is better to be
7477 safe than to be sorry. */
7478 if (! cfun
->machine
->uses_anonymous_args
7479 && crtl
->args
.size
>= 0
7480 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
7481 && (TARGET_AAPCS_BASED
7482 ? crtl
->args
.info
.aapcs_ncrn
< 4
7483 : crtl
->args
.info
.nregs
< 4))
7484 return LAST_ARG_REGNUM
;
7486 /* Otherwise look for a call-saved register that is going to be pushed. */
7487 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
7488 if (pushed_regs_mask
& (1 << reg
))
7493 /* Thumb-2 can use high regs. */
7494 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
7495 if (pushed_regs_mask
& (1 << reg
))
7498 /* Something went wrong - thumb_compute_save_reg_mask()
7499 should have arranged for a suitable register to be pushed. */
7503 static GTY(()) int pic_labelno
;
7505 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7509 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
7511 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
7513 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
7516 gcc_assert (flag_pic
);
7518 pic_reg
= cfun
->machine
->pic_reg
;
7519 if (TARGET_VXWORKS_RTP
)
7521 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
7522 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7523 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
7525 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
7527 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
7528 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
7532 /* We use an UNSPEC rather than a LABEL_REF because this label
7533 never appears in the code stream. */
7535 labelno
= GEN_INT (pic_labelno
++);
7536 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7537 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7539 /* On the ARM the PC register contains 'dot + 8' at the time of the
7540 addition, on the Thumb it is 'dot + 4'. */
7541 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7542 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
7544 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7548 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7550 else /* TARGET_THUMB1 */
7552 if (arm_pic_register
!= INVALID_REGNUM
7553 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
7555 /* We will have pushed the pic register, so we should always be
7556 able to find a work register. */
7557 pic_tmp
= gen_rtx_REG (SImode
,
7558 thumb_find_work_register (saved_regs
));
7559 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
7560 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
7561 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
7563 else if (arm_pic_register
!= INVALID_REGNUM
7564 && arm_pic_register
> LAST_LO_REGNUM
7565 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
7567 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7568 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
7569 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
7572 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7576 /* Need to emit this whether or not we obey regdecls,
7577 since setjmp/longjmp can cause life info to screw up. */
7581 /* Generate code to load the address of a static var when flag_pic is set. */
7583 arm_pic_static_addr (rtx orig
, rtx reg
)
7585 rtx l1
, labelno
, offset_rtx
;
7587 gcc_assert (flag_pic
);
7589 /* We use an UNSPEC rather than a LABEL_REF because this label
7590 never appears in the code stream. */
7591 labelno
= GEN_INT (pic_labelno
++);
7592 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7593 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7595 /* On the ARM the PC register contains 'dot + 8' at the time of the
7596 addition, on the Thumb it is 'dot + 4'. */
7597 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7598 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
7599 UNSPEC_SYMBOL_OFFSET
);
7600 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
7602 return emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
7605 /* Return nonzero if X is valid as an ARM state addressing register. */
7607 arm_address_register_rtx_p (rtx x
, int strict_p
)
7617 return ARM_REGNO_OK_FOR_BASE_P (regno
);
7619 return (regno
<= LAST_ARM_REGNUM
7620 || regno
>= FIRST_PSEUDO_REGISTER
7621 || regno
== FRAME_POINTER_REGNUM
7622 || regno
== ARG_POINTER_REGNUM
);
7625 /* Return TRUE if this rtx is the difference of a symbol and a label,
7626 and will reduce to a PC-relative relocation in the object file.
7627 Expressions like this can be left alone when generating PIC, rather
7628 than forced through the GOT. */
7630 pcrel_constant_p (rtx x
)
7632 if (GET_CODE (x
) == MINUS
)
7633 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
7638 /* Return true if X will surely end up in an index register after next
7641 will_be_in_index_register (const_rtx x
)
7643 /* arm.md: calculate_pic_address will split this into a register. */
7644 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
7647 /* Return nonzero if X is a valid ARM state address operand. */
7649 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
7653 enum rtx_code code
= GET_CODE (x
);
7655 if (arm_address_register_rtx_p (x
, strict_p
))
7658 use_ldrd
= (TARGET_LDRD
7659 && (mode
== DImode
|| mode
== DFmode
));
7661 if (code
== POST_INC
|| code
== PRE_DEC
7662 || ((code
== PRE_INC
|| code
== POST_DEC
)
7663 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7664 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7666 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7667 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7668 && GET_CODE (XEXP (x
, 1)) == PLUS
7669 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7671 rtx addend
= XEXP (XEXP (x
, 1), 1);
7673 /* Don't allow ldrd post increment by register because it's hard
7674 to fixup invalid register choices. */
7676 && GET_CODE (x
) == POST_MODIFY
7680 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
7681 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
7684 /* After reload constants split into minipools will have addresses
7685 from a LABEL_REF. */
7686 else if (reload_completed
7687 && (code
== LABEL_REF
7689 && GET_CODE (XEXP (x
, 0)) == PLUS
7690 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7691 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7694 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7697 else if (code
== PLUS
)
7699 rtx xop0
= XEXP (x
, 0);
7700 rtx xop1
= XEXP (x
, 1);
7702 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7703 && ((CONST_INT_P (xop1
)
7704 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7705 || (!strict_p
&& will_be_in_index_register (xop1
))))
7706 || (arm_address_register_rtx_p (xop1
, strict_p
)
7707 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7711 /* Reload currently can't handle MINUS, so disable this for now */
7712 else if (GET_CODE (x
) == MINUS
)
7714 rtx xop0
= XEXP (x
, 0);
7715 rtx xop1
= XEXP (x
, 1);
7717 return (arm_address_register_rtx_p (xop0
, strict_p
)
7718 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7722 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7723 && code
== SYMBOL_REF
7724 && CONSTANT_POOL_ADDRESS_P (x
)
7726 && symbol_mentioned_p (get_pool_constant (x
))
7727 && ! pcrel_constant_p (get_pool_constant (x
))))
7733 /* Return true if we can avoid creating a constant pool entry for x. */
7735 can_avoid_literal_pool_for_label_p (rtx x
)
7737 /* Normally we can assign constant values to target registers without
7738 the help of constant pool. But there are cases we have to use constant
7740 1) assign a label to register.
7741 2) sign-extend a 8bit value to 32bit and then assign to register.
7743 Constant pool access in format:
7744 (set (reg r0) (mem (symbol_ref (".LC0"))))
7745 will cause the use of literal pool (later in function arm_reorg).
7746 So here we mark such format as an invalid format, then the compiler
7747 will adjust it into:
7748 (set (reg r0) (symbol_ref (".LC0")))
7749 (set (reg r0) (mem (reg r0))).
7750 No extra register is required, and (mem (reg r0)) won't cause the use
7751 of literal pools. */
7752 if (arm_disable_literal_pool
&& GET_CODE (x
) == SYMBOL_REF
7753 && CONSTANT_POOL_ADDRESS_P (x
))
7759 /* Return nonzero if X is a valid Thumb-2 address operand. */
7761 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7764 enum rtx_code code
= GET_CODE (x
);
7766 if (arm_address_register_rtx_p (x
, strict_p
))
7769 use_ldrd
= (TARGET_LDRD
7770 && (mode
== DImode
|| mode
== DFmode
));
7772 if (code
== POST_INC
|| code
== PRE_DEC
7773 || ((code
== PRE_INC
|| code
== POST_DEC
)
7774 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7775 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7777 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7778 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7779 && GET_CODE (XEXP (x
, 1)) == PLUS
7780 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7782 /* Thumb-2 only has autoincrement by constant. */
7783 rtx addend
= XEXP (XEXP (x
, 1), 1);
7784 HOST_WIDE_INT offset
;
7786 if (!CONST_INT_P (addend
))
7789 offset
= INTVAL(addend
);
7790 if (GET_MODE_SIZE (mode
) <= 4)
7791 return (offset
> -256 && offset
< 256);
7793 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7794 && (offset
& 3) == 0);
7797 /* After reload constants split into minipools will have addresses
7798 from a LABEL_REF. */
7799 else if (reload_completed
7800 && (code
== LABEL_REF
7802 && GET_CODE (XEXP (x
, 0)) == PLUS
7803 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7804 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7807 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7810 else if (code
== PLUS
)
7812 rtx xop0
= XEXP (x
, 0);
7813 rtx xop1
= XEXP (x
, 1);
7815 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7816 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7817 || (!strict_p
&& will_be_in_index_register (xop1
))))
7818 || (arm_address_register_rtx_p (xop1
, strict_p
)
7819 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7822 else if (can_avoid_literal_pool_for_label_p (x
))
7825 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7826 && code
== SYMBOL_REF
7827 && CONSTANT_POOL_ADDRESS_P (x
)
7829 && symbol_mentioned_p (get_pool_constant (x
))
7830 && ! pcrel_constant_p (get_pool_constant (x
))))
7836 /* Return nonzero if INDEX is valid for an address index operand in
7839 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7842 HOST_WIDE_INT range
;
7843 enum rtx_code code
= GET_CODE (index
);
7845 /* Standard coprocessor addressing modes. */
7846 if (TARGET_HARD_FLOAT
7847 && (mode
== SFmode
|| mode
== DFmode
))
7848 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7849 && INTVAL (index
) > -1024
7850 && (INTVAL (index
) & 3) == 0);
7852 /* For quad modes, we restrict the constant offset to be slightly less
7853 than what the instruction format permits. We do this because for
7854 quad mode moves, we will actually decompose them into two separate
7855 double-mode reads or writes. INDEX must therefore be a valid
7856 (double-mode) offset and so should INDEX+8. */
7857 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7858 return (code
== CONST_INT
7859 && INTVAL (index
) < 1016
7860 && INTVAL (index
) > -1024
7861 && (INTVAL (index
) & 3) == 0);
7863 /* We have no such constraint on double mode offsets, so we permit the
7864 full range of the instruction format. */
7865 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7866 return (code
== CONST_INT
7867 && INTVAL (index
) < 1024
7868 && INTVAL (index
) > -1024
7869 && (INTVAL (index
) & 3) == 0);
7871 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7872 return (code
== CONST_INT
7873 && INTVAL (index
) < 1024
7874 && INTVAL (index
) > -1024
7875 && (INTVAL (index
) & 3) == 0);
7877 if (arm_address_register_rtx_p (index
, strict_p
)
7878 && (GET_MODE_SIZE (mode
) <= 4))
7881 if (mode
== DImode
|| mode
== DFmode
)
7883 if (code
== CONST_INT
)
7885 HOST_WIDE_INT val
= INTVAL (index
);
7888 return val
> -256 && val
< 256;
7890 return val
> -4096 && val
< 4092;
7893 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7896 if (GET_MODE_SIZE (mode
) <= 4
7900 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7904 rtx xiop0
= XEXP (index
, 0);
7905 rtx xiop1
= XEXP (index
, 1);
7907 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7908 && power_of_two_operand (xiop1
, SImode
))
7909 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7910 && power_of_two_operand (xiop0
, SImode
)));
7912 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7913 || code
== ASHIFT
|| code
== ROTATERT
)
7915 rtx op
= XEXP (index
, 1);
7917 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7920 && INTVAL (op
) <= 31);
7924 /* For ARM v4 we may be doing a sign-extend operation during the
7930 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7936 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7938 return (code
== CONST_INT
7939 && INTVAL (index
) < range
7940 && INTVAL (index
) > -range
);
7943 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7944 index operand. i.e. 1, 2, 4 or 8. */
7946 thumb2_index_mul_operand (rtx op
)
7950 if (!CONST_INT_P (op
))
7954 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7957 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7959 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
7961 enum rtx_code code
= GET_CODE (index
);
7963 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7964 /* Standard coprocessor addressing modes. */
7965 if (TARGET_HARD_FLOAT
7966 && (mode
== SFmode
|| mode
== DFmode
))
7967 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7968 /* Thumb-2 allows only > -256 index range for it's core register
7969 load/stores. Since we allow SF/DF in core registers, we have
7970 to use the intersection between -256~4096 (core) and -1024~1024
7972 && INTVAL (index
) > -256
7973 && (INTVAL (index
) & 3) == 0);
7975 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7977 /* For DImode assume values will usually live in core regs
7978 and only allow LDRD addressing modes. */
7979 if (!TARGET_LDRD
|| mode
!= DImode
)
7980 return (code
== CONST_INT
7981 && INTVAL (index
) < 1024
7982 && INTVAL (index
) > -1024
7983 && (INTVAL (index
) & 3) == 0);
7986 /* For quad modes, we restrict the constant offset to be slightly less
7987 than what the instruction format permits. We do this because for
7988 quad mode moves, we will actually decompose them into two separate
7989 double-mode reads or writes. INDEX must therefore be a valid
7990 (double-mode) offset and so should INDEX+8. */
7991 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7992 return (code
== CONST_INT
7993 && INTVAL (index
) < 1016
7994 && INTVAL (index
) > -1024
7995 && (INTVAL (index
) & 3) == 0);
7997 /* We have no such constraint on double mode offsets, so we permit the
7998 full range of the instruction format. */
7999 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8000 return (code
== CONST_INT
8001 && INTVAL (index
) < 1024
8002 && INTVAL (index
) > -1024
8003 && (INTVAL (index
) & 3) == 0);
8005 if (arm_address_register_rtx_p (index
, strict_p
)
8006 && (GET_MODE_SIZE (mode
) <= 4))
8009 if (mode
== DImode
|| mode
== DFmode
)
8011 if (code
== CONST_INT
)
8013 HOST_WIDE_INT val
= INTVAL (index
);
8014 /* ??? Can we assume ldrd for thumb2? */
8015 /* Thumb-2 ldrd only has reg+const addressing modes. */
8016 /* ldrd supports offsets of +-1020.
8017 However the ldr fallback does not. */
8018 return val
> -256 && val
< 256 && (val
& 3) == 0;
8026 rtx xiop0
= XEXP (index
, 0);
8027 rtx xiop1
= XEXP (index
, 1);
8029 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8030 && thumb2_index_mul_operand (xiop1
))
8031 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8032 && thumb2_index_mul_operand (xiop0
)));
8034 else if (code
== ASHIFT
)
8036 rtx op
= XEXP (index
, 1);
8038 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8041 && INTVAL (op
) <= 3);
8044 return (code
== CONST_INT
8045 && INTVAL (index
) < 4096
8046 && INTVAL (index
) > -256);
8049 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8051 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
8061 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
8063 return (regno
<= LAST_LO_REGNUM
8064 || regno
> LAST_VIRTUAL_REGISTER
8065 || regno
== FRAME_POINTER_REGNUM
8066 || (GET_MODE_SIZE (mode
) >= 4
8067 && (regno
== STACK_POINTER_REGNUM
8068 || regno
>= FIRST_PSEUDO_REGISTER
8069 || x
== hard_frame_pointer_rtx
8070 || x
== arg_pointer_rtx
)));
8073 /* Return nonzero if x is a legitimate index register. This is the case
8074 for any base register that can access a QImode object. */
8076 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
8078 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
8081 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8083 The AP may be eliminated to either the SP or the FP, so we use the
8084 least common denominator, e.g. SImode, and offsets from 0 to 64.
8086 ??? Verify whether the above is the right approach.
8088 ??? Also, the FP may be eliminated to the SP, so perhaps that
8089 needs special handling also.
8091 ??? Look at how the mips16 port solves this problem. It probably uses
8092 better ways to solve some of these problems.
8094 Although it is not incorrect, we don't accept QImode and HImode
8095 addresses based on the frame pointer or arg pointer until the
8096 reload pass starts. This is so that eliminating such addresses
8097 into stack based ones won't produce impossible code. */
8099 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8101 if (TARGET_HAVE_MOVT
&& can_avoid_literal_pool_for_label_p (x
))
8104 /* ??? Not clear if this is right. Experiment. */
8105 if (GET_MODE_SIZE (mode
) < 4
8106 && !(reload_in_progress
|| reload_completed
)
8107 && (reg_mentioned_p (frame_pointer_rtx
, x
)
8108 || reg_mentioned_p (arg_pointer_rtx
, x
)
8109 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
8110 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
8111 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
8112 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
8115 /* Accept any base register. SP only in SImode or larger. */
8116 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
8119 /* This is PC relative data before arm_reorg runs. */
8120 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
8121 && GET_CODE (x
) == SYMBOL_REF
8122 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
8125 /* This is PC relative data after arm_reorg runs. */
8126 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
8128 && (GET_CODE (x
) == LABEL_REF
8129 || (GET_CODE (x
) == CONST
8130 && GET_CODE (XEXP (x
, 0)) == PLUS
8131 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8132 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8135 /* Post-inc indexing only supported for SImode and larger. */
8136 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
8137 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
8140 else if (GET_CODE (x
) == PLUS
)
8142 /* REG+REG address can be any two index registers. */
8143 /* We disallow FRAME+REG addressing since we know that FRAME
8144 will be replaced with STACK, and SP relative addressing only
8145 permits SP+OFFSET. */
8146 if (GET_MODE_SIZE (mode
) <= 4
8147 && XEXP (x
, 0) != frame_pointer_rtx
8148 && XEXP (x
, 1) != frame_pointer_rtx
8149 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8150 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
8151 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
8154 /* REG+const has 5-7 bit offset for non-SP registers. */
8155 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8156 || XEXP (x
, 0) == arg_pointer_rtx
)
8157 && CONST_INT_P (XEXP (x
, 1))
8158 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
8161 /* REG+const has 10-bit offset for SP, but only SImode and
8162 larger is supported. */
8163 /* ??? Should probably check for DI/DFmode overflow here
8164 just like GO_IF_LEGITIMATE_OFFSET does. */
8165 else if (REG_P (XEXP (x
, 0))
8166 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
8167 && GET_MODE_SIZE (mode
) >= 4
8168 && CONST_INT_P (XEXP (x
, 1))
8169 && INTVAL (XEXP (x
, 1)) >= 0
8170 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
8171 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8174 else if (REG_P (XEXP (x
, 0))
8175 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
8176 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
8177 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
8178 && REGNO (XEXP (x
, 0))
8179 <= LAST_VIRTUAL_POINTER_REGISTER
))
8180 && GET_MODE_SIZE (mode
) >= 4
8181 && CONST_INT_P (XEXP (x
, 1))
8182 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8186 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8187 && GET_MODE_SIZE (mode
) == 4
8188 && GET_CODE (x
) == SYMBOL_REF
8189 && CONSTANT_POOL_ADDRESS_P (x
)
8191 && symbol_mentioned_p (get_pool_constant (x
))
8192 && ! pcrel_constant_p (get_pool_constant (x
))))
8198 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8199 instruction of mode MODE. */
8201 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
8203 switch (GET_MODE_SIZE (mode
))
8206 return val
>= 0 && val
< 32;
8209 return val
>= 0 && val
< 64 && (val
& 1) == 0;
8213 && (val
+ GET_MODE_SIZE (mode
)) <= 128
8219 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
8222 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
8223 else if (TARGET_THUMB2
)
8224 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
8225 else /* if (TARGET_THUMB1) */
8226 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
8229 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8231 Given an rtx X being reloaded into a reg required to be
8232 in class CLASS, return the class of reg to actually use.
8233 In general this is just CLASS, but for the Thumb core registers and
8234 immediate constants we prefer a LO_REGS class or a subset. */
8237 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
8243 if (rclass
== GENERAL_REGS
)
8250 /* Build the SYMBOL_REF for __tls_get_addr. */
8252 static GTY(()) rtx tls_get_addr_libfunc
;
8255 get_tls_get_addr (void)
8257 if (!tls_get_addr_libfunc
)
8258 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
8259 return tls_get_addr_libfunc
;
8263 arm_load_tp (rtx target
)
8266 target
= gen_reg_rtx (SImode
);
8270 /* Can return in any reg. */
8271 emit_insn (gen_load_tp_hard (target
));
8275 /* Always returned in r0. Immediately copy the result into a pseudo,
8276 otherwise other uses of r0 (e.g. setting up function arguments) may
8277 clobber the value. */
8281 emit_insn (gen_load_tp_soft ());
8283 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
8284 emit_move_insn (target
, tmp
);
8290 load_tls_operand (rtx x
, rtx reg
)
8294 if (reg
== NULL_RTX
)
8295 reg
= gen_reg_rtx (SImode
);
8297 tmp
= gen_rtx_CONST (SImode
, x
);
8299 emit_move_insn (reg
, tmp
);
8305 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
8307 rtx label
, labelno
, sum
;
8309 gcc_assert (reloc
!= TLS_DESCSEQ
);
8312 labelno
= GEN_INT (pic_labelno
++);
8313 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8314 label
= gen_rtx_CONST (VOIDmode
, label
);
8316 sum
= gen_rtx_UNSPEC (Pmode
,
8317 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
8318 GEN_INT (TARGET_ARM
? 8 : 4)),
8320 reg
= load_tls_operand (sum
, reg
);
8323 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
8325 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8327 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
8328 LCT_PURE
, /* LCT_CONST? */
8329 Pmode
, 1, reg
, Pmode
);
8331 rtx_insn
*insns
= get_insns ();
8338 arm_tls_descseq_addr (rtx x
, rtx reg
)
8340 rtx labelno
= GEN_INT (pic_labelno
++);
8341 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8342 rtx sum
= gen_rtx_UNSPEC (Pmode
,
8343 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
8344 gen_rtx_CONST (VOIDmode
, label
),
8345 GEN_INT (!TARGET_ARM
)),
8347 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
8349 emit_insn (gen_tlscall (x
, labelno
));
8351 reg
= gen_reg_rtx (SImode
);
8353 gcc_assert (REGNO (reg
) != R0_REGNUM
);
8355 emit_move_insn (reg
, reg0
);
8361 legitimize_tls_address (rtx x
, rtx reg
)
8363 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
8365 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
8369 case TLS_MODEL_GLOBAL_DYNAMIC
:
8370 if (TARGET_GNU2_TLS
)
8372 reg
= arm_tls_descseq_addr (x
, reg
);
8374 tp
= arm_load_tp (NULL_RTX
);
8376 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8380 /* Original scheme */
8381 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
8382 dest
= gen_reg_rtx (Pmode
);
8383 emit_libcall_block (insns
, dest
, ret
, x
);
8387 case TLS_MODEL_LOCAL_DYNAMIC
:
8388 if (TARGET_GNU2_TLS
)
8390 reg
= arm_tls_descseq_addr (x
, reg
);
8392 tp
= arm_load_tp (NULL_RTX
);
8394 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8398 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
8400 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8401 share the LDM result with other LD model accesses. */
8402 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
8404 dest
= gen_reg_rtx (Pmode
);
8405 emit_libcall_block (insns
, dest
, ret
, eqv
);
8407 /* Load the addend. */
8408 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
8409 GEN_INT (TLS_LDO32
)),
8411 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
8412 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
8416 case TLS_MODEL_INITIAL_EXEC
:
8417 labelno
= GEN_INT (pic_labelno
++);
8418 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8419 label
= gen_rtx_CONST (VOIDmode
, label
);
8420 sum
= gen_rtx_UNSPEC (Pmode
,
8421 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
8422 GEN_INT (TARGET_ARM
? 8 : 4)),
8424 reg
= load_tls_operand (sum
, reg
);
8427 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
8428 else if (TARGET_THUMB2
)
8429 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
8432 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8433 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
8436 tp
= arm_load_tp (NULL_RTX
);
8438 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8440 case TLS_MODEL_LOCAL_EXEC
:
8441 tp
= arm_load_tp (NULL_RTX
);
8443 reg
= gen_rtx_UNSPEC (Pmode
,
8444 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
8446 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
8448 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8455 /* Try machine-dependent ways of modifying an illegitimate address
8456 to be legitimate. If we find one, return the new, valid address. */
8458 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8460 if (arm_tls_referenced_p (x
))
8464 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
8466 addend
= XEXP (XEXP (x
, 0), 1);
8467 x
= XEXP (XEXP (x
, 0), 0);
8470 if (GET_CODE (x
) != SYMBOL_REF
)
8473 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
8475 x
= legitimize_tls_address (x
, NULL_RTX
);
8479 x
= gen_rtx_PLUS (SImode
, x
, addend
);
8488 /* TODO: legitimize_address for Thumb2. */
8491 return thumb_legitimize_address (x
, orig_x
, mode
);
8494 if (GET_CODE (x
) == PLUS
)
8496 rtx xop0
= XEXP (x
, 0);
8497 rtx xop1
= XEXP (x
, 1);
8499 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
8500 xop0
= force_reg (SImode
, xop0
);
8502 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
8503 && !symbol_mentioned_p (xop1
))
8504 xop1
= force_reg (SImode
, xop1
);
8506 if (ARM_BASE_REGISTER_RTX_P (xop0
)
8507 && CONST_INT_P (xop1
))
8509 HOST_WIDE_INT n
, low_n
;
8513 /* VFP addressing modes actually allow greater offsets, but for
8514 now we just stick with the lowest common denominator. */
8515 if (mode
== DImode
|| mode
== DFmode
)
8527 low_n
= ((mode
) == TImode
? 0
8528 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
8532 base_reg
= gen_reg_rtx (SImode
);
8533 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
8534 emit_move_insn (base_reg
, val
);
8535 x
= plus_constant (Pmode
, base_reg
, low_n
);
8537 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8538 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8541 /* XXX We don't allow MINUS any more -- see comment in
8542 arm_legitimate_address_outer_p (). */
8543 else if (GET_CODE (x
) == MINUS
)
8545 rtx xop0
= XEXP (x
, 0);
8546 rtx xop1
= XEXP (x
, 1);
8548 if (CONSTANT_P (xop0
))
8549 xop0
= force_reg (SImode
, xop0
);
8551 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
8552 xop1
= force_reg (SImode
, xop1
);
8554 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8555 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
8558 /* Make sure to take full advantage of the pre-indexed addressing mode
8559 with absolute addresses which often allows for the base register to
8560 be factorized for multiple adjacent memory references, and it might
8561 even allows for the mini pool to be avoided entirely. */
8562 else if (CONST_INT_P (x
) && optimize
> 0)
8565 HOST_WIDE_INT mask
, base
, index
;
8568 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8569 use a 8-bit index. So let's use a 12-bit index for SImode only and
8570 hope that arm_gen_constant will enable ldrb to use more bits. */
8571 bits
= (mode
== SImode
) ? 12 : 8;
8572 mask
= (1 << bits
) - 1;
8573 base
= INTVAL (x
) & ~mask
;
8574 index
= INTVAL (x
) & mask
;
8575 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
8577 /* It'll most probably be more efficient to generate the base
8578 with more bits set and use a negative index instead. */
8582 base_reg
= force_reg (SImode
, GEN_INT (base
));
8583 x
= plus_constant (Pmode
, base_reg
, index
);
8588 /* We need to find and carefully transform any SYMBOL and LABEL
8589 references; so go back to the original address expression. */
8590 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8592 if (new_x
!= orig_x
)
8600 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8601 to be legitimate. If we find one, return the new, valid address. */
8603 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8605 if (GET_CODE (x
) == PLUS
8606 && CONST_INT_P (XEXP (x
, 1))
8607 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
8608 || INTVAL (XEXP (x
, 1)) < 0))
8610 rtx xop0
= XEXP (x
, 0);
8611 rtx xop1
= XEXP (x
, 1);
8612 HOST_WIDE_INT offset
= INTVAL (xop1
);
8614 /* Try and fold the offset into a biasing of the base register and
8615 then offsetting that. Don't do this when optimizing for space
8616 since it can cause too many CSEs. */
8617 if (optimize_size
&& offset
>= 0
8618 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
8620 HOST_WIDE_INT delta
;
8623 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
8624 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
8625 delta
= 31 * GET_MODE_SIZE (mode
);
8627 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
8629 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
8631 x
= plus_constant (Pmode
, xop0
, delta
);
8633 else if (offset
< 0 && offset
> -256)
8634 /* Small negative offsets are best done with a subtract before the
8635 dereference, forcing these into a register normally takes two
8637 x
= force_operand (x
, NULL_RTX
);
8640 /* For the remaining cases, force the constant into a register. */
8641 xop1
= force_reg (SImode
, xop1
);
8642 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8645 else if (GET_CODE (x
) == PLUS
8646 && s_register_operand (XEXP (x
, 1), SImode
)
8647 && !s_register_operand (XEXP (x
, 0), SImode
))
8649 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
8651 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
8656 /* We need to find and carefully transform any SYMBOL and LABEL
8657 references; so go back to the original address expression. */
8658 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8660 if (new_x
!= orig_x
)
8667 /* Return TRUE if X contains any TLS symbol references. */
8670 arm_tls_referenced_p (rtx x
)
8672 if (! TARGET_HAVE_TLS
)
8675 subrtx_iterator::array_type array
;
8676 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8678 const_rtx x
= *iter
;
8679 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8681 /* ARM currently does not provide relocations to encode TLS variables
8682 into AArch32 instructions, only data, so there is no way to
8683 currently implement these if a literal pool is disabled. */
8684 if (arm_disable_literal_pool
)
8685 sorry ("accessing thread-local storage is not currently supported "
8686 "with -mpure-code or -mslow-flash-data");
8691 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8692 TLS offsets, not real symbol references. */
8693 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8694 iter
.skip_subrtxes ();
8699 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8701 On the ARM, allow any integer (invalid ones are removed later by insn
8702 patterns), nice doubles and symbol_refs which refer to the function's
8705 When generating pic allow anything. */
8708 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8710 return flag_pic
|| !label_mentioned_p (x
);
8714 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8716 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8717 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8718 for ARMv8-M Baseline or later the result is valid. */
8719 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
8722 return (CONST_INT_P (x
)
8723 || CONST_DOUBLE_P (x
)
8724 || CONSTANT_ADDRESS_P (x
)
8725 || (TARGET_HAVE_MOVT
&& GET_CODE (x
) == SYMBOL_REF
)
8730 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8732 return (!arm_cannot_force_const_mem (mode
, x
)
8734 ? arm_legitimate_constant_p_1 (mode
, x
)
8735 : thumb_legitimate_constant_p (mode
, x
)));
8738 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8741 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8745 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8747 split_const (x
, &base
, &offset
);
8748 if (GET_CODE (base
) == SYMBOL_REF
8749 && !offset_within_block_p (base
, INTVAL (offset
)))
8752 return arm_tls_referenced_p (x
);
8755 #define REG_OR_SUBREG_REG(X) \
8757 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8759 #define REG_OR_SUBREG_RTX(X) \
8760 (REG_P (X) ? (X) : SUBREG_REG (X))
8763 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8765 machine_mode mode
= GET_MODE (x
);
8774 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8781 return COSTS_N_INSNS (1);
8784 if (arm_arch6m
&& arm_m_profile_small_mul
)
8785 return COSTS_N_INSNS (32);
8787 if (CONST_INT_P (XEXP (x
, 1)))
8790 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8797 return COSTS_N_INSNS (2) + cycles
;
8799 return COSTS_N_INSNS (1) + 16;
8802 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8804 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8805 return (COSTS_N_INSNS (words
)
8806 + 4 * ((MEM_P (SET_SRC (x
)))
8807 + MEM_P (SET_DEST (x
))));
8812 if (UINTVAL (x
) < 256
8813 /* 16-bit constant. */
8814 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
8816 if (thumb_shiftable_const (INTVAL (x
)))
8817 return COSTS_N_INSNS (2);
8818 return COSTS_N_INSNS (3);
8820 else if ((outer
== PLUS
|| outer
== COMPARE
)
8821 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8823 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8824 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8825 return COSTS_N_INSNS (1);
8826 else if (outer
== AND
)
8829 /* This duplicates the tests in the andsi3 expander. */
8830 for (i
= 9; i
<= 31; i
++)
8831 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8832 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8833 return COSTS_N_INSNS (2);
8835 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8836 || outer
== LSHIFTRT
)
8838 return COSTS_N_INSNS (2);
8844 return COSTS_N_INSNS (3);
8862 /* XXX another guess. */
8863 /* Memory costs quite a lot for the first word, but subsequent words
8864 load at the equivalent of a single insn each. */
8865 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8866 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8871 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8877 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8878 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8884 return total
+ COSTS_N_INSNS (1);
8886 /* Assume a two-shift sequence. Increase the cost slightly so
8887 we prefer actual shifts over an extend operation. */
8888 return total
+ 1 + COSTS_N_INSNS (2);
8895 /* Estimates the size cost of thumb1 instructions.
8896 For now most of the code is copied from thumb1_rtx_costs. We need more
8897 fine grain tuning when we have more related test cases. */
8899 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8901 machine_mode mode
= GET_MODE (x
);
8910 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8914 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8915 defined by RTL expansion, especially for the expansion of
8917 if ((GET_CODE (XEXP (x
, 0)) == MULT
8918 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8919 || (GET_CODE (XEXP (x
, 1)) == MULT
8920 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8921 return COSTS_N_INSNS (2);
8926 return COSTS_N_INSNS (1);
8929 if (CONST_INT_P (XEXP (x
, 1)))
8931 /* Thumb1 mul instruction can't operate on const. We must Load it
8932 into a register first. */
8933 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8934 /* For the targets which have a very small and high-latency multiply
8935 unit, we prefer to synthesize the mult with up to 5 instructions,
8936 giving a good balance between size and performance. */
8937 if (arm_arch6m
&& arm_m_profile_small_mul
)
8938 return COSTS_N_INSNS (5);
8940 return COSTS_N_INSNS (1) + const_size
;
8942 return COSTS_N_INSNS (1);
8945 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8947 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8948 cost
= COSTS_N_INSNS (words
);
8949 if (satisfies_constraint_J (SET_SRC (x
))
8950 || satisfies_constraint_K (SET_SRC (x
))
8951 /* Too big an immediate for a 2-byte mov, using MOVT. */
8952 || (CONST_INT_P (SET_SRC (x
))
8953 && UINTVAL (SET_SRC (x
)) >= 256
8955 && satisfies_constraint_j (SET_SRC (x
)))
8956 /* thumb1_movdi_insn. */
8957 || ((words
> 1) && MEM_P (SET_SRC (x
))))
8958 cost
+= COSTS_N_INSNS (1);
8964 if (UINTVAL (x
) < 256)
8965 return COSTS_N_INSNS (1);
8966 /* movw is 4byte long. */
8967 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
8968 return COSTS_N_INSNS (2);
8969 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8970 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8971 return COSTS_N_INSNS (2);
8972 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8973 if (thumb_shiftable_const (INTVAL (x
)))
8974 return COSTS_N_INSNS (2);
8975 return COSTS_N_INSNS (3);
8977 else if ((outer
== PLUS
|| outer
== COMPARE
)
8978 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8980 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8981 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8982 return COSTS_N_INSNS (1);
8983 else if (outer
== AND
)
8986 /* This duplicates the tests in the andsi3 expander. */
8987 for (i
= 9; i
<= 31; i
++)
8988 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8989 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8990 return COSTS_N_INSNS (2);
8992 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8993 || outer
== LSHIFTRT
)
8995 return COSTS_N_INSNS (2);
9001 return COSTS_N_INSNS (3);
9015 return COSTS_N_INSNS (1);
9018 return (COSTS_N_INSNS (1)
9020 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9021 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9022 ? COSTS_N_INSNS (1) : 0));
9026 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9031 /* XXX still guessing. */
9032 switch (GET_MODE (XEXP (x
, 0)))
9035 return (1 + (mode
== DImode
? 4 : 0)
9036 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9039 return (4 + (mode
== DImode
? 4 : 0)
9040 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9043 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9054 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9055 operand, then return the operand that is being shifted. If the shift
9056 is not by a constant, then set SHIFT_REG to point to the operand.
9057 Return NULL if OP is not a shifter operand. */
9059 shifter_op_p (rtx op
, rtx
*shift_reg
)
9061 enum rtx_code code
= GET_CODE (op
);
9063 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9064 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9065 return XEXP (op
, 0);
9066 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9067 return XEXP (op
, 0);
9068 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9069 || code
== ASHIFTRT
)
9071 if (!CONST_INT_P (XEXP (op
, 1)))
9072 *shift_reg
= XEXP (op
, 1);
9073 return XEXP (op
, 0);
9080 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9082 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9083 rtx_code code
= GET_CODE (x
);
9084 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9086 switch (XINT (x
, 1))
9088 case UNSPEC_UNALIGNED_LOAD
:
9089 /* We can only do unaligned loads into the integer unit, and we can't
9091 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9093 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9094 + extra_cost
->ldst
.load_unaligned
);
9097 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9098 ADDR_SPACE_GENERIC
, speed_p
);
9102 case UNSPEC_UNALIGNED_STORE
:
9103 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9105 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9106 + extra_cost
->ldst
.store_unaligned
);
9108 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
9110 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9111 ADDR_SPACE_GENERIC
, speed_p
);
9122 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9126 *cost
= COSTS_N_INSNS (2);
9132 /* Cost of a libcall. We assume one insn per argument, an amount for the
9133 call (one insn for -Os) and then one for processing the result. */
9134 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9136 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9139 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9140 if (shift_op != NULL \
9141 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9146 *cost += extra_cost->alu.arith_shift_reg; \
9147 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9148 ASHIFT, 1, speed_p); \
9151 *cost += extra_cost->alu.arith_shift; \
9153 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9154 ASHIFT, 0, speed_p) \
9155 + rtx_cost (XEXP (x, 1 - IDX), \
9156 GET_MODE (shift_op), \
9163 /* RTX costs. Make an estimate of the cost of executing the operation
9164 X, which is contained with an operation with code OUTER_CODE.
9165 SPEED_P indicates whether the cost desired is the performance cost,
9166 or the size cost. The estimate is stored in COST and the return
9167 value is TRUE if the cost calculation is final, or FALSE if the
9168 caller should recurse through the operands of X to add additional
9171 We currently make no attempt to model the size savings of Thumb-2
9172 16-bit instructions. At the normal points in compilation where
9173 this code is called we have no measure of whether the condition
9174 flags are live or not, and thus no realistic way to determine what
9175 the size will eventually be. */
9177 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9178 const struct cpu_cost_table
*extra_cost
,
9179 int *cost
, bool speed_p
)
9181 machine_mode mode
= GET_MODE (x
);
9183 *cost
= COSTS_N_INSNS (1);
9188 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9190 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9198 /* SET RTXs don't have a mode so we get it from the destination. */
9199 mode
= GET_MODE (SET_DEST (x
));
9201 if (REG_P (SET_SRC (x
))
9202 && REG_P (SET_DEST (x
)))
9204 /* Assume that most copies can be done with a single insn,
9205 unless we don't have HW FP, in which case everything
9206 larger than word mode will require two insns. */
9207 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9208 && GET_MODE_SIZE (mode
) > 4)
9211 /* Conditional register moves can be encoded
9212 in 16 bits in Thumb mode. */
9213 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9219 if (CONST_INT_P (SET_SRC (x
)))
9221 /* Handle CONST_INT here, since the value doesn't have a mode
9222 and we would otherwise be unable to work out the true cost. */
9223 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
9226 /* Slightly lower the cost of setting a core reg to a constant.
9227 This helps break up chains and allows for better scheduling. */
9228 if (REG_P (SET_DEST (x
))
9229 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9232 /* Immediate moves with an immediate in the range [0, 255] can be
9233 encoded in 16 bits in Thumb mode. */
9234 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9235 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9237 goto const_int_cost
;
9243 /* A memory access costs 1 insn if the mode is small, or the address is
9244 a single register, otherwise it costs one insn per word. */
9245 if (REG_P (XEXP (x
, 0)))
9246 *cost
= COSTS_N_INSNS (1);
9248 && GET_CODE (XEXP (x
, 0)) == PLUS
9249 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9250 /* This will be split into two instructions.
9251 See arm.md:calculate_pic_address. */
9252 *cost
= COSTS_N_INSNS (2);
9254 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9256 /* For speed optimizations, add the costs of the address and
9257 accessing memory. */
9260 *cost
+= (extra_cost
->ldst
.load
9261 + arm_address_cost (XEXP (x
, 0), mode
,
9262 ADDR_SPACE_GENERIC
, speed_p
));
9264 *cost
+= extra_cost
->ldst
.load
;
9270 /* Calculations of LDM costs are complex. We assume an initial cost
9271 (ldm_1st) which will load the number of registers mentioned in
9272 ldm_regs_per_insn_1st registers; then each additional
9273 ldm_regs_per_insn_subsequent registers cost one more insn. The
9274 formula for N regs is thus:
9276 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9277 + ldm_regs_per_insn_subsequent - 1)
9278 / ldm_regs_per_insn_subsequent).
9280 Additional costs may also be added for addressing. A similar
9281 formula is used for STM. */
9283 bool is_ldm
= load_multiple_operation (x
, SImode
);
9284 bool is_stm
= store_multiple_operation (x
, SImode
);
9286 if (is_ldm
|| is_stm
)
9290 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9291 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9292 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9293 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9294 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9295 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9296 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9298 *cost
+= regs_per_insn_1st
9299 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9300 + regs_per_insn_sub
- 1)
9301 / regs_per_insn_sub
);
9310 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9311 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9312 *cost
+= COSTS_N_INSNS (speed_p
9313 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
9314 else if (mode
== SImode
&& TARGET_IDIV
)
9315 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
9317 *cost
= LIBCALL_COST (2);
9318 return false; /* All arguments must be in registers. */
9321 /* MOD by a power of 2 can be expanded as:
9323 and r0, r0, #(n - 1)
9324 and r1, r1, #(n - 1)
9325 rsbpl r0, r1, #0. */
9326 if (CONST_INT_P (XEXP (x
, 1))
9327 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
9330 *cost
+= COSTS_N_INSNS (3);
9333 *cost
+= 2 * extra_cost
->alu
.logical
9334 + extra_cost
->alu
.arith
;
9340 *cost
= LIBCALL_COST (2);
9341 return false; /* All arguments must be in registers. */
9344 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9346 *cost
+= (COSTS_N_INSNS (1)
9347 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9349 *cost
+= extra_cost
->alu
.shift_reg
;
9357 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9359 *cost
+= (COSTS_N_INSNS (2)
9360 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9362 *cost
+= 2 * extra_cost
->alu
.shift
;
9365 else if (mode
== SImode
)
9367 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9368 /* Slightly disparage register shifts at -Os, but not by much. */
9369 if (!CONST_INT_P (XEXP (x
, 1)))
9370 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9371 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9374 else if (GET_MODE_CLASS (mode
) == MODE_INT
9375 && GET_MODE_SIZE (mode
) < 4)
9379 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9380 /* Slightly disparage register shifts at -Os, but not by
9382 if (!CONST_INT_P (XEXP (x
, 1)))
9383 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9384 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9386 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9388 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9390 /* Can use SBFX/UBFX. */
9392 *cost
+= extra_cost
->alu
.bfx
;
9393 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9397 *cost
+= COSTS_N_INSNS (1);
9398 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9401 if (CONST_INT_P (XEXP (x
, 1)))
9402 *cost
+= 2 * extra_cost
->alu
.shift
;
9404 *cost
+= (extra_cost
->alu
.shift
9405 + extra_cost
->alu
.shift_reg
);
9408 /* Slightly disparage register shifts. */
9409 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9414 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
9415 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9418 if (CONST_INT_P (XEXP (x
, 1)))
9419 *cost
+= (2 * extra_cost
->alu
.shift
9420 + extra_cost
->alu
.log_shift
);
9422 *cost
+= (extra_cost
->alu
.shift
9423 + extra_cost
->alu
.shift_reg
9424 + extra_cost
->alu
.log_shift_reg
);
9430 *cost
= LIBCALL_COST (2);
9439 *cost
+= extra_cost
->alu
.rev
;
9446 /* No rev instruction available. Look at arm_legacy_rev
9447 and thumb_legacy_rev for the form of RTL used then. */
9450 *cost
+= COSTS_N_INSNS (9);
9454 *cost
+= 6 * extra_cost
->alu
.shift
;
9455 *cost
+= 3 * extra_cost
->alu
.logical
;
9460 *cost
+= COSTS_N_INSNS (4);
9464 *cost
+= 2 * extra_cost
->alu
.shift
;
9465 *cost
+= extra_cost
->alu
.arith_shift
;
9466 *cost
+= 2 * extra_cost
->alu
.logical
;
9474 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9475 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9477 if (GET_CODE (XEXP (x
, 0)) == MULT
9478 || GET_CODE (XEXP (x
, 1)) == MULT
)
9480 rtx mul_op0
, mul_op1
, sub_op
;
9483 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9485 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9487 mul_op0
= XEXP (XEXP (x
, 0), 0);
9488 mul_op1
= XEXP (XEXP (x
, 0), 1);
9489 sub_op
= XEXP (x
, 1);
9493 mul_op0
= XEXP (XEXP (x
, 1), 0);
9494 mul_op1
= XEXP (XEXP (x
, 1), 1);
9495 sub_op
= XEXP (x
, 0);
9498 /* The first operand of the multiply may be optionally
9500 if (GET_CODE (mul_op0
) == NEG
)
9501 mul_op0
= XEXP (mul_op0
, 0);
9503 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9504 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9505 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
9511 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9517 rtx shift_by_reg
= NULL
;
9521 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9522 if (shift_op
== NULL
)
9524 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9525 non_shift_op
= XEXP (x
, 0);
9528 non_shift_op
= XEXP (x
, 1);
9530 if (shift_op
!= NULL
)
9532 if (shift_by_reg
!= NULL
)
9535 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9536 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
9539 *cost
+= extra_cost
->alu
.arith_shift
;
9541 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
9542 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
9547 && GET_CODE (XEXP (x
, 1)) == MULT
)
9551 *cost
+= extra_cost
->mult
[0].add
;
9552 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
9553 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
9554 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
9558 if (CONST_INT_P (XEXP (x
, 0)))
9560 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9561 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9563 *cost
= COSTS_N_INSNS (insns
);
9565 *cost
+= insns
* extra_cost
->alu
.arith
;
9566 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9570 *cost
+= extra_cost
->alu
.arith
;
9575 if (GET_MODE_CLASS (mode
) == MODE_INT
9576 && GET_MODE_SIZE (mode
) < 4)
9578 rtx shift_op
, shift_reg
;
9581 /* We check both sides of the MINUS for shifter operands since,
9582 unlike PLUS, it's not commutative. */
9584 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9585 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9587 /* Slightly disparage, as we might need to widen the result. */
9590 *cost
+= extra_cost
->alu
.arith
;
9592 if (CONST_INT_P (XEXP (x
, 0)))
9594 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9603 *cost
+= COSTS_N_INSNS (1);
9605 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9607 rtx op1
= XEXP (x
, 1);
9610 *cost
+= 2 * extra_cost
->alu
.arith
;
9612 if (GET_CODE (op1
) == ZERO_EXTEND
)
9613 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
9616 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
9617 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9621 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9624 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9625 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
9627 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
9630 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9631 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9634 *cost
+= (extra_cost
->alu
.arith
9635 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9636 ? extra_cost
->alu
.arith
9637 : extra_cost
->alu
.arith_shift
));
9638 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
9639 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
9640 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9645 *cost
+= 2 * extra_cost
->alu
.arith
;
9651 *cost
= LIBCALL_COST (2);
9655 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9656 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9658 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9660 rtx mul_op0
, mul_op1
, add_op
;
9663 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9665 mul_op0
= XEXP (XEXP (x
, 0), 0);
9666 mul_op1
= XEXP (XEXP (x
, 0), 1);
9667 add_op
= XEXP (x
, 1);
9669 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9670 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9671 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
9677 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9680 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9682 *cost
= LIBCALL_COST (2);
9686 /* Narrow modes can be synthesized in SImode, but the range
9687 of useful sub-operations is limited. Check for shift operations
9688 on one of the operands. Only left shifts can be used in the
9690 if (GET_MODE_CLASS (mode
) == MODE_INT
9691 && GET_MODE_SIZE (mode
) < 4)
9693 rtx shift_op
, shift_reg
;
9696 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9698 if (CONST_INT_P (XEXP (x
, 1)))
9700 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9701 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9703 *cost
= COSTS_N_INSNS (insns
);
9705 *cost
+= insns
* extra_cost
->alu
.arith
;
9706 /* Slightly penalize a narrow operation as the result may
9708 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9712 /* Slightly penalize a narrow operation as the result may
9716 *cost
+= extra_cost
->alu
.arith
;
9723 rtx shift_op
, shift_reg
;
9726 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9727 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9729 /* UXTA[BH] or SXTA[BH]. */
9731 *cost
+= extra_cost
->alu
.extend_arith
;
9732 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9734 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
9739 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9740 if (shift_op
!= NULL
)
9745 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9746 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9749 *cost
+= extra_cost
->alu
.arith_shift
;
9751 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9752 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9755 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9757 rtx mul_op
= XEXP (x
, 0);
9759 if (TARGET_DSP_MULTIPLY
9760 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9761 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9762 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9763 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9764 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9765 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9766 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9767 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9768 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9769 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9770 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9771 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9776 *cost
+= extra_cost
->mult
[0].extend_add
;
9777 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
9778 SIGN_EXTEND
, 0, speed_p
)
9779 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
9780 SIGN_EXTEND
, 0, speed_p
)
9781 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9786 *cost
+= extra_cost
->mult
[0].add
;
9787 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
9788 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
9789 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9792 if (CONST_INT_P (XEXP (x
, 1)))
9794 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9795 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9797 *cost
= COSTS_N_INSNS (insns
);
9799 *cost
+= insns
* extra_cost
->alu
.arith
;
9800 *cost
+= rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9804 *cost
+= extra_cost
->alu
.arith
;
9812 && GET_CODE (XEXP (x
, 0)) == MULT
9813 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9814 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9815 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
9816 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
9819 *cost
+= extra_cost
->mult
[1].extend_add
;
9820 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
9821 ZERO_EXTEND
, 0, speed_p
)
9822 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
9823 ZERO_EXTEND
, 0, speed_p
)
9824 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9828 *cost
+= COSTS_N_INSNS (1);
9830 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9831 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9834 *cost
+= (extra_cost
->alu
.arith
9835 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9836 ? extra_cost
->alu
.arith
9837 : extra_cost
->alu
.arith_shift
));
9839 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9841 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9846 *cost
+= 2 * extra_cost
->alu
.arith
;
9851 *cost
= LIBCALL_COST (2);
9854 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
9857 *cost
+= extra_cost
->alu
.rev
;
9865 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9866 rtx op0
= XEXP (x
, 0);
9867 rtx shift_op
, shift_reg
;
9871 || (code
== IOR
&& TARGET_THUMB2
)))
9872 op0
= XEXP (op0
, 0);
9875 shift_op
= shifter_op_p (op0
, &shift_reg
);
9876 if (shift_op
!= NULL
)
9881 *cost
+= extra_cost
->alu
.log_shift_reg
;
9882 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9885 *cost
+= extra_cost
->alu
.log_shift
;
9887 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9888 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9892 if (CONST_INT_P (XEXP (x
, 1)))
9894 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
9895 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9898 *cost
= COSTS_N_INSNS (insns
);
9900 *cost
+= insns
* extra_cost
->alu
.logical
;
9901 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
9906 *cost
+= extra_cost
->alu
.logical
;
9907 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
9908 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9914 rtx op0
= XEXP (x
, 0);
9915 enum rtx_code subcode
= GET_CODE (op0
);
9917 *cost
+= COSTS_N_INSNS (1);
9921 || (code
== IOR
&& TARGET_THUMB2
)))
9922 op0
= XEXP (op0
, 0);
9924 if (GET_CODE (op0
) == ZERO_EXTEND
)
9927 *cost
+= 2 * extra_cost
->alu
.logical
;
9929 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
9931 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
9934 else if (GET_CODE (op0
) == SIGN_EXTEND
)
9937 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
9939 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
9941 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
9946 *cost
+= 2 * extra_cost
->alu
.logical
;
9952 *cost
= LIBCALL_COST (2);
9956 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9957 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9959 rtx op0
= XEXP (x
, 0);
9961 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
9962 op0
= XEXP (op0
, 0);
9965 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
9967 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
9968 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
9971 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9973 *cost
= LIBCALL_COST (2);
9979 if (TARGET_DSP_MULTIPLY
9980 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9981 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9982 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9983 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9984 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
9985 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
9986 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
9987 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
9988 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9989 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9990 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9991 && (INTVAL (XEXP (XEXP (x
, 1), 1))
9996 *cost
+= extra_cost
->mult
[0].extend
;
9997 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
9998 SIGN_EXTEND
, 0, speed_p
);
9999 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
10000 SIGN_EXTEND
, 1, speed_p
);
10004 *cost
+= extra_cost
->mult
[0].simple
;
10008 if (mode
== DImode
)
10011 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10012 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10013 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10014 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10017 *cost
+= extra_cost
->mult
[1].extend
;
10018 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
10019 ZERO_EXTEND
, 0, speed_p
)
10020 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10021 ZERO_EXTEND
, 0, speed_p
));
10025 *cost
= LIBCALL_COST (2);
10030 *cost
= LIBCALL_COST (2);
10034 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10035 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10037 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10040 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
10045 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10049 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10051 *cost
= LIBCALL_COST (1);
10055 if (mode
== SImode
)
10057 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10059 *cost
+= COSTS_N_INSNS (1);
10060 /* Assume the non-flag-changing variant. */
10062 *cost
+= (extra_cost
->alu
.log_shift
10063 + extra_cost
->alu
.arith_shift
);
10064 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
10068 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10069 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10071 *cost
+= COSTS_N_INSNS (1);
10072 /* No extra cost for MOV imm and MVN imm. */
10073 /* If the comparison op is using the flags, there's no further
10074 cost, otherwise we need to add the cost of the comparison. */
10075 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10076 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10077 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10079 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
10080 *cost
+= (COSTS_N_INSNS (1)
10081 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
10083 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
10086 *cost
+= extra_cost
->alu
.arith
;
10092 *cost
+= extra_cost
->alu
.arith
;
10096 if (GET_MODE_CLASS (mode
) == MODE_INT
10097 && GET_MODE_SIZE (mode
) < 4)
10099 /* Slightly disparage, as we might need an extend operation. */
10102 *cost
+= extra_cost
->alu
.arith
;
10106 if (mode
== DImode
)
10108 *cost
+= COSTS_N_INSNS (1);
10110 *cost
+= 2 * extra_cost
->alu
.arith
;
10115 *cost
= LIBCALL_COST (1);
10119 if (mode
== SImode
)
10122 rtx shift_reg
= NULL
;
10124 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10128 if (shift_reg
!= NULL
)
10131 *cost
+= extra_cost
->alu
.log_shift_reg
;
10132 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10135 *cost
+= extra_cost
->alu
.log_shift
;
10136 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
10141 *cost
+= extra_cost
->alu
.logical
;
10144 if (mode
== DImode
)
10146 *cost
+= COSTS_N_INSNS (1);
10152 *cost
+= LIBCALL_COST (1);
10157 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10159 *cost
+= COSTS_N_INSNS (3);
10162 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
10163 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
10165 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
10166 /* Assume that if one arm of the if_then_else is a register,
10167 that it will be tied with the result and eliminate the
10168 conditional insn. */
10169 if (REG_P (XEXP (x
, 1)))
10171 else if (REG_P (XEXP (x
, 2)))
10177 if (extra_cost
->alu
.non_exec_costs_exec
)
10178 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10180 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10183 *cost
+= op1cost
+ op2cost
;
10189 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10193 machine_mode op0mode
;
10194 /* We'll mostly assume that the cost of a compare is the cost of the
10195 LHS. However, there are some notable exceptions. */
10197 /* Floating point compares are never done as side-effects. */
10198 op0mode
= GET_MODE (XEXP (x
, 0));
10199 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10200 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10203 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10205 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10207 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
10213 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10215 *cost
= LIBCALL_COST (2);
10219 /* DImode compares normally take two insns. */
10220 if (op0mode
== DImode
)
10222 *cost
+= COSTS_N_INSNS (1);
10224 *cost
+= 2 * extra_cost
->alu
.arith
;
10228 if (op0mode
== SImode
)
10233 if (XEXP (x
, 1) == const0_rtx
10234 && !(REG_P (XEXP (x
, 0))
10235 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10236 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10238 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10240 /* Multiply operations that set the flags are often
10241 significantly more expensive. */
10243 && GET_CODE (XEXP (x
, 0)) == MULT
10244 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10245 *cost
+= extra_cost
->mult
[0].flag_setting
;
10248 && GET_CODE (XEXP (x
, 0)) == PLUS
10249 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10250 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10252 *cost
+= extra_cost
->mult
[0].flag_setting
;
10257 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10258 if (shift_op
!= NULL
)
10260 if (shift_reg
!= NULL
)
10262 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
10265 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10268 *cost
+= extra_cost
->alu
.arith_shift
;
10269 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
10270 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
10275 *cost
+= extra_cost
->alu
.arith
;
10276 if (CONST_INT_P (XEXP (x
, 1))
10277 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10279 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10287 *cost
= LIBCALL_COST (2);
10310 if (outer_code
== SET
)
10312 /* Is it a store-flag operation? */
10313 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10314 && XEXP (x
, 1) == const0_rtx
)
10316 /* Thumb also needs an IT insn. */
10317 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
10320 if (XEXP (x
, 1) == const0_rtx
)
10325 /* LSR Rd, Rn, #31. */
10327 *cost
+= extra_cost
->alu
.shift
;
10337 *cost
+= COSTS_N_INSNS (1);
10341 /* RSBS T1, Rn, Rn, LSR #31
10343 *cost
+= COSTS_N_INSNS (1);
10345 *cost
+= extra_cost
->alu
.arith_shift
;
10349 /* RSB Rd, Rn, Rn, ASR #1
10350 LSR Rd, Rd, #31. */
10351 *cost
+= COSTS_N_INSNS (1);
10353 *cost
+= (extra_cost
->alu
.arith_shift
10354 + extra_cost
->alu
.shift
);
10360 *cost
+= COSTS_N_INSNS (1);
10362 *cost
+= extra_cost
->alu
.shift
;
10366 /* Remaining cases are either meaningless or would take
10367 three insns anyway. */
10368 *cost
= COSTS_N_INSNS (3);
10371 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10376 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10377 if (CONST_INT_P (XEXP (x
, 1))
10378 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10380 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10387 /* Not directly inside a set. If it involves the condition code
10388 register it must be the condition for a branch, cond_exec or
10389 I_T_E operation. Since the comparison is performed elsewhere
10390 this is just the control part which has no additional
10392 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10393 && XEXP (x
, 1) == const0_rtx
)
10401 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10402 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10405 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10409 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10411 *cost
= LIBCALL_COST (1);
10415 if (mode
== SImode
)
10418 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10422 *cost
= LIBCALL_COST (1);
10426 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10427 && MEM_P (XEXP (x
, 0)))
10429 if (mode
== DImode
)
10430 *cost
+= COSTS_N_INSNS (1);
10435 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10436 *cost
+= extra_cost
->ldst
.load
;
10438 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10440 if (mode
== DImode
)
10441 *cost
+= extra_cost
->alu
.shift
;
10446 /* Widening from less than 32-bits requires an extend operation. */
10447 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10449 /* We have SXTB/SXTH. */
10450 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10452 *cost
+= extra_cost
->alu
.extend
;
10454 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10456 /* Needs two shifts. */
10457 *cost
+= COSTS_N_INSNS (1);
10458 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10460 *cost
+= 2 * extra_cost
->alu
.shift
;
10463 /* Widening beyond 32-bits requires one more insn. */
10464 if (mode
== DImode
)
10466 *cost
+= COSTS_N_INSNS (1);
10468 *cost
+= extra_cost
->alu
.shift
;
10475 || GET_MODE (XEXP (x
, 0)) == SImode
10476 || GET_MODE (XEXP (x
, 0)) == QImode
)
10477 && MEM_P (XEXP (x
, 0)))
10479 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10481 if (mode
== DImode
)
10482 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10487 /* Widening from less than 32-bits requires an extend operation. */
10488 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10490 /* UXTB can be a shorter instruction in Thumb2, but it might
10491 be slower than the AND Rd, Rn, #255 alternative. When
10492 optimizing for speed it should never be slower to use
10493 AND, and we don't really model 16-bit vs 32-bit insns
10496 *cost
+= extra_cost
->alu
.logical
;
10498 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10500 /* We have UXTB/UXTH. */
10501 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10503 *cost
+= extra_cost
->alu
.extend
;
10505 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10507 /* Needs two shifts. It's marginally preferable to use
10508 shifts rather than two BIC instructions as the second
10509 shift may merge with a subsequent insn as a shifter
10511 *cost
= COSTS_N_INSNS (2);
10512 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10514 *cost
+= 2 * extra_cost
->alu
.shift
;
10517 /* Widening beyond 32-bits requires one more insn. */
10518 if (mode
== DImode
)
10520 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10527 /* CONST_INT has no mode, so we cannot tell for sure how many
10528 insns are really going to be needed. The best we can do is
10529 look at the value passed. If it fits in SImode, then assume
10530 that's the mode it will be used for. Otherwise assume it
10531 will be used in DImode. */
10532 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10537 /* Avoid blowing up in arm_gen_constant (). */
10538 if (!(outer_code
== PLUS
10539 || outer_code
== AND
10540 || outer_code
== IOR
10541 || outer_code
== XOR
10542 || outer_code
== MINUS
))
10546 if (mode
== SImode
)
10548 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10549 INTVAL (x
), NULL
, NULL
,
10555 *cost
+= COSTS_N_INSNS (arm_gen_constant
10556 (outer_code
, SImode
, NULL
,
10557 trunc_int_for_mode (INTVAL (x
), SImode
),
10559 + arm_gen_constant (outer_code
, SImode
, NULL
,
10560 INTVAL (x
) >> 32, NULL
,
10572 if (arm_arch_thumb2
&& !flag_pic
)
10573 *cost
+= COSTS_N_INSNS (1);
10575 *cost
+= extra_cost
->ldst
.load
;
10578 *cost
+= COSTS_N_INSNS (1);
10582 *cost
+= COSTS_N_INSNS (1);
10584 *cost
+= extra_cost
->alu
.arith
;
10590 *cost
= COSTS_N_INSNS (4);
10595 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10596 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10598 if (vfp3_const_double_rtx (x
))
10601 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10607 if (mode
== DFmode
)
10608 *cost
+= extra_cost
->ldst
.loadd
;
10610 *cost
+= extra_cost
->ldst
.loadf
;
10613 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
10617 *cost
= COSTS_N_INSNS (4);
10623 && TARGET_HARD_FLOAT
10624 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10625 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10626 *cost
= COSTS_N_INSNS (1);
10628 *cost
= COSTS_N_INSNS (4);
10633 /* When optimizing for size, we prefer constant pool entries to
10634 MOVW/MOVT pairs, so bump the cost of these slightly. */
10641 *cost
+= extra_cost
->alu
.clz
;
10645 if (XEXP (x
, 1) == const0_rtx
)
10648 *cost
+= extra_cost
->alu
.log_shift
;
10649 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10652 /* Fall through. */
10656 *cost
+= COSTS_N_INSNS (1);
10660 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10661 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10662 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10663 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10664 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10665 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10666 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10667 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10671 *cost
+= extra_cost
->mult
[1].extend
;
10672 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
10673 ZERO_EXTEND
, 0, speed_p
)
10674 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
10675 ZERO_EXTEND
, 0, speed_p
));
10678 *cost
= LIBCALL_COST (1);
10681 case UNSPEC_VOLATILE
:
10683 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10686 /* Reading the PC is like reading any other register. Writing it
10687 is more expensive, but we take that into account elsewhere. */
10692 /* TODO: Simple zero_extract of bottom bits using AND. */
10693 /* Fall through. */
10697 && CONST_INT_P (XEXP (x
, 1))
10698 && CONST_INT_P (XEXP (x
, 2)))
10701 *cost
+= extra_cost
->alu
.bfx
;
10702 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10705 /* Without UBFX/SBFX, need to resort to shift operations. */
10706 *cost
+= COSTS_N_INSNS (1);
10708 *cost
+= 2 * extra_cost
->alu
.shift
;
10709 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
10713 if (TARGET_HARD_FLOAT
)
10716 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10717 if (!TARGET_FPU_ARMV8
10718 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10720 /* Pre v8, widening HF->DF is a two-step process, first
10721 widening to SFmode. */
10722 *cost
+= COSTS_N_INSNS (1);
10724 *cost
+= extra_cost
->fp
[0].widen
;
10726 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10730 *cost
= LIBCALL_COST (1);
10733 case FLOAT_TRUNCATE
:
10734 if (TARGET_HARD_FLOAT
)
10737 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10738 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10740 /* Vector modes? */
10742 *cost
= LIBCALL_COST (1);
10746 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10748 rtx op0
= XEXP (x
, 0);
10749 rtx op1
= XEXP (x
, 1);
10750 rtx op2
= XEXP (x
, 2);
10753 /* vfms or vfnma. */
10754 if (GET_CODE (op0
) == NEG
)
10755 op0
= XEXP (op0
, 0);
10757 /* vfnms or vfnma. */
10758 if (GET_CODE (op2
) == NEG
)
10759 op2
= XEXP (op2
, 0);
10761 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
10762 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
10763 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
10766 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
10771 *cost
= LIBCALL_COST (3);
10776 if (TARGET_HARD_FLOAT
)
10778 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10779 a vcvt fixed-point conversion. */
10780 if (code
== FIX
&& mode
== SImode
10781 && GET_CODE (XEXP (x
, 0)) == FIX
10782 && GET_MODE (XEXP (x
, 0)) == SFmode
10783 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10784 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10788 *cost
+= extra_cost
->fp
[0].toint
;
10790 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10795 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10797 mode
= GET_MODE (XEXP (x
, 0));
10799 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
10800 /* Strip of the 'cost' of rounding towards zero. */
10801 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10802 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
10805 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10806 /* ??? Increase the cost to deal with transferring from
10807 FP -> CORE registers? */
10810 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10811 && TARGET_FPU_ARMV8
)
10814 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
10817 /* Vector costs? */
10819 *cost
= LIBCALL_COST (1);
10823 case UNSIGNED_FLOAT
:
10824 if (TARGET_HARD_FLOAT
)
10826 /* ??? Increase the cost to deal with transferring from CORE
10827 -> FP registers? */
10829 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
10832 *cost
= LIBCALL_COST (1);
10840 /* Just a guess. Guess number of instructions in the asm
10841 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10842 though (see PR60663). */
10843 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
10844 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
10846 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
10850 if (mode
!= VOIDmode
)
10851 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
10853 *cost
= COSTS_N_INSNS (4); /* Who knows? */
10858 #undef HANDLE_NARROW_SHIFT_ARITH
10860 /* RTX costs entry point. */
10863 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
10864 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
10867 int code
= GET_CODE (x
);
10868 gcc_assert (current_tune
->insn_extra_cost
);
10870 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
10871 (enum rtx_code
) outer_code
,
10872 current_tune
->insn_extra_cost
,
10875 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
10877 print_rtl_single (dump_file
, x
);
10878 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
10879 *total
, result
? "final" : "partial");
10884 /* All address computations that can be done are free, but rtx cost returns
10885 the same for practically all of them. So we weight the different types
10886 of address here in the order (most pref first):
10887 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10889 arm_arm_address_cost (rtx x
)
10891 enum rtx_code c
= GET_CODE (x
);
10893 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
10895 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
10900 if (CONST_INT_P (XEXP (x
, 1)))
10903 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
10913 arm_thumb_address_cost (rtx x
)
10915 enum rtx_code c
= GET_CODE (x
);
10920 && REG_P (XEXP (x
, 0))
10921 && CONST_INT_P (XEXP (x
, 1)))
10928 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
10929 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
10931 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
10934 /* Adjust cost hook for XScale. */
10936 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
10939 /* Some true dependencies can have a higher cost depending
10940 on precisely how certain input operands are used. */
10942 && recog_memoized (insn
) >= 0
10943 && recog_memoized (dep
) >= 0)
10945 int shift_opnum
= get_attr_shift (insn
);
10946 enum attr_type attr_type
= get_attr_type (dep
);
10948 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10949 operand for INSN. If we have a shifted input operand and the
10950 instruction we depend on is another ALU instruction, then we may
10951 have to account for an additional stall. */
10952 if (shift_opnum
!= 0
10953 && (attr_type
== TYPE_ALU_SHIFT_IMM
10954 || attr_type
== TYPE_ALUS_SHIFT_IMM
10955 || attr_type
== TYPE_LOGIC_SHIFT_IMM
10956 || attr_type
== TYPE_LOGICS_SHIFT_IMM
10957 || attr_type
== TYPE_ALU_SHIFT_REG
10958 || attr_type
== TYPE_ALUS_SHIFT_REG
10959 || attr_type
== TYPE_LOGIC_SHIFT_REG
10960 || attr_type
== TYPE_LOGICS_SHIFT_REG
10961 || attr_type
== TYPE_MOV_SHIFT
10962 || attr_type
== TYPE_MVN_SHIFT
10963 || attr_type
== TYPE_MOV_SHIFT_REG
10964 || attr_type
== TYPE_MVN_SHIFT_REG
))
10966 rtx shifted_operand
;
10969 /* Get the shifted operand. */
10970 extract_insn (insn
);
10971 shifted_operand
= recog_data
.operand
[shift_opnum
];
10973 /* Iterate over all the operands in DEP. If we write an operand
10974 that overlaps with SHIFTED_OPERAND, then we have increase the
10975 cost of this dependency. */
10976 extract_insn (dep
);
10977 preprocess_constraints (dep
);
10978 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
10980 /* We can ignore strict inputs. */
10981 if (recog_data
.operand_type
[opno
] == OP_IN
)
10984 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
10996 /* Adjust cost hook for Cortex A9. */
10998 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11008 case REG_DEP_OUTPUT
:
11009 if (recog_memoized (insn
) >= 0
11010 && recog_memoized (dep
) >= 0)
11012 if (GET_CODE (PATTERN (insn
)) == SET
)
11015 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11017 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11019 enum attr_type attr_type_insn
= get_attr_type (insn
);
11020 enum attr_type attr_type_dep
= get_attr_type (dep
);
11022 /* By default all dependencies of the form
11025 have an extra latency of 1 cycle because
11026 of the input and output dependency in this
11027 case. However this gets modeled as an true
11028 dependency and hence all these checks. */
11029 if (REG_P (SET_DEST (PATTERN (insn
)))
11030 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
11032 /* FMACS is a special case where the dependent
11033 instruction can be issued 3 cycles before
11034 the normal latency in case of an output
11036 if ((attr_type_insn
== TYPE_FMACS
11037 || attr_type_insn
== TYPE_FMACD
)
11038 && (attr_type_dep
== TYPE_FMACS
11039 || attr_type_dep
== TYPE_FMACD
))
11041 if (dep_type
== REG_DEP_OUTPUT
)
11042 *cost
= insn_default_latency (dep
) - 3;
11044 *cost
= insn_default_latency (dep
);
11049 if (dep_type
== REG_DEP_OUTPUT
)
11050 *cost
= insn_default_latency (dep
) + 1;
11052 *cost
= insn_default_latency (dep
);
11062 gcc_unreachable ();
11068 /* Adjust cost hook for FA726TE. */
11070 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11073 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11074 have penalty of 3. */
11075 if (dep_type
== REG_DEP_TRUE
11076 && recog_memoized (insn
) >= 0
11077 && recog_memoized (dep
) >= 0
11078 && get_attr_conds (dep
) == CONDS_SET
)
11080 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11081 if (get_attr_conds (insn
) == CONDS_USE
11082 && get_attr_type (insn
) != TYPE_BRANCH
)
11088 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11089 || get_attr_conds (insn
) == CONDS_USE
)
11099 /* Implement TARGET_REGISTER_MOVE_COST.
11101 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11102 it is typically more expensive than a single memory access. We set
11103 the cost to less than two memory accesses so that floating
11104 point to integer conversion does not go through memory. */
11107 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11108 reg_class_t from
, reg_class_t to
)
11112 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11113 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11115 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11116 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11118 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11125 if (from
== HI_REGS
|| to
== HI_REGS
)
11132 /* Implement TARGET_MEMORY_MOVE_COST. */
11135 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11136 bool in ATTRIBUTE_UNUSED
)
11142 if (GET_MODE_SIZE (mode
) < 4)
11145 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11149 /* Vectorizer cost model implementation. */
11151 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11153 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11155 int misalign ATTRIBUTE_UNUSED
)
11159 switch (type_of_cost
)
11162 return current_tune
->vec_costs
->scalar_stmt_cost
;
11165 return current_tune
->vec_costs
->scalar_load_cost
;
11168 return current_tune
->vec_costs
->scalar_store_cost
;
11171 return current_tune
->vec_costs
->vec_stmt_cost
;
11174 return current_tune
->vec_costs
->vec_align_load_cost
;
11177 return current_tune
->vec_costs
->vec_store_cost
;
11179 case vec_to_scalar
:
11180 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11182 case scalar_to_vec
:
11183 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11185 case unaligned_load
:
11186 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11188 case unaligned_store
:
11189 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11191 case cond_branch_taken
:
11192 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11194 case cond_branch_not_taken
:
11195 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11198 case vec_promote_demote
:
11199 return current_tune
->vec_costs
->vec_stmt_cost
;
11201 case vec_construct
:
11202 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11203 return elements
/ 2 + 1;
11206 gcc_unreachable ();
11210 /* Implement targetm.vectorize.add_stmt_cost. */
11213 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11214 struct _stmt_vec_info
*stmt_info
, int misalign
,
11215 enum vect_cost_model_location where
)
11217 unsigned *cost
= (unsigned *) data
;
11218 unsigned retval
= 0;
11220 if (flag_vect_cost_model
)
11222 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11223 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11225 /* Statements in an inner loop relative to the loop being
11226 vectorized are weighted more heavily. The value here is
11227 arbitrary and could potentially be improved with analysis. */
11228 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11229 count
*= 50; /* FIXME. */
11231 retval
= (unsigned) (count
* stmt_cost
);
11232 cost
[where
] += retval
;
11238 /* Return true if and only if this insn can dual-issue only as older. */
11240 cortexa7_older_only (rtx_insn
*insn
)
11242 if (recog_memoized (insn
) < 0)
11245 switch (get_attr_type (insn
))
11247 case TYPE_ALU_DSP_REG
:
11248 case TYPE_ALU_SREG
:
11249 case TYPE_ALUS_SREG
:
11250 case TYPE_LOGIC_REG
:
11251 case TYPE_LOGICS_REG
:
11253 case TYPE_ADCS_REG
:
11258 case TYPE_SHIFT_IMM
:
11259 case TYPE_SHIFT_REG
:
11260 case TYPE_LOAD_BYTE
:
11263 case TYPE_FFARITHS
:
11265 case TYPE_FFARITHD
:
11283 case TYPE_F_STORES
:
11290 /* Return true if and only if this insn can dual-issue as younger. */
11292 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11294 if (recog_memoized (insn
) < 0)
11297 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11301 switch (get_attr_type (insn
))
11304 case TYPE_ALUS_IMM
:
11305 case TYPE_LOGIC_IMM
:
11306 case TYPE_LOGICS_IMM
:
11311 case TYPE_MOV_SHIFT
:
11312 case TYPE_MOV_SHIFT_REG
:
11322 /* Look for an instruction that can dual issue only as an older
11323 instruction, and move it in front of any instructions that can
11324 dual-issue as younger, while preserving the relative order of all
11325 other instructions in the ready list. This is a hueuristic to help
11326 dual-issue in later cycles, by postponing issue of more flexible
11327 instructions. This heuristic may affect dual issue opportunities
11328 in the current cycle. */
11330 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11331 int *n_readyp
, int clock
)
11334 int first_older_only
= -1, first_younger
= -1;
11338 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11342 /* Traverse the ready list from the head (the instruction to issue
11343 first), and looking for the first instruction that can issue as
11344 younger and the first instruction that can dual-issue only as
11346 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11348 rtx_insn
*insn
= ready
[i
];
11349 if (cortexa7_older_only (insn
))
11351 first_older_only
= i
;
11353 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11356 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11360 /* Nothing to reorder because either no younger insn found or insn
11361 that can dual-issue only as older appears before any insn that
11362 can dual-issue as younger. */
11363 if (first_younger
== -1)
11366 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11370 /* Nothing to reorder because no older-only insn in the ready list. */
11371 if (first_older_only
== -1)
11374 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11378 /* Move first_older_only insn before first_younger. */
11380 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11381 INSN_UID(ready
[first_older_only
]),
11382 INSN_UID(ready
[first_younger
]));
11383 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11384 for (i
= first_older_only
; i
< first_younger
; i
++)
11386 ready
[i
] = ready
[i
+1];
11389 ready
[i
] = first_older_only_insn
;
11393 /* Implement TARGET_SCHED_REORDER. */
11395 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11400 case TARGET_CPU_cortexa7
:
11401 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11404 /* Do nothing for other cores. */
11408 return arm_issue_rate ();
11411 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11412 It corrects the value of COST based on the relationship between
11413 INSN and DEP through the dependence LINK. It returns the new
11414 value. There is a per-core adjust_cost hook to adjust scheduler costs
11415 and the per-core hook can choose to completely override the generic
11416 adjust_cost function. Only put bits of code into arm_adjust_cost that
11417 are common across all cores. */
11419 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
11424 /* When generating Thumb-1 code, we want to place flag-setting operations
11425 close to a conditional branch which depends on them, so that we can
11426 omit the comparison. */
11429 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11430 && recog_memoized (dep
) >= 0
11431 && get_attr_conds (dep
) == CONDS_SET
)
11434 if (current_tune
->sched_adjust_cost
!= NULL
)
11436 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
11440 /* XXX Is this strictly true? */
11441 if (dep_type
== REG_DEP_ANTI
11442 || dep_type
== REG_DEP_OUTPUT
)
11445 /* Call insns don't incur a stall, even if they follow a load. */
11450 if ((i_pat
= single_set (insn
)) != NULL
11451 && MEM_P (SET_SRC (i_pat
))
11452 && (d_pat
= single_set (dep
)) != NULL
11453 && MEM_P (SET_DEST (d_pat
)))
11455 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11456 /* This is a load after a store, there is no conflict if the load reads
11457 from a cached area. Assume that loads from the stack, and from the
11458 constant pool are cached, and that others will miss. This is a
11461 if ((GET_CODE (src_mem
) == SYMBOL_REF
11462 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11463 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11464 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11465 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11473 arm_max_conditional_execute (void)
11475 return max_insns_skipped
;
11479 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11482 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11484 return (optimize
> 0) ? 2 : 0;
11488 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11490 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11493 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11494 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11495 sequences of non-executed instructions in IT blocks probably take the same
11496 amount of time as executed instructions (and the IT instruction itself takes
11497 space in icache). This function was experimentally determined to give good
11498 results on a popular embedded benchmark. */
11501 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11503 return (TARGET_32BIT
&& speed_p
) ? 1
11504 : arm_default_branch_cost (speed_p
, predictable_p
);
11508 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
11510 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11513 static bool fp_consts_inited
= false;
11515 static REAL_VALUE_TYPE value_fp0
;
11518 init_fp_table (void)
11522 r
= REAL_VALUE_ATOF ("0", DFmode
);
11524 fp_consts_inited
= true;
11527 /* Return TRUE if rtx X is a valid immediate FP constant. */
11529 arm_const_double_rtx (rtx x
)
11531 const REAL_VALUE_TYPE
*r
;
11533 if (!fp_consts_inited
)
11536 r
= CONST_DOUBLE_REAL_VALUE (x
);
11537 if (REAL_VALUE_MINUS_ZERO (*r
))
11540 if (real_equal (r
, &value_fp0
))
11546 /* VFPv3 has a fairly wide range of representable immediates, formed from
11547 "quarter-precision" floating-point values. These can be evaluated using this
11548 formula (with ^ for exponentiation):
11552 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11553 16 <= n <= 31 and 0 <= r <= 7.
11555 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11557 - A (most-significant) is the sign bit.
11558 - BCD are the exponent (encoded as r XOR 3).
11559 - EFGH are the mantissa (encoded as n - 16).
11562 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11563 fconst[sd] instruction, or -1 if X isn't suitable. */
11565 vfp3_const_double_index (rtx x
)
11567 REAL_VALUE_TYPE r
, m
;
11568 int sign
, exponent
;
11569 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11570 unsigned HOST_WIDE_INT mask
;
11571 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11574 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11577 r
= *CONST_DOUBLE_REAL_VALUE (x
);
11579 /* We can't represent these things, so detect them first. */
11580 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11583 /* Extract sign, exponent and mantissa. */
11584 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
11585 r
= real_value_abs (&r
);
11586 exponent
= REAL_EXP (&r
);
11587 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11588 highest (sign) bit, with a fixed binary point at bit point_pos.
11589 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11590 bits for the mantissa, this may fail (low bits would be lost). */
11591 real_ldexp (&m
, &r
, point_pos
- exponent
);
11592 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
11593 mantissa
= w
.elt (0);
11594 mant_hi
= w
.elt (1);
11596 /* If there are bits set in the low part of the mantissa, we can't
11597 represent this value. */
11601 /* Now make it so that mantissa contains the most-significant bits, and move
11602 the point_pos to indicate that the least-significant bits have been
11604 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11605 mantissa
= mant_hi
;
11607 /* We can permit four significant bits of mantissa only, plus a high bit
11608 which is always 1. */
11609 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
11610 if ((mantissa
& mask
) != 0)
11613 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11614 mantissa
>>= point_pos
- 5;
11616 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11617 floating-point immediate zero with Neon using an integer-zero load, but
11618 that case is handled elsewhere.) */
11622 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
11624 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11625 normalized significands are in the range [1, 2). (Our mantissa is shifted
11626 left 4 places at this point relative to normalized IEEE754 values). GCC
11627 internally uses [0.5, 1) (see real.c), so the exponent returned from
11628 REAL_EXP must be altered. */
11629 exponent
= 5 - exponent
;
11631 if (exponent
< 0 || exponent
> 7)
11634 /* Sign, mantissa and exponent are now in the correct form to plug into the
11635 formula described in the comment above. */
11636 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
11639 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11641 vfp3_const_double_rtx (rtx x
)
11646 return vfp3_const_double_index (x
) != -1;
11649 /* Recognize immediates which can be used in various Neon instructions. Legal
11650 immediates are described by the following table (for VMVN variants, the
11651 bitwise inverse of the constant shown is recognized. In either case, VMOV
11652 is output and the correct instruction to use for a given constant is chosen
11653 by the assembler). The constant shown is replicated across all elements of
11654 the destination vector.
11656 insn elems variant constant (binary)
11657 ---- ----- ------- -----------------
11658 vmov i32 0 00000000 00000000 00000000 abcdefgh
11659 vmov i32 1 00000000 00000000 abcdefgh 00000000
11660 vmov i32 2 00000000 abcdefgh 00000000 00000000
11661 vmov i32 3 abcdefgh 00000000 00000000 00000000
11662 vmov i16 4 00000000 abcdefgh
11663 vmov i16 5 abcdefgh 00000000
11664 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11665 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11666 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11667 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11668 vmvn i16 10 00000000 abcdefgh
11669 vmvn i16 11 abcdefgh 00000000
11670 vmov i32 12 00000000 00000000 abcdefgh 11111111
11671 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11672 vmov i32 14 00000000 abcdefgh 11111111 11111111
11673 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11674 vmov i8 16 abcdefgh
11675 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11676 eeeeeeee ffffffff gggggggg hhhhhhhh
11677 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11678 vmov f32 19 00000000 00000000 00000000 00000000
11680 For case 18, B = !b. Representable values are exactly those accepted by
11681 vfp3_const_double_index, but are output as floating-point numbers rather
11684 For case 19, we will change it to vmov.i32 when assembling.
11686 Variants 0-5 (inclusive) may also be used as immediates for the second
11687 operand of VORR/VBIC instructions.
11689 The INVERSE argument causes the bitwise inverse of the given operand to be
11690 recognized instead (used for recognizing legal immediates for the VAND/VORN
11691 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11692 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11693 output, rather than the real insns vbic/vorr).
11695 INVERSE makes no difference to the recognition of float vectors.
11697 The return value is the variant of immediate as shown in the above table, or
11698 -1 if the given value doesn't match any of the listed patterns.
11701 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
11702 rtx
*modconst
, int *elementwidth
)
11704 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11706 for (i = 0; i < idx; i += (STRIDE)) \
11711 immtype = (CLASS); \
11712 elsize = (ELSIZE); \
11716 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
11717 unsigned int innersize
;
11718 unsigned char bytes
[16];
11719 int immtype
= -1, matches
;
11720 unsigned int invmask
= inverse
? 0xff : 0;
11721 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
11724 n_elts
= CONST_VECTOR_NUNITS (op
);
11728 if (mode
== VOIDmode
)
11732 innersize
= GET_MODE_UNIT_SIZE (mode
);
11734 /* Vectors of float constants. */
11735 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11737 rtx el0
= CONST_VECTOR_ELT (op
, 0);
11739 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
11742 /* FP16 vectors cannot be represented. */
11743 if (GET_MODE_INNER (mode
) == HFmode
)
11746 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11747 are distinct in this context. */
11748 if (!const_vec_duplicate_p (op
))
11752 *modconst
= CONST_VECTOR_ELT (op
, 0);
11757 if (el0
== CONST0_RTX (GET_MODE (el0
)))
11763 /* The tricks done in the code below apply for little-endian vector layout.
11764 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11765 FIXME: Implement logic for big-endian vectors. */
11766 if (BYTES_BIG_ENDIAN
&& vector
&& !const_vec_duplicate_p (op
))
11769 /* Splat vector constant out into a byte vector. */
11770 for (i
= 0; i
< n_elts
; i
++)
11772 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
11773 unsigned HOST_WIDE_INT elpart
;
11775 gcc_assert (CONST_INT_P (el
));
11776 elpart
= INTVAL (el
);
11778 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
11780 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
11781 elpart
>>= BITS_PER_UNIT
;
11785 /* Sanity check. */
11786 gcc_assert (idx
== GET_MODE_SIZE (mode
));
11790 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
11791 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11793 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11794 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11796 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11797 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11799 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11800 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
11802 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
11804 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
11806 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
11807 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11809 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11810 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11812 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11813 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11815 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11816 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
11818 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
11820 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
11822 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11823 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11825 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11826 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11828 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11829 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11831 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11832 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11834 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
11836 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
11837 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
11845 *elementwidth
= elsize
;
11849 unsigned HOST_WIDE_INT imm
= 0;
11851 /* Un-invert bytes of recognized vector, if necessary. */
11853 for (i
= 0; i
< idx
; i
++)
11854 bytes
[i
] ^= invmask
;
11858 /* FIXME: Broken on 32-bit H_W_I hosts. */
11859 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
11861 for (i
= 0; i
< 8; i
++)
11862 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
11863 << (i
* BITS_PER_UNIT
);
11865 *modconst
= GEN_INT (imm
);
11869 unsigned HOST_WIDE_INT imm
= 0;
11871 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
11872 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
11874 *modconst
= GEN_INT (imm
);
11882 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11883 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11884 float elements), and a modified constant (whatever should be output for a
11885 VMOV) in *MODCONST. */
11888 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
11889 rtx
*modconst
, int *elementwidth
)
11893 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
11899 *modconst
= tmpconst
;
11902 *elementwidth
= tmpwidth
;
11907 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11908 the immediate is valid, write a constant suitable for using as an operand
11909 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11910 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11913 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
11914 rtx
*modconst
, int *elementwidth
)
11918 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
11920 if (retval
< 0 || retval
> 5)
11924 *modconst
= tmpconst
;
11927 *elementwidth
= tmpwidth
;
11932 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11933 the immediate is valid, write a constant suitable for using as an operand
11934 to VSHR/VSHL to *MODCONST and the corresponding element width to
11935 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11936 because they have different limitations. */
11939 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
11940 rtx
*modconst
, int *elementwidth
,
11943 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
11944 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
11945 unsigned HOST_WIDE_INT last_elt
= 0;
11946 unsigned HOST_WIDE_INT maxshift
;
11948 /* Split vector constant out into a byte vector. */
11949 for (i
= 0; i
< n_elts
; i
++)
11951 rtx el
= CONST_VECTOR_ELT (op
, i
);
11952 unsigned HOST_WIDE_INT elpart
;
11954 if (CONST_INT_P (el
))
11955 elpart
= INTVAL (el
);
11956 else if (CONST_DOUBLE_P (el
))
11959 gcc_unreachable ();
11961 if (i
!= 0 && elpart
!= last_elt
)
11967 /* Shift less than element size. */
11968 maxshift
= innersize
* 8;
11972 /* Left shift immediate value can be from 0 to <size>-1. */
11973 if (last_elt
>= maxshift
)
11978 /* Right shift immediate value can be from 1 to <size>. */
11979 if (last_elt
== 0 || last_elt
> maxshift
)
11984 *elementwidth
= innersize
* 8;
11987 *modconst
= CONST_VECTOR_ELT (op
, 0);
11992 /* Return a string suitable for output of Neon immediate logic operation
11996 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
11997 int inverse
, int quad
)
11999 int width
, is_valid
;
12000 static char templ
[40];
12002 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12004 gcc_assert (is_valid
!= 0);
12007 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12009 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12014 /* Return a string suitable for output of Neon immediate shift operation
12015 (VSHR or VSHL) MNEM. */
12018 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12019 machine_mode mode
, int quad
,
12022 int width
, is_valid
;
12023 static char templ
[40];
12025 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12026 gcc_assert (is_valid
!= 0);
12029 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12031 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12036 /* Output a sequence of pairwise operations to implement a reduction.
12037 NOTE: We do "too much work" here, because pairwise operations work on two
12038 registers-worth of operands in one go. Unfortunately we can't exploit those
12039 extra calculations to do the full operation in fewer steps, I don't think.
12040 Although all vector elements of the result but the first are ignored, we
12041 actually calculate the same result in each of the elements. An alternative
12042 such as initially loading a vector with zero to use as each of the second
12043 operands would use up an additional register and take an extra instruction,
12044 for no particular gain. */
12047 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12048 rtx (*reduc
) (rtx
, rtx
, rtx
))
12050 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
12053 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12055 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12056 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12061 /* If VALS is a vector constant that can be loaded into a register
12062 using VDUP, generate instructions to do so and return an RTX to
12063 assign to the register. Otherwise return NULL_RTX. */
12066 neon_vdup_constant (rtx vals
)
12068 machine_mode mode
= GET_MODE (vals
);
12069 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12072 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12075 if (!const_vec_duplicate_p (vals
, &x
))
12076 /* The elements are not all the same. We could handle repeating
12077 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12078 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12082 /* We can load this constant by using VDUP and a constant in a
12083 single ARM register. This will be cheaper than a vector
12086 x
= copy_to_mode_reg (inner_mode
, x
);
12087 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12090 /* Generate code to load VALS, which is a PARALLEL containing only
12091 constants (for vec_init) or CONST_VECTOR, efficiently into a
12092 register. Returns an RTX to copy into the register, or NULL_RTX
12093 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12096 neon_make_constant (rtx vals
)
12098 machine_mode mode
= GET_MODE (vals
);
12100 rtx const_vec
= NULL_RTX
;
12101 int n_elts
= GET_MODE_NUNITS (mode
);
12105 if (GET_CODE (vals
) == CONST_VECTOR
)
12107 else if (GET_CODE (vals
) == PARALLEL
)
12109 /* A CONST_VECTOR must contain only CONST_INTs and
12110 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12111 Only store valid constants in a CONST_VECTOR. */
12112 for (i
= 0; i
< n_elts
; ++i
)
12114 rtx x
= XVECEXP (vals
, 0, i
);
12115 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12118 if (n_const
== n_elts
)
12119 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12122 gcc_unreachable ();
12124 if (const_vec
!= NULL
12125 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12126 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12128 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12129 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12130 pipeline cycle; creating the constant takes one or two ARM
12131 pipeline cycles. */
12133 else if (const_vec
!= NULL_RTX
)
12134 /* Load from constant pool. On Cortex-A8 this takes two cycles
12135 (for either double or quad vectors). We can not take advantage
12136 of single-cycle VLD1 because we need a PC-relative addressing
12140 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12141 We can not construct an initializer. */
12145 /* Initialize vector TARGET to VALS. */
12148 neon_expand_vector_init (rtx target
, rtx vals
)
12150 machine_mode mode
= GET_MODE (target
);
12151 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12152 int n_elts
= GET_MODE_NUNITS (mode
);
12153 int n_var
= 0, one_var
= -1;
12154 bool all_same
= true;
12158 for (i
= 0; i
< n_elts
; ++i
)
12160 x
= XVECEXP (vals
, 0, i
);
12161 if (!CONSTANT_P (x
))
12162 ++n_var
, one_var
= i
;
12164 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12170 rtx constant
= neon_make_constant (vals
);
12171 if (constant
!= NULL_RTX
)
12173 emit_move_insn (target
, constant
);
12178 /* Splat a single non-constant element if we can. */
12179 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12181 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12182 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
12186 /* One field is non-constant. Load constant then overwrite varying
12187 field. This is more efficient than using the stack. */
12190 rtx copy
= copy_rtx (vals
);
12191 rtx index
= GEN_INT (one_var
);
12193 /* Load constant part of vector, substitute neighboring value for
12194 varying element. */
12195 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12196 neon_expand_vector_init (target
, copy
);
12198 /* Insert variable. */
12199 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12203 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12206 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12209 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12212 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12215 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12218 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12221 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12224 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12227 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12230 gcc_unreachable ();
12235 /* Construct the vector in memory one field at a time
12236 and load the whole vector. */
12237 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12238 for (i
= 0; i
< n_elts
; i
++)
12239 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12240 i
* GET_MODE_SIZE (inner_mode
)),
12241 XVECEXP (vals
, 0, i
));
12242 emit_move_insn (target
, mem
);
12245 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12246 ERR if it doesn't. EXP indicates the source location, which includes the
12247 inlining history for intrinsics. */
12250 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12251 const_tree exp
, const char *desc
)
12253 HOST_WIDE_INT lane
;
12255 gcc_assert (CONST_INT_P (operand
));
12257 lane
= INTVAL (operand
);
12259 if (lane
< low
|| lane
>= high
)
12262 error ("%K%s %wd out of range %wd - %wd",
12263 exp
, desc
, lane
, low
, high
- 1);
12265 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
12269 /* Bounds-check lanes. */
12272 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12275 bounds_check (operand
, low
, high
, exp
, "lane");
12278 /* Bounds-check constants. */
12281 arm_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12283 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
12287 neon_element_bits (machine_mode mode
)
12289 return GET_MODE_UNIT_BITSIZE (mode
);
12293 /* Predicates for `match_operand' and `match_operator'. */
12295 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12296 WB is true if full writeback address modes are allowed and is false
12297 if limited writeback address modes (POST_INC and PRE_DEC) are
12301 arm_coproc_mem_operand (rtx op
, bool wb
)
12305 /* Reject eliminable registers. */
12306 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12307 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12308 || reg_mentioned_p (arg_pointer_rtx
, op
)
12309 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12310 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12311 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12312 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12315 /* Constants are converted into offsets from labels. */
12319 ind
= XEXP (op
, 0);
12321 if (reload_completed
12322 && (GET_CODE (ind
) == LABEL_REF
12323 || (GET_CODE (ind
) == CONST
12324 && GET_CODE (XEXP (ind
, 0)) == PLUS
12325 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12326 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12329 /* Match: (mem (reg)). */
12331 return arm_address_register_rtx_p (ind
, 0);
12333 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12334 acceptable in any case (subject to verification by
12335 arm_address_register_rtx_p). We need WB to be true to accept
12336 PRE_INC and POST_DEC. */
12337 if (GET_CODE (ind
) == POST_INC
12338 || GET_CODE (ind
) == PRE_DEC
12340 && (GET_CODE (ind
) == PRE_INC
12341 || GET_CODE (ind
) == POST_DEC
)))
12342 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12345 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12346 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12347 && GET_CODE (XEXP (ind
, 1)) == PLUS
12348 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12349 ind
= XEXP (ind
, 1);
12354 if (GET_CODE (ind
) == PLUS
12355 && REG_P (XEXP (ind
, 0))
12356 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12357 && CONST_INT_P (XEXP (ind
, 1))
12358 && INTVAL (XEXP (ind
, 1)) > -1024
12359 && INTVAL (XEXP (ind
, 1)) < 1024
12360 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12366 /* Return TRUE if OP is a memory operand which we can load or store a vector
12367 to/from. TYPE is one of the following values:
12368 0 - Vector load/stor (vldr)
12369 1 - Core registers (ldm)
12370 2 - Element/structure loads (vld1)
12373 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12377 /* Reject eliminable registers. */
12378 if (strict
&& ! (reload_in_progress
|| reload_completed
)
12379 && (reg_mentioned_p (frame_pointer_rtx
, op
)
12380 || reg_mentioned_p (arg_pointer_rtx
, op
)
12381 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12382 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12383 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12384 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12387 /* Constants are converted into offsets from labels. */
12391 ind
= XEXP (op
, 0);
12393 if (reload_completed
12394 && (GET_CODE (ind
) == LABEL_REF
12395 || (GET_CODE (ind
) == CONST
12396 && GET_CODE (XEXP (ind
, 0)) == PLUS
12397 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12398 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12401 /* Match: (mem (reg)). */
12403 return arm_address_register_rtx_p (ind
, 0);
12405 /* Allow post-increment with Neon registers. */
12406 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12407 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12408 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12410 /* Allow post-increment by register for VLDn */
12411 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12412 && GET_CODE (XEXP (ind
, 1)) == PLUS
12413 && REG_P (XEXP (XEXP (ind
, 1), 1)))
12420 && GET_CODE (ind
) == PLUS
12421 && REG_P (XEXP (ind
, 0))
12422 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12423 && CONST_INT_P (XEXP (ind
, 1))
12424 && INTVAL (XEXP (ind
, 1)) > -1024
12425 /* For quad modes, we restrict the constant offset to be slightly less
12426 than what the instruction format permits. We have no such constraint
12427 on double mode offsets. (This must match arm_legitimate_index_p.) */
12428 && (INTVAL (XEXP (ind
, 1))
12429 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12430 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12436 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12439 neon_struct_mem_operand (rtx op
)
12443 /* Reject eliminable registers. */
12444 if (! (reload_in_progress
|| reload_completed
)
12445 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12446 || reg_mentioned_p (arg_pointer_rtx
, op
)
12447 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12448 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12449 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12450 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12453 /* Constants are converted into offsets from labels. */
12457 ind
= XEXP (op
, 0);
12459 if (reload_completed
12460 && (GET_CODE (ind
) == LABEL_REF
12461 || (GET_CODE (ind
) == CONST
12462 && GET_CODE (XEXP (ind
, 0)) == PLUS
12463 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12464 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12467 /* Match: (mem (reg)). */
12469 return arm_address_register_rtx_p (ind
, 0);
12471 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12472 if (GET_CODE (ind
) == POST_INC
12473 || GET_CODE (ind
) == PRE_DEC
)
12474 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12479 /* Return true if X is a register that will be eliminated later on. */
12481 arm_eliminable_register (rtx x
)
12483 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12484 || REGNO (x
) == ARG_POINTER_REGNUM
12485 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12486 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12489 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12490 coprocessor registers. Otherwise return NO_REGS. */
12493 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
12495 if (mode
== HFmode
)
12497 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
12498 return GENERAL_REGS
;
12499 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12501 return GENERAL_REGS
;
12504 /* The neon move patterns handle all legitimate vector and struct
12507 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12508 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12509 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12510 || VALID_NEON_STRUCT_MODE (mode
)))
12513 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12516 return GENERAL_REGS
;
12519 /* Values which must be returned in the most-significant end of the return
12523 arm_return_in_msb (const_tree valtype
)
12525 return (TARGET_AAPCS_BASED
12526 && BYTES_BIG_ENDIAN
12527 && (AGGREGATE_TYPE_P (valtype
)
12528 || TREE_CODE (valtype
) == COMPLEX_TYPE
12529 || FIXED_POINT_TYPE_P (valtype
)));
12532 /* Return TRUE if X references a SYMBOL_REF. */
12534 symbol_mentioned_p (rtx x
)
12539 if (GET_CODE (x
) == SYMBOL_REF
)
12542 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12543 are constant offsets, not symbols. */
12544 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12547 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12549 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12555 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12556 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
12559 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
12566 /* Return TRUE if X references a LABEL_REF. */
12568 label_mentioned_p (rtx x
)
12573 if (GET_CODE (x
) == LABEL_REF
)
12576 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12577 instruction, but they are constant offsets, not symbols. */
12578 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12581 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12582 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12588 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12589 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
12592 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
12600 tls_mentioned_p (rtx x
)
12602 switch (GET_CODE (x
))
12605 return tls_mentioned_p (XEXP (x
, 0));
12608 if (XINT (x
, 1) == UNSPEC_TLS
)
12611 /* Fall through. */
12617 /* Must not copy any rtx that uses a pc-relative address.
12618 Also, disallow copying of load-exclusive instructions that
12619 may appear after splitting of compare-and-swap-style operations
12620 so as to prevent those loops from being transformed away from their
12621 canonical forms (see PR 69904). */
12624 arm_cannot_copy_insn_p (rtx_insn
*insn
)
12626 /* The tls call insn cannot be copied, as it is paired with a data
12628 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
12631 subrtx_iterator::array_type array
;
12632 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
12634 const_rtx x
= *iter
;
12635 if (GET_CODE (x
) == UNSPEC
12636 && (XINT (x
, 1) == UNSPEC_PIC_BASE
12637 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
12641 rtx set
= single_set (insn
);
12644 rtx src
= SET_SRC (set
);
12645 if (GET_CODE (src
) == ZERO_EXTEND
)
12646 src
= XEXP (src
, 0);
12648 /* Catch the load-exclusive and load-acquire operations. */
12649 if (GET_CODE (src
) == UNSPEC_VOLATILE
12650 && (XINT (src
, 1) == VUNSPEC_LL
12651 || XINT (src
, 1) == VUNSPEC_LAX
))
12658 minmax_code (rtx x
)
12660 enum rtx_code code
= GET_CODE (x
);
12673 gcc_unreachable ();
12677 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12680 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
12681 int *mask
, bool *signed_sat
)
12683 /* The high bound must be a power of two minus one. */
12684 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
12688 /* The low bound is either zero (for usat) or one less than the
12689 negation of the high bound (for ssat). */
12690 if (INTVAL (lo_bound
) == 0)
12695 *signed_sat
= false;
12700 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
12705 *signed_sat
= true;
12713 /* Return 1 if memory locations are adjacent. */
12715 adjacent_mem_locations (rtx a
, rtx b
)
12717 /* We don't guarantee to preserve the order of these memory refs. */
12718 if (volatile_refs_p (a
) || volatile_refs_p (b
))
12721 if ((REG_P (XEXP (a
, 0))
12722 || (GET_CODE (XEXP (a
, 0)) == PLUS
12723 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
12724 && (REG_P (XEXP (b
, 0))
12725 || (GET_CODE (XEXP (b
, 0)) == PLUS
12726 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
12728 HOST_WIDE_INT val0
= 0, val1
= 0;
12732 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
12734 reg0
= XEXP (XEXP (a
, 0), 0);
12735 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
12738 reg0
= XEXP (a
, 0);
12740 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
12742 reg1
= XEXP (XEXP (b
, 0), 0);
12743 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
12746 reg1
= XEXP (b
, 0);
12748 /* Don't accept any offset that will require multiple
12749 instructions to handle, since this would cause the
12750 arith_adjacentmem pattern to output an overlong sequence. */
12751 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
12754 /* Don't allow an eliminable register: register elimination can make
12755 the offset too large. */
12756 if (arm_eliminable_register (reg0
))
12759 val_diff
= val1
- val0
;
12763 /* If the target has load delay slots, then there's no benefit
12764 to using an ldm instruction unless the offset is zero and
12765 we are optimizing for size. */
12766 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
12767 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
12768 && (val_diff
== 4 || val_diff
== -4));
12771 return ((REGNO (reg0
) == REGNO (reg1
))
12772 && (val_diff
== 4 || val_diff
== -4));
12778 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12779 for load operations, false for store operations. CONSECUTIVE is true
12780 if the register numbers in the operation must be consecutive in the register
12781 bank. RETURN_PC is true if value is to be loaded in PC.
12782 The pattern we are trying to match for load is:
12783 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12784 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12787 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12790 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12791 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12792 3. If consecutive is TRUE, then for kth register being loaded,
12793 REGNO (R_dk) = REGNO (R_d0) + k.
12794 The pattern for store is similar. */
12796 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
12797 bool consecutive
, bool return_pc
)
12799 HOST_WIDE_INT count
= XVECLEN (op
, 0);
12800 rtx reg
, mem
, addr
;
12802 unsigned first_regno
;
12803 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
12805 bool addr_reg_in_reglist
= false;
12806 bool update
= false;
12811 /* If not in SImode, then registers must be consecutive
12812 (e.g., VLDM instructions for DFmode). */
12813 gcc_assert ((mode
== SImode
) || consecutive
);
12814 /* Setting return_pc for stores is illegal. */
12815 gcc_assert (!return_pc
|| load
);
12817 /* Set up the increments and the regs per val based on the mode. */
12818 reg_increment
= GET_MODE_SIZE (mode
);
12819 regs_per_val
= reg_increment
/ 4;
12820 offset_adj
= return_pc
? 1 : 0;
12823 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
12824 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
12827 /* Check if this is a write-back. */
12828 elt
= XVECEXP (op
, 0, offset_adj
);
12829 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
12835 /* The offset adjustment must be the number of registers being
12836 popped times the size of a single register. */
12837 if (!REG_P (SET_DEST (elt
))
12838 || !REG_P (XEXP (SET_SRC (elt
), 0))
12839 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
12840 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
12841 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
12842 ((count
- 1 - offset_adj
) * reg_increment
))
12846 i
= i
+ offset_adj
;
12847 base
= base
+ offset_adj
;
12848 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12849 success depends on the type: VLDM can do just one reg,
12850 LDM must do at least two. */
12851 if ((count
<= i
) && (mode
== SImode
))
12854 elt
= XVECEXP (op
, 0, i
- 1);
12855 if (GET_CODE (elt
) != SET
)
12860 reg
= SET_DEST (elt
);
12861 mem
= SET_SRC (elt
);
12865 reg
= SET_SRC (elt
);
12866 mem
= SET_DEST (elt
);
12869 if (!REG_P (reg
) || !MEM_P (mem
))
12872 regno
= REGNO (reg
);
12873 first_regno
= regno
;
12874 addr
= XEXP (mem
, 0);
12875 if (GET_CODE (addr
) == PLUS
)
12877 if (!CONST_INT_P (XEXP (addr
, 1)))
12880 offset
= INTVAL (XEXP (addr
, 1));
12881 addr
= XEXP (addr
, 0);
12887 /* Don't allow SP to be loaded unless it is also the base register. It
12888 guarantees that SP is reset correctly when an LDM instruction
12889 is interrupted. Otherwise, we might end up with a corrupt stack. */
12890 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12893 for (; i
< count
; i
++)
12895 elt
= XVECEXP (op
, 0, i
);
12896 if (GET_CODE (elt
) != SET
)
12901 reg
= SET_DEST (elt
);
12902 mem
= SET_SRC (elt
);
12906 reg
= SET_SRC (elt
);
12907 mem
= SET_DEST (elt
);
12911 || GET_MODE (reg
) != mode
12912 || REGNO (reg
) <= regno
12915 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
12916 /* Don't allow SP to be loaded unless it is also the base register. It
12917 guarantees that SP is reset correctly when an LDM instruction
12918 is interrupted. Otherwise, we might end up with a corrupt stack. */
12919 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12921 || GET_MODE (mem
) != mode
12922 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
12923 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
12924 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
12925 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
12926 offset
+ (i
- base
) * reg_increment
))
12927 && (!REG_P (XEXP (mem
, 0))
12928 || offset
+ (i
- base
) * reg_increment
!= 0)))
12931 regno
= REGNO (reg
);
12932 if (regno
== REGNO (addr
))
12933 addr_reg_in_reglist
= true;
12938 if (update
&& addr_reg_in_reglist
)
12941 /* For Thumb-1, address register is always modified - either by write-back
12942 or by explicit load. If the pattern does not describe an update,
12943 then the address register must be in the list of loaded registers. */
12945 return update
|| addr_reg_in_reglist
;
12951 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12952 or stores (depending on IS_STORE) into a load-multiple or store-multiple
12953 instruction. ADD_OFFSET is nonzero if the base address register needs
12954 to be modified with an add instruction before we can use it. */
12957 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
12958 int nops
, HOST_WIDE_INT add_offset
)
12960 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
12961 if the offset isn't small enough. The reason 2 ldrs are faster
12962 is because these ARMs are able to do more than one cache access
12963 in a single cycle. The ARM9 and StrongARM have Harvard caches,
12964 whilst the ARM8 has a double bandwidth cache. This means that
12965 these cores can do both an instruction fetch and a data fetch in
12966 a single cycle, so the trick of calculating the address into a
12967 scratch register (one of the result regs) and then doing a load
12968 multiple actually becomes slower (and no smaller in code size).
12969 That is the transformation
12971 ldr rd1, [rbase + offset]
12972 ldr rd2, [rbase + offset + 4]
12976 add rd1, rbase, offset
12977 ldmia rd1, {rd1, rd2}
12979 produces worse code -- '3 cycles + any stalls on rd2' instead of
12980 '2 cycles + any stalls on rd2'. On ARMs with only one cache
12981 access per cycle, the first sequence could never complete in less
12982 than 6 cycles, whereas the ldm sequence would only take 5 and
12983 would make better use of sequential accesses if not hitting the
12986 We cheat here and test 'arm_ld_sched' which we currently know to
12987 only be true for the ARM8, ARM9 and StrongARM. If this ever
12988 changes, then the test below needs to be reworked. */
12989 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
12992 /* XScale has load-store double instructions, but they have stricter
12993 alignment requirements than load-store multiple, so we cannot
12996 For XScale ldm requires 2 + NREGS cycles to complete and blocks
12997 the pipeline until completion.
13005 An ldr instruction takes 1-3 cycles, but does not block the
13014 Best case ldr will always win. However, the more ldr instructions
13015 we issue, the less likely we are to be able to schedule them well.
13016 Using ldr instructions also increases code size.
13018 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13019 for counts of 3 or 4 regs. */
13020 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13025 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13026 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13027 an array ORDER which describes the sequence to use when accessing the
13028 offsets that produces an ascending order. In this sequence, each
13029 offset must be larger by exactly 4 than the previous one. ORDER[0]
13030 must have been filled in with the lowest offset by the caller.
13031 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13032 we use to verify that ORDER produces an ascending order of registers.
13033 Return true if it was possible to construct such an order, false if
13037 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13038 int *unsorted_regs
)
13041 for (i
= 1; i
< nops
; i
++)
13045 order
[i
] = order
[i
- 1];
13046 for (j
= 0; j
< nops
; j
++)
13047 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13049 /* We must find exactly one offset that is higher than the
13050 previous one by 4. */
13051 if (order
[i
] != order
[i
- 1])
13055 if (order
[i
] == order
[i
- 1])
13057 /* The register numbers must be ascending. */
13058 if (unsorted_regs
!= NULL
13059 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13065 /* Used to determine in a peephole whether a sequence of load
13066 instructions can be changed into a load-multiple instruction.
13067 NOPS is the number of separate load instructions we are examining. The
13068 first NOPS entries in OPERANDS are the destination registers, the
13069 next NOPS entries are memory operands. If this function is
13070 successful, *BASE is set to the common base register of the memory
13071 accesses; *LOAD_OFFSET is set to the first memory location's offset
13072 from that base register.
13073 REGS is an array filled in with the destination register numbers.
13074 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13075 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13076 the sequence of registers in REGS matches the loads from ascending memory
13077 locations, and the function verifies that the register numbers are
13078 themselves ascending. If CHECK_REGS is false, the register numbers
13079 are stored in the order they are found in the operands. */
13081 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13082 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13084 int unsorted_regs
[MAX_LDM_STM_OPS
];
13085 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13086 int order
[MAX_LDM_STM_OPS
];
13087 rtx base_reg_rtx
= NULL
;
13091 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13092 easily extended if required. */
13093 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13095 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13097 /* Loop over the operands and check that the memory references are
13098 suitable (i.e. immediate offsets from the same base register). At
13099 the same time, extract the target register, and the memory
13101 for (i
= 0; i
< nops
; i
++)
13106 /* Convert a subreg of a mem into the mem itself. */
13107 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13108 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13110 gcc_assert (MEM_P (operands
[nops
+ i
]));
13112 /* Don't reorder volatile memory references; it doesn't seem worth
13113 looking for the case where the order is ok anyway. */
13114 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13117 offset
= const0_rtx
;
13119 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13120 || (GET_CODE (reg
) == SUBREG
13121 && REG_P (reg
= SUBREG_REG (reg
))))
13122 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13123 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13124 || (GET_CODE (reg
) == SUBREG
13125 && REG_P (reg
= SUBREG_REG (reg
))))
13126 && (CONST_INT_P (offset
13127 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13131 base_reg
= REGNO (reg
);
13132 base_reg_rtx
= reg
;
13133 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13136 else if (base_reg
!= (int) REGNO (reg
))
13137 /* Not addressed from the same base register. */
13140 unsorted_regs
[i
] = (REG_P (operands
[i
])
13141 ? REGNO (operands
[i
])
13142 : REGNO (SUBREG_REG (operands
[i
])));
13144 /* If it isn't an integer register, or if it overwrites the
13145 base register but isn't the last insn in the list, then
13146 we can't do this. */
13147 if (unsorted_regs
[i
] < 0
13148 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13149 || unsorted_regs
[i
] > 14
13150 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13153 /* Don't allow SP to be loaded unless it is also the base
13154 register. It guarantees that SP is reset correctly when
13155 an LDM instruction is interrupted. Otherwise, we might
13156 end up with a corrupt stack. */
13157 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13160 unsorted_offsets
[i
] = INTVAL (offset
);
13161 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13165 /* Not a suitable memory address. */
13169 /* All the useful information has now been extracted from the
13170 operands into unsorted_regs and unsorted_offsets; additionally,
13171 order[0] has been set to the lowest offset in the list. Sort
13172 the offsets into order, verifying that they are adjacent, and
13173 check that the register numbers are ascending. */
13174 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13175 check_regs
? unsorted_regs
: NULL
))
13179 memcpy (saved_order
, order
, sizeof order
);
13185 for (i
= 0; i
< nops
; i
++)
13186 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13188 *load_offset
= unsorted_offsets
[order
[0]];
13192 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13195 if (unsorted_offsets
[order
[0]] == 0)
13196 ldm_case
= 1; /* ldmia */
13197 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13198 ldm_case
= 2; /* ldmib */
13199 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13200 ldm_case
= 3; /* ldmda */
13201 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13202 ldm_case
= 4; /* ldmdb */
13203 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13204 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13209 if (!multiple_operation_profitable_p (false, nops
,
13211 ? unsorted_offsets
[order
[0]] : 0))
13217 /* Used to determine in a peephole whether a sequence of store instructions can
13218 be changed into a store-multiple instruction.
13219 NOPS is the number of separate store instructions we are examining.
13220 NOPS_TOTAL is the total number of instructions recognized by the peephole
13222 The first NOPS entries in OPERANDS are the source registers, the next
13223 NOPS entries are memory operands. If this function is successful, *BASE is
13224 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13225 to the first memory location's offset from that base register. REGS is an
13226 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13227 likewise filled with the corresponding rtx's.
13228 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13229 numbers to an ascending order of stores.
13230 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13231 from ascending memory locations, and the function verifies that the register
13232 numbers are themselves ascending. If CHECK_REGS is false, the register
13233 numbers are stored in the order they are found in the operands. */
13235 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13236 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13237 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13239 int unsorted_regs
[MAX_LDM_STM_OPS
];
13240 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13241 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13242 int order
[MAX_LDM_STM_OPS
];
13244 rtx base_reg_rtx
= NULL
;
13247 /* Write back of base register is currently only supported for Thumb 1. */
13248 int base_writeback
= TARGET_THUMB1
;
13250 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13251 easily extended if required. */
13252 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13254 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13256 /* Loop over the operands and check that the memory references are
13257 suitable (i.e. immediate offsets from the same base register). At
13258 the same time, extract the target register, and the memory
13260 for (i
= 0; i
< nops
; i
++)
13265 /* Convert a subreg of a mem into the mem itself. */
13266 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13267 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13269 gcc_assert (MEM_P (operands
[nops
+ i
]));
13271 /* Don't reorder volatile memory references; it doesn't seem worth
13272 looking for the case where the order is ok anyway. */
13273 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13276 offset
= const0_rtx
;
13278 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13279 || (GET_CODE (reg
) == SUBREG
13280 && REG_P (reg
= SUBREG_REG (reg
))))
13281 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13282 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13283 || (GET_CODE (reg
) == SUBREG
13284 && REG_P (reg
= SUBREG_REG (reg
))))
13285 && (CONST_INT_P (offset
13286 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13288 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13289 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13290 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13294 base_reg
= REGNO (reg
);
13295 base_reg_rtx
= reg
;
13296 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13299 else if (base_reg
!= (int) REGNO (reg
))
13300 /* Not addressed from the same base register. */
13303 /* If it isn't an integer register, then we can't do this. */
13304 if (unsorted_regs
[i
] < 0
13305 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13306 /* The effects are unpredictable if the base register is
13307 both updated and stored. */
13308 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13309 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13310 || unsorted_regs
[i
] > 14)
13313 unsorted_offsets
[i
] = INTVAL (offset
);
13314 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13318 /* Not a suitable memory address. */
13322 /* All the useful information has now been extracted from the
13323 operands into unsorted_regs and unsorted_offsets; additionally,
13324 order[0] has been set to the lowest offset in the list. Sort
13325 the offsets into order, verifying that they are adjacent, and
13326 check that the register numbers are ascending. */
13327 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13328 check_regs
? unsorted_regs
: NULL
))
13332 memcpy (saved_order
, order
, sizeof order
);
13338 for (i
= 0; i
< nops
; i
++)
13340 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13342 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13345 *load_offset
= unsorted_offsets
[order
[0]];
13349 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13352 if (unsorted_offsets
[order
[0]] == 0)
13353 stm_case
= 1; /* stmia */
13354 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13355 stm_case
= 2; /* stmib */
13356 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13357 stm_case
= 3; /* stmda */
13358 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13359 stm_case
= 4; /* stmdb */
13363 if (!multiple_operation_profitable_p (false, nops
, 0))
13369 /* Routines for use in generating RTL. */
13371 /* Generate a load-multiple instruction. COUNT is the number of loads in
13372 the instruction; REGS and MEMS are arrays containing the operands.
13373 BASEREG is the base register to be used in addressing the memory operands.
13374 WBACK_OFFSET is nonzero if the instruction should update the base
13378 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13379 HOST_WIDE_INT wback_offset
)
13384 if (!multiple_operation_profitable_p (false, count
, 0))
13390 for (i
= 0; i
< count
; i
++)
13391 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13393 if (wback_offset
!= 0)
13394 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13396 seq
= get_insns ();
13402 result
= gen_rtx_PARALLEL (VOIDmode
,
13403 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13404 if (wback_offset
!= 0)
13406 XVECEXP (result
, 0, 0)
13407 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13412 for (j
= 0; i
< count
; i
++, j
++)
13413 XVECEXP (result
, 0, i
)
13414 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13419 /* Generate a store-multiple instruction. COUNT is the number of stores in
13420 the instruction; REGS and MEMS are arrays containing the operands.
13421 BASEREG is the base register to be used in addressing the memory operands.
13422 WBACK_OFFSET is nonzero if the instruction should update the base
13426 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13427 HOST_WIDE_INT wback_offset
)
13432 if (GET_CODE (basereg
) == PLUS
)
13433 basereg
= XEXP (basereg
, 0);
13435 if (!multiple_operation_profitable_p (false, count
, 0))
13441 for (i
= 0; i
< count
; i
++)
13442 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13444 if (wback_offset
!= 0)
13445 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13447 seq
= get_insns ();
13453 result
= gen_rtx_PARALLEL (VOIDmode
,
13454 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13455 if (wback_offset
!= 0)
13457 XVECEXP (result
, 0, 0)
13458 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13463 for (j
= 0; i
< count
; i
++, j
++)
13464 XVECEXP (result
, 0, i
)
13465 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13470 /* Generate either a load-multiple or a store-multiple instruction. This
13471 function can be used in situations where we can start with a single MEM
13472 rtx and adjust its address upwards.
13473 COUNT is the number of operations in the instruction, not counting a
13474 possible update of the base register. REGS is an array containing the
13476 BASEREG is the base register to be used in addressing the memory operands,
13477 which are constructed from BASEMEM.
13478 WRITE_BACK specifies whether the generated instruction should include an
13479 update of the base register.
13480 OFFSETP is used to pass an offset to and from this function; this offset
13481 is not used when constructing the address (instead BASEMEM should have an
13482 appropriate offset in its address), it is used only for setting
13483 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13486 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13487 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13489 rtx mems
[MAX_LDM_STM_OPS
];
13490 HOST_WIDE_INT offset
= *offsetp
;
13493 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13495 if (GET_CODE (basereg
) == PLUS
)
13496 basereg
= XEXP (basereg
, 0);
13498 for (i
= 0; i
< count
; i
++)
13500 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13501 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13509 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13510 write_back
? 4 * count
: 0);
13512 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13513 write_back
? 4 * count
: 0);
13517 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13518 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13520 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13525 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13526 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13528 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13532 /* Called from a peephole2 expander to turn a sequence of loads into an
13533 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13534 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13535 is true if we can reorder the registers because they are used commutatively
13537 Returns true iff we could generate a new instruction. */
13540 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13542 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13543 rtx mems
[MAX_LDM_STM_OPS
];
13544 int i
, j
, base_reg
;
13546 HOST_WIDE_INT offset
;
13547 int write_back
= FALSE
;
13551 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13552 &base_reg
, &offset
, !sort_regs
);
13558 for (i
= 0; i
< nops
- 1; i
++)
13559 for (j
= i
+ 1; j
< nops
; j
++)
13560 if (regs
[i
] > regs
[j
])
13566 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13570 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
13571 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13577 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
13578 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
13580 if (!TARGET_THUMB1
)
13582 base_reg
= regs
[0];
13583 base_reg_rtx
= newbase
;
13587 for (i
= 0; i
< nops
; i
++)
13589 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13590 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13593 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13594 write_back
? offset
+ i
* 4 : 0));
13598 /* Called from a peephole2 expander to turn a sequence of stores into an
13599 STM instruction. OPERANDS are the operands found by the peephole matcher;
13600 NOPS indicates how many separate stores we are trying to combine.
13601 Returns true iff we could generate a new instruction. */
13604 gen_stm_seq (rtx
*operands
, int nops
)
13607 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13608 rtx mems
[MAX_LDM_STM_OPS
];
13611 HOST_WIDE_INT offset
;
13612 int write_back
= FALSE
;
13615 bool base_reg_dies
;
13617 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
13618 mem_order
, &base_reg
, &offset
, true);
13623 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13625 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
13628 gcc_assert (base_reg_dies
);
13634 gcc_assert (base_reg_dies
);
13635 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13639 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13641 for (i
= 0; i
< nops
; i
++)
13643 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13644 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13647 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13648 write_back
? offset
+ i
* 4 : 0));
13652 /* Called from a peephole2 expander to turn a sequence of stores that are
13653 preceded by constant loads into an STM instruction. OPERANDS are the
13654 operands found by the peephole matcher; NOPS indicates how many
13655 separate stores we are trying to combine; there are 2 * NOPS
13656 instructions in the peephole.
13657 Returns true iff we could generate a new instruction. */
13660 gen_const_stm_seq (rtx
*operands
, int nops
)
13662 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
13663 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13664 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
13665 rtx mems
[MAX_LDM_STM_OPS
];
13668 HOST_WIDE_INT offset
;
13669 int write_back
= FALSE
;
13672 bool base_reg_dies
;
13674 HARD_REG_SET allocated
;
13676 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
13677 mem_order
, &base_reg
, &offset
, false);
13682 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
13684 /* If the same register is used more than once, try to find a free
13686 CLEAR_HARD_REG_SET (allocated
);
13687 for (i
= 0; i
< nops
; i
++)
13689 for (j
= i
+ 1; j
< nops
; j
++)
13690 if (regs
[i
] == regs
[j
])
13692 rtx t
= peep2_find_free_register (0, nops
* 2,
13693 TARGET_THUMB1
? "l" : "r",
13694 SImode
, &allocated
);
13698 regs
[i
] = REGNO (t
);
13702 /* Compute an ordering that maps the register numbers to an ascending
13705 for (i
= 0; i
< nops
; i
++)
13706 if (regs
[i
] < regs
[reg_order
[0]])
13709 for (i
= 1; i
< nops
; i
++)
13711 int this_order
= reg_order
[i
- 1];
13712 for (j
= 0; j
< nops
; j
++)
13713 if (regs
[j
] > regs
[reg_order
[i
- 1]]
13714 && (this_order
== reg_order
[i
- 1]
13715 || regs
[j
] < regs
[this_order
]))
13717 reg_order
[i
] = this_order
;
13720 /* Ensure that registers that must be live after the instruction end
13721 up with the correct value. */
13722 for (i
= 0; i
< nops
; i
++)
13724 int this_order
= reg_order
[i
];
13725 if ((this_order
!= mem_order
[i
]
13726 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
13727 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
13731 /* Load the constants. */
13732 for (i
= 0; i
< nops
; i
++)
13734 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
13735 sorted_regs
[i
] = regs
[reg_order
[i
]];
13736 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
13739 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13741 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
13744 gcc_assert (base_reg_dies
);
13750 gcc_assert (base_reg_dies
);
13751 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13755 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13757 for (i
= 0; i
< nops
; i
++)
13759 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13760 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13763 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
13764 write_back
? offset
+ i
* 4 : 0));
13768 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13769 unaligned copies on processors which support unaligned semantics for those
13770 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13771 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13772 An interleave factor of 1 (the minimum) will perform no interleaving.
13773 Load/store multiple are used for aligned addresses where possible. */
13776 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
13777 HOST_WIDE_INT length
,
13778 unsigned int interleave_factor
)
13780 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
13781 int *regnos
= XALLOCAVEC (int, interleave_factor
);
13782 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
13783 HOST_WIDE_INT i
, j
;
13784 HOST_WIDE_INT remaining
= length
, words
;
13785 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
13787 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
13788 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
13789 HOST_WIDE_INT srcoffset
, dstoffset
;
13790 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
13793 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
13795 /* Use hard registers if we have aligned source or destination so we can use
13796 load/store multiple with contiguous registers. */
13797 if (dst_aligned
|| src_aligned
)
13798 for (i
= 0; i
< interleave_factor
; i
++)
13799 regs
[i
] = gen_rtx_REG (SImode
, i
);
13801 for (i
= 0; i
< interleave_factor
; i
++)
13802 regs
[i
] = gen_reg_rtx (SImode
);
13804 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
13805 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
13807 srcoffset
= dstoffset
= 0;
13809 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13810 For copying the last bytes we want to subtract this offset again. */
13811 src_autoinc
= dst_autoinc
= 0;
13813 for (i
= 0; i
< interleave_factor
; i
++)
13816 /* Copy BLOCK_SIZE_BYTES chunks. */
13818 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
13821 if (src_aligned
&& interleave_factor
> 1)
13823 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
13824 TRUE
, srcbase
, &srcoffset
));
13825 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13829 for (j
= 0; j
< interleave_factor
; j
++)
13831 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
13833 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13834 srcoffset
+ j
* UNITS_PER_WORD
);
13835 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13837 srcoffset
+= block_size_bytes
;
13841 if (dst_aligned
&& interleave_factor
> 1)
13843 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
13844 TRUE
, dstbase
, &dstoffset
));
13845 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13849 for (j
= 0; j
< interleave_factor
; j
++)
13851 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
13853 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13854 dstoffset
+ j
* UNITS_PER_WORD
);
13855 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13857 dstoffset
+= block_size_bytes
;
13860 remaining
-= block_size_bytes
;
13863 /* Copy any whole words left (note these aren't interleaved with any
13864 subsequent halfword/byte load/stores in the interests of simplicity). */
13866 words
= remaining
/ UNITS_PER_WORD
;
13868 gcc_assert (words
< interleave_factor
);
13870 if (src_aligned
&& words
> 1)
13872 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
13874 src_autoinc
+= UNITS_PER_WORD
* words
;
13878 for (j
= 0; j
< words
; j
++)
13880 addr
= plus_constant (Pmode
, src
,
13881 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
13882 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13883 srcoffset
+ j
* UNITS_PER_WORD
);
13885 emit_move_insn (regs
[j
], mem
);
13887 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13889 srcoffset
+= words
* UNITS_PER_WORD
;
13892 if (dst_aligned
&& words
> 1)
13894 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
13896 dst_autoinc
+= words
* UNITS_PER_WORD
;
13900 for (j
= 0; j
< words
; j
++)
13902 addr
= plus_constant (Pmode
, dst
,
13903 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
13904 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13905 dstoffset
+ j
* UNITS_PER_WORD
);
13907 emit_move_insn (mem
, regs
[j
]);
13909 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13911 dstoffset
+= words
* UNITS_PER_WORD
;
13914 remaining
-= words
* UNITS_PER_WORD
;
13916 gcc_assert (remaining
< 4);
13918 /* Copy a halfword if necessary. */
13920 if (remaining
>= 2)
13922 halfword_tmp
= gen_reg_rtx (SImode
);
13924 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13925 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
13926 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
13928 /* Either write out immediately, or delay until we've loaded the last
13929 byte, depending on interleave factor. */
13930 if (interleave_factor
== 1)
13932 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13933 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
13934 emit_insn (gen_unaligned_storehi (mem
,
13935 gen_lowpart (HImode
, halfword_tmp
)));
13936 halfword_tmp
= NULL
;
13944 gcc_assert (remaining
< 2);
13946 /* Copy last byte. */
13948 if ((remaining
& 1) != 0)
13950 byte_tmp
= gen_reg_rtx (SImode
);
13952 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13953 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
13954 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
13956 if (interleave_factor
== 1)
13958 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13959 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
13960 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
13969 /* Store last halfword if we haven't done so already. */
13973 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13974 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
13975 emit_insn (gen_unaligned_storehi (mem
,
13976 gen_lowpart (HImode
, halfword_tmp
)));
13980 /* Likewise for last byte. */
13984 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13985 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
13986 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
13990 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
13993 /* From mips_adjust_block_mem:
13995 Helper function for doing a loop-based block operation on memory
13996 reference MEM. Each iteration of the loop will operate on LENGTH
13999 Create a new base register for use within the loop and point it to
14000 the start of MEM. Create a new memory reference that uses this
14001 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14004 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14007 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14009 /* Although the new mem does not refer to a known location,
14010 it does keep up to LENGTH bytes of alignment. */
14011 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14012 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14015 /* From mips_block_move_loop:
14017 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14018 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14019 the memory regions do not overlap. */
14022 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14023 unsigned int interleave_factor
,
14024 HOST_WIDE_INT bytes_per_iter
)
14026 rtx src_reg
, dest_reg
, final_src
, test
;
14027 HOST_WIDE_INT leftover
;
14029 leftover
= length
% bytes_per_iter
;
14030 length
-= leftover
;
14032 /* Create registers and memory references for use within the loop. */
14033 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14034 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14036 /* Calculate the value that SRC_REG should have after the last iteration of
14038 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14039 0, 0, OPTAB_WIDEN
);
14041 /* Emit the start of the loop. */
14042 rtx_code_label
*label
= gen_label_rtx ();
14043 emit_label (label
);
14045 /* Emit the loop body. */
14046 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14047 interleave_factor
);
14049 /* Move on to the next block. */
14050 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14051 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14053 /* Emit the loop condition. */
14054 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14055 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14057 /* Mop up any left-over bytes. */
14059 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14062 /* Emit a block move when either the source or destination is unaligned (not
14063 aligned to a four-byte boundary). This may need further tuning depending on
14064 core type, optimize_size setting, etc. */
14067 arm_movmemqi_unaligned (rtx
*operands
)
14069 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14073 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14074 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14075 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14076 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14077 or dst_aligned though: allow more interleaving in those cases since the
14078 resulting code can be smaller. */
14079 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14080 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14083 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14084 interleave_factor
, bytes_per_iter
);
14086 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14087 interleave_factor
);
14091 /* Note that the loop created by arm_block_move_unaligned_loop may be
14092 subject to loop unrolling, which makes tuning this condition a little
14095 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14097 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14104 arm_gen_movmemqi (rtx
*operands
)
14106 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14107 HOST_WIDE_INT srcoffset
, dstoffset
;
14109 rtx src
, dst
, srcbase
, dstbase
;
14110 rtx part_bytes_reg
= NULL
;
14113 if (!CONST_INT_P (operands
[2])
14114 || !CONST_INT_P (operands
[3])
14115 || INTVAL (operands
[2]) > 64)
14118 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14119 return arm_movmemqi_unaligned (operands
);
14121 if (INTVAL (operands
[3]) & 3)
14124 dstbase
= operands
[0];
14125 srcbase
= operands
[1];
14127 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14128 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14130 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14131 out_words_to_go
= INTVAL (operands
[2]) / 4;
14132 last_bytes
= INTVAL (operands
[2]) & 3;
14133 dstoffset
= srcoffset
= 0;
14135 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14136 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14138 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14140 if (in_words_to_go
> 4)
14141 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14142 TRUE
, srcbase
, &srcoffset
));
14144 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14145 src
, FALSE
, srcbase
,
14148 if (out_words_to_go
)
14150 if (out_words_to_go
> 4)
14151 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14152 TRUE
, dstbase
, &dstoffset
));
14153 else if (out_words_to_go
!= 1)
14154 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14155 out_words_to_go
, dst
,
14158 dstbase
, &dstoffset
));
14161 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14162 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14163 if (last_bytes
!= 0)
14165 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14171 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14172 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14175 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14176 if (out_words_to_go
)
14180 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14181 sreg
= copy_to_reg (mem
);
14183 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14184 emit_move_insn (mem
, sreg
);
14187 gcc_assert (!in_words_to_go
); /* Sanity check */
14190 if (in_words_to_go
)
14192 gcc_assert (in_words_to_go
> 0);
14194 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14195 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14198 gcc_assert (!last_bytes
|| part_bytes_reg
);
14200 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14202 rtx tmp
= gen_reg_rtx (SImode
);
14204 /* The bytes we want are in the top end of the word. */
14205 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14206 GEN_INT (8 * (4 - last_bytes
))));
14207 part_bytes_reg
= tmp
;
14211 mem
= adjust_automodify_address (dstbase
, QImode
,
14212 plus_constant (Pmode
, dst
,
14214 dstoffset
+ last_bytes
- 1);
14215 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14219 tmp
= gen_reg_rtx (SImode
);
14220 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14221 part_bytes_reg
= tmp
;
14228 if (last_bytes
> 1)
14230 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14231 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14235 rtx tmp
= gen_reg_rtx (SImode
);
14236 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14237 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14238 part_bytes_reg
= tmp
;
14245 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14246 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14253 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14256 next_consecutive_mem (rtx mem
)
14258 machine_mode mode
= GET_MODE (mem
);
14259 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14260 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14262 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14265 /* Copy using LDRD/STRD instructions whenever possible.
14266 Returns true upon success. */
14268 gen_movmem_ldrd_strd (rtx
*operands
)
14270 unsigned HOST_WIDE_INT len
;
14271 HOST_WIDE_INT align
;
14272 rtx src
, dst
, base
;
14274 bool src_aligned
, dst_aligned
;
14275 bool src_volatile
, dst_volatile
;
14277 gcc_assert (CONST_INT_P (operands
[2]));
14278 gcc_assert (CONST_INT_P (operands
[3]));
14280 len
= UINTVAL (operands
[2]);
14284 /* Maximum alignment we can assume for both src and dst buffers. */
14285 align
= INTVAL (operands
[3]);
14287 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14290 /* Place src and dst addresses in registers
14291 and update the corresponding mem rtx. */
14293 dst_volatile
= MEM_VOLATILE_P (dst
);
14294 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14295 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14296 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14299 src_volatile
= MEM_VOLATILE_P (src
);
14300 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14301 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14302 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14304 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14307 if (src_volatile
|| dst_volatile
)
14310 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14311 if (!(dst_aligned
|| src_aligned
))
14312 return arm_gen_movmemqi (operands
);
14314 /* If the either src or dst is unaligned we'll be accessing it as pairs
14315 of unaligned SImode accesses. Otherwise we can generate DImode
14316 ldrd/strd instructions. */
14317 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
14318 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
14323 reg0
= gen_reg_rtx (DImode
);
14324 rtx low_reg
= NULL_RTX
;
14325 rtx hi_reg
= NULL_RTX
;
14327 if (!src_aligned
|| !dst_aligned
)
14329 low_reg
= gen_lowpart (SImode
, reg0
);
14330 hi_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
14333 emit_move_insn (reg0
, src
);
14336 emit_insn (gen_unaligned_loadsi (low_reg
, src
));
14337 src
= next_consecutive_mem (src
);
14338 emit_insn (gen_unaligned_loadsi (hi_reg
, src
));
14342 emit_move_insn (dst
, reg0
);
14345 emit_insn (gen_unaligned_storesi (dst
, low_reg
));
14346 dst
= next_consecutive_mem (dst
);
14347 emit_insn (gen_unaligned_storesi (dst
, hi_reg
));
14350 src
= next_consecutive_mem (src
);
14351 dst
= next_consecutive_mem (dst
);
14354 gcc_assert (len
< 8);
14357 /* More than a word but less than a double-word to copy. Copy a word. */
14358 reg0
= gen_reg_rtx (SImode
);
14359 src
= adjust_address (src
, SImode
, 0);
14360 dst
= adjust_address (dst
, SImode
, 0);
14362 emit_move_insn (reg0
, src
);
14364 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14367 emit_move_insn (dst
, reg0
);
14369 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14371 src
= next_consecutive_mem (src
);
14372 dst
= next_consecutive_mem (dst
);
14379 /* Copy the remaining bytes. */
14382 dst
= adjust_address (dst
, HImode
, 0);
14383 src
= adjust_address (src
, HImode
, 0);
14384 reg0
= gen_reg_rtx (SImode
);
14386 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14388 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14391 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14393 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14395 src
= next_consecutive_mem (src
);
14396 dst
= next_consecutive_mem (dst
);
14401 dst
= adjust_address (dst
, QImode
, 0);
14402 src
= adjust_address (src
, QImode
, 0);
14403 reg0
= gen_reg_rtx (QImode
);
14404 emit_move_insn (reg0
, src
);
14405 emit_move_insn (dst
, reg0
);
14409 /* Select a dominance comparison mode if possible for a test of the general
14410 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14411 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14412 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14413 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14414 In all cases OP will be either EQ or NE, but we don't need to know which
14415 here. If we are unable to support a dominance comparison we return
14416 CC mode. This will then fail to match for the RTL expressions that
14417 generate this call. */
14419 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14421 enum rtx_code cond1
, cond2
;
14424 /* Currently we will probably get the wrong result if the individual
14425 comparisons are not simple. This also ensures that it is safe to
14426 reverse a comparison if necessary. */
14427 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14429 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14433 /* The if_then_else variant of this tests the second condition if the
14434 first passes, but is true if the first fails. Reverse the first
14435 condition to get a true "inclusive-or" expression. */
14436 if (cond_or
== DOM_CC_NX_OR_Y
)
14437 cond1
= reverse_condition (cond1
);
14439 /* If the comparisons are not equal, and one doesn't dominate the other,
14440 then we can't do this. */
14442 && !comparison_dominates_p (cond1
, cond2
)
14443 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14447 std::swap (cond1
, cond2
);
14452 if (cond_or
== DOM_CC_X_AND_Y
)
14457 case EQ
: return CC_DEQmode
;
14458 case LE
: return CC_DLEmode
;
14459 case LEU
: return CC_DLEUmode
;
14460 case GE
: return CC_DGEmode
;
14461 case GEU
: return CC_DGEUmode
;
14462 default: gcc_unreachable ();
14466 if (cond_or
== DOM_CC_X_AND_Y
)
14478 gcc_unreachable ();
14482 if (cond_or
== DOM_CC_X_AND_Y
)
14494 gcc_unreachable ();
14498 if (cond_or
== DOM_CC_X_AND_Y
)
14499 return CC_DLTUmode
;
14504 return CC_DLTUmode
;
14506 return CC_DLEUmode
;
14510 gcc_unreachable ();
14514 if (cond_or
== DOM_CC_X_AND_Y
)
14515 return CC_DGTUmode
;
14520 return CC_DGTUmode
;
14522 return CC_DGEUmode
;
14526 gcc_unreachable ();
14529 /* The remaining cases only occur when both comparisons are the
14532 gcc_assert (cond1
== cond2
);
14536 gcc_assert (cond1
== cond2
);
14540 gcc_assert (cond1
== cond2
);
14544 gcc_assert (cond1
== cond2
);
14545 return CC_DLEUmode
;
14548 gcc_assert (cond1
== cond2
);
14549 return CC_DGEUmode
;
14552 gcc_unreachable ();
14557 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14559 /* All floating point compares return CCFP if it is an equality
14560 comparison, and CCFPE otherwise. */
14561 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14584 gcc_unreachable ();
14588 /* A compare with a shifted operand. Because of canonicalization, the
14589 comparison will have to be swapped when we emit the assembler. */
14590 if (GET_MODE (y
) == SImode
14591 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14592 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14593 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14594 || GET_CODE (x
) == ROTATERT
))
14597 /* This operation is performed swapped, but since we only rely on the Z
14598 flag we don't need an additional mode. */
14599 if (GET_MODE (y
) == SImode
14600 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14601 && GET_CODE (x
) == NEG
14602 && (op
== EQ
|| op
== NE
))
14605 /* This is a special case that is used by combine to allow a
14606 comparison of a shifted byte load to be split into a zero-extend
14607 followed by a comparison of the shifted integer (only valid for
14608 equalities and unsigned inequalities). */
14609 if (GET_MODE (x
) == SImode
14610 && GET_CODE (x
) == ASHIFT
14611 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
14612 && GET_CODE (XEXP (x
, 0)) == SUBREG
14613 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
14614 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
14615 && (op
== EQ
|| op
== NE
14616 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
14617 && CONST_INT_P (y
))
14620 /* A construct for a conditional compare, if the false arm contains
14621 0, then both conditions must be true, otherwise either condition
14622 must be true. Not all conditions are possible, so CCmode is
14623 returned if it can't be done. */
14624 if (GET_CODE (x
) == IF_THEN_ELSE
14625 && (XEXP (x
, 2) == const0_rtx
14626 || XEXP (x
, 2) == const1_rtx
)
14627 && COMPARISON_P (XEXP (x
, 0))
14628 && COMPARISON_P (XEXP (x
, 1)))
14629 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14630 INTVAL (XEXP (x
, 2)));
14632 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14633 if (GET_CODE (x
) == AND
14634 && (op
== EQ
|| op
== NE
)
14635 && COMPARISON_P (XEXP (x
, 0))
14636 && COMPARISON_P (XEXP (x
, 1)))
14637 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14640 if (GET_CODE (x
) == IOR
14641 && (op
== EQ
|| op
== NE
)
14642 && COMPARISON_P (XEXP (x
, 0))
14643 && COMPARISON_P (XEXP (x
, 1)))
14644 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14647 /* An operation (on Thumb) where we want to test for a single bit.
14648 This is done by shifting that bit up into the top bit of a
14649 scratch register; we can then branch on the sign bit. */
14651 && GET_MODE (x
) == SImode
14652 && (op
== EQ
|| op
== NE
)
14653 && GET_CODE (x
) == ZERO_EXTRACT
14654 && XEXP (x
, 1) == const1_rtx
)
14657 /* An operation that sets the condition codes as a side-effect, the
14658 V flag is not set correctly, so we can only use comparisons where
14659 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14661 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14662 if (GET_MODE (x
) == SImode
14664 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
14665 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
14666 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
14667 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
14668 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
14669 || GET_CODE (x
) == LSHIFTRT
14670 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14671 || GET_CODE (x
) == ROTATERT
14672 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
14673 return CC_NOOVmode
;
14675 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
14678 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
14679 && GET_CODE (x
) == PLUS
14680 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
14683 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
14689 /* A DImode comparison against zero can be implemented by
14690 or'ing the two halves together. */
14691 if (y
== const0_rtx
)
14694 /* We can do an equality test in three Thumb instructions. */
14704 /* DImode unsigned comparisons can be implemented by cmp +
14705 cmpeq without a scratch register. Not worth doing in
14716 /* DImode signed and unsigned comparisons can be implemented
14717 by cmp + sbcs with a scratch register, but that does not
14718 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14719 gcc_assert (op
!= EQ
&& op
!= NE
);
14723 gcc_unreachable ();
14727 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
14728 return GET_MODE (x
);
14733 /* X and Y are two things to compare using CODE. Emit the compare insn and
14734 return the rtx for register 0 in the proper mode. FP means this is a
14735 floating point compare: I don't think that it is needed on the arm. */
14737 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
14741 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
14743 /* We might have X as a constant, Y as a register because of the predicates
14744 used for cmpdi. If so, force X to a register here. */
14745 if (dimode_comparison
&& !REG_P (x
))
14746 x
= force_reg (DImode
, x
);
14748 mode
= SELECT_CC_MODE (code
, x
, y
);
14749 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
14751 if (dimode_comparison
14752 && mode
!= CC_CZmode
)
14756 /* To compare two non-zero values for equality, XOR them and
14757 then compare against zero. Not used for ARM mode; there
14758 CC_CZmode is cheaper. */
14759 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
14761 gcc_assert (!reload_completed
);
14762 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
14766 /* A scratch register is required. */
14767 if (reload_completed
)
14768 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
14770 scratch
= gen_rtx_SCRATCH (SImode
);
14772 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
14773 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14774 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
14777 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14782 /* Generate a sequence of insns that will generate the correct return
14783 address mask depending on the physical architecture that the program
14786 arm_gen_return_addr_mask (void)
14788 rtx reg
= gen_reg_rtx (Pmode
);
14790 emit_insn (gen_return_addr_mask (reg
));
14795 arm_reload_in_hi (rtx
*operands
)
14797 rtx ref
= operands
[1];
14799 HOST_WIDE_INT offset
= 0;
14801 if (GET_CODE (ref
) == SUBREG
)
14803 offset
= SUBREG_BYTE (ref
);
14804 ref
= SUBREG_REG (ref
);
14809 /* We have a pseudo which has been spilt onto the stack; there
14810 are two cases here: the first where there is a simple
14811 stack-slot replacement and a second where the stack-slot is
14812 out of range, or is used as a subreg. */
14813 if (reg_equiv_mem (REGNO (ref
)))
14815 ref
= reg_equiv_mem (REGNO (ref
));
14816 base
= find_replacement (&XEXP (ref
, 0));
14819 /* The slot is out of range, or was dressed up in a SUBREG. */
14820 base
= reg_equiv_address (REGNO (ref
));
14822 /* PR 62554: If there is no equivalent memory location then just move
14823 the value as an SImode register move. This happens when the target
14824 architecture variant does not have an HImode register move. */
14827 gcc_assert (REG_P (operands
[0]));
14828 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14829 gen_rtx_SUBREG (SImode
, ref
, 0)));
14834 base
= find_replacement (&XEXP (ref
, 0));
14836 /* Handle the case where the address is too complex to be offset by 1. */
14837 if (GET_CODE (base
) == MINUS
14838 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14840 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14842 emit_set_insn (base_plus
, base
);
14845 else if (GET_CODE (base
) == PLUS
)
14847 /* The addend must be CONST_INT, or we would have dealt with it above. */
14848 HOST_WIDE_INT hi
, lo
;
14850 offset
+= INTVAL (XEXP (base
, 1));
14851 base
= XEXP (base
, 0);
14853 /* Rework the address into a legal sequence of insns. */
14854 /* Valid range for lo is -4095 -> 4095 */
14857 : -((-offset
) & 0xfff));
14859 /* Corner case, if lo is the max offset then we would be out of range
14860 once we have added the additional 1 below, so bump the msb into the
14861 pre-loading insn(s). */
14865 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14866 ^ (HOST_WIDE_INT
) 0x80000000)
14867 - (HOST_WIDE_INT
) 0x80000000);
14869 gcc_assert (hi
+ lo
== offset
);
14873 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14875 /* Get the base address; addsi3 knows how to handle constants
14876 that require more than one insn. */
14877 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
14883 /* Operands[2] may overlap operands[0] (though it won't overlap
14884 operands[1]), that's why we asked for a DImode reg -- so we can
14885 use the bit that does not overlap. */
14886 if (REGNO (operands
[2]) == REGNO (operands
[0]))
14887 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14889 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14891 emit_insn (gen_zero_extendqisi2 (scratch
,
14892 gen_rtx_MEM (QImode
,
14893 plus_constant (Pmode
, base
,
14895 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14896 gen_rtx_MEM (QImode
,
14897 plus_constant (Pmode
, base
,
14899 if (!BYTES_BIG_ENDIAN
)
14900 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14901 gen_rtx_IOR (SImode
,
14904 gen_rtx_SUBREG (SImode
, operands
[0], 0),
14908 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14909 gen_rtx_IOR (SImode
,
14910 gen_rtx_ASHIFT (SImode
, scratch
,
14912 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
14915 /* Handle storing a half-word to memory during reload by synthesizing as two
14916 byte stores. Take care not to clobber the input values until after we
14917 have moved them somewhere safe. This code assumes that if the DImode
14918 scratch in operands[2] overlaps either the input value or output address
14919 in some way, then that value must die in this insn (we absolutely need
14920 two scratch registers for some corner cases). */
14922 arm_reload_out_hi (rtx
*operands
)
14924 rtx ref
= operands
[0];
14925 rtx outval
= operands
[1];
14927 HOST_WIDE_INT offset
= 0;
14929 if (GET_CODE (ref
) == SUBREG
)
14931 offset
= SUBREG_BYTE (ref
);
14932 ref
= SUBREG_REG (ref
);
14937 /* We have a pseudo which has been spilt onto the stack; there
14938 are two cases here: the first where there is a simple
14939 stack-slot replacement and a second where the stack-slot is
14940 out of range, or is used as a subreg. */
14941 if (reg_equiv_mem (REGNO (ref
)))
14943 ref
= reg_equiv_mem (REGNO (ref
));
14944 base
= find_replacement (&XEXP (ref
, 0));
14947 /* The slot is out of range, or was dressed up in a SUBREG. */
14948 base
= reg_equiv_address (REGNO (ref
));
14950 /* PR 62254: If there is no equivalent memory location then just move
14951 the value as an SImode register move. This happens when the target
14952 architecture variant does not have an HImode register move. */
14955 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
14957 if (REG_P (outval
))
14959 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
14960 gen_rtx_SUBREG (SImode
, outval
, 0)));
14962 else /* SUBREG_P (outval) */
14964 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
14965 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
14966 SUBREG_REG (outval
)));
14968 /* FIXME: Handle other cases ? */
14969 gcc_unreachable ();
14975 base
= find_replacement (&XEXP (ref
, 0));
14977 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14979 /* Handle the case where the address is too complex to be offset by 1. */
14980 if (GET_CODE (base
) == MINUS
14981 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14983 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14985 /* Be careful not to destroy OUTVAL. */
14986 if (reg_overlap_mentioned_p (base_plus
, outval
))
14988 /* Updating base_plus might destroy outval, see if we can
14989 swap the scratch and base_plus. */
14990 if (!reg_overlap_mentioned_p (scratch
, outval
))
14991 std::swap (scratch
, base_plus
);
14994 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
14996 /* Be conservative and copy OUTVAL into the scratch now,
14997 this should only be necessary if outval is a subreg
14998 of something larger than a word. */
14999 /* XXX Might this clobber base? I can't see how it can,
15000 since scratch is known to overlap with OUTVAL, and
15001 must be wider than a word. */
15002 emit_insn (gen_movhi (scratch_hi
, outval
));
15003 outval
= scratch_hi
;
15007 emit_set_insn (base_plus
, base
);
15010 else if (GET_CODE (base
) == PLUS
)
15012 /* The addend must be CONST_INT, or we would have dealt with it above. */
15013 HOST_WIDE_INT hi
, lo
;
15015 offset
+= INTVAL (XEXP (base
, 1));
15016 base
= XEXP (base
, 0);
15018 /* Rework the address into a legal sequence of insns. */
15019 /* Valid range for lo is -4095 -> 4095 */
15022 : -((-offset
) & 0xfff));
15024 /* Corner case, if lo is the max offset then we would be out of range
15025 once we have added the additional 1 below, so bump the msb into the
15026 pre-loading insn(s). */
15030 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15031 ^ (HOST_WIDE_INT
) 0x80000000)
15032 - (HOST_WIDE_INT
) 0x80000000);
15034 gcc_assert (hi
+ lo
== offset
);
15038 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15040 /* Be careful not to destroy OUTVAL. */
15041 if (reg_overlap_mentioned_p (base_plus
, outval
))
15043 /* Updating base_plus might destroy outval, see if we
15044 can swap the scratch and base_plus. */
15045 if (!reg_overlap_mentioned_p (scratch
, outval
))
15046 std::swap (scratch
, base_plus
);
15049 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15051 /* Be conservative and copy outval into scratch now,
15052 this should only be necessary if outval is a
15053 subreg of something larger than a word. */
15054 /* XXX Might this clobber base? I can't see how it
15055 can, since scratch is known to overlap with
15057 emit_insn (gen_movhi (scratch_hi
, outval
));
15058 outval
= scratch_hi
;
15062 /* Get the base address; addsi3 knows how to handle constants
15063 that require more than one insn. */
15064 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15070 if (BYTES_BIG_ENDIAN
)
15072 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15073 plus_constant (Pmode
, base
,
15075 gen_lowpart (QImode
, outval
)));
15076 emit_insn (gen_lshrsi3 (scratch
,
15077 gen_rtx_SUBREG (SImode
, outval
, 0),
15079 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15081 gen_lowpart (QImode
, scratch
)));
15085 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15087 gen_lowpart (QImode
, outval
)));
15088 emit_insn (gen_lshrsi3 (scratch
,
15089 gen_rtx_SUBREG (SImode
, outval
, 0),
15091 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15092 plus_constant (Pmode
, base
,
15094 gen_lowpart (QImode
, scratch
)));
15098 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15099 (padded to the size of a word) should be passed in a register. */
15102 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15104 if (TARGET_AAPCS_BASED
)
15105 return must_pass_in_stack_var_size (mode
, type
);
15107 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15111 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15112 Return true if an argument passed on the stack should be padded upwards,
15113 i.e. if the least-significant byte has useful data.
15114 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15115 aggregate types are placed in the lowest memory address. */
15118 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15120 if (!TARGET_AAPCS_BASED
)
15121 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15123 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15130 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15131 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15132 register has useful data, and return the opposite if the most
15133 significant byte does. */
15136 arm_pad_reg_upward (machine_mode mode
,
15137 tree type
, int first ATTRIBUTE_UNUSED
)
15139 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15141 /* For AAPCS, small aggregates, small fixed-point types,
15142 and small complex types are always padded upwards. */
15145 if ((AGGREGATE_TYPE_P (type
)
15146 || TREE_CODE (type
) == COMPLEX_TYPE
15147 || FIXED_POINT_TYPE_P (type
))
15148 && int_size_in_bytes (type
) <= 4)
15153 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15154 && GET_MODE_SIZE (mode
) <= 4)
15159 /* Otherwise, use default padding. */
15160 return !BYTES_BIG_ENDIAN
;
15163 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15164 assuming that the address in the base register is word aligned. */
15166 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15168 HOST_WIDE_INT max_offset
;
15170 /* Offset must be a multiple of 4 in Thumb mode. */
15171 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15176 else if (TARGET_ARM
)
15181 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15184 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15185 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15186 Assumes that the address in the base register RN is word aligned. Pattern
15187 guarantees that both memory accesses use the same base register,
15188 the offsets are constants within the range, and the gap between the offsets is 4.
15189 If preload complete then check that registers are legal. WBACK indicates whether
15190 address is updated. LOAD indicates whether memory access is load or store. */
15192 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15193 bool wback
, bool load
)
15195 unsigned int t
, t2
, n
;
15197 if (!reload_completed
)
15200 if (!offset_ok_for_ldrd_strd (offset
))
15207 if ((TARGET_THUMB2
)
15208 && ((wback
&& (n
== t
|| n
== t2
))
15209 || (t
== SP_REGNUM
)
15210 || (t
== PC_REGNUM
)
15211 || (t2
== SP_REGNUM
)
15212 || (t2
== PC_REGNUM
)
15213 || (!load
&& (n
== PC_REGNUM
))
15214 || (load
&& (t
== t2
))
15215 /* Triggers Cortex-M3 LDRD errata. */
15216 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15220 && ((wback
&& (n
== t
|| n
== t2
))
15221 || (t2
== PC_REGNUM
)
15222 || (t
% 2 != 0) /* First destination register is not even. */
15224 /* PC can be used as base register (for offset addressing only),
15225 but it is depricated. */
15226 || (n
== PC_REGNUM
)))
15232 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15233 operand MEM's address contains an immediate offset from the base
15234 register and has no side effects, in which case it sets BASE and
15235 OFFSET accordingly. */
15237 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15241 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15243 /* TODO: Handle more general memory operand patterns, such as
15244 PRE_DEC and PRE_INC. */
15246 if (side_effects_p (mem
))
15249 /* Can't deal with subregs. */
15250 if (GET_CODE (mem
) == SUBREG
)
15253 gcc_assert (MEM_P (mem
));
15255 *offset
= const0_rtx
;
15257 addr
= XEXP (mem
, 0);
15259 /* If addr isn't valid for DImode, then we can't handle it. */
15260 if (!arm_legitimate_address_p (DImode
, addr
,
15261 reload_in_progress
|| reload_completed
))
15269 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15271 *base
= XEXP (addr
, 0);
15272 *offset
= XEXP (addr
, 1);
15273 return (REG_P (*base
) && CONST_INT_P (*offset
));
15279 /* Called from a peephole2 to replace two word-size accesses with a
15280 single LDRD/STRD instruction. Returns true iff we can generate a
15281 new instruction sequence. That is, both accesses use the same base
15282 register and the gap between constant offsets is 4. This function
15283 may reorder its operands to match ldrd/strd RTL templates.
15284 OPERANDS are the operands found by the peephole matcher;
15285 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15286 corresponding memory operands. LOAD indicaates whether the access
15287 is load or store. CONST_STORE indicates a store of constant
15288 integer values held in OPERANDS[4,5] and assumes that the pattern
15289 is of length 4 insn, for the purpose of checking dead registers.
15290 COMMUTE indicates that register operands may be reordered. */
15292 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15293 bool const_store
, bool commute
)
15296 HOST_WIDE_INT offsets
[2], offset
;
15297 rtx base
= NULL_RTX
;
15298 rtx cur_base
, cur_offset
, tmp
;
15300 HARD_REG_SET regset
;
15302 gcc_assert (!const_store
|| !load
);
15303 /* Check that the memory references are immediate offsets from the
15304 same base register. Extract the base register, the destination
15305 registers, and the corresponding memory offsets. */
15306 for (i
= 0; i
< nops
; i
++)
15308 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15313 else if (REGNO (base
) != REGNO (cur_base
))
15316 offsets
[i
] = INTVAL (cur_offset
);
15317 if (GET_CODE (operands
[i
]) == SUBREG
)
15319 tmp
= SUBREG_REG (operands
[i
]);
15320 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15325 /* Make sure there is no dependency between the individual loads. */
15326 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15327 return false; /* RAW */
15329 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15330 return false; /* WAW */
15332 /* If the same input register is used in both stores
15333 when storing different constants, try to find a free register.
15334 For example, the code
15339 can be transformed into
15343 in Thumb mode assuming that r1 is free.
15344 For ARM mode do the same but only if the starting register
15345 can be made to be even. */
15347 && REGNO (operands
[0]) == REGNO (operands
[1])
15348 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15352 CLEAR_HARD_REG_SET (regset
);
15353 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15354 if (tmp
== NULL_RTX
)
15357 /* Use the new register in the first load to ensure that
15358 if the original input register is not dead after peephole,
15359 then it will have the correct constant value. */
15362 else if (TARGET_ARM
)
15364 int regno
= REGNO (operands
[0]);
15365 if (!peep2_reg_dead_p (4, operands
[0]))
15367 /* When the input register is even and is not dead after the
15368 pattern, it has to hold the second constant but we cannot
15369 form a legal STRD in ARM mode with this register as the second
15371 if (regno
% 2 == 0)
15374 /* Is regno-1 free? */
15375 SET_HARD_REG_SET (regset
);
15376 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15377 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15378 if (tmp
== NULL_RTX
)
15385 /* Find a DImode register. */
15386 CLEAR_HARD_REG_SET (regset
);
15387 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15388 if (tmp
!= NULL_RTX
)
15390 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15391 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15395 /* Can we use the input register to form a DI register? */
15396 SET_HARD_REG_SET (regset
);
15397 CLEAR_HARD_REG_BIT(regset
,
15398 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15399 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15400 if (tmp
== NULL_RTX
)
15402 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15406 gcc_assert (operands
[0] != NULL_RTX
);
15407 gcc_assert (operands
[1] != NULL_RTX
);
15408 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15409 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15413 /* Make sure the instructions are ordered with lower memory access first. */
15414 if (offsets
[0] > offsets
[1])
15416 gap
= offsets
[0] - offsets
[1];
15417 offset
= offsets
[1];
15419 /* Swap the instructions such that lower memory is accessed first. */
15420 std::swap (operands
[0], operands
[1]);
15421 std::swap (operands
[2], operands
[3]);
15423 std::swap (operands
[4], operands
[5]);
15427 gap
= offsets
[1] - offsets
[0];
15428 offset
= offsets
[0];
15431 /* Make sure accesses are to consecutive memory locations. */
15435 /* Make sure we generate legal instructions. */
15436 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15440 /* In Thumb state, where registers are almost unconstrained, there
15441 is little hope to fix it. */
15445 if (load
&& commute
)
15447 /* Try reordering registers. */
15448 std::swap (operands
[0], operands
[1]);
15449 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15456 /* If input registers are dead after this pattern, they can be
15457 reordered or replaced by other registers that are free in the
15458 current pattern. */
15459 if (!peep2_reg_dead_p (4, operands
[0])
15460 || !peep2_reg_dead_p (4, operands
[1]))
15463 /* Try to reorder the input registers. */
15464 /* For example, the code
15469 can be transformed into
15474 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15477 std::swap (operands
[0], operands
[1]);
15481 /* Try to find a free DI register. */
15482 CLEAR_HARD_REG_SET (regset
);
15483 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15484 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15487 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15488 if (tmp
== NULL_RTX
)
15491 /* DREG must be an even-numbered register in DImode.
15492 Split it into SI registers. */
15493 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15494 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15495 gcc_assert (operands
[0] != NULL_RTX
);
15496 gcc_assert (operands
[1] != NULL_RTX
);
15497 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15498 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15500 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15512 /* Print a symbolic form of X to the debug file, F. */
15514 arm_print_value (FILE *f
, rtx x
)
15516 switch (GET_CODE (x
))
15519 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15523 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15531 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15533 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15534 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15542 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15546 fprintf (f
, "`%s'", XSTR (x
, 0));
15550 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15554 arm_print_value (f
, XEXP (x
, 0));
15558 arm_print_value (f
, XEXP (x
, 0));
15560 arm_print_value (f
, XEXP (x
, 1));
15568 fprintf (f
, "????");
15573 /* Routines for manipulation of the constant pool. */
15575 /* Arm instructions cannot load a large constant directly into a
15576 register; they have to come from a pc relative load. The constant
15577 must therefore be placed in the addressable range of the pc
15578 relative load. Depending on the precise pc relative load
15579 instruction the range is somewhere between 256 bytes and 4k. This
15580 means that we often have to dump a constant inside a function, and
15581 generate code to branch around it.
15583 It is important to minimize this, since the branches will slow
15584 things down and make the code larger.
15586 Normally we can hide the table after an existing unconditional
15587 branch so that there is no interruption of the flow, but in the
15588 worst case the code looks like this:
15606 We fix this by performing a scan after scheduling, which notices
15607 which instructions need to have their operands fetched from the
15608 constant table and builds the table.
15610 The algorithm starts by building a table of all the constants that
15611 need fixing up and all the natural barriers in the function (places
15612 where a constant table can be dropped without breaking the flow).
15613 For each fixup we note how far the pc-relative replacement will be
15614 able to reach and the offset of the instruction into the function.
15616 Having built the table we then group the fixes together to form
15617 tables that are as large as possible (subject to addressing
15618 constraints) and emit each table of constants after the last
15619 barrier that is within range of all the instructions in the group.
15620 If a group does not contain a barrier, then we forcibly create one
15621 by inserting a jump instruction into the flow. Once the table has
15622 been inserted, the insns are then modified to reference the
15623 relevant entry in the pool.
15625 Possible enhancements to the algorithm (not implemented) are:
15627 1) For some processors and object formats, there may be benefit in
15628 aligning the pools to the start of cache lines; this alignment
15629 would need to be taken into account when calculating addressability
15632 /* These typedefs are located at the start of this file, so that
15633 they can be used in the prototypes there. This comment is to
15634 remind readers of that fact so that the following structures
15635 can be understood more easily.
15637 typedef struct minipool_node Mnode;
15638 typedef struct minipool_fixup Mfix; */
15640 struct minipool_node
15642 /* Doubly linked chain of entries. */
15645 /* The maximum offset into the code that this entry can be placed. While
15646 pushing fixes for forward references, all entries are sorted in order
15647 of increasing max_address. */
15648 HOST_WIDE_INT max_address
;
15649 /* Similarly for an entry inserted for a backwards ref. */
15650 HOST_WIDE_INT min_address
;
15651 /* The number of fixes referencing this entry. This can become zero
15652 if we "unpush" an entry. In this case we ignore the entry when we
15653 come to emit the code. */
15655 /* The offset from the start of the minipool. */
15656 HOST_WIDE_INT offset
;
15657 /* The value in table. */
15659 /* The mode of value. */
15661 /* The size of the value. With iWMMXt enabled
15662 sizes > 4 also imply an alignment of 8-bytes. */
15666 struct minipool_fixup
15670 HOST_WIDE_INT address
;
15676 HOST_WIDE_INT forwards
;
15677 HOST_WIDE_INT backwards
;
15680 /* Fixes less than a word need padding out to a word boundary. */
15681 #define MINIPOOL_FIX_SIZE(mode) \
15682 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15684 static Mnode
* minipool_vector_head
;
15685 static Mnode
* minipool_vector_tail
;
15686 static rtx_code_label
*minipool_vector_label
;
15687 static int minipool_pad
;
15689 /* The linked list of all minipool fixes required for this function. */
15690 Mfix
* minipool_fix_head
;
15691 Mfix
* minipool_fix_tail
;
15692 /* The fix entry for the current minipool, once it has been placed. */
15693 Mfix
* minipool_barrier
;
15695 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15696 #define JUMP_TABLES_IN_TEXT_SECTION 0
15699 static HOST_WIDE_INT
15700 get_jump_table_size (rtx_jump_table_data
*insn
)
15702 /* ADDR_VECs only take room if read-only data does into the text
15704 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
15706 rtx body
= PATTERN (insn
);
15707 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
15708 HOST_WIDE_INT size
;
15709 HOST_WIDE_INT modesize
;
15711 modesize
= GET_MODE_SIZE (GET_MODE (body
));
15712 size
= modesize
* XVECLEN (body
, elt
);
15716 /* Round up size of TBB table to a halfword boundary. */
15717 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
15720 /* No padding necessary for TBH. */
15723 /* Add two bytes for alignment on Thumb. */
15728 gcc_unreachable ();
15736 /* Return the maximum amount of padding that will be inserted before
15739 static HOST_WIDE_INT
15740 get_label_padding (rtx label
)
15742 HOST_WIDE_INT align
, min_insn_size
;
15744 align
= 1 << label_to_alignment (label
);
15745 min_insn_size
= TARGET_THUMB
? 2 : 4;
15746 return align
> min_insn_size
? align
- min_insn_size
: 0;
15749 /* Move a minipool fix MP from its current location to before MAX_MP.
15750 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15751 constraints may need updating. */
15753 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
15754 HOST_WIDE_INT max_address
)
15756 /* The code below assumes these are different. */
15757 gcc_assert (mp
!= max_mp
);
15759 if (max_mp
== NULL
)
15761 if (max_address
< mp
->max_address
)
15762 mp
->max_address
= max_address
;
15766 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15767 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15769 mp
->max_address
= max_address
;
15771 /* Unlink MP from its current position. Since max_mp is non-null,
15772 mp->prev must be non-null. */
15773 mp
->prev
->next
= mp
->next
;
15774 if (mp
->next
!= NULL
)
15775 mp
->next
->prev
= mp
->prev
;
15777 minipool_vector_tail
= mp
->prev
;
15779 /* Re-insert it before MAX_MP. */
15781 mp
->prev
= max_mp
->prev
;
15784 if (mp
->prev
!= NULL
)
15785 mp
->prev
->next
= mp
;
15787 minipool_vector_head
= mp
;
15790 /* Save the new entry. */
15793 /* Scan over the preceding entries and adjust their addresses as
15795 while (mp
->prev
!= NULL
15796 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15798 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15805 /* Add a constant to the minipool for a forward reference. Returns the
15806 node added or NULL if the constant will not fit in this pool. */
15808 add_minipool_forward_ref (Mfix
*fix
)
15810 /* If set, max_mp is the first pool_entry that has a lower
15811 constraint than the one we are trying to add. */
15812 Mnode
* max_mp
= NULL
;
15813 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
15816 /* If the minipool starts before the end of FIX->INSN then this FIX
15817 can not be placed into the current pool. Furthermore, adding the
15818 new constant pool entry may cause the pool to start FIX_SIZE bytes
15820 if (minipool_vector_head
&&
15821 (fix
->address
+ get_attr_length (fix
->insn
)
15822 >= minipool_vector_head
->max_address
- fix
->fix_size
))
15825 /* Scan the pool to see if a constant with the same value has
15826 already been added. While we are doing this, also note the
15827 location where we must insert the constant if it doesn't already
15829 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15831 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15832 && fix
->mode
== mp
->mode
15833 && (!LABEL_P (fix
->value
)
15834 || (CODE_LABEL_NUMBER (fix
->value
)
15835 == CODE_LABEL_NUMBER (mp
->value
)))
15836 && rtx_equal_p (fix
->value
, mp
->value
))
15838 /* More than one fix references this entry. */
15840 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
15843 /* Note the insertion point if necessary. */
15845 && mp
->max_address
> max_address
)
15848 /* If we are inserting an 8-bytes aligned quantity and
15849 we have not already found an insertion point, then
15850 make sure that all such 8-byte aligned quantities are
15851 placed at the start of the pool. */
15852 if (ARM_DOUBLEWORD_ALIGN
15854 && fix
->fix_size
>= 8
15855 && mp
->fix_size
< 8)
15858 max_address
= mp
->max_address
;
15862 /* The value is not currently in the minipool, so we need to create
15863 a new entry for it. If MAX_MP is NULL, the entry will be put on
15864 the end of the list since the placement is less constrained than
15865 any existing entry. Otherwise, we insert the new fix before
15866 MAX_MP and, if necessary, adjust the constraints on the other
15869 mp
->fix_size
= fix
->fix_size
;
15870 mp
->mode
= fix
->mode
;
15871 mp
->value
= fix
->value
;
15873 /* Not yet required for a backwards ref. */
15874 mp
->min_address
= -65536;
15876 if (max_mp
== NULL
)
15878 mp
->max_address
= max_address
;
15880 mp
->prev
= minipool_vector_tail
;
15882 if (mp
->prev
== NULL
)
15884 minipool_vector_head
= mp
;
15885 minipool_vector_label
= gen_label_rtx ();
15888 mp
->prev
->next
= mp
;
15890 minipool_vector_tail
= mp
;
15894 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15895 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15897 mp
->max_address
= max_address
;
15900 mp
->prev
= max_mp
->prev
;
15902 if (mp
->prev
!= NULL
)
15903 mp
->prev
->next
= mp
;
15905 minipool_vector_head
= mp
;
15908 /* Save the new entry. */
15911 /* Scan over the preceding entries and adjust their addresses as
15913 while (mp
->prev
!= NULL
15914 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15916 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15924 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
15925 HOST_WIDE_INT min_address
)
15927 HOST_WIDE_INT offset
;
15929 /* The code below assumes these are different. */
15930 gcc_assert (mp
!= min_mp
);
15932 if (min_mp
== NULL
)
15934 if (min_address
> mp
->min_address
)
15935 mp
->min_address
= min_address
;
15939 /* We will adjust this below if it is too loose. */
15940 mp
->min_address
= min_address
;
15942 /* Unlink MP from its current position. Since min_mp is non-null,
15943 mp->next must be non-null. */
15944 mp
->next
->prev
= mp
->prev
;
15945 if (mp
->prev
!= NULL
)
15946 mp
->prev
->next
= mp
->next
;
15948 minipool_vector_head
= mp
->next
;
15950 /* Reinsert it after MIN_MP. */
15952 mp
->next
= min_mp
->next
;
15954 if (mp
->next
!= NULL
)
15955 mp
->next
->prev
= mp
;
15957 minipool_vector_tail
= mp
;
15963 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15965 mp
->offset
= offset
;
15966 if (mp
->refcount
> 0)
15967 offset
+= mp
->fix_size
;
15969 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
15970 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
15976 /* Add a constant to the minipool for a backward reference. Returns the
15977 node added or NULL if the constant will not fit in this pool.
15979 Note that the code for insertion for a backwards reference can be
15980 somewhat confusing because the calculated offsets for each fix do
15981 not take into account the size of the pool (which is still under
15984 add_minipool_backward_ref (Mfix
*fix
)
15986 /* If set, min_mp is the last pool_entry that has a lower constraint
15987 than the one we are trying to add. */
15988 Mnode
*min_mp
= NULL
;
15989 /* This can be negative, since it is only a constraint. */
15990 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
15993 /* If we can't reach the current pool from this insn, or if we can't
15994 insert this entry at the end of the pool without pushing other
15995 fixes out of range, then we don't try. This ensures that we
15996 can't fail later on. */
15997 if (min_address
>= minipool_barrier
->address
15998 || (minipool_vector_tail
->min_address
+ fix
->fix_size
15999 >= minipool_barrier
->address
))
16002 /* Scan the pool to see if a constant with the same value has
16003 already been added. While we are doing this, also note the
16004 location where we must insert the constant if it doesn't already
16006 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16008 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16009 && fix
->mode
== mp
->mode
16010 && (!LABEL_P (fix
->value
)
16011 || (CODE_LABEL_NUMBER (fix
->value
)
16012 == CODE_LABEL_NUMBER (mp
->value
)))
16013 && rtx_equal_p (fix
->value
, mp
->value
)
16014 /* Check that there is enough slack to move this entry to the
16015 end of the table (this is conservative). */
16016 && (mp
->max_address
16017 > (minipool_barrier
->address
16018 + minipool_vector_tail
->offset
16019 + minipool_vector_tail
->fix_size
)))
16022 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16025 if (min_mp
!= NULL
)
16026 mp
->min_address
+= fix
->fix_size
;
16029 /* Note the insertion point if necessary. */
16030 if (mp
->min_address
< min_address
)
16032 /* For now, we do not allow the insertion of 8-byte alignment
16033 requiring nodes anywhere but at the start of the pool. */
16034 if (ARM_DOUBLEWORD_ALIGN
16035 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16040 else if (mp
->max_address
16041 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16043 /* Inserting before this entry would push the fix beyond
16044 its maximum address (which can happen if we have
16045 re-located a forwards fix); force the new fix to come
16047 if (ARM_DOUBLEWORD_ALIGN
16048 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16053 min_address
= mp
->min_address
+ fix
->fix_size
;
16056 /* Do not insert a non-8-byte aligned quantity before 8-byte
16057 aligned quantities. */
16058 else if (ARM_DOUBLEWORD_ALIGN
16059 && fix
->fix_size
< 8
16060 && mp
->fix_size
>= 8)
16063 min_address
= mp
->min_address
+ fix
->fix_size
;
16068 /* We need to create a new entry. */
16070 mp
->fix_size
= fix
->fix_size
;
16071 mp
->mode
= fix
->mode
;
16072 mp
->value
= fix
->value
;
16074 mp
->max_address
= minipool_barrier
->address
+ 65536;
16076 mp
->min_address
= min_address
;
16078 if (min_mp
== NULL
)
16081 mp
->next
= minipool_vector_head
;
16083 if (mp
->next
== NULL
)
16085 minipool_vector_tail
= mp
;
16086 minipool_vector_label
= gen_label_rtx ();
16089 mp
->next
->prev
= mp
;
16091 minipool_vector_head
= mp
;
16095 mp
->next
= min_mp
->next
;
16099 if (mp
->next
!= NULL
)
16100 mp
->next
->prev
= mp
;
16102 minipool_vector_tail
= mp
;
16105 /* Save the new entry. */
16113 /* Scan over the following entries and adjust their offsets. */
16114 while (mp
->next
!= NULL
)
16116 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16117 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16120 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16122 mp
->next
->offset
= mp
->offset
;
16131 assign_minipool_offsets (Mfix
*barrier
)
16133 HOST_WIDE_INT offset
= 0;
16136 minipool_barrier
= barrier
;
16138 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16140 mp
->offset
= offset
;
16142 if (mp
->refcount
> 0)
16143 offset
+= mp
->fix_size
;
16147 /* Output the literal table */
16149 dump_minipool (rtx_insn
*scan
)
16155 if (ARM_DOUBLEWORD_ALIGN
)
16156 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16157 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16164 fprintf (dump_file
,
16165 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16166 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16168 scan
= emit_label_after (gen_label_rtx (), scan
);
16169 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16170 scan
= emit_label_after (minipool_vector_label
, scan
);
16172 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16174 if (mp
->refcount
> 0)
16178 fprintf (dump_file
,
16179 ";; Offset %u, min %ld, max %ld ",
16180 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16181 (unsigned long) mp
->max_address
);
16182 arm_print_value (dump_file
, mp
->value
);
16183 fputc ('\n', dump_file
);
16186 rtx val
= copy_rtx (mp
->value
);
16188 switch (GET_MODE_SIZE (mp
->mode
))
16190 #ifdef HAVE_consttable_1
16192 scan
= emit_insn_after (gen_consttable_1 (val
), scan
);
16196 #ifdef HAVE_consttable_2
16198 scan
= emit_insn_after (gen_consttable_2 (val
), scan
);
16202 #ifdef HAVE_consttable_4
16204 scan
= emit_insn_after (gen_consttable_4 (val
), scan
);
16208 #ifdef HAVE_consttable_8
16210 scan
= emit_insn_after (gen_consttable_8 (val
), scan
);
16214 #ifdef HAVE_consttable_16
16216 scan
= emit_insn_after (gen_consttable_16 (val
), scan
);
16221 gcc_unreachable ();
16229 minipool_vector_head
= minipool_vector_tail
= NULL
;
16230 scan
= emit_insn_after (gen_consttable_end (), scan
);
16231 scan
= emit_barrier_after (scan
);
16234 /* Return the cost of forcibly inserting a barrier after INSN. */
16236 arm_barrier_cost (rtx_insn
*insn
)
16238 /* Basing the location of the pool on the loop depth is preferable,
16239 but at the moment, the basic block information seems to be
16240 corrupt by this stage of the compilation. */
16241 int base_cost
= 50;
16242 rtx_insn
*next
= next_nonnote_insn (insn
);
16244 if (next
!= NULL
&& LABEL_P (next
))
16247 switch (GET_CODE (insn
))
16250 /* It will always be better to place the table before the label, rather
16259 return base_cost
- 10;
16262 return base_cost
+ 10;
16266 /* Find the best place in the insn stream in the range
16267 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16268 Create the barrier by inserting a jump and add a new fix entry for
16271 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16273 HOST_WIDE_INT count
= 0;
16274 rtx_barrier
*barrier
;
16275 rtx_insn
*from
= fix
->insn
;
16276 /* The instruction after which we will insert the jump. */
16277 rtx_insn
*selected
= NULL
;
16279 /* The address at which the jump instruction will be placed. */
16280 HOST_WIDE_INT selected_address
;
16282 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16283 rtx_code_label
*label
= gen_label_rtx ();
16285 selected_cost
= arm_barrier_cost (from
);
16286 selected_address
= fix
->address
;
16288 while (from
&& count
< max_count
)
16290 rtx_jump_table_data
*tmp
;
16293 /* This code shouldn't have been called if there was a natural barrier
16295 gcc_assert (!BARRIER_P (from
));
16297 /* Count the length of this insn. This must stay in sync with the
16298 code that pushes minipool fixes. */
16299 if (LABEL_P (from
))
16300 count
+= get_label_padding (from
);
16302 count
+= get_attr_length (from
);
16304 /* If there is a jump table, add its length. */
16305 if (tablejump_p (from
, NULL
, &tmp
))
16307 count
+= get_jump_table_size (tmp
);
16309 /* Jump tables aren't in a basic block, so base the cost on
16310 the dispatch insn. If we select this location, we will
16311 still put the pool after the table. */
16312 new_cost
= arm_barrier_cost (from
);
16314 if (count
< max_count
16315 && (!selected
|| new_cost
<= selected_cost
))
16318 selected_cost
= new_cost
;
16319 selected_address
= fix
->address
+ count
;
16322 /* Continue after the dispatch table. */
16323 from
= NEXT_INSN (tmp
);
16327 new_cost
= arm_barrier_cost (from
);
16329 if (count
< max_count
16330 && (!selected
|| new_cost
<= selected_cost
))
16333 selected_cost
= new_cost
;
16334 selected_address
= fix
->address
+ count
;
16337 from
= NEXT_INSN (from
);
16340 /* Make sure that we found a place to insert the jump. */
16341 gcc_assert (selected
);
16343 /* Make sure we do not split a call and its corresponding
16344 CALL_ARG_LOCATION note. */
16345 if (CALL_P (selected
))
16347 rtx_insn
*next
= NEXT_INSN (selected
);
16348 if (next
&& NOTE_P (next
)
16349 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16353 /* Create a new JUMP_INSN that branches around a barrier. */
16354 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16355 JUMP_LABEL (from
) = label
;
16356 barrier
= emit_barrier_after (from
);
16357 emit_label_after (label
, barrier
);
16359 /* Create a minipool barrier entry for the new barrier. */
16360 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16361 new_fix
->insn
= barrier
;
16362 new_fix
->address
= selected_address
;
16363 new_fix
->next
= fix
->next
;
16364 fix
->next
= new_fix
;
16369 /* Record that there is a natural barrier in the insn stream at
16372 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16374 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16377 fix
->address
= address
;
16380 if (minipool_fix_head
!= NULL
)
16381 minipool_fix_tail
->next
= fix
;
16383 minipool_fix_head
= fix
;
16385 minipool_fix_tail
= fix
;
16388 /* Record INSN, which will need fixing up to load a value from the
16389 minipool. ADDRESS is the offset of the insn since the start of the
16390 function; LOC is a pointer to the part of the insn which requires
16391 fixing; VALUE is the constant that must be loaded, which is of type
16394 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16395 machine_mode mode
, rtx value
)
16397 gcc_assert (!arm_disable_literal_pool
);
16398 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16401 fix
->address
= address
;
16404 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16405 fix
->value
= value
;
16406 fix
->forwards
= get_attr_pool_range (insn
);
16407 fix
->backwards
= get_attr_neg_pool_range (insn
);
16408 fix
->minipool
= NULL
;
16410 /* If an insn doesn't have a range defined for it, then it isn't
16411 expecting to be reworked by this code. Better to stop now than
16412 to generate duff assembly code. */
16413 gcc_assert (fix
->forwards
|| fix
->backwards
);
16415 /* If an entry requires 8-byte alignment then assume all constant pools
16416 require 4 bytes of padding. Trying to do this later on a per-pool
16417 basis is awkward because existing pool entries have to be modified. */
16418 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16423 fprintf (dump_file
,
16424 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16425 GET_MODE_NAME (mode
),
16426 INSN_UID (insn
), (unsigned long) address
,
16427 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16428 arm_print_value (dump_file
, fix
->value
);
16429 fprintf (dump_file
, "\n");
16432 /* Add it to the chain of fixes. */
16435 if (minipool_fix_head
!= NULL
)
16436 minipool_fix_tail
->next
= fix
;
16438 minipool_fix_head
= fix
;
16440 minipool_fix_tail
= fix
;
16443 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16444 Returns the number of insns needed, or 99 if we always want to synthesize
16447 arm_max_const_double_inline_cost ()
16449 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16452 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16453 Returns the number of insns needed, or 99 if we don't know how to
16456 arm_const_double_inline_cost (rtx val
)
16458 rtx lowpart
, highpart
;
16461 mode
= GET_MODE (val
);
16463 if (mode
== VOIDmode
)
16466 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16468 lowpart
= gen_lowpart (SImode
, val
);
16469 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16471 gcc_assert (CONST_INT_P (lowpart
));
16472 gcc_assert (CONST_INT_P (highpart
));
16474 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16475 NULL_RTX
, NULL_RTX
, 0, 0)
16476 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16477 NULL_RTX
, NULL_RTX
, 0, 0));
16480 /* Cost of loading a SImode constant. */
16482 arm_const_inline_cost (enum rtx_code code
, rtx val
)
16484 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
16485 NULL_RTX
, NULL_RTX
, 1, 0);
16488 /* Return true if it is worthwhile to split a 64-bit constant into two
16489 32-bit operations. This is the case if optimizing for size, or
16490 if we have load delay slots, or if one 32-bit part can be done with
16491 a single data operation. */
16493 arm_const_double_by_parts (rtx val
)
16495 machine_mode mode
= GET_MODE (val
);
16498 if (optimize_size
|| arm_ld_sched
)
16501 if (mode
== VOIDmode
)
16504 part
= gen_highpart_mode (SImode
, mode
, val
);
16506 gcc_assert (CONST_INT_P (part
));
16508 if (const_ok_for_arm (INTVAL (part
))
16509 || const_ok_for_arm (~INTVAL (part
)))
16512 part
= gen_lowpart (SImode
, val
);
16514 gcc_assert (CONST_INT_P (part
));
16516 if (const_ok_for_arm (INTVAL (part
))
16517 || const_ok_for_arm (~INTVAL (part
)))
16523 /* Return true if it is possible to inline both the high and low parts
16524 of a 64-bit constant into 32-bit data processing instructions. */
16526 arm_const_double_by_immediates (rtx val
)
16528 machine_mode mode
= GET_MODE (val
);
16531 if (mode
== VOIDmode
)
16534 part
= gen_highpart_mode (SImode
, mode
, val
);
16536 gcc_assert (CONST_INT_P (part
));
16538 if (!const_ok_for_arm (INTVAL (part
)))
16541 part
= gen_lowpart (SImode
, val
);
16543 gcc_assert (CONST_INT_P (part
));
16545 if (!const_ok_for_arm (INTVAL (part
)))
16551 /* Scan INSN and note any of its operands that need fixing.
16552 If DO_PUSHES is false we do not actually push any of the fixups
16555 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
16559 extract_constrain_insn (insn
);
16561 if (recog_data
.n_alternatives
== 0)
16564 /* Fill in recog_op_alt with information about the constraints of
16566 preprocess_constraints (insn
);
16568 const operand_alternative
*op_alt
= which_op_alt ();
16569 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16571 /* Things we need to fix can only occur in inputs. */
16572 if (recog_data
.operand_type
[opno
] != OP_IN
)
16575 /* If this alternative is a memory reference, then any mention
16576 of constants in this alternative is really to fool reload
16577 into allowing us to accept one there. We need to fix them up
16578 now so that we output the right code. */
16579 if (op_alt
[opno
].memory_ok
)
16581 rtx op
= recog_data
.operand
[opno
];
16583 if (CONSTANT_P (op
))
16586 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16587 recog_data
.operand_mode
[opno
], op
);
16589 else if (MEM_P (op
)
16590 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16591 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16595 rtx cop
= avoid_constant_pool_reference (op
);
16597 /* Casting the address of something to a mode narrower
16598 than a word can cause avoid_constant_pool_reference()
16599 to return the pool reference itself. That's no good to
16600 us here. Lets just hope that we can use the
16601 constant pool value directly. */
16603 cop
= get_pool_constant (XEXP (op
, 0));
16605 push_minipool_fix (insn
, address
,
16606 recog_data
.operand_loc
[opno
],
16607 recog_data
.operand_mode
[opno
], cop
);
16617 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16618 and unions in the context of ARMv8-M Security Extensions. It is used as a
16619 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16620 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16621 or four masks, depending on whether it is being computed for a
16622 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16623 respectively. The tree for the type of the argument or a field within an
16624 argument is passed in ARG_TYPE, the current register this argument or field
16625 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16626 argument or field starts at is passed in STARTING_BIT and the last used bit
16627 is kept in LAST_USED_BIT which is also updated accordingly. */
16629 static unsigned HOST_WIDE_INT
16630 comp_not_to_clear_mask_str_un (tree arg_type
, int * regno
,
16631 uint32_t * padding_bits_to_clear
,
16632 unsigned starting_bit
, int * last_used_bit
)
16635 unsigned HOST_WIDE_INT not_to_clear_reg_mask
= 0;
16637 if (TREE_CODE (arg_type
) == RECORD_TYPE
)
16639 unsigned current_bit
= starting_bit
;
16641 long int offset
, size
;
16644 field
= TYPE_FIELDS (arg_type
);
16647 /* The offset within a structure is always an offset from
16648 the start of that structure. Make sure we take that into the
16649 calculation of the register based offset that we use here. */
16650 offset
= starting_bit
;
16651 offset
+= TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field
), 0);
16654 /* This is the actual size of the field, for bitfields this is the
16655 bitfield width and not the container size. */
16656 size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16658 if (*last_used_bit
!= offset
)
16660 if (offset
< *last_used_bit
)
16662 /* This field's offset is before the 'last_used_bit', that
16663 means this field goes on the next register. So we need to
16664 pad the rest of the current register and increase the
16665 register number. */
16667 mask
= ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit
);
16670 padding_bits_to_clear
[*regno
] |= mask
;
16671 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16676 /* Otherwise we pad the bits between the last field's end and
16677 the start of the new field. */
16680 mask
= ((uint32_t)-1) >> (32 - offset
);
16681 mask
-= ((uint32_t) 1 << *last_used_bit
) - 1;
16682 padding_bits_to_clear
[*regno
] |= mask
;
16684 current_bit
= offset
;
16687 /* Calculate further padding bits for inner structs/unions too. */
16688 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field
)))
16690 *last_used_bit
= current_bit
;
16691 not_to_clear_reg_mask
16692 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field
), regno
,
16693 padding_bits_to_clear
, offset
,
16698 /* Update 'current_bit' with this field's size. If the
16699 'current_bit' lies in a subsequent register, update 'regno' and
16700 reset 'current_bit' to point to the current bit in that new
16702 current_bit
+= size
;
16703 while (current_bit
>= 32)
16706 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16709 *last_used_bit
= current_bit
;
16712 field
= TREE_CHAIN (field
);
16714 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16716 else if (TREE_CODE (arg_type
) == UNION_TYPE
)
16718 tree field
, field_t
;
16719 int i
, regno_t
, field_size
;
16723 uint32_t padding_bits_to_clear_res
[NUM_ARG_REGS
]
16724 = {-1, -1, -1, -1};
16726 /* To compute the padding bits in a union we only consider bits as
16727 padding bits if they are always either a padding bit or fall outside a
16728 fields size for all fields in the union. */
16729 field
= TYPE_FIELDS (arg_type
);
16732 uint32_t padding_bits_to_clear_t
[NUM_ARG_REGS
]
16733 = {0U, 0U, 0U, 0U};
16734 int last_used_bit_t
= *last_used_bit
;
16736 field_t
= TREE_TYPE (field
);
16738 /* If the field's type is either a record or a union make sure to
16739 compute their padding bits too. */
16740 if (RECORD_OR_UNION_TYPE_P (field_t
))
16741 not_to_clear_reg_mask
16742 |= comp_not_to_clear_mask_str_un (field_t
, ®no_t
,
16743 &padding_bits_to_clear_t
[0],
16744 starting_bit
, &last_used_bit_t
);
16747 field_size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16748 regno_t
= (field_size
/ 32) + *regno
;
16749 last_used_bit_t
= (starting_bit
+ field_size
) % 32;
16752 for (i
= *regno
; i
< regno_t
; i
++)
16754 /* For all but the last register used by this field only keep the
16755 padding bits that were padding bits in this field. */
16756 padding_bits_to_clear_res
[i
] &= padding_bits_to_clear_t
[i
];
16759 /* For the last register, keep all padding bits that were padding
16760 bits in this field and any padding bits that are still valid
16761 as padding bits but fall outside of this field's size. */
16762 mask
= (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t
)) + 1;
16763 padding_bits_to_clear_res
[regno_t
]
16764 &= padding_bits_to_clear_t
[regno_t
] | mask
;
16766 /* Update the maximum size of the fields in terms of registers used
16767 ('max_reg') and the 'last_used_bit' in said register. */
16768 if (max_reg
< regno_t
)
16771 max_bit
= last_used_bit_t
;
16773 else if (max_reg
== regno_t
&& max_bit
< last_used_bit_t
)
16774 max_bit
= last_used_bit_t
;
16776 field
= TREE_CHAIN (field
);
16779 /* Update the current padding_bits_to_clear using the intersection of the
16780 padding bits of all the fields. */
16781 for (i
=*regno
; i
< max_reg
; i
++)
16782 padding_bits_to_clear
[i
] |= padding_bits_to_clear_res
[i
];
16784 /* Do not keep trailing padding bits, we do not know yet whether this
16785 is the end of the argument. */
16786 mask
= ((uint32_t) 1 << max_bit
) - 1;
16787 padding_bits_to_clear
[max_reg
]
16788 |= padding_bits_to_clear_res
[max_reg
] & mask
;
16791 *last_used_bit
= max_bit
;
16794 /* This function should only be used for structs and unions. */
16795 gcc_unreachable ();
16797 return not_to_clear_reg_mask
;
16800 /* In the context of ARMv8-M Security Extensions, this function is used for both
16801 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16802 registers are used when returning or passing arguments, which is then
16803 returned as a mask. It will also compute a mask to indicate padding/unused
16804 bits for each of these registers, and passes this through the
16805 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16806 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16807 the starting register used to pass this argument or return value is passed
16808 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16809 for struct and union types. */
16811 static unsigned HOST_WIDE_INT
16812 compute_not_to_clear_mask (tree arg_type
, rtx arg_rtx
, int regno
,
16813 uint32_t * padding_bits_to_clear
)
16816 int last_used_bit
= 0;
16817 unsigned HOST_WIDE_INT not_to_clear_mask
;
16819 if (RECORD_OR_UNION_TYPE_P (arg_type
))
16822 = comp_not_to_clear_mask_str_un (arg_type
, ®no
,
16823 padding_bits_to_clear
, 0,
16827 /* If the 'last_used_bit' is not zero, that means we are still using a
16828 part of the last 'regno'. In such cases we must clear the trailing
16829 bits. Otherwise we are not using regno and we should mark it as to
16831 if (last_used_bit
!= 0)
16832 padding_bits_to_clear
[regno
]
16833 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit
) + 1;
16835 not_to_clear_mask
&= ~(HOST_WIDE_INT_1U
<< regno
);
16839 not_to_clear_mask
= 0;
16840 /* We are not dealing with structs nor unions. So these arguments may be
16841 passed in floating point registers too. In some cases a BLKmode is
16842 used when returning or passing arguments in multiple VFP registers. */
16843 if (GET_MODE (arg_rtx
) == BLKmode
)
16848 /* This should really only occur when dealing with the hard-float
16850 gcc_assert (TARGET_HARD_FLOAT_ABI
);
16852 for (i
= 0; i
< XVECLEN (arg_rtx
, 0); i
++)
16854 reg
= XEXP (XVECEXP (arg_rtx
, 0, i
), 0);
16855 gcc_assert (REG_P (reg
));
16857 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (reg
);
16859 /* If we are dealing with DF mode, make sure we don't
16860 clear either of the registers it addresses. */
16861 arg_regs
= ARM_NUM_REGS (GET_MODE (reg
));
16864 unsigned HOST_WIDE_INT mask
;
16865 mask
= HOST_WIDE_INT_1U
<< (REGNO (reg
) + arg_regs
);
16866 mask
-= HOST_WIDE_INT_1U
<< REGNO (reg
);
16867 not_to_clear_mask
|= mask
;
16873 /* Otherwise we can rely on the MODE to determine how many registers
16874 are being used by this argument. */
16875 int arg_regs
= ARM_NUM_REGS (GET_MODE (arg_rtx
));
16876 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
16879 unsigned HOST_WIDE_INT
16880 mask
= HOST_WIDE_INT_1U
<< (REGNO (arg_rtx
) + arg_regs
);
16881 mask
-= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
16882 not_to_clear_mask
|= mask
;
16887 return not_to_clear_mask
;
16890 /* Clears caller saved registers not used to pass arguments before a
16891 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
16892 registers is done in __gnu_cmse_nonsecure_call libcall.
16893 See libgcc/config/arm/cmse_nonsecure_call.S. */
16896 cmse_nonsecure_call_clear_caller_saved (void)
16900 FOR_EACH_BB_FN (bb
, cfun
)
16904 FOR_BB_INSNS (bb
, insn
)
16906 uint64_t to_clear_mask
, float_mask
;
16908 rtx pat
, call
, unspec
, reg
, cleared_reg
, tmp
;
16909 unsigned int regno
, maxregno
;
16911 CUMULATIVE_ARGS args_so_far_v
;
16912 cumulative_args_t args_so_far
;
16913 tree arg_type
, fntype
;
16914 bool using_r4
, first_param
= true;
16915 function_args_iterator args_iter
;
16916 uint32_t padding_bits_to_clear
[4] = {0U, 0U, 0U, 0U};
16917 uint32_t * padding_bits_to_clear_ptr
= &padding_bits_to_clear
[0];
16919 if (!NONDEBUG_INSN_P (insn
))
16922 if (!CALL_P (insn
))
16925 pat
= PATTERN (insn
);
16926 gcc_assert (GET_CODE (pat
) == PARALLEL
&& XVECLEN (pat
, 0) > 0);
16927 call
= XVECEXP (pat
, 0, 0);
16929 /* Get the real call RTX if the insn sets a value, ie. returns. */
16930 if (GET_CODE (call
) == SET
)
16931 call
= SET_SRC (call
);
16933 /* Check if it is a cmse_nonsecure_call. */
16934 unspec
= XEXP (call
, 0);
16935 if (GET_CODE (unspec
) != UNSPEC
16936 || XINT (unspec
, 1) != UNSPEC_NONSECURE_MEM
)
16939 /* Determine the caller-saved registers we need to clear. */
16940 to_clear_mask
= (1LL << (NUM_ARG_REGS
)) - 1;
16941 maxregno
= NUM_ARG_REGS
- 1;
16942 /* Only look at the caller-saved floating point registers in case of
16943 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
16944 lazy store and loads which clear both caller- and callee-saved
16946 if (TARGET_HARD_FLOAT_ABI
)
16948 float_mask
= (1LL << (D7_VFP_REGNUM
+ 1)) - 1;
16949 float_mask
&= ~((1LL << FIRST_VFP_REGNUM
) - 1);
16950 to_clear_mask
|= float_mask
;
16951 maxregno
= D7_VFP_REGNUM
;
16954 /* Make sure the register used to hold the function address is not
16956 address
= RTVEC_ELT (XVEC (unspec
, 0), 0);
16957 gcc_assert (MEM_P (address
));
16958 gcc_assert (REG_P (XEXP (address
, 0)));
16959 to_clear_mask
&= ~(1LL << REGNO (XEXP (address
, 0)));
16961 /* Set basic block of call insn so that df rescan is performed on
16962 insns inserted here. */
16963 set_block_for_insn (insn
, bb
);
16964 df_set_flags (DF_DEFER_INSN_RESCAN
);
16967 /* Make sure the scheduler doesn't schedule other insns beyond
16969 emit_insn (gen_blockage ());
16971 /* Walk through all arguments and clear registers appropriately.
16973 fntype
= TREE_TYPE (MEM_EXPR (address
));
16974 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
,
16976 args_so_far
= pack_cumulative_args (&args_so_far_v
);
16977 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
16980 machine_mode arg_mode
= TYPE_MODE (arg_type
);
16982 if (VOID_TYPE_P (arg_type
))
16986 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
,
16989 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
,
16991 gcc_assert (REG_P (arg_rtx
));
16993 &= ~compute_not_to_clear_mask (arg_type
, arg_rtx
,
16995 padding_bits_to_clear_ptr
);
16997 first_param
= false;
17000 /* Clear padding bits where needed. */
17001 cleared_reg
= XEXP (address
, 0);
17002 reg
= gen_rtx_REG (SImode
, IP_REGNUM
);
17004 for (regno
= R0_REGNUM
; regno
< NUM_ARG_REGS
; regno
++)
17006 if (padding_bits_to_clear
[regno
] == 0)
17009 /* If this is a Thumb-1 target copy the address of the function
17010 we are calling from 'r4' into 'ip' such that we can use r4 to
17011 clear the unused bits in the arguments. */
17012 if (TARGET_THUMB1
&& !using_r4
)
17016 emit_move_insn (gen_rtx_REG (SImode
, IP_REGNUM
),
17020 tmp
= GEN_INT ((((~padding_bits_to_clear
[regno
]) << 16u) >> 16u));
17021 emit_move_insn (reg
, tmp
);
17022 /* Also fill the top half of the negated
17023 padding_bits_to_clear. */
17024 if (((~padding_bits_to_clear
[regno
]) >> 16) > 0)
17026 tmp
= GEN_INT ((~padding_bits_to_clear
[regno
]) >> 16);
17027 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode
, reg
,
17033 emit_insn (gen_andsi3 (gen_rtx_REG (SImode
, regno
),
17034 gen_rtx_REG (SImode
, regno
),
17039 emit_move_insn (cleared_reg
,
17040 gen_rtx_REG (SImode
, IP_REGNUM
));
17042 /* We use right shift and left shift to clear the LSB of the address
17043 we jump to instead of using bic, to avoid having to use an extra
17044 register on Thumb-1. */
17045 tmp
= gen_rtx_LSHIFTRT (SImode
, cleared_reg
, const1_rtx
);
17046 emit_insn (gen_rtx_SET (cleared_reg
, tmp
));
17047 tmp
= gen_rtx_ASHIFT (SImode
, cleared_reg
, const1_rtx
);
17048 emit_insn (gen_rtx_SET (cleared_reg
, tmp
));
17050 /* Clearing all registers that leak before doing a non-secure
17052 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
17054 if (!(to_clear_mask
& (1LL << regno
)))
17057 /* If regno is an even vfp register and its successor is also to
17058 be cleared, use vmov. */
17059 if (IS_VFP_REGNUM (regno
))
17061 if (TARGET_VFP_DOUBLE
17062 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
17063 && to_clear_mask
& (1LL << (regno
+ 1)))
17064 emit_move_insn (gen_rtx_REG (DFmode
, regno
++),
17065 CONST0_RTX (DFmode
));
17067 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
17068 CONST0_RTX (SFmode
));
17071 emit_move_insn (gen_rtx_REG (SImode
, regno
), cleared_reg
);
17074 seq
= get_insns ();
17076 emit_insn_before (seq
, insn
);
17082 /* Rewrite move insn into subtract of 0 if the condition codes will
17083 be useful in next conditional jump insn. */
17086 thumb1_reorg (void)
17090 FOR_EACH_BB_FN (bb
, cfun
)
17093 rtx cmp
, op0
, op1
, set
= NULL
;
17094 rtx_insn
*prev
, *insn
= BB_END (bb
);
17095 bool insn_clobbered
= false;
17097 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17098 insn
= PREV_INSN (insn
);
17100 /* Find the last cbranchsi4_insn in basic block BB. */
17101 if (insn
== BB_HEAD (bb
)
17102 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17105 /* Get the register with which we are comparing. */
17106 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
17107 op0
= XEXP (cmp
, 0);
17108 op1
= XEXP (cmp
, 1);
17110 /* Check that comparison is against ZERO. */
17111 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
17114 /* Find the first flag setting insn before INSN in basic block BB. */
17115 gcc_assert (insn
!= BB_HEAD (bb
));
17116 for (prev
= PREV_INSN (insn
);
17118 && prev
!= BB_HEAD (bb
)
17120 || DEBUG_INSN_P (prev
)
17121 || ((set
= single_set (prev
)) != NULL
17122 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17123 prev
= PREV_INSN (prev
))
17125 if (reg_set_p (op0
, prev
))
17126 insn_clobbered
= true;
17129 /* Skip if op0 is clobbered by insn other than prev. */
17130 if (insn_clobbered
)
17136 dest
= SET_DEST (set
);
17137 src
= SET_SRC (set
);
17138 if (!low_register_operand (dest
, SImode
)
17139 || !low_register_operand (src
, SImode
))
17142 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17143 in INSN. Both src and dest of the move insn are checked. */
17144 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17146 dest
= copy_rtx (dest
);
17147 src
= copy_rtx (src
);
17148 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17149 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
17150 INSN_CODE (prev
) = -1;
17151 /* Set test register in INSN to dest. */
17152 XEXP (cmp
, 0) = copy_rtx (dest
);
17153 INSN_CODE (insn
) = -1;
17158 /* Convert instructions to their cc-clobbering variant if possible, since
17159 that allows us to use smaller encodings. */
17162 thumb2_reorg (void)
17167 INIT_REG_SET (&live
);
17169 /* We are freeing block_for_insn in the toplev to keep compatibility
17170 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17171 compute_bb_for_insn ();
17174 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17176 FOR_EACH_BB_FN (bb
, cfun
)
17178 if ((current_tune
->disparage_flag_setting_t16_encodings
17179 == tune_params::DISPARAGE_FLAGS_ALL
)
17180 && optimize_bb_for_speed_p (bb
))
17184 Convert_Action action
= SKIP
;
17185 Convert_Action action_for_partial_flag_setting
17186 = ((current_tune
->disparage_flag_setting_t16_encodings
17187 != tune_params::DISPARAGE_FLAGS_NEITHER
)
17188 && optimize_bb_for_speed_p (bb
))
17191 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17192 df_simulate_initialize_backwards (bb
, &live
);
17193 FOR_BB_INSNS_REVERSE (bb
, insn
)
17195 if (NONJUMP_INSN_P (insn
)
17196 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17197 && GET_CODE (PATTERN (insn
)) == SET
)
17200 rtx pat
= PATTERN (insn
);
17201 rtx dst
= XEXP (pat
, 0);
17202 rtx src
= XEXP (pat
, 1);
17203 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17205 if (UNARY_P (src
) || BINARY_P (src
))
17206 op0
= XEXP (src
, 0);
17208 if (BINARY_P (src
))
17209 op1
= XEXP (src
, 1);
17211 if (low_register_operand (dst
, SImode
))
17213 switch (GET_CODE (src
))
17216 /* Adding two registers and storing the result
17217 in the first source is already a 16-bit
17219 if (rtx_equal_p (dst
, op0
)
17220 && register_operand (op1
, SImode
))
17223 if (low_register_operand (op0
, SImode
))
17225 /* ADDS <Rd>,<Rn>,<Rm> */
17226 if (low_register_operand (op1
, SImode
))
17228 /* ADDS <Rdn>,#<imm8> */
17229 /* SUBS <Rdn>,#<imm8> */
17230 else if (rtx_equal_p (dst
, op0
)
17231 && CONST_INT_P (op1
)
17232 && IN_RANGE (INTVAL (op1
), -255, 255))
17234 /* ADDS <Rd>,<Rn>,#<imm3> */
17235 /* SUBS <Rd>,<Rn>,#<imm3> */
17236 else if (CONST_INT_P (op1
)
17237 && IN_RANGE (INTVAL (op1
), -7, 7))
17240 /* ADCS <Rd>, <Rn> */
17241 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17242 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17243 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17245 && COMPARISON_P (op1
)
17246 && cc_register (XEXP (op1
, 0), VOIDmode
)
17247 && maybe_get_arm_condition_code (op1
) == ARM_CS
17248 && XEXP (op1
, 1) == const0_rtx
)
17253 /* RSBS <Rd>,<Rn>,#0
17254 Not handled here: see NEG below. */
17255 /* SUBS <Rd>,<Rn>,#<imm3>
17257 Not handled here: see PLUS above. */
17258 /* SUBS <Rd>,<Rn>,<Rm> */
17259 if (low_register_operand (op0
, SImode
)
17260 && low_register_operand (op1
, SImode
))
17265 /* MULS <Rdm>,<Rn>,<Rdm>
17266 As an exception to the rule, this is only used
17267 when optimizing for size since MULS is slow on all
17268 known implementations. We do not even want to use
17269 MULS in cold code, if optimizing for speed, so we
17270 test the global flag here. */
17271 if (!optimize_size
)
17273 /* Fall through. */
17277 /* ANDS <Rdn>,<Rm> */
17278 if (rtx_equal_p (dst
, op0
)
17279 && low_register_operand (op1
, SImode
))
17280 action
= action_for_partial_flag_setting
;
17281 else if (rtx_equal_p (dst
, op1
)
17282 && low_register_operand (op0
, SImode
))
17283 action
= action_for_partial_flag_setting
== SKIP
17284 ? SKIP
: SWAP_CONV
;
17290 /* ASRS <Rdn>,<Rm> */
17291 /* LSRS <Rdn>,<Rm> */
17292 /* LSLS <Rdn>,<Rm> */
17293 if (rtx_equal_p (dst
, op0
)
17294 && low_register_operand (op1
, SImode
))
17295 action
= action_for_partial_flag_setting
;
17296 /* ASRS <Rd>,<Rm>,#<imm5> */
17297 /* LSRS <Rd>,<Rm>,#<imm5> */
17298 /* LSLS <Rd>,<Rm>,#<imm5> */
17299 else if (low_register_operand (op0
, SImode
)
17300 && CONST_INT_P (op1
)
17301 && IN_RANGE (INTVAL (op1
), 0, 31))
17302 action
= action_for_partial_flag_setting
;
17306 /* RORS <Rdn>,<Rm> */
17307 if (rtx_equal_p (dst
, op0
)
17308 && low_register_operand (op1
, SImode
))
17309 action
= action_for_partial_flag_setting
;
17313 /* MVNS <Rd>,<Rm> */
17314 if (low_register_operand (op0
, SImode
))
17315 action
= action_for_partial_flag_setting
;
17319 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17320 if (low_register_operand (op0
, SImode
))
17325 /* MOVS <Rd>,#<imm8> */
17326 if (CONST_INT_P (src
)
17327 && IN_RANGE (INTVAL (src
), 0, 255))
17328 action
= action_for_partial_flag_setting
;
17332 /* MOVS and MOV<c> with registers have different
17333 encodings, so are not relevant here. */
17341 if (action
!= SKIP
)
17343 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17344 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17347 if (action
== SWAP_CONV
)
17349 src
= copy_rtx (src
);
17350 XEXP (src
, 0) = op1
;
17351 XEXP (src
, 1) = op0
;
17352 pat
= gen_rtx_SET (dst
, src
);
17353 vec
= gen_rtvec (2, pat
, clobber
);
17355 else /* action == CONV */
17356 vec
= gen_rtvec (2, pat
, clobber
);
17358 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17359 INSN_CODE (insn
) = -1;
17363 if (NONDEBUG_INSN_P (insn
))
17364 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17368 CLEAR_REG_SET (&live
);
17371 /* Gcc puts the pool in the wrong place for ARM, since we can only
17372 load addresses a limited distance around the pc. We do some
17373 special munging to move the constant pool values to the correct
17374 point in the code. */
17379 HOST_WIDE_INT address
= 0;
17383 cmse_nonsecure_call_clear_caller_saved ();
17386 else if (TARGET_THUMB2
)
17389 /* Ensure all insns that must be split have been split at this point.
17390 Otherwise, the pool placement code below may compute incorrect
17391 insn lengths. Note that when optimizing, all insns have already
17392 been split at this point. */
17394 split_all_insns_noflow ();
17396 /* Make sure we do not attempt to create a literal pool even though it should
17397 no longer be necessary to create any. */
17398 if (arm_disable_literal_pool
)
17401 minipool_fix_head
= minipool_fix_tail
= NULL
;
17403 /* The first insn must always be a note, or the code below won't
17404 scan it properly. */
17405 insn
= get_insns ();
17406 gcc_assert (NOTE_P (insn
));
17409 /* Scan all the insns and record the operands that will need fixing. */
17410 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17412 if (BARRIER_P (insn
))
17413 push_minipool_barrier (insn
, address
);
17414 else if (INSN_P (insn
))
17416 rtx_jump_table_data
*table
;
17418 note_invalid_constants (insn
, address
, true);
17419 address
+= get_attr_length (insn
);
17421 /* If the insn is a vector jump, add the size of the table
17422 and skip the table. */
17423 if (tablejump_p (insn
, NULL
, &table
))
17425 address
+= get_jump_table_size (table
);
17429 else if (LABEL_P (insn
))
17430 /* Add the worst-case padding due to alignment. We don't add
17431 the _current_ padding because the minipool insertions
17432 themselves might change it. */
17433 address
+= get_label_padding (insn
);
17436 fix
= minipool_fix_head
;
17438 /* Now scan the fixups and perform the required changes. */
17443 Mfix
* last_added_fix
;
17444 Mfix
* last_barrier
= NULL
;
17447 /* Skip any further barriers before the next fix. */
17448 while (fix
&& BARRIER_P (fix
->insn
))
17451 /* No more fixes. */
17455 last_added_fix
= NULL
;
17457 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17459 if (BARRIER_P (ftmp
->insn
))
17461 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17464 last_barrier
= ftmp
;
17466 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17469 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17472 /* If we found a barrier, drop back to that; any fixes that we
17473 could have reached but come after the barrier will now go in
17474 the next mini-pool. */
17475 if (last_barrier
!= NULL
)
17477 /* Reduce the refcount for those fixes that won't go into this
17479 for (fdel
= last_barrier
->next
;
17480 fdel
&& fdel
!= ftmp
;
17483 fdel
->minipool
->refcount
--;
17484 fdel
->minipool
= NULL
;
17487 ftmp
= last_barrier
;
17491 /* ftmp is first fix that we can't fit into this pool and
17492 there no natural barriers that we could use. Insert a
17493 new barrier in the code somewhere between the previous
17494 fix and this one, and arrange to jump around it. */
17495 HOST_WIDE_INT max_address
;
17497 /* The last item on the list of fixes must be a barrier, so
17498 we can never run off the end of the list of fixes without
17499 last_barrier being set. */
17502 max_address
= minipool_vector_head
->max_address
;
17503 /* Check that there isn't another fix that is in range that
17504 we couldn't fit into this pool because the pool was
17505 already too large: we need to put the pool before such an
17506 instruction. The pool itself may come just after the
17507 fix because create_fix_barrier also allows space for a
17508 jump instruction. */
17509 if (ftmp
->address
< max_address
)
17510 max_address
= ftmp
->address
+ 1;
17512 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17515 assign_minipool_offsets (last_barrier
);
17519 if (!BARRIER_P (ftmp
->insn
)
17520 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17527 /* Scan over the fixes we have identified for this pool, fixing them
17528 up and adding the constants to the pool itself. */
17529 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17530 this_fix
= this_fix
->next
)
17531 if (!BARRIER_P (this_fix
->insn
))
17534 = plus_constant (Pmode
,
17535 gen_rtx_LABEL_REF (VOIDmode
,
17536 minipool_vector_label
),
17537 this_fix
->minipool
->offset
);
17538 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17541 dump_minipool (last_barrier
->insn
);
17545 /* From now on we must synthesize any constants that we can't handle
17546 directly. This can happen if the RTL gets split during final
17547 instruction generation. */
17548 cfun
->machine
->after_arm_reorg
= 1;
17550 /* Free the minipool memory. */
17551 obstack_free (&minipool_obstack
, minipool_startobj
);
17554 /* Routines to output assembly language. */
17556 /* Return string representation of passed in real value. */
17557 static const char *
17558 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17560 if (!fp_consts_inited
)
17563 gcc_assert (real_equal (r
, &value_fp0
));
17567 /* OPERANDS[0] is the entire list of insns that constitute pop,
17568 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17569 is in the list, UPDATE is true iff the list contains explicit
17570 update of base register. */
17572 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17578 const char *conditional
;
17579 int num_saves
= XVECLEN (operands
[0], 0);
17580 unsigned int regno
;
17581 unsigned int regno_base
= REGNO (operands
[1]);
17582 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
17585 offset
+= update
? 1 : 0;
17586 offset
+= return_pc
? 1 : 0;
17588 /* Is the base register in the list? */
17589 for (i
= offset
; i
< num_saves
; i
++)
17591 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17592 /* If SP is in the list, then the base register must be SP. */
17593 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17594 /* If base register is in the list, there must be no explicit update. */
17595 if (regno
== regno_base
)
17596 gcc_assert (!update
);
17599 conditional
= reverse
? "%?%D0" : "%?%d0";
17600 /* Can't use POP if returning from an interrupt. */
17601 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
17602 sprintf (pattern
, "pop%s\t{", conditional
);
17605 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17606 It's just a convention, their semantics are identical. */
17607 if (regno_base
== SP_REGNUM
)
17608 sprintf (pattern
, "ldmfd%s\t", conditional
);
17610 sprintf (pattern
, "ldmia%s\t", conditional
);
17612 sprintf (pattern
, "ldm%s\t", conditional
);
17614 strcat (pattern
, reg_names
[regno_base
]);
17616 strcat (pattern
, "!, {");
17618 strcat (pattern
, ", {");
17621 /* Output the first destination register. */
17623 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17625 /* Output the rest of the destination registers. */
17626 for (i
= offset
+ 1; i
< num_saves
; i
++)
17628 strcat (pattern
, ", ");
17630 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17633 strcat (pattern
, "}");
17635 if (interrupt_p
&& return_pc
)
17636 strcat (pattern
, "^");
17638 output_asm_insn (pattern
, &cond
);
17642 /* Output the assembly for a store multiple. */
17645 vfp_output_vstmd (rtx
* operands
)
17651 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17652 ? XEXP (operands
[0], 0)
17653 : XEXP (XEXP (operands
[0], 0), 0);
17654 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17657 strcpy (pattern
, "vpush%?.64\t{%P1");
17659 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17661 p
= strlen (pattern
);
17663 gcc_assert (REG_P (operands
[1]));
17665 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17666 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17668 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17670 strcpy (&pattern
[p
], "}");
17672 output_asm_insn (pattern
, operands
);
17677 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17678 number of bytes pushed. */
17681 vfp_emit_fstmd (int base_reg
, int count
)
17688 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17689 register pairs are stored by a store multiple insn. We avoid this
17690 by pushing an extra pair. */
17691 if (count
== 2 && !arm_arch6
)
17693 if (base_reg
== LAST_VFP_REGNUM
- 3)
17698 /* FSTMD may not store more than 16 doubleword registers at once. Split
17699 larger stores into multiple parts (up to a maximum of two, in
17704 /* NOTE: base_reg is an internal register number, so each D register
17706 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17707 saved
+= vfp_emit_fstmd (base_reg
, 16);
17711 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17712 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17714 reg
= gen_rtx_REG (DFmode
, base_reg
);
17717 XVECEXP (par
, 0, 0)
17718 = gen_rtx_SET (gen_frame_mem
17720 gen_rtx_PRE_MODIFY (Pmode
,
17723 (Pmode
, stack_pointer_rtx
,
17726 gen_rtx_UNSPEC (BLKmode
,
17727 gen_rtvec (1, reg
),
17728 UNSPEC_PUSH_MULT
));
17730 tmp
= gen_rtx_SET (stack_pointer_rtx
,
17731 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17732 RTX_FRAME_RELATED_P (tmp
) = 1;
17733 XVECEXP (dwarf
, 0, 0) = tmp
;
17735 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
17736 RTX_FRAME_RELATED_P (tmp
) = 1;
17737 XVECEXP (dwarf
, 0, 1) = tmp
;
17739 for (i
= 1; i
< count
; i
++)
17741 reg
= gen_rtx_REG (DFmode
, base_reg
);
17743 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17745 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
17746 plus_constant (Pmode
,
17750 RTX_FRAME_RELATED_P (tmp
) = 1;
17751 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17754 par
= emit_insn (par
);
17755 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17756 RTX_FRAME_RELATED_P (par
) = 1;
17761 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17762 has the cmse_nonsecure_call attribute and returns false otherwise. */
17765 detect_cmse_nonsecure_call (tree addr
)
17770 tree fntype
= TREE_TYPE (addr
);
17771 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_call",
17772 TYPE_ATTRIBUTES (fntype
)))
17778 /* Emit a call instruction with pattern PAT. ADDR is the address of
17779 the call target. */
17782 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
17786 insn
= emit_call_insn (pat
);
17788 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17789 If the call might use such an entry, add a use of the PIC register
17790 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17791 if (TARGET_VXWORKS_RTP
17794 && GET_CODE (addr
) == SYMBOL_REF
17795 && (SYMBOL_REF_DECL (addr
)
17796 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17797 : !SYMBOL_REF_LOCAL_P (addr
)))
17799 require_pic_register ();
17800 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17803 if (TARGET_AAPCS_BASED
)
17805 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17806 linker. We need to add an IP clobber to allow setting
17807 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17808 is not needed since it's a fixed register. */
17809 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
17810 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
17814 /* Output a 'call' insn. */
17816 output_call (rtx
*operands
)
17818 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17820 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17821 if (REGNO (operands
[0]) == LR_REGNUM
)
17823 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17824 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17827 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17829 if (TARGET_INTERWORK
|| arm_arch4t
)
17830 output_asm_insn ("bx%?\t%0", operands
);
17832 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17837 /* Output a move from arm registers to arm registers of a long double
17838 OPERANDS[0] is the destination.
17839 OPERANDS[1] is the source. */
17841 output_mov_long_double_arm_from_arm (rtx
*operands
)
17843 /* We have to be careful here because the two might overlap. */
17844 int dest_start
= REGNO (operands
[0]);
17845 int src_start
= REGNO (operands
[1]);
17849 if (dest_start
< src_start
)
17851 for (i
= 0; i
< 3; i
++)
17853 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17854 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17855 output_asm_insn ("mov%?\t%0, %1", ops
);
17860 for (i
= 2; i
>= 0; i
--)
17862 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17863 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17864 output_asm_insn ("mov%?\t%0, %1", ops
);
17872 arm_emit_movpair (rtx dest
, rtx src
)
17874 /* If the src is an immediate, simplify it. */
17875 if (CONST_INT_P (src
))
17877 HOST_WIDE_INT val
= INTVAL (src
);
17878 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17879 if ((val
>> 16) & 0x0000ffff)
17881 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17883 GEN_INT ((val
>> 16) & 0x0000ffff));
17884 rtx_insn
*insn
= get_last_insn ();
17885 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17889 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17890 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17891 rtx_insn
*insn
= get_last_insn ();
17892 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17895 /* Output a move between double words. It must be REG<-MEM
17898 output_move_double (rtx
*operands
, bool emit
, int *count
)
17900 enum rtx_code code0
= GET_CODE (operands
[0]);
17901 enum rtx_code code1
= GET_CODE (operands
[1]);
17906 /* The only case when this might happen is when
17907 you are looking at the length of a DImode instruction
17908 that has an invalid constant in it. */
17909 if (code0
== REG
&& code1
!= MEM
)
17911 gcc_assert (!emit
);
17918 unsigned int reg0
= REGNO (operands
[0]);
17920 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17922 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17924 switch (GET_CODE (XEXP (operands
[1], 0)))
17931 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17932 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
17934 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
17939 gcc_assert (TARGET_LDRD
);
17941 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
17948 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
17950 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
17958 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
17960 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
17965 gcc_assert (TARGET_LDRD
);
17967 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
17972 /* Autoicrement addressing modes should never have overlapping
17973 base and destination registers, and overlapping index registers
17974 are already prohibited, so this doesn't need to worry about
17976 otherops
[0] = operands
[0];
17977 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
17978 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
17980 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
17982 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
17984 /* Registers overlap so split out the increment. */
17987 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
17988 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
17995 /* Use a single insn if we can.
17996 FIXME: IWMMXT allows offsets larger than ldrd can
17997 handle, fix these up with a pair of ldr. */
17999 || !CONST_INT_P (otherops
[2])
18000 || (INTVAL (otherops
[2]) > -256
18001 && INTVAL (otherops
[2]) < 256))
18004 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
18010 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
18011 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18021 /* Use a single insn if we can.
18022 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18023 fix these up with a pair of ldr. */
18025 || !CONST_INT_P (otherops
[2])
18026 || (INTVAL (otherops
[2]) > -256
18027 && INTVAL (otherops
[2]) < 256))
18030 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
18036 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18037 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18047 /* We might be able to use ldrd %0, %1 here. However the range is
18048 different to ldr/adr, and it is broken on some ARMv7-M
18049 implementations. */
18050 /* Use the second register of the pair to avoid problematic
18052 otherops
[1] = operands
[1];
18054 output_asm_insn ("adr%?\t%0, %1", otherops
);
18055 operands
[1] = otherops
[0];
18059 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18061 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
18068 /* ??? This needs checking for thumb2. */
18070 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18071 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18073 otherops
[0] = operands
[0];
18074 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18075 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18077 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18079 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18081 switch ((int) INTVAL (otherops
[2]))
18085 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
18091 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
18097 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
18101 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18102 operands
[1] = otherops
[0];
18104 && (REG_P (otherops
[2])
18106 || (CONST_INT_P (otherops
[2])
18107 && INTVAL (otherops
[2]) > -256
18108 && INTVAL (otherops
[2]) < 256)))
18110 if (reg_overlap_mentioned_p (operands
[0],
18113 /* Swap base and index registers over to
18114 avoid a conflict. */
18115 std::swap (otherops
[1], otherops
[2]);
18117 /* If both registers conflict, it will usually
18118 have been fixed by a splitter. */
18119 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18120 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18124 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18125 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18132 otherops
[0] = operands
[0];
18134 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
18139 if (CONST_INT_P (otherops
[2]))
18143 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18144 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18146 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18152 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18158 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18165 return "ldrd%?\t%0, [%1]";
18167 return "ldmia%?\t%1, %M0";
18171 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18172 /* Take care of overlapping base/data reg. */
18173 if (reg_mentioned_p (operands
[0], operands
[1]))
18177 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18178 output_asm_insn ("ldr%?\t%0, %1", operands
);
18188 output_asm_insn ("ldr%?\t%0, %1", operands
);
18189 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18199 /* Constraints should ensure this. */
18200 gcc_assert (code0
== MEM
&& code1
== REG
);
18201 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18202 || (TARGET_ARM
&& TARGET_LDRD
));
18204 switch (GET_CODE (XEXP (operands
[0], 0)))
18210 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
18212 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18217 gcc_assert (TARGET_LDRD
);
18219 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
18226 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
18228 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
18236 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
18238 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
18243 gcc_assert (TARGET_LDRD
);
18245 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
18250 otherops
[0] = operands
[1];
18251 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18252 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18254 /* IWMMXT allows offsets larger than ldrd can handle,
18255 fix these up with a pair of ldr. */
18257 && CONST_INT_P (otherops
[2])
18258 && (INTVAL(otherops
[2]) <= -256
18259 || INTVAL(otherops
[2]) >= 256))
18261 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18265 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18266 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18275 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18276 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18282 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18285 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
18290 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
18295 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18296 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18298 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18302 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
18309 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
18316 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
18321 && (REG_P (otherops
[2])
18323 || (CONST_INT_P (otherops
[2])
18324 && INTVAL (otherops
[2]) > -256
18325 && INTVAL (otherops
[2]) < 256)))
18327 otherops
[0] = operands
[1];
18328 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18330 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
18336 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18337 otherops
[1] = operands
[1];
18340 output_asm_insn ("str%?\t%1, %0", operands
);
18341 output_asm_insn ("str%?\t%H1, %0", otherops
);
18351 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18352 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18355 output_move_quad (rtx
*operands
)
18357 if (REG_P (operands
[0]))
18359 /* Load, or reg->reg move. */
18361 if (MEM_P (operands
[1]))
18363 switch (GET_CODE (XEXP (operands
[1], 0)))
18366 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18371 output_asm_insn ("adr%?\t%0, %1", operands
);
18372 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
18376 gcc_unreachable ();
18384 gcc_assert (REG_P (operands
[1]));
18386 dest
= REGNO (operands
[0]);
18387 src
= REGNO (operands
[1]);
18389 /* This seems pretty dumb, but hopefully GCC won't try to do it
18392 for (i
= 0; i
< 4; i
++)
18394 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18395 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18396 output_asm_insn ("mov%?\t%0, %1", ops
);
18399 for (i
= 3; i
>= 0; i
--)
18401 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18402 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18403 output_asm_insn ("mov%?\t%0, %1", ops
);
18409 gcc_assert (MEM_P (operands
[0]));
18410 gcc_assert (REG_P (operands
[1]));
18411 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18413 switch (GET_CODE (XEXP (operands
[0], 0)))
18416 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18420 gcc_unreachable ();
18427 /* Output a VFP load or store instruction. */
18430 output_move_vfp (rtx
*operands
)
18432 rtx reg
, mem
, addr
, ops
[2];
18433 int load
= REG_P (operands
[0]);
18434 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18435 int sp
= (!TARGET_VFP_FP16INST
18436 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
18437 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18442 reg
= operands
[!load
];
18443 mem
= operands
[load
];
18445 mode
= GET_MODE (reg
);
18447 gcc_assert (REG_P (reg
));
18448 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18449 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
18455 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18456 gcc_assert (MEM_P (mem
));
18458 addr
= XEXP (mem
, 0);
18460 switch (GET_CODE (addr
))
18463 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18464 ops
[0] = XEXP (addr
, 0);
18469 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18470 ops
[0] = XEXP (addr
, 0);
18475 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18481 sprintf (buff
, templ
,
18482 load
? "ld" : "st",
18483 dp
? "64" : sp
? "32" : "16",
18485 integer_p
? "\t%@ int" : "");
18486 output_asm_insn (buff
, ops
);
18491 /* Output a Neon double-word or quad-word load or store, or a load
18492 or store for larger structure modes.
18494 WARNING: The ordering of elements is weird in big-endian mode,
18495 because the EABI requires that vectors stored in memory appear
18496 as though they were stored by a VSTM, as required by the EABI.
18497 GCC RTL defines element ordering based on in-memory order.
18498 This can be different from the architectural ordering of elements
18499 within a NEON register. The intrinsics defined in arm_neon.h use the
18500 NEON register element ordering, not the GCC RTL element ordering.
18502 For example, the in-memory ordering of a big-endian a quadword
18503 vector with 16-bit elements when stored from register pair {d0,d1}
18504 will be (lowest address first, d0[N] is NEON register element N):
18506 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18508 When necessary, quadword registers (dN, dN+1) are moved to ARM
18509 registers from rN in the order:
18511 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18513 So that STM/LDM can be used on vectors in ARM registers, and the
18514 same memory layout will result as if VSTM/VLDM were used.
18516 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18517 possible, which allows use of appropriate alignment tags.
18518 Note that the choice of "64" is independent of the actual vector
18519 element size; this size simply ensures that the behavior is
18520 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18522 Due to limitations of those instructions, use of VST1.64/VLD1.64
18523 is not possible if:
18524 - the address contains PRE_DEC, or
18525 - the mode refers to more than 4 double-word registers
18527 In those cases, it would be possible to replace VSTM/VLDM by a
18528 sequence of instructions; this is not currently implemented since
18529 this is not certain to actually improve performance. */
18532 output_move_neon (rtx
*operands
)
18534 rtx reg
, mem
, addr
, ops
[2];
18535 int regno
, nregs
, load
= REG_P (operands
[0]);
18540 reg
= operands
[!load
];
18541 mem
= operands
[load
];
18543 mode
= GET_MODE (reg
);
18545 gcc_assert (REG_P (reg
));
18546 regno
= REGNO (reg
);
18547 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18548 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18549 || NEON_REGNO_OK_FOR_QUAD (regno
));
18550 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18551 || VALID_NEON_QREG_MODE (mode
)
18552 || VALID_NEON_STRUCT_MODE (mode
));
18553 gcc_assert (MEM_P (mem
));
18555 addr
= XEXP (mem
, 0);
18557 /* Strip off const from addresses like (const (plus (...))). */
18558 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18559 addr
= XEXP (addr
, 0);
18561 switch (GET_CODE (addr
))
18564 /* We have to use vldm / vstm for too-large modes. */
18567 templ
= "v%smia%%?\t%%0!, %%h1";
18568 ops
[0] = XEXP (addr
, 0);
18572 templ
= "v%s1.64\t%%h1, %%A0";
18579 /* We have to use vldm / vstm in this case, since there is no
18580 pre-decrement form of the vld1 / vst1 instructions. */
18581 templ
= "v%smdb%%?\t%%0!, %%h1";
18582 ops
[0] = XEXP (addr
, 0);
18587 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18588 gcc_unreachable ();
18591 /* We have to use vldm / vstm for too-large modes. */
18595 templ
= "v%smia%%?\t%%m0, %%h1";
18597 templ
= "v%s1.64\t%%h1, %%A0";
18603 /* Fall through. */
18609 for (i
= 0; i
< nregs
; i
++)
18611 /* We're only using DImode here because it's a convenient size. */
18612 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18613 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18614 if (reg_overlap_mentioned_p (ops
[0], mem
))
18616 gcc_assert (overlap
== -1);
18621 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18622 output_asm_insn (buff
, ops
);
18627 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18628 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18629 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18630 output_asm_insn (buff
, ops
);
18637 gcc_unreachable ();
18640 sprintf (buff
, templ
, load
? "ld" : "st");
18641 output_asm_insn (buff
, ops
);
18646 /* Compute and return the length of neon_mov<mode>, where <mode> is
18647 one of VSTRUCT modes: EI, OI, CI or XI. */
18649 arm_attr_length_move_neon (rtx_insn
*insn
)
18651 rtx reg
, mem
, addr
;
18655 extract_insn_cached (insn
);
18657 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18659 mode
= GET_MODE (recog_data
.operand
[0]);
18670 gcc_unreachable ();
18674 load
= REG_P (recog_data
.operand
[0]);
18675 reg
= recog_data
.operand
[!load
];
18676 mem
= recog_data
.operand
[load
];
18678 gcc_assert (MEM_P (mem
));
18680 mode
= GET_MODE (reg
);
18681 addr
= XEXP (mem
, 0);
18683 /* Strip off const from addresses like (const (plus (...))). */
18684 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18685 addr
= XEXP (addr
, 0);
18687 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18689 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18696 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18700 arm_address_offset_is_imm (rtx_insn
*insn
)
18704 extract_insn_cached (insn
);
18706 if (REG_P (recog_data
.operand
[0]))
18709 mem
= recog_data
.operand
[0];
18711 gcc_assert (MEM_P (mem
));
18713 addr
= XEXP (mem
, 0);
18716 || (GET_CODE (addr
) == PLUS
18717 && REG_P (XEXP (addr
, 0))
18718 && CONST_INT_P (XEXP (addr
, 1))))
18724 /* Output an ADD r, s, #n where n may be too big for one instruction.
18725 If adding zero to one register, output nothing. */
18727 output_add_immediate (rtx
*operands
)
18729 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18731 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18734 output_multi_immediate (operands
,
18735 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18738 output_multi_immediate (operands
,
18739 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18746 /* Output a multiple immediate operation.
18747 OPERANDS is the vector of operands referred to in the output patterns.
18748 INSTR1 is the output pattern to use for the first constant.
18749 INSTR2 is the output pattern to use for subsequent constants.
18750 IMMED_OP is the index of the constant slot in OPERANDS.
18751 N is the constant value. */
18752 static const char *
18753 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18754 int immed_op
, HOST_WIDE_INT n
)
18756 #if HOST_BITS_PER_WIDE_INT > 32
18762 /* Quick and easy output. */
18763 operands
[immed_op
] = const0_rtx
;
18764 output_asm_insn (instr1
, operands
);
18769 const char * instr
= instr1
;
18771 /* Note that n is never zero here (which would give no output). */
18772 for (i
= 0; i
< 32; i
+= 2)
18776 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18777 output_asm_insn (instr
, operands
);
18787 /* Return the name of a shifter operation. */
18788 static const char *
18789 arm_shift_nmem(enum rtx_code code
)
18794 return ARM_LSL_NAME
;
18810 /* Return the appropriate ARM instruction for the operation code.
18811 The returned result should not be overwritten. OP is the rtx of the
18812 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18815 arithmetic_instr (rtx op
, int shift_first_arg
)
18817 switch (GET_CODE (op
))
18823 return shift_first_arg
? "rsb" : "sub";
18838 return arm_shift_nmem(GET_CODE(op
));
18841 gcc_unreachable ();
18845 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18846 for the operation code. The returned result should not be overwritten.
18847 OP is the rtx code of the shift.
18848 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18850 static const char *
18851 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18854 enum rtx_code code
= GET_CODE (op
);
18859 if (!CONST_INT_P (XEXP (op
, 1)))
18861 output_operand_lossage ("invalid shift operand");
18866 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18874 mnem
= arm_shift_nmem(code
);
18875 if (CONST_INT_P (XEXP (op
, 1)))
18877 *amountp
= INTVAL (XEXP (op
, 1));
18879 else if (REG_P (XEXP (op
, 1)))
18886 output_operand_lossage ("invalid shift operand");
18892 /* We never have to worry about the amount being other than a
18893 power of 2, since this case can never be reloaded from a reg. */
18894 if (!CONST_INT_P (XEXP (op
, 1)))
18896 output_operand_lossage ("invalid shift operand");
18900 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18902 /* Amount must be a power of two. */
18903 if (*amountp
& (*amountp
- 1))
18905 output_operand_lossage ("invalid shift operand");
18909 *amountp
= exact_log2 (*amountp
);
18910 gcc_assert (IN_RANGE (*amountp
, 0, 31));
18911 return ARM_LSL_NAME
;
18914 output_operand_lossage ("invalid shift operand");
18918 /* This is not 100% correct, but follows from the desire to merge
18919 multiplication by a power of 2 with the recognizer for a
18920 shift. >=32 is not a valid shift for "lsl", so we must try and
18921 output a shift that produces the correct arithmetical result.
18922 Using lsr #32 is identical except for the fact that the carry bit
18923 is not set correctly if we set the flags; but we never use the
18924 carry bit from such an operation, so we can ignore that. */
18925 if (code
== ROTATERT
)
18926 /* Rotate is just modulo 32. */
18928 else if (*amountp
!= (*amountp
& 31))
18930 if (code
== ASHIFT
)
18935 /* Shifts of 0 are no-ops. */
18942 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18943 because /bin/as is horribly restrictive. The judgement about
18944 whether or not each character is 'printable' (and can be output as
18945 is) or not (and must be printed with an octal escape) must be made
18946 with reference to the *host* character set -- the situation is
18947 similar to that discussed in the comments above pp_c_char in
18948 c-pretty-print.c. */
18950 #define MAX_ASCII_LEN 51
18953 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18956 int len_so_far
= 0;
18958 fputs ("\t.ascii\t\"", stream
);
18960 for (i
= 0; i
< len
; i
++)
18964 if (len_so_far
>= MAX_ASCII_LEN
)
18966 fputs ("\"\n\t.ascii\t\"", stream
);
18972 if (c
== '\\' || c
== '\"')
18974 putc ('\\', stream
);
18982 fprintf (stream
, "\\%03o", c
);
18987 fputs ("\"\n", stream
);
18990 /* Whether a register is callee saved or not. This is necessary because high
18991 registers are marked as caller saved when optimizing for size on Thumb-1
18992 targets despite being callee saved in order to avoid using them. */
18993 #define callee_saved_reg_p(reg) \
18994 (!call_used_regs[reg] \
18995 || (TARGET_THUMB1 && optimize_size \
18996 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
18998 /* Compute the register save mask for registers 0 through 12
18999 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19001 static unsigned long
19002 arm_compute_save_reg0_reg12_mask (void)
19004 unsigned long func_type
= arm_current_func_type ();
19005 unsigned long save_reg_mask
= 0;
19008 if (IS_INTERRUPT (func_type
))
19010 unsigned int max_reg
;
19011 /* Interrupt functions must not corrupt any registers,
19012 even call clobbered ones. If this is a leaf function
19013 we can just examine the registers used by the RTL, but
19014 otherwise we have to assume that whatever function is
19015 called might clobber anything, and so we have to save
19016 all the call-clobbered registers as well. */
19017 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
19018 /* FIQ handlers have registers r8 - r12 banked, so
19019 we only need to check r0 - r7, Normal ISRs only
19020 bank r14 and r15, so we must check up to r12.
19021 r13 is the stack pointer which is always preserved,
19022 so we do not need to consider it here. */
19027 for (reg
= 0; reg
<= max_reg
; reg
++)
19028 if (df_regs_ever_live_p (reg
)
19029 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
19030 save_reg_mask
|= (1 << reg
);
19032 /* Also save the pic base register if necessary. */
19034 && !TARGET_SINGLE_PIC_BASE
19035 && arm_pic_register
!= INVALID_REGNUM
19036 && crtl
->uses_pic_offset_table
)
19037 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19039 else if (IS_VOLATILE(func_type
))
19041 /* For noreturn functions we historically omitted register saves
19042 altogether. However this really messes up debugging. As a
19043 compromise save just the frame pointers. Combined with the link
19044 register saved elsewhere this should be sufficient to get
19046 if (frame_pointer_needed
)
19047 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19048 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19049 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19050 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19051 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19055 /* In the normal case we only need to save those registers
19056 which are call saved and which are used by this function. */
19057 for (reg
= 0; reg
<= 11; reg
++)
19058 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19059 save_reg_mask
|= (1 << reg
);
19061 /* Handle the frame pointer as a special case. */
19062 if (frame_pointer_needed
)
19063 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19065 /* If we aren't loading the PIC register,
19066 don't stack it even though it may be live. */
19068 && !TARGET_SINGLE_PIC_BASE
19069 && arm_pic_register
!= INVALID_REGNUM
19070 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19071 || crtl
->uses_pic_offset_table
))
19072 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19074 /* The prologue will copy SP into R0, so save it. */
19075 if (IS_STACKALIGN (func_type
))
19076 save_reg_mask
|= 1;
19079 /* Save registers so the exception handler can modify them. */
19080 if (crtl
->calls_eh_return
)
19086 reg
= EH_RETURN_DATA_REGNO (i
);
19087 if (reg
== INVALID_REGNUM
)
19089 save_reg_mask
|= 1 << reg
;
19093 return save_reg_mask
;
19096 /* Return true if r3 is live at the start of the function. */
19099 arm_r3_live_at_start_p (void)
19101 /* Just look at cfg info, which is still close enough to correct at this
19102 point. This gives false positives for broken functions that might use
19103 uninitialized data that happens to be allocated in r3, but who cares? */
19104 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19107 /* Compute the number of bytes used to store the static chain register on the
19108 stack, above the stack frame. We need to know this accurately to get the
19109 alignment of the rest of the stack frame correct. */
19112 arm_compute_static_chain_stack_bytes (void)
19114 /* See the defining assertion in arm_expand_prologue. */
19115 if (IS_NESTED (arm_current_func_type ())
19116 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19117 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
19118 && !df_regs_ever_live_p (LR_REGNUM
)))
19119 && arm_r3_live_at_start_p ()
19120 && crtl
->args
.pretend_args_size
== 0)
19126 /* Compute a bit mask of which core registers need to be
19127 saved on the stack for the current function.
19128 This is used by arm_compute_frame_layout, which may add extra registers. */
19130 static unsigned long
19131 arm_compute_save_core_reg_mask (void)
19133 unsigned int save_reg_mask
= 0;
19134 unsigned long func_type
= arm_current_func_type ();
19137 if (IS_NAKED (func_type
))
19138 /* This should never really happen. */
19141 /* If we are creating a stack frame, then we must save the frame pointer,
19142 IP (which will hold the old stack pointer), LR and the PC. */
19143 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19145 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19148 | (1 << PC_REGNUM
);
19150 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19152 /* Decide if we need to save the link register.
19153 Interrupt routines have their own banked link register,
19154 so they never need to save it.
19155 Otherwise if we do not use the link register we do not need to save
19156 it. If we are pushing other registers onto the stack however, we
19157 can save an instruction in the epilogue by pushing the link register
19158 now and then popping it back into the PC. This incurs extra memory
19159 accesses though, so we only do it when optimizing for size, and only
19160 if we know that we will not need a fancy return sequence. */
19161 if (df_regs_ever_live_p (LR_REGNUM
)
19164 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19165 && !crtl
->tail_call_emit
19166 && !crtl
->calls_eh_return
))
19167 save_reg_mask
|= 1 << LR_REGNUM
;
19169 if (cfun
->machine
->lr_save_eliminated
)
19170 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19172 if (TARGET_REALLY_IWMMXT
19173 && ((bit_count (save_reg_mask
)
19174 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19175 arm_compute_static_chain_stack_bytes())
19178 /* The total number of registers that are going to be pushed
19179 onto the stack is odd. We need to ensure that the stack
19180 is 64-bit aligned before we start to save iWMMXt registers,
19181 and also before we start to create locals. (A local variable
19182 might be a double or long long which we will load/store using
19183 an iWMMXt instruction). Therefore we need to push another
19184 ARM register, so that the stack will be 64-bit aligned. We
19185 try to avoid using the arg registers (r0 -r3) as they might be
19186 used to pass values in a tail call. */
19187 for (reg
= 4; reg
<= 12; reg
++)
19188 if ((save_reg_mask
& (1 << reg
)) == 0)
19192 save_reg_mask
|= (1 << reg
);
19195 cfun
->machine
->sibcall_blocked
= 1;
19196 save_reg_mask
|= (1 << 3);
19200 /* We may need to push an additional register for use initializing the
19201 PIC base register. */
19202 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19203 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19205 reg
= thumb_find_work_register (1 << 4);
19206 if (!call_used_regs
[reg
])
19207 save_reg_mask
|= (1 << reg
);
19210 return save_reg_mask
;
19213 /* Compute a bit mask of which core registers need to be
19214 saved on the stack for the current function. */
19215 static unsigned long
19216 thumb1_compute_save_core_reg_mask (void)
19218 unsigned long mask
;
19222 for (reg
= 0; reg
< 12; reg
++)
19223 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19226 /* Handle the frame pointer as a special case. */
19227 if (frame_pointer_needed
)
19228 mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19231 && !TARGET_SINGLE_PIC_BASE
19232 && arm_pic_register
!= INVALID_REGNUM
19233 && crtl
->uses_pic_offset_table
)
19234 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19236 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19237 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19238 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19240 /* LR will also be pushed if any lo regs are pushed. */
19241 if (mask
& 0xff || thumb_force_lr_save ())
19242 mask
|= (1 << LR_REGNUM
);
19244 /* Make sure we have a low work register if we need one.
19245 We will need one if we are going to push a high register,
19246 but we are not currently intending to push a low register. */
19247 if ((mask
& 0xff) == 0
19248 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19250 /* Use thumb_find_work_register to choose which register
19251 we will use. If the register is live then we will
19252 have to push it. Use LAST_LO_REGNUM as our fallback
19253 choice for the register to select. */
19254 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19255 /* Make sure the register returned by thumb_find_work_register is
19256 not part of the return value. */
19257 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19258 reg
= LAST_LO_REGNUM
;
19260 if (callee_saved_reg_p (reg
))
19264 /* The 504 below is 8 bytes less than 512 because there are two possible
19265 alignment words. We can't tell here if they will be present or not so we
19266 have to play it safe and assume that they are. */
19267 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19268 ROUND_UP_WORD (get_frame_size ()) +
19269 crtl
->outgoing_args_size
) >= 504)
19271 /* This is the same as the code in thumb1_expand_prologue() which
19272 determines which register to use for stack decrement. */
19273 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19274 if (mask
& (1 << reg
))
19277 if (reg
> LAST_LO_REGNUM
)
19279 /* Make sure we have a register available for stack decrement. */
19280 mask
|= 1 << LAST_LO_REGNUM
;
19288 /* Return the number of bytes required to save VFP registers. */
19290 arm_get_vfp_saved_size (void)
19292 unsigned int regno
;
19297 /* Space for saved VFP registers. */
19298 if (TARGET_HARD_FLOAT
)
19301 for (regno
= FIRST_VFP_REGNUM
;
19302 regno
< LAST_VFP_REGNUM
;
19305 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19306 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19310 /* Workaround ARM10 VFPr1 bug. */
19311 if (count
== 2 && !arm_arch6
)
19313 saved
+= count
* 8;
19322 if (count
== 2 && !arm_arch6
)
19324 saved
+= count
* 8;
19331 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19332 everything bar the final return instruction. If simple_return is true,
19333 then do not output epilogue, because it has already been emitted in RTL. */
19335 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19336 bool simple_return
)
19338 char conditional
[10];
19341 unsigned long live_regs_mask
;
19342 unsigned long func_type
;
19343 arm_stack_offsets
*offsets
;
19345 func_type
= arm_current_func_type ();
19347 if (IS_NAKED (func_type
))
19350 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19352 /* If this function was declared non-returning, and we have
19353 found a tail call, then we have to trust that the called
19354 function won't return. */
19359 /* Otherwise, trap an attempted return by aborting. */
19361 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19363 assemble_external_libcall (ops
[1]);
19364 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19370 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19372 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19374 cfun
->machine
->return_used_this_function
= 1;
19376 offsets
= arm_get_frame_offsets ();
19377 live_regs_mask
= offsets
->saved_regs_mask
;
19379 if (!simple_return
&& live_regs_mask
)
19381 const char * return_reg
;
19383 /* If we do not have any special requirements for function exit
19384 (e.g. interworking) then we can load the return address
19385 directly into the PC. Otherwise we must load it into LR. */
19387 && !IS_CMSE_ENTRY (func_type
)
19388 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19389 return_reg
= reg_names
[PC_REGNUM
];
19391 return_reg
= reg_names
[LR_REGNUM
];
19393 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19395 /* There are three possible reasons for the IP register
19396 being saved. 1) a stack frame was created, in which case
19397 IP contains the old stack pointer, or 2) an ISR routine
19398 corrupted it, or 3) it was saved to align the stack on
19399 iWMMXt. In case 1, restore IP into SP, otherwise just
19401 if (frame_pointer_needed
)
19403 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19404 live_regs_mask
|= (1 << SP_REGNUM
);
19407 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19410 /* On some ARM architectures it is faster to use LDR rather than
19411 LDM to load a single register. On other architectures, the
19412 cost is the same. In 26 bit mode, or for exception handlers,
19413 we have to use LDM to load the PC so that the CPSR is also
19415 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19416 if (live_regs_mask
== (1U << reg
))
19419 if (reg
<= LAST_ARM_REGNUM
19420 && (reg
!= LR_REGNUM
19422 || ! IS_INTERRUPT (func_type
)))
19424 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19425 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19432 /* Generate the load multiple instruction to restore the
19433 registers. Note we can get here, even if
19434 frame_pointer_needed is true, but only if sp already
19435 points to the base of the saved core registers. */
19436 if (live_regs_mask
& (1 << SP_REGNUM
))
19438 unsigned HOST_WIDE_INT stack_adjust
;
19440 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19441 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19443 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19444 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19447 /* If we can't use ldmib (SA110 bug),
19448 then try to pop r3 instead. */
19450 live_regs_mask
|= 1 << 3;
19452 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19455 /* For interrupt returns we have to use an LDM rather than
19456 a POP so that we can use the exception return variant. */
19457 else if (IS_INTERRUPT (func_type
))
19458 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
19460 sprintf (instr
, "pop%s\t{", conditional
);
19462 p
= instr
+ strlen (instr
);
19464 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19465 if (live_regs_mask
& (1 << reg
))
19467 int l
= strlen (reg_names
[reg
]);
19473 memcpy (p
, ", ", 2);
19477 memcpy (p
, "%|", 2);
19478 memcpy (p
+ 2, reg_names
[reg
], l
);
19482 if (live_regs_mask
& (1 << LR_REGNUM
))
19484 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19485 /* If returning from an interrupt, restore the CPSR. */
19486 if (IS_INTERRUPT (func_type
))
19493 output_asm_insn (instr
, & operand
);
19495 /* See if we need to generate an extra instruction to
19496 perform the actual function return. */
19498 && func_type
!= ARM_FT_INTERWORKED
19499 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19501 /* The return has already been handled
19502 by loading the LR into the PC. */
19509 switch ((int) ARM_FUNC_TYPE (func_type
))
19513 /* ??? This is wrong for unified assembly syntax. */
19514 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19517 case ARM_FT_INTERWORKED
:
19518 gcc_assert (arm_arch5
|| arm_arch4t
);
19519 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19522 case ARM_FT_EXCEPTION
:
19523 /* ??? This is wrong for unified assembly syntax. */
19524 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19528 if (IS_CMSE_ENTRY (func_type
))
19530 /* Check if we have to clear the 'GE bits' which is only used if
19531 parallel add and subtraction instructions are available. */
19532 if (TARGET_INT_SIMD
)
19533 snprintf (instr
, sizeof (instr
),
19534 "msr%s\tAPSR_nzcvqg, %%|lr", conditional
);
19536 snprintf (instr
, sizeof (instr
),
19537 "msr%s\tAPSR_nzcvq, %%|lr", conditional
);
19539 output_asm_insn (instr
, & operand
);
19540 if (TARGET_HARD_FLOAT
&& !TARGET_THUMB1
)
19542 /* Clear the cumulative exception-status bits (0-4,7) and the
19543 condition code bits (28-31) of the FPSCR. We need to
19544 remember to clear the first scratch register used (IP) and
19545 save and restore the second (r4). */
19546 snprintf (instr
, sizeof (instr
), "push\t{%%|r4}");
19547 output_asm_insn (instr
, & operand
);
19548 snprintf (instr
, sizeof (instr
), "vmrs\t%%|ip, fpscr");
19549 output_asm_insn (instr
, & operand
);
19550 snprintf (instr
, sizeof (instr
), "movw\t%%|r4, #65376");
19551 output_asm_insn (instr
, & operand
);
19552 snprintf (instr
, sizeof (instr
), "movt\t%%|r4, #4095");
19553 output_asm_insn (instr
, & operand
);
19554 snprintf (instr
, sizeof (instr
), "and\t%%|ip, %%|r4");
19555 output_asm_insn (instr
, & operand
);
19556 snprintf (instr
, sizeof (instr
), "vmsr\tfpscr, %%|ip");
19557 output_asm_insn (instr
, & operand
);
19558 snprintf (instr
, sizeof (instr
), "pop\t{%%|r4}");
19559 output_asm_insn (instr
, & operand
);
19560 snprintf (instr
, sizeof (instr
), "mov\t%%|ip, %%|lr");
19561 output_asm_insn (instr
, & operand
);
19563 snprintf (instr
, sizeof (instr
), "bxns\t%%|lr");
19565 /* Use bx if it's available. */
19566 else if (arm_arch5
|| arm_arch4t
)
19567 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19569 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19573 output_asm_insn (instr
, & operand
);
19579 /* Output in FILE asm statements needed to declare the NAME of the function
19580 defined by its DECL node. */
19583 arm_asm_declare_function_name (FILE *file
, const char *name
, tree decl
)
19585 size_t cmse_name_len
;
19586 char *cmse_name
= 0;
19587 char cmse_prefix
[] = "__acle_se_";
19589 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19590 extra function label for each function with the 'cmse_nonsecure_entry'
19591 attribute. This extra function label should be prepended with
19592 '__acle_se_', telling the linker that it needs to create secure gateway
19593 veneers for this function. */
19594 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_entry",
19595 DECL_ATTRIBUTES (decl
)))
19597 cmse_name_len
= sizeof (cmse_prefix
) + strlen (name
);
19598 cmse_name
= XALLOCAVEC (char, cmse_name_len
);
19599 snprintf (cmse_name
, cmse_name_len
, "%s%s", cmse_prefix
, name
);
19600 targetm
.asm_out
.globalize_label (file
, cmse_name
);
19602 ARM_DECLARE_FUNCTION_NAME (file
, cmse_name
, decl
);
19603 ASM_OUTPUT_TYPE_DIRECTIVE (file
, cmse_name
, "function");
19606 ARM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
19607 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
19608 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
19609 ASM_OUTPUT_LABEL (file
, name
);
19612 ASM_OUTPUT_LABEL (file
, cmse_name
);
19614 ARM_OUTPUT_FN_UNWIND (file
, TRUE
);
19617 /* Write the function name into the code section, directly preceding
19618 the function prologue.
19620 Code will be output similar to this:
19622 .ascii "arm_poke_function_name", 0
19625 .word 0xff000000 + (t1 - t0)
19626 arm_poke_function_name
19628 stmfd sp!, {fp, ip, lr, pc}
19631 When performing a stack backtrace, code can inspect the value
19632 of 'pc' stored at 'fp' + 0. If the trace function then looks
19633 at location pc - 12 and the top 8 bits are set, then we know
19634 that there is a function name embedded immediately preceding this
19635 location and has length ((pc[-3]) & 0xff000000).
19637 We assume that pc is declared as a pointer to an unsigned long.
19639 It is of no benefit to output the function name if we are assembling
19640 a leaf function. These function types will not contain a stack
19641 backtrace structure, therefore it is not possible to determine the
19644 arm_poke_function_name (FILE *stream
, const char *name
)
19646 unsigned long alignlength
;
19647 unsigned long length
;
19650 length
= strlen (name
) + 1;
19651 alignlength
= ROUND_UP_WORD (length
);
19653 ASM_OUTPUT_ASCII (stream
, name
, length
);
19654 ASM_OUTPUT_ALIGN (stream
, 2);
19655 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19656 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19659 /* Place some comments into the assembler stream
19660 describing the current function. */
19662 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19664 unsigned long func_type
;
19666 /* Sanity check. */
19667 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19669 func_type
= arm_current_func_type ();
19671 switch ((int) ARM_FUNC_TYPE (func_type
))
19674 case ARM_FT_NORMAL
:
19676 case ARM_FT_INTERWORKED
:
19677 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19680 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19683 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19685 case ARM_FT_EXCEPTION
:
19686 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19690 if (IS_NAKED (func_type
))
19691 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19693 if (IS_VOLATILE (func_type
))
19694 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19696 if (IS_NESTED (func_type
))
19697 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19698 if (IS_STACKALIGN (func_type
))
19699 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19700 if (IS_CMSE_ENTRY (func_type
))
19701 asm_fprintf (f
, "\t%@ Non-secure entry function: called from non-secure code.\n");
19703 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19705 crtl
->args
.pretend_args_size
, frame_size
);
19707 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19708 frame_pointer_needed
,
19709 cfun
->machine
->uses_anonymous_args
);
19711 if (cfun
->machine
->lr_save_eliminated
)
19712 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19714 if (crtl
->calls_eh_return
)
19715 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19720 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19721 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19723 arm_stack_offsets
*offsets
;
19729 /* Emit any call-via-reg trampolines that are needed for v4t support
19730 of call_reg and call_value_reg type insns. */
19731 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19733 rtx label
= cfun
->machine
->call_via
[regno
];
19737 switch_to_section (function_section (current_function_decl
));
19738 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19739 CODE_LABEL_NUMBER (label
));
19740 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19744 /* ??? Probably not safe to set this here, since it assumes that a
19745 function will be emitted as assembly immediately after we generate
19746 RTL for it. This does not happen for inline functions. */
19747 cfun
->machine
->return_used_this_function
= 0;
19749 else /* TARGET_32BIT */
19751 /* We need to take into account any stack-frame rounding. */
19752 offsets
= arm_get_frame_offsets ();
19754 gcc_assert (!use_return_insn (FALSE
, NULL
)
19755 || (cfun
->machine
->return_used_this_function
!= 0)
19756 || offsets
->saved_regs
== offsets
->outgoing_args
19757 || frame_pointer_needed
);
19761 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19762 STR and STRD. If an even number of registers are being pushed, one
19763 or more STRD patterns are created for each register pair. If an
19764 odd number of registers are pushed, emit an initial STR followed by
19765 as many STRD instructions as are needed. This works best when the
19766 stack is initially 64-bit aligned (the normal case), since it
19767 ensures that each STRD is also 64-bit aligned. */
19769 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19774 rtx par
= NULL_RTX
;
19775 rtx dwarf
= NULL_RTX
;
19779 num_regs
= bit_count (saved_regs_mask
);
19781 /* Must be at least one register to save, and can't save SP or PC. */
19782 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19783 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19784 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19786 /* Create sequence for DWARF info. All the frame-related data for
19787 debugging is held in this wrapper. */
19788 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19790 /* Describe the stack adjustment. */
19791 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19792 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19793 RTX_FRAME_RELATED_P (tmp
) = 1;
19794 XVECEXP (dwarf
, 0, 0) = tmp
;
19796 /* Find the first register. */
19797 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19802 /* If there's an odd number of registers to push. Start off by
19803 pushing a single register. This ensures that subsequent strd
19804 operations are dword aligned (assuming that SP was originally
19805 64-bit aligned). */
19806 if ((num_regs
& 1) != 0)
19808 rtx reg
, mem
, insn
;
19810 reg
= gen_rtx_REG (SImode
, regno
);
19812 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19813 stack_pointer_rtx
));
19815 mem
= gen_frame_mem (Pmode
,
19817 (Pmode
, stack_pointer_rtx
,
19818 plus_constant (Pmode
, stack_pointer_rtx
,
19821 tmp
= gen_rtx_SET (mem
, reg
);
19822 RTX_FRAME_RELATED_P (tmp
) = 1;
19823 insn
= emit_insn (tmp
);
19824 RTX_FRAME_RELATED_P (insn
) = 1;
19825 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19826 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
19827 RTX_FRAME_RELATED_P (tmp
) = 1;
19830 XVECEXP (dwarf
, 0, i
) = tmp
;
19834 while (i
< num_regs
)
19835 if (saved_regs_mask
& (1 << regno
))
19837 rtx reg1
, reg2
, mem1
, mem2
;
19838 rtx tmp0
, tmp1
, tmp2
;
19841 /* Find the register to pair with this one. */
19842 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19846 reg1
= gen_rtx_REG (SImode
, regno
);
19847 reg2
= gen_rtx_REG (SImode
, regno2
);
19854 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19857 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19859 -4 * (num_regs
- 1)));
19860 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
19861 plus_constant (Pmode
, stack_pointer_rtx
,
19863 tmp1
= gen_rtx_SET (mem1
, reg1
);
19864 tmp2
= gen_rtx_SET (mem2
, reg2
);
19865 RTX_FRAME_RELATED_P (tmp0
) = 1;
19866 RTX_FRAME_RELATED_P (tmp1
) = 1;
19867 RTX_FRAME_RELATED_P (tmp2
) = 1;
19868 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19869 XVECEXP (par
, 0, 0) = tmp0
;
19870 XVECEXP (par
, 0, 1) = tmp1
;
19871 XVECEXP (par
, 0, 2) = tmp2
;
19872 insn
= emit_insn (par
);
19873 RTX_FRAME_RELATED_P (insn
) = 1;
19874 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19878 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19881 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19884 tmp1
= gen_rtx_SET (mem1
, reg1
);
19885 tmp2
= gen_rtx_SET (mem2
, reg2
);
19886 RTX_FRAME_RELATED_P (tmp1
) = 1;
19887 RTX_FRAME_RELATED_P (tmp2
) = 1;
19888 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19889 XVECEXP (par
, 0, 0) = tmp1
;
19890 XVECEXP (par
, 0, 1) = tmp2
;
19894 /* Create unwind information. This is an approximation. */
19895 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
19896 plus_constant (Pmode
,
19900 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
19901 plus_constant (Pmode
,
19906 RTX_FRAME_RELATED_P (tmp1
) = 1;
19907 RTX_FRAME_RELATED_P (tmp2
) = 1;
19908 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19909 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19911 regno
= regno2
+ 1;
19919 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19920 whenever possible, otherwise it emits single-word stores. The first store
19921 also allocates stack space for all saved registers, using writeback with
19922 post-addressing mode. All other stores use offset addressing. If no STRD
19923 can be emitted, this function emits a sequence of single-word stores,
19924 and not an STM as before, because single-word stores provide more freedom
19925 scheduling and can be turned into an STM by peephole optimizations. */
19927 arm_emit_strd_push (unsigned long saved_regs_mask
)
19930 int i
, j
, dwarf_index
= 0;
19932 rtx dwarf
= NULL_RTX
;
19933 rtx insn
= NULL_RTX
;
19936 /* TODO: A more efficient code can be emitted by changing the
19937 layout, e.g., first push all pairs that can use STRD to keep the
19938 stack aligned, and then push all other registers. */
19939 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19940 if (saved_regs_mask
& (1 << i
))
19943 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19944 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19945 gcc_assert (num_regs
> 0);
19947 /* Create sequence for DWARF info. */
19948 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19950 /* For dwarf info, we generate explicit stack update. */
19951 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19952 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19953 RTX_FRAME_RELATED_P (tmp
) = 1;
19954 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19956 /* Save registers. */
19957 offset
= - 4 * num_regs
;
19959 while (j
<= LAST_ARM_REGNUM
)
19960 if (saved_regs_mask
& (1 << j
))
19963 && (saved_regs_mask
& (1 << (j
+ 1))))
19965 /* Current register and previous register form register pair for
19966 which STRD can be generated. */
19969 /* Allocate stack space for all saved registers. */
19970 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19971 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19972 mem
= gen_frame_mem (DImode
, tmp
);
19975 else if (offset
> 0)
19976 mem
= gen_frame_mem (DImode
,
19977 plus_constant (Pmode
,
19981 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19983 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
19984 RTX_FRAME_RELATED_P (tmp
) = 1;
19985 tmp
= emit_insn (tmp
);
19987 /* Record the first store insn. */
19988 if (dwarf_index
== 1)
19991 /* Generate dwarf info. */
19992 mem
= gen_frame_mem (SImode
,
19993 plus_constant (Pmode
,
19996 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19997 RTX_FRAME_RELATED_P (tmp
) = 1;
19998 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20000 mem
= gen_frame_mem (SImode
,
20001 plus_constant (Pmode
,
20004 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
20005 RTX_FRAME_RELATED_P (tmp
) = 1;
20006 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20013 /* Emit a single word store. */
20016 /* Allocate stack space for all saved registers. */
20017 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20018 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20019 mem
= gen_frame_mem (SImode
, tmp
);
20022 else if (offset
> 0)
20023 mem
= gen_frame_mem (SImode
,
20024 plus_constant (Pmode
,
20028 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20030 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20031 RTX_FRAME_RELATED_P (tmp
) = 1;
20032 tmp
= emit_insn (tmp
);
20034 /* Record the first store insn. */
20035 if (dwarf_index
== 1)
20038 /* Generate dwarf info. */
20039 mem
= gen_frame_mem (SImode
,
20040 plus_constant(Pmode
,
20043 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20044 RTX_FRAME_RELATED_P (tmp
) = 1;
20045 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20054 /* Attach dwarf info to the first insn we generate. */
20055 gcc_assert (insn
!= NULL_RTX
);
20056 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20057 RTX_FRAME_RELATED_P (insn
) = 1;
20060 /* Generate and emit an insn that we will recognize as a push_multi.
20061 Unfortunately, since this insn does not reflect very well the actual
20062 semantics of the operation, we need to annotate the insn for the benefit
20063 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20064 MASK for registers that should be annotated for DWARF2 frame unwind
20067 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
20070 int num_dwarf_regs
= 0;
20074 int dwarf_par_index
;
20077 /* We don't record the PC in the dwarf frame information. */
20078 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20080 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20082 if (mask
& (1 << i
))
20084 if (dwarf_regs_mask
& (1 << i
))
20088 gcc_assert (num_regs
&& num_regs
<= 16);
20089 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20091 /* For the body of the insn we are going to generate an UNSPEC in
20092 parallel with several USEs. This allows the insn to be recognized
20093 by the push_multi pattern in the arm.md file.
20095 The body of the insn looks something like this:
20098 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20099 (const_int:SI <num>)))
20100 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20106 For the frame note however, we try to be more explicit and actually
20107 show each register being stored into the stack frame, plus a (single)
20108 decrement of the stack pointer. We do it this way in order to be
20109 friendly to the stack unwinding code, which only wants to see a single
20110 stack decrement per instruction. The RTL we generate for the note looks
20111 something like this:
20114 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20115 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20116 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20117 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20121 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20122 instead we'd have a parallel expression detailing all
20123 the stores to the various memory addresses so that debug
20124 information is more up-to-date. Remember however while writing
20125 this to take care of the constraints with the push instruction.
20127 Note also that this has to be taken care of for the VFP registers.
20129 For more see PR43399. */
20131 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20132 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20133 dwarf_par_index
= 1;
20135 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20137 if (mask
& (1 << i
))
20139 reg
= gen_rtx_REG (SImode
, i
);
20141 XVECEXP (par
, 0, 0)
20142 = gen_rtx_SET (gen_frame_mem
20144 gen_rtx_PRE_MODIFY (Pmode
,
20147 (Pmode
, stack_pointer_rtx
,
20150 gen_rtx_UNSPEC (BLKmode
,
20151 gen_rtvec (1, reg
),
20152 UNSPEC_PUSH_MULT
));
20154 if (dwarf_regs_mask
& (1 << i
))
20156 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
20158 RTX_FRAME_RELATED_P (tmp
) = 1;
20159 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20166 for (j
= 1, i
++; j
< num_regs
; i
++)
20168 if (mask
& (1 << i
))
20170 reg
= gen_rtx_REG (SImode
, i
);
20172 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20174 if (dwarf_regs_mask
& (1 << i
))
20177 = gen_rtx_SET (gen_frame_mem
20179 plus_constant (Pmode
, stack_pointer_rtx
,
20182 RTX_FRAME_RELATED_P (tmp
) = 1;
20183 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20190 par
= emit_insn (par
);
20192 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20193 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20194 RTX_FRAME_RELATED_P (tmp
) = 1;
20195 XVECEXP (dwarf
, 0, 0) = tmp
;
20197 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20202 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20203 SIZE is the offset to be adjusted.
20204 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20206 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20210 RTX_FRAME_RELATED_P (insn
) = 1;
20211 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
20212 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20215 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20216 SAVED_REGS_MASK shows which registers need to be restored.
20218 Unfortunately, since this insn does not reflect very well the actual
20219 semantics of the operation, we need to annotate the insn for the benefit
20220 of DWARF2 frame unwind information. */
20222 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20227 rtx dwarf
= NULL_RTX
;
20229 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20233 offset_adj
= return_in_pc
? 1 : 0;
20234 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20235 if (saved_regs_mask
& (1 << i
))
20238 gcc_assert (num_regs
&& num_regs
<= 16);
20240 /* If SP is in reglist, then we don't emit SP update insn. */
20241 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20243 /* The parallel needs to hold num_regs SETs
20244 and one SET for the stack update. */
20245 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20248 XVECEXP (par
, 0, 0) = ret_rtx
;
20252 /* Increment the stack pointer, based on there being
20253 num_regs 4-byte registers to restore. */
20254 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20255 plus_constant (Pmode
,
20258 RTX_FRAME_RELATED_P (tmp
) = 1;
20259 XVECEXP (par
, 0, offset_adj
) = tmp
;
20262 /* Now restore every reg, which may include PC. */
20263 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20264 if (saved_regs_mask
& (1 << i
))
20266 reg
= gen_rtx_REG (SImode
, i
);
20267 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20269 /* Emit single load with writeback. */
20270 tmp
= gen_frame_mem (SImode
,
20271 gen_rtx_POST_INC (Pmode
,
20272 stack_pointer_rtx
));
20273 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
20274 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20278 tmp
= gen_rtx_SET (reg
,
20281 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20282 RTX_FRAME_RELATED_P (tmp
) = 1;
20283 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20285 /* We need to maintain a sequence for DWARF info too. As dwarf info
20286 should not have PC, skip PC. */
20287 if (i
!= PC_REGNUM
)
20288 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20294 par
= emit_jump_insn (par
);
20296 par
= emit_insn (par
);
20298 REG_NOTES (par
) = dwarf
;
20300 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20301 stack_pointer_rtx
, stack_pointer_rtx
);
20304 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20305 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20307 Unfortunately, since this insn does not reflect very well the actual
20308 semantics of the operation, we need to annotate the insn for the benefit
20309 of DWARF2 frame unwind information. */
20311 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20315 rtx dwarf
= NULL_RTX
;
20318 gcc_assert (num_regs
&& num_regs
<= 32);
20320 /* Workaround ARM10 VFPr1 bug. */
20321 if (num_regs
== 2 && !arm_arch6
)
20323 if (first_reg
== 15)
20329 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20330 there could be up to 32 D-registers to restore.
20331 If there are more than 16 D-registers, make two recursive calls,
20332 each of which emits one pop_multi instruction. */
20335 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20336 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20340 /* The parallel needs to hold num_regs SETs
20341 and one SET for the stack update. */
20342 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20344 /* Increment the stack pointer, based on there being
20345 num_regs 8-byte registers to restore. */
20346 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20347 RTX_FRAME_RELATED_P (tmp
) = 1;
20348 XVECEXP (par
, 0, 0) = tmp
;
20350 /* Now show every reg that will be restored, using a SET for each. */
20351 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20353 reg
= gen_rtx_REG (DFmode
, i
);
20355 tmp
= gen_rtx_SET (reg
,
20358 plus_constant (Pmode
, base_reg
, 8 * j
)));
20359 RTX_FRAME_RELATED_P (tmp
) = 1;
20360 XVECEXP (par
, 0, j
+ 1) = tmp
;
20362 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20367 par
= emit_insn (par
);
20368 REG_NOTES (par
) = dwarf
;
20370 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20371 if (REGNO (base_reg
) == IP_REGNUM
)
20373 RTX_FRAME_RELATED_P (par
) = 1;
20374 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20377 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20378 base_reg
, base_reg
);
20381 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20382 number of registers are being popped, multiple LDRD patterns are created for
20383 all register pairs. If odd number of registers are popped, last register is
20384 loaded by using LDR pattern. */
20386 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20390 rtx par
= NULL_RTX
;
20391 rtx dwarf
= NULL_RTX
;
20392 rtx tmp
, reg
, tmp1
;
20393 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20395 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20396 if (saved_regs_mask
& (1 << i
))
20399 gcc_assert (num_regs
&& num_regs
<= 16);
20401 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20402 to be popped. So, if num_regs is even, now it will become odd,
20403 and we can generate pop with PC. If num_regs is odd, it will be
20404 even now, and ldr with return can be generated for PC. */
20408 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20410 /* Var j iterates over all the registers to gather all the registers in
20411 saved_regs_mask. Var i gives index of saved registers in stack frame.
20412 A PARALLEL RTX of register-pair is created here, so that pattern for
20413 LDRD can be matched. As PC is always last register to be popped, and
20414 we have already decremented num_regs if PC, we don't have to worry
20415 about PC in this loop. */
20416 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20417 if (saved_regs_mask
& (1 << j
))
20419 /* Create RTX for memory load. */
20420 reg
= gen_rtx_REG (SImode
, j
);
20421 tmp
= gen_rtx_SET (reg
,
20422 gen_frame_mem (SImode
,
20423 plus_constant (Pmode
,
20424 stack_pointer_rtx
, 4 * i
)));
20425 RTX_FRAME_RELATED_P (tmp
) = 1;
20429 /* When saved-register index (i) is even, the RTX to be emitted is
20430 yet to be created. Hence create it first. The LDRD pattern we
20431 are generating is :
20432 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20433 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20434 where target registers need not be consecutive. */
20435 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20439 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20440 added as 0th element and if i is odd, reg_i is added as 1st element
20441 of LDRD pattern shown above. */
20442 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20443 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20447 /* When saved-register index (i) is odd, RTXs for both the registers
20448 to be loaded are generated in above given LDRD pattern, and the
20449 pattern can be emitted now. */
20450 par
= emit_insn (par
);
20451 REG_NOTES (par
) = dwarf
;
20452 RTX_FRAME_RELATED_P (par
) = 1;
20458 /* If the number of registers pushed is odd AND return_in_pc is false OR
20459 number of registers are even AND return_in_pc is true, last register is
20460 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20461 then LDR with post increment. */
20463 /* Increment the stack pointer, based on there being
20464 num_regs 4-byte registers to restore. */
20465 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20466 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20467 RTX_FRAME_RELATED_P (tmp
) = 1;
20468 tmp
= emit_insn (tmp
);
20471 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20472 stack_pointer_rtx
, stack_pointer_rtx
);
20477 if (((num_regs
% 2) == 1 && !return_in_pc
)
20478 || ((num_regs
% 2) == 0 && return_in_pc
))
20480 /* Scan for the single register to be popped. Skip until the saved
20481 register is found. */
20482 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20484 /* Gen LDR with post increment here. */
20485 tmp1
= gen_rtx_MEM (SImode
,
20486 gen_rtx_POST_INC (SImode
,
20487 stack_pointer_rtx
));
20488 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20490 reg
= gen_rtx_REG (SImode
, j
);
20491 tmp
= gen_rtx_SET (reg
, tmp1
);
20492 RTX_FRAME_RELATED_P (tmp
) = 1;
20493 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20497 /* If return_in_pc, j must be PC_REGNUM. */
20498 gcc_assert (j
== PC_REGNUM
);
20499 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20500 XVECEXP (par
, 0, 0) = ret_rtx
;
20501 XVECEXP (par
, 0, 1) = tmp
;
20502 par
= emit_jump_insn (par
);
20506 par
= emit_insn (tmp
);
20507 REG_NOTES (par
) = dwarf
;
20508 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20509 stack_pointer_rtx
, stack_pointer_rtx
);
20513 else if ((num_regs
% 2) == 1 && return_in_pc
)
20515 /* There are 2 registers to be popped. So, generate the pattern
20516 pop_multiple_with_stack_update_and_return to pop in PC. */
20517 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20523 /* LDRD in ARM mode needs consecutive registers as operands. This function
20524 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20525 offset addressing and then generates one separate stack udpate. This provides
20526 more scheduling freedom, compared to writeback on every load. However,
20527 if the function returns using load into PC directly
20528 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20529 before the last load. TODO: Add a peephole optimization to recognize
20530 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20531 peephole optimization to merge the load at stack-offset zero
20532 with the stack update instruction using load with writeback
20533 in post-index addressing mode. */
20535 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20539 rtx par
= NULL_RTX
;
20540 rtx dwarf
= NULL_RTX
;
20543 /* Restore saved registers. */
20544 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20546 while (j
<= LAST_ARM_REGNUM
)
20547 if (saved_regs_mask
& (1 << j
))
20550 && (saved_regs_mask
& (1 << (j
+ 1)))
20551 && (j
+ 1) != PC_REGNUM
)
20553 /* Current register and next register form register pair for which
20554 LDRD can be generated. PC is always the last register popped, and
20555 we handle it separately. */
20557 mem
= gen_frame_mem (DImode
,
20558 plus_constant (Pmode
,
20562 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20564 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
20565 tmp
= emit_insn (tmp
);
20566 RTX_FRAME_RELATED_P (tmp
) = 1;
20568 /* Generate dwarf info. */
20570 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20571 gen_rtx_REG (SImode
, j
),
20573 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20574 gen_rtx_REG (SImode
, j
+ 1),
20577 REG_NOTES (tmp
) = dwarf
;
20582 else if (j
!= PC_REGNUM
)
20584 /* Emit a single word load. */
20586 mem
= gen_frame_mem (SImode
,
20587 plus_constant (Pmode
,
20591 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20593 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
20594 tmp
= emit_insn (tmp
);
20595 RTX_FRAME_RELATED_P (tmp
) = 1;
20597 /* Generate dwarf info. */
20598 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20599 gen_rtx_REG (SImode
, j
),
20605 else /* j == PC_REGNUM */
20611 /* Update the stack. */
20614 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20615 plus_constant (Pmode
,
20618 tmp
= emit_insn (tmp
);
20619 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20620 stack_pointer_rtx
, stack_pointer_rtx
);
20624 if (saved_regs_mask
& (1 << PC_REGNUM
))
20626 /* Only PC is to be popped. */
20627 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20628 XVECEXP (par
, 0, 0) = ret_rtx
;
20629 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
20630 gen_frame_mem (SImode
,
20631 gen_rtx_POST_INC (SImode
,
20632 stack_pointer_rtx
)));
20633 RTX_FRAME_RELATED_P (tmp
) = 1;
20634 XVECEXP (par
, 0, 1) = tmp
;
20635 par
= emit_jump_insn (par
);
20637 /* Generate dwarf info. */
20638 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20639 gen_rtx_REG (SImode
, PC_REGNUM
),
20641 REG_NOTES (par
) = dwarf
;
20642 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20643 stack_pointer_rtx
, stack_pointer_rtx
);
20647 /* Calculate the size of the return value that is passed in registers. */
20649 arm_size_return_regs (void)
20653 if (crtl
->return_rtx
!= 0)
20654 mode
= GET_MODE (crtl
->return_rtx
);
20656 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20658 return GET_MODE_SIZE (mode
);
20661 /* Return true if the current function needs to save/restore LR. */
20663 thumb_force_lr_save (void)
20665 return !cfun
->machine
->lr_save_eliminated
20667 || thumb_far_jump_used_p ()
20668 || df_regs_ever_live_p (LR_REGNUM
));
20671 /* We do not know if r3 will be available because
20672 we do have an indirect tailcall happening in this
20673 particular case. */
20675 is_indirect_tailcall_p (rtx call
)
20677 rtx pat
= PATTERN (call
);
20679 /* Indirect tail call. */
20680 pat
= XVECEXP (pat
, 0, 0);
20681 if (GET_CODE (pat
) == SET
)
20682 pat
= SET_SRC (pat
);
20684 pat
= XEXP (XEXP (pat
, 0), 0);
20685 return REG_P (pat
);
20688 /* Return true if r3 is used by any of the tail call insns in the
20689 current function. */
20691 any_sibcall_could_use_r3 (void)
20696 if (!crtl
->tail_call_emit
)
20698 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20699 if (e
->flags
& EDGE_SIBCALL
)
20701 rtx_insn
*call
= BB_END (e
->src
);
20702 if (!CALL_P (call
))
20703 call
= prev_nonnote_nondebug_insn (call
);
20704 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20705 if (find_regno_fusage (call
, USE
, 3)
20706 || is_indirect_tailcall_p (call
))
20713 /* Compute the distance from register FROM to register TO.
20714 These can be the arg pointer (26), the soft frame pointer (25),
20715 the stack pointer (13) or the hard frame pointer (11).
20716 In thumb mode r7 is used as the soft frame pointer, if needed.
20717 Typical stack layout looks like this:
20719 old stack pointer -> | |
20722 | | saved arguments for
20723 | | vararg functions
20726 hard FP & arg pointer -> | | \
20734 soft frame pointer -> | | /
20739 locals base pointer -> | | /
20744 current stack pointer -> | | /
20747 For a given function some or all of these stack components
20748 may not be needed, giving rise to the possibility of
20749 eliminating some of the registers.
20751 The values returned by this function must reflect the behavior
20752 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20754 The sign of the number returned reflects the direction of stack
20755 growth, so the values are positive for all eliminations except
20756 from the soft frame pointer to the hard frame pointer.
20758 SFP may point just inside the local variables block to ensure correct
20762 /* Return cached stack offsets. */
20764 static arm_stack_offsets
*
20765 arm_get_frame_offsets (void)
20767 struct arm_stack_offsets
*offsets
;
20769 offsets
= &cfun
->machine
->stack_offsets
;
20775 /* Calculate stack offsets. These are used to calculate register elimination
20776 offsets and in prologue/epilogue code. Also calculates which registers
20777 should be saved. */
20780 arm_compute_frame_layout (void)
20782 struct arm_stack_offsets
*offsets
;
20783 unsigned long func_type
;
20786 HOST_WIDE_INT frame_size
;
20789 offsets
= &cfun
->machine
->stack_offsets
;
20791 /* Initially this is the size of the local variables. It will translated
20792 into an offset once we have determined the size of preceding data. */
20793 frame_size
= ROUND_UP_WORD (get_frame_size ());
20795 /* Space for variadic functions. */
20796 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20798 /* In Thumb mode this is incorrect, but never used. */
20800 = (offsets
->saved_args
20801 + arm_compute_static_chain_stack_bytes ()
20802 + (frame_pointer_needed
? 4 : 0));
20806 unsigned int regno
;
20808 offsets
->saved_regs_mask
= arm_compute_save_core_reg_mask ();
20809 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20810 saved
= core_saved
;
20812 /* We know that SP will be doubleword aligned on entry, and we must
20813 preserve that condition at any subroutine call. We also require the
20814 soft frame pointer to be doubleword aligned. */
20816 if (TARGET_REALLY_IWMMXT
)
20818 /* Check for the call-saved iWMMXt registers. */
20819 for (regno
= FIRST_IWMMXT_REGNUM
;
20820 regno
<= LAST_IWMMXT_REGNUM
;
20822 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20826 func_type
= arm_current_func_type ();
20827 /* Space for saved VFP registers. */
20828 if (! IS_VOLATILE (func_type
)
20829 && TARGET_HARD_FLOAT
)
20830 saved
+= arm_get_vfp_saved_size ();
20832 else /* TARGET_THUMB1 */
20834 offsets
->saved_regs_mask
= thumb1_compute_save_core_reg_mask ();
20835 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20836 saved
= core_saved
;
20837 if (TARGET_BACKTRACE
)
20841 /* Saved registers include the stack frame. */
20842 offsets
->saved_regs
20843 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20844 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20846 /* A leaf function does not need any stack alignment if it has nothing
20848 if (crtl
->is_leaf
&& frame_size
== 0
20849 /* However if it calls alloca(), we have a dynamically allocated
20850 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20851 && ! cfun
->calls_alloca
)
20853 offsets
->outgoing_args
= offsets
->soft_frame
;
20854 offsets
->locals_base
= offsets
->soft_frame
;
20858 /* Ensure SFP has the correct alignment. */
20859 if (ARM_DOUBLEWORD_ALIGN
20860 && (offsets
->soft_frame
& 7))
20862 offsets
->soft_frame
+= 4;
20863 /* Try to align stack by pushing an extra reg. Don't bother doing this
20864 when there is a stack frame as the alignment will be rolled into
20865 the normal stack adjustment. */
20866 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20870 /* Register r3 is caller-saved. Normally it does not need to be
20871 saved on entry by the prologue. However if we choose to save
20872 it for padding then we may confuse the compiler into thinking
20873 a prologue sequence is required when in fact it is not. This
20874 will occur when shrink-wrapping if r3 is used as a scratch
20875 register and there are no other callee-saved writes.
20877 This situation can be avoided when other callee-saved registers
20878 are available and r3 is not mandatory if we choose a callee-saved
20879 register for padding. */
20880 bool prefer_callee_reg_p
= false;
20882 /* If it is safe to use r3, then do so. This sometimes
20883 generates better code on Thumb-2 by avoiding the need to
20884 use 32-bit push/pop instructions. */
20885 if (! any_sibcall_could_use_r3 ()
20886 && arm_size_return_regs () <= 12
20887 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20889 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20892 if (!TARGET_THUMB2
)
20893 prefer_callee_reg_p
= true;
20896 || prefer_callee_reg_p
)
20898 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20900 /* Avoid fixed registers; they may be changed at
20901 arbitrary times so it's unsafe to restore them
20902 during the epilogue. */
20904 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20914 offsets
->saved_regs
+= 4;
20915 offsets
->saved_regs_mask
|= (1 << reg
);
20920 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20921 offsets
->outgoing_args
= (offsets
->locals_base
20922 + crtl
->outgoing_args_size
);
20924 if (ARM_DOUBLEWORD_ALIGN
)
20926 /* Ensure SP remains doubleword aligned. */
20927 if (offsets
->outgoing_args
& 7)
20928 offsets
->outgoing_args
+= 4;
20929 gcc_assert (!(offsets
->outgoing_args
& 7));
20934 /* Calculate the relative offsets for the different stack pointers. Positive
20935 offsets are in the direction of stack growth. */
20938 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20940 arm_stack_offsets
*offsets
;
20942 offsets
= arm_get_frame_offsets ();
20944 /* OK, now we have enough information to compute the distances.
20945 There must be an entry in these switch tables for each pair
20946 of registers in ELIMINABLE_REGS, even if some of the entries
20947 seem to be redundant or useless. */
20950 case ARG_POINTER_REGNUM
:
20953 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20956 case FRAME_POINTER_REGNUM
:
20957 /* This is the reverse of the soft frame pointer
20958 to hard frame pointer elimination below. */
20959 return offsets
->soft_frame
- offsets
->saved_args
;
20961 case ARM_HARD_FRAME_POINTER_REGNUM
:
20962 /* This is only non-zero in the case where the static chain register
20963 is stored above the frame. */
20964 return offsets
->frame
- offsets
->saved_args
- 4;
20966 case STACK_POINTER_REGNUM
:
20967 /* If nothing has been pushed on the stack at all
20968 then this will return -4. This *is* correct! */
20969 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
20972 gcc_unreachable ();
20974 gcc_unreachable ();
20976 case FRAME_POINTER_REGNUM
:
20979 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20982 case ARM_HARD_FRAME_POINTER_REGNUM
:
20983 /* The hard frame pointer points to the top entry in the
20984 stack frame. The soft frame pointer to the bottom entry
20985 in the stack frame. If there is no stack frame at all,
20986 then they are identical. */
20988 return offsets
->frame
- offsets
->soft_frame
;
20990 case STACK_POINTER_REGNUM
:
20991 return offsets
->outgoing_args
- offsets
->soft_frame
;
20994 gcc_unreachable ();
20996 gcc_unreachable ();
20999 /* You cannot eliminate from the stack pointer.
21000 In theory you could eliminate from the hard frame
21001 pointer to the stack pointer, but this will never
21002 happen, since if a stack frame is not needed the
21003 hard frame pointer will never be used. */
21004 gcc_unreachable ();
21008 /* Given FROM and TO register numbers, say whether this elimination is
21009 allowed. Frame pointer elimination is automatically handled.
21011 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21012 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21013 pointer, we must eliminate FRAME_POINTER_REGNUM into
21014 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21015 ARG_POINTER_REGNUM. */
21018 arm_can_eliminate (const int from
, const int to
)
21020 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
21021 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
21022 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
21023 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
21027 /* Emit RTL to save coprocessor registers on function entry. Returns the
21028 number of bytes pushed. */
21031 arm_save_coproc_regs(void)
21033 int saved_size
= 0;
21035 unsigned start_reg
;
21038 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
21039 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
21041 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21042 insn
= gen_rtx_MEM (V2SImode
, insn
);
21043 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
21044 RTX_FRAME_RELATED_P (insn
) = 1;
21048 if (TARGET_HARD_FLOAT
)
21050 start_reg
= FIRST_VFP_REGNUM
;
21052 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
21054 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
21055 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
21057 if (start_reg
!= reg
)
21058 saved_size
+= vfp_emit_fstmd (start_reg
,
21059 (reg
- start_reg
) / 2);
21060 start_reg
= reg
+ 2;
21063 if (start_reg
!= reg
)
21064 saved_size
+= vfp_emit_fstmd (start_reg
,
21065 (reg
- start_reg
) / 2);
21071 /* Set the Thumb frame pointer from the stack pointer. */
21074 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21076 HOST_WIDE_INT amount
;
21079 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21081 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21082 stack_pointer_rtx
, GEN_INT (amount
)));
21085 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21086 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21087 expects the first two operands to be the same. */
21090 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21092 hard_frame_pointer_rtx
));
21096 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21097 hard_frame_pointer_rtx
,
21098 stack_pointer_rtx
));
21100 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
21101 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21102 RTX_FRAME_RELATED_P (dwarf
) = 1;
21103 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21106 RTX_FRAME_RELATED_P (insn
) = 1;
21109 struct scratch_reg
{
21114 /* Return a short-lived scratch register for use as a 2nd scratch register on
21115 function entry after the registers are saved in the prologue. This register
21116 must be released by means of release_scratch_register_on_entry. IP is not
21117 considered since it is always used as the 1st scratch register if available.
21119 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21120 mask of live registers. */
21123 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
21124 unsigned long live_regs
)
21130 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
21136 for (i
= 4; i
< 11; i
++)
21137 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
21145 /* If IP is used as the 1st scratch register for a nested function,
21146 then either r3 wasn't available or is used to preserve IP. */
21147 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
21149 regno
= (regno1
== 3 ? 2 : 3);
21151 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
21156 sr
->reg
= gen_rtx_REG (SImode
, regno
);
21159 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21160 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
21161 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21162 plus_constant (Pmode
, stack_pointer_rtx
, -4));
21163 RTX_FRAME_RELATED_P (insn
) = 1;
21164 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21168 /* Release a scratch register obtained from the preceding function. */
21171 release_scratch_register_on_entry (struct scratch_reg
*sr
)
21175 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
21176 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
21177 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21178 plus_constant (Pmode
, stack_pointer_rtx
, 4));
21179 RTX_FRAME_RELATED_P (insn
) = 1;
21180 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21184 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21186 #if PROBE_INTERVAL > 4096
21187 #error Cannot use indexed addressing mode for stack probing
21190 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21191 inclusive. These are offsets from the current stack pointer. REGNO1
21192 is the index number of the 1st scratch register and LIVE_REGS is the
21193 mask of live registers. */
21196 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
21197 unsigned int regno1
, unsigned long live_regs
)
21199 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
21201 /* See if we have a constant small number of probes to generate. If so,
21202 that's the easy case. */
21203 if (size
<= PROBE_INTERVAL
)
21205 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21206 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21207 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
21210 /* The run-time loop is made up of 10 insns in the generic case while the
21211 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21212 else if (size
<= 5 * PROBE_INTERVAL
)
21214 HOST_WIDE_INT i
, rem
;
21216 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21217 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21218 emit_stack_probe (reg1
);
21220 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21221 it exceeds SIZE. If only two probes are needed, this will not
21222 generate any code. Then probe at FIRST + SIZE. */
21223 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
21225 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21226 emit_stack_probe (reg1
);
21229 rem
= size
- (i
- PROBE_INTERVAL
);
21230 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21232 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21233 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
21236 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
21239 /* Otherwise, do the same as above, but in a loop. Note that we must be
21240 extra careful with variables wrapping around because we might be at
21241 the very top (or the very bottom) of the address space and we have
21242 to be able to handle this case properly; in particular, we use an
21243 equality test for the loop condition. */
21246 HOST_WIDE_INT rounded_size
;
21247 struct scratch_reg sr
;
21249 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
21251 emit_move_insn (reg1
, GEN_INT (first
));
21254 /* Step 1: round SIZE to the previous multiple of the interval. */
21256 rounded_size
= size
& -PROBE_INTERVAL
;
21257 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
21260 /* Step 2: compute initial and final value of the loop counter. */
21262 /* TEST_ADDR = SP + FIRST. */
21263 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21265 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21266 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
21269 /* Step 3: the loop
21273 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21276 while (TEST_ADDR != LAST_ADDR)
21278 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21279 until it is equal to ROUNDED_SIZE. */
21281 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
21284 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21285 that SIZE is equal to ROUNDED_SIZE. */
21287 if (size
!= rounded_size
)
21289 HOST_WIDE_INT rem
= size
- rounded_size
;
21291 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21293 emit_set_insn (sr
.reg
,
21294 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
21295 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
21296 PROBE_INTERVAL
- rem
));
21299 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
21302 release_scratch_register_on_entry (&sr
);
21305 /* Make sure nothing is scheduled before we are done. */
21306 emit_insn (gen_blockage ());
21309 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21310 absolute addresses. */
21313 output_probe_stack_range (rtx reg1
, rtx reg2
)
21315 static int labelno
= 0;
21319 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
21322 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
21324 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21326 xops
[1] = GEN_INT (PROBE_INTERVAL
);
21327 output_asm_insn ("sub\t%0, %0, %1", xops
);
21329 /* Probe at TEST_ADDR. */
21330 output_asm_insn ("str\tr0, [%0, #0]", xops
);
21332 /* Test if TEST_ADDR == LAST_ADDR. */
21334 output_asm_insn ("cmp\t%0, %1", xops
);
21337 fputs ("\tbne\t", asm_out_file
);
21338 assemble_name_raw (asm_out_file
, loop_lab
);
21339 fputc ('\n', asm_out_file
);
21344 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21347 arm_expand_prologue (void)
21352 unsigned long live_regs_mask
;
21353 unsigned long func_type
;
21355 int saved_pretend_args
= 0;
21356 int saved_regs
= 0;
21357 unsigned HOST_WIDE_INT args_to_push
;
21358 HOST_WIDE_INT size
;
21359 arm_stack_offsets
*offsets
;
21362 func_type
= arm_current_func_type ();
21364 /* Naked functions don't have prologues. */
21365 if (IS_NAKED (func_type
))
21367 if (flag_stack_usage_info
)
21368 current_function_static_stack_size
= 0;
21372 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21373 args_to_push
= crtl
->args
.pretend_args_size
;
21375 /* Compute which register we will have to save onto the stack. */
21376 offsets
= arm_get_frame_offsets ();
21377 live_regs_mask
= offsets
->saved_regs_mask
;
21379 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21381 if (IS_STACKALIGN (func_type
))
21385 /* Handle a word-aligned stack pointer. We generate the following:
21390 <save and restore r0 in normal prologue/epilogue>
21394 The unwinder doesn't need to know about the stack realignment.
21395 Just tell it we saved SP in r0. */
21396 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21398 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
21399 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
21401 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21402 RTX_FRAME_RELATED_P (insn
) = 1;
21403 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21405 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21407 /* ??? The CFA changes here, which may cause GDB to conclude that it
21408 has entered a different function. That said, the unwind info is
21409 correct, individually, before and after this instruction because
21410 we've described the save of SP, which will override the default
21411 handling of SP as restoring from the CFA. */
21412 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21415 /* The static chain register is the same as the IP register. If it is
21416 clobbered when creating the frame, we need to save and restore it. */
21417 clobber_ip
= IS_NESTED (func_type
)
21418 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21419 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21420 && !df_regs_ever_live_p (LR_REGNUM
)
21421 && arm_r3_live_at_start_p ()));
21423 /* Find somewhere to store IP whilst the frame is being created.
21424 We try the following places in order:
21426 1. The last argument register r3 if it is available.
21427 2. A slot on the stack above the frame if there are no
21428 arguments to push onto the stack.
21429 3. Register r3 again, after pushing the argument registers
21430 onto the stack, if this is a varargs function.
21431 4. The last slot on the stack created for the arguments to
21432 push, if this isn't a varargs function.
21434 Note - we only need to tell the dwarf2 backend about the SP
21435 adjustment in the second variant; the static chain register
21436 doesn't need to be unwound, as it doesn't contain a value
21437 inherited from the caller. */
21440 if (!arm_r3_live_at_start_p ())
21441 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21442 else if (args_to_push
== 0)
21446 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21449 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21450 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21453 /* Just tell the dwarf backend that we adjusted SP. */
21454 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21455 plus_constant (Pmode
, stack_pointer_rtx
,
21457 RTX_FRAME_RELATED_P (insn
) = 1;
21458 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21462 /* Store the args on the stack. */
21463 if (cfun
->machine
->uses_anonymous_args
)
21465 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21466 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21467 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21468 saved_pretend_args
= 1;
21474 if (args_to_push
== 4)
21475 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21477 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21478 plus_constant (Pmode
,
21482 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21484 /* Just tell the dwarf backend that we adjusted SP. */
21485 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21486 plus_constant (Pmode
, stack_pointer_rtx
,
21488 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21491 RTX_FRAME_RELATED_P (insn
) = 1;
21492 fp_offset
= args_to_push
;
21497 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21499 if (IS_INTERRUPT (func_type
))
21501 /* Interrupt functions must not corrupt any registers.
21502 Creating a frame pointer however, corrupts the IP
21503 register, so we must push it first. */
21504 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21506 /* Do not set RTX_FRAME_RELATED_P on this insn.
21507 The dwarf stack unwinding code only wants to see one
21508 stack decrement per function, and this is not it. If
21509 this instruction is labeled as being part of the frame
21510 creation sequence then dwarf2out_frame_debug_expr will
21511 die when it encounters the assignment of IP to FP
21512 later on, since the use of SP here establishes SP as
21513 the CFA register and not IP.
21515 Anyway this instruction is not really part of the stack
21516 frame creation although it is part of the prologue. */
21519 insn
= emit_set_insn (ip_rtx
,
21520 plus_constant (Pmode
, stack_pointer_rtx
,
21522 RTX_FRAME_RELATED_P (insn
) = 1;
21527 /* Push the argument registers, or reserve space for them. */
21528 if (cfun
->machine
->uses_anonymous_args
)
21529 insn
= emit_multi_reg_push
21530 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21531 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21534 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21535 GEN_INT (- args_to_push
)));
21536 RTX_FRAME_RELATED_P (insn
) = 1;
21539 /* If this is an interrupt service routine, and the link register
21540 is going to be pushed, and we're not generating extra
21541 push of IP (needed when frame is needed and frame layout if apcs),
21542 subtracting four from LR now will mean that the function return
21543 can be done with a single instruction. */
21544 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21545 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21546 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21549 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21551 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21554 if (live_regs_mask
)
21556 unsigned long dwarf_regs_mask
= live_regs_mask
;
21558 saved_regs
+= bit_count (live_regs_mask
) * 4;
21559 if (optimize_size
&& !frame_pointer_needed
21560 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21562 /* If no coprocessor registers are being pushed and we don't have
21563 to worry about a frame pointer then push extra registers to
21564 create the stack frame. This is done in a way that does not
21565 alter the frame layout, so is independent of the epilogue. */
21569 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21571 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21572 if (frame
&& n
* 4 >= frame
)
21575 live_regs_mask
|= (1 << n
) - 1;
21576 saved_regs
+= frame
;
21581 && current_tune
->prefer_ldrd_strd
21582 && !optimize_function_for_size_p (cfun
))
21584 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21586 thumb2_emit_strd_push (live_regs_mask
);
21587 else if (TARGET_ARM
21588 && !TARGET_APCS_FRAME
21589 && !IS_INTERRUPT (func_type
))
21590 arm_emit_strd_push (live_regs_mask
);
21593 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21594 RTX_FRAME_RELATED_P (insn
) = 1;
21599 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21600 RTX_FRAME_RELATED_P (insn
) = 1;
21604 if (! IS_VOLATILE (func_type
))
21605 saved_regs
+= arm_save_coproc_regs ();
21607 if (frame_pointer_needed
&& TARGET_ARM
)
21609 /* Create the new frame pointer. */
21610 if (TARGET_APCS_FRAME
)
21612 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21613 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21614 RTX_FRAME_RELATED_P (insn
) = 1;
21618 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
21619 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21620 stack_pointer_rtx
, insn
));
21621 RTX_FRAME_RELATED_P (insn
) = 1;
21625 size
= offsets
->outgoing_args
- offsets
->saved_args
;
21626 if (flag_stack_usage_info
)
21627 current_function_static_stack_size
= size
;
21629 /* If this isn't an interrupt service routine and we have a frame, then do
21630 stack checking. We use IP as the first scratch register, except for the
21631 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21632 if (!IS_INTERRUPT (func_type
)
21633 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
21635 unsigned int regno
;
21637 if (!IS_NESTED (func_type
) || clobber_ip
)
21639 else if (df_regs_ever_live_p (LR_REGNUM
))
21644 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
21646 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
21647 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
,
21648 size
- STACK_CHECK_PROTECT
,
21649 regno
, live_regs_mask
);
21652 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
,
21653 regno
, live_regs_mask
);
21656 /* Recover the static chain register. */
21659 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21660 insn
= gen_rtx_REG (SImode
, 3);
21663 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21664 insn
= gen_frame_mem (SImode
, insn
);
21666 emit_set_insn (ip_rtx
, insn
);
21667 emit_insn (gen_force_register_use (ip_rtx
));
21670 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21672 /* This add can produce multiple insns for a large constant, so we
21673 need to get tricky. */
21674 rtx_insn
*last
= get_last_insn ();
21676 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21677 - offsets
->outgoing_args
);
21679 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21683 last
= last
? NEXT_INSN (last
) : get_insns ();
21684 RTX_FRAME_RELATED_P (last
) = 1;
21686 while (last
!= insn
);
21688 /* If the frame pointer is needed, emit a special barrier that
21689 will prevent the scheduler from moving stores to the frame
21690 before the stack adjustment. */
21691 if (frame_pointer_needed
)
21692 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
21693 hard_frame_pointer_rtx
));
21697 if (frame_pointer_needed
&& TARGET_THUMB2
)
21698 thumb_set_frame_pointer (offsets
);
21700 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21702 unsigned long mask
;
21704 mask
= live_regs_mask
;
21705 mask
&= THUMB2_WORK_REGS
;
21706 if (!IS_NESTED (func_type
))
21707 mask
|= (1 << IP_REGNUM
);
21708 arm_load_pic_register (mask
);
21711 /* If we are profiling, make sure no instructions are scheduled before
21712 the call to mcount. Similarly if the user has requested no
21713 scheduling in the prolog. Similarly if we want non-call exceptions
21714 using the EABI unwinder, to prevent faulting instructions from being
21715 swapped with a stack adjustment. */
21716 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21717 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21718 && cfun
->can_throw_non_call_exceptions
))
21719 emit_insn (gen_blockage ());
21721 /* If the link register is being kept alive, with the return address in it,
21722 then make sure that it does not get reused by the ce2 pass. */
21723 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21724 cfun
->machine
->lr_save_eliminated
= 1;
21727 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21729 arm_print_condition (FILE *stream
)
21731 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21733 /* Branch conversion is not implemented for Thumb-2. */
21736 output_operand_lossage ("predicated Thumb instruction");
21739 if (current_insn_predicate
!= NULL
)
21741 output_operand_lossage
21742 ("predicated instruction in conditional sequence");
21746 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21748 else if (current_insn_predicate
)
21750 enum arm_cond_code code
;
21754 output_operand_lossage ("predicated Thumb instruction");
21758 code
= get_arm_condition_code (current_insn_predicate
);
21759 fputs (arm_condition_codes
[code
], stream
);
21764 /* Globally reserved letters: acln
21765 Puncutation letters currently used: @_|?().!#
21766 Lower case letters currently used: bcdefhimpqtvwxyz
21767 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21768 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21770 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21772 If CODE is 'd', then the X is a condition operand and the instruction
21773 should only be executed if the condition is true.
21774 if CODE is 'D', then the X is a condition operand and the instruction
21775 should only be executed if the condition is false: however, if the mode
21776 of the comparison is CCFPEmode, then always execute the instruction -- we
21777 do this because in these circumstances !GE does not necessarily imply LT;
21778 in these cases the instruction pattern will take care to make sure that
21779 an instruction containing %d will follow, thereby undoing the effects of
21780 doing this instruction unconditionally.
21781 If CODE is 'N' then X is a floating point operand that must be negated
21783 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21784 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21786 arm_print_operand (FILE *stream
, rtx x
, int code
)
21791 fputs (ASM_COMMENT_START
, stream
);
21795 fputs (user_label_prefix
, stream
);
21799 fputs (REGISTER_PREFIX
, stream
);
21803 arm_print_condition (stream
);
21807 /* The current condition code for a condition code setting instruction.
21808 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21809 fputc('s', stream
);
21810 arm_print_condition (stream
);
21814 /* If the instruction is conditionally executed then print
21815 the current condition code, otherwise print 's'. */
21816 gcc_assert (TARGET_THUMB2
);
21817 if (current_insn_predicate
)
21818 arm_print_condition (stream
);
21820 fputc('s', stream
);
21823 /* %# is a "break" sequence. It doesn't output anything, but is used to
21824 separate e.g. operand numbers from following text, if that text consists
21825 of further digits which we don't want to be part of the operand
21833 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
21834 fprintf (stream
, "%s", fp_const_from_val (&r
));
21838 /* An integer or symbol address without a preceding # sign. */
21840 switch (GET_CODE (x
))
21843 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21847 output_addr_const (stream
, x
);
21851 if (GET_CODE (XEXP (x
, 0)) == PLUS
21852 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21854 output_addr_const (stream
, x
);
21857 /* Fall through. */
21860 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21864 /* An integer that we want to print in HEX. */
21866 switch (GET_CODE (x
))
21869 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21873 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21878 if (CONST_INT_P (x
))
21881 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21882 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21886 putc ('~', stream
);
21887 output_addr_const (stream
, x
);
21892 /* Print the log2 of a CONST_INT. */
21896 if (!CONST_INT_P (x
)
21897 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
21898 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21900 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21905 /* The low 16 bits of an immediate constant. */
21906 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21910 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21914 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21922 shift
= shift_op (x
, &val
);
21926 fprintf (stream
, ", %s ", shift
);
21928 arm_print_operand (stream
, XEXP (x
, 1), 0);
21930 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21935 /* An explanation of the 'Q', 'R' and 'H' register operands:
21937 In a pair of registers containing a DI or DF value the 'Q'
21938 operand returns the register number of the register containing
21939 the least significant part of the value. The 'R' operand returns
21940 the register number of the register containing the most
21941 significant part of the value.
21943 The 'H' operand returns the higher of the two register numbers.
21944 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21945 same as the 'Q' operand, since the most significant part of the
21946 value is held in the lower number register. The reverse is true
21947 on systems where WORDS_BIG_ENDIAN is false.
21949 The purpose of these operands is to distinguish between cases
21950 where the endian-ness of the values is important (for example
21951 when they are added together), and cases where the endian-ness
21952 is irrelevant, but the order of register operations is important.
21953 For example when loading a value from memory into a register
21954 pair, the endian-ness does not matter. Provided that the value
21955 from the lower memory address is put into the lower numbered
21956 register, and the value from the higher address is put into the
21957 higher numbered register, the load will work regardless of whether
21958 the value being loaded is big-wordian or little-wordian. The
21959 order of the two register loads can matter however, if the address
21960 of the memory location is actually held in one of the registers
21961 being overwritten by the load.
21963 The 'Q' and 'R' constraints are also available for 64-bit
21966 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21968 rtx part
= gen_lowpart (SImode
, x
);
21969 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21973 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21975 output_operand_lossage ("invalid operand for code '%c'", code
);
21979 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
21983 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21985 machine_mode mode
= GET_MODE (x
);
21988 if (mode
== VOIDmode
)
21990 part
= gen_highpart_mode (SImode
, mode
, x
);
21991 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21995 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21997 output_operand_lossage ("invalid operand for code '%c'", code
);
22001 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
22005 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22007 output_operand_lossage ("invalid operand for code '%c'", code
);
22011 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
22015 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22017 output_operand_lossage ("invalid operand for code '%c'", code
);
22021 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
22025 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22027 output_operand_lossage ("invalid operand for code '%c'", code
);
22031 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
22035 asm_fprintf (stream
, "%r",
22036 REG_P (XEXP (x
, 0))
22037 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
22041 asm_fprintf (stream
, "{%r-%r}",
22043 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
22046 /* Like 'M', but writing doubleword vector registers, for use by Neon
22050 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
22051 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
22053 asm_fprintf (stream
, "{d%d}", regno
);
22055 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
22060 /* CONST_TRUE_RTX means always -- that's the default. */
22061 if (x
== const_true_rtx
)
22064 if (!COMPARISON_P (x
))
22066 output_operand_lossage ("invalid operand for code '%c'", code
);
22070 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
22075 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22076 want to do that. */
22077 if (x
== const_true_rtx
)
22079 output_operand_lossage ("instruction never executed");
22082 if (!COMPARISON_P (x
))
22084 output_operand_lossage ("invalid operand for code '%c'", code
);
22088 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
22089 (get_arm_condition_code (x
))],
22099 /* Former Maverick support, removed after GCC-4.7. */
22100 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
22105 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
22106 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
22107 /* Bad value for wCG register number. */
22109 output_operand_lossage ("invalid operand for code '%c'", code
);
22114 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
22117 /* Print an iWMMXt control register name. */
22119 if (!CONST_INT_P (x
)
22121 || INTVAL (x
) >= 16)
22122 /* Bad value for wC register number. */
22124 output_operand_lossage ("invalid operand for code '%c'", code
);
22130 static const char * wc_reg_names
[16] =
22132 "wCID", "wCon", "wCSSF", "wCASF",
22133 "wC4", "wC5", "wC6", "wC7",
22134 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22135 "wC12", "wC13", "wC14", "wC15"
22138 fputs (wc_reg_names
[INTVAL (x
)], stream
);
22142 /* Print the high single-precision register of a VFP double-precision
22146 machine_mode mode
= GET_MODE (x
);
22149 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
22151 output_operand_lossage ("invalid operand for code '%c'", code
);
22156 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
22158 output_operand_lossage ("invalid operand for code '%c'", code
);
22162 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
22166 /* Print a VFP/Neon double precision or quad precision register name. */
22170 machine_mode mode
= GET_MODE (x
);
22171 int is_quad
= (code
== 'q');
22174 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
22176 output_operand_lossage ("invalid operand for code '%c'", code
);
22181 || !IS_VFP_REGNUM (REGNO (x
)))
22183 output_operand_lossage ("invalid operand for code '%c'", code
);
22188 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
22189 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
22191 output_operand_lossage ("invalid operand for code '%c'", code
);
22195 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
22196 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
22200 /* These two codes print the low/high doubleword register of a Neon quad
22201 register, respectively. For pair-structure types, can also print
22202 low/high quadword registers. */
22206 machine_mode mode
= GET_MODE (x
);
22209 if ((GET_MODE_SIZE (mode
) != 16
22210 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
22212 output_operand_lossage ("invalid operand for code '%c'", code
);
22217 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
22219 output_operand_lossage ("invalid operand for code '%c'", code
);
22223 if (GET_MODE_SIZE (mode
) == 16)
22224 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
22225 + (code
== 'f' ? 1 : 0));
22227 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
22228 + (code
== 'f' ? 1 : 0));
22232 /* Print a VFPv3 floating-point constant, represented as an integer
22236 int index
= vfp3_const_double_index (x
);
22237 gcc_assert (index
!= -1);
22238 fprintf (stream
, "%d", index
);
22242 /* Print bits representing opcode features for Neon.
22244 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22245 and polynomials as unsigned.
22247 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22249 Bit 2 is 1 for rounding functions, 0 otherwise. */
22251 /* Identify the type as 's', 'u', 'p' or 'f'. */
22254 HOST_WIDE_INT bits
= INTVAL (x
);
22255 fputc ("uspf"[bits
& 3], stream
);
22259 /* Likewise, but signed and unsigned integers are both 'i'. */
22262 HOST_WIDE_INT bits
= INTVAL (x
);
22263 fputc ("iipf"[bits
& 3], stream
);
22267 /* As for 'T', but emit 'u' instead of 'p'. */
22270 HOST_WIDE_INT bits
= INTVAL (x
);
22271 fputc ("usuf"[bits
& 3], stream
);
22275 /* Bit 2: rounding (vs none). */
22278 HOST_WIDE_INT bits
= INTVAL (x
);
22279 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
22283 /* Memory operand for vld1/vst1 instruction. */
22287 bool postinc
= FALSE
;
22288 rtx postinc_reg
= NULL
;
22289 unsigned align
, memsize
, align_bits
;
22291 gcc_assert (MEM_P (x
));
22292 addr
= XEXP (x
, 0);
22293 if (GET_CODE (addr
) == POST_INC
)
22296 addr
= XEXP (addr
, 0);
22298 if (GET_CODE (addr
) == POST_MODIFY
)
22300 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22301 addr
= XEXP (addr
, 0);
22303 asm_fprintf (stream
, "[%r", REGNO (addr
));
22305 /* We know the alignment of this access, so we can emit a hint in the
22306 instruction (for some alignments) as an aid to the memory subsystem
22308 align
= MEM_ALIGN (x
) >> 3;
22309 memsize
= MEM_SIZE (x
);
22311 /* Only certain alignment specifiers are supported by the hardware. */
22312 if (memsize
== 32 && (align
% 32) == 0)
22314 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22316 else if (memsize
>= 8 && (align
% 8) == 0)
22321 if (align_bits
!= 0)
22322 asm_fprintf (stream
, ":%d", align_bits
);
22324 asm_fprintf (stream
, "]");
22327 fputs("!", stream
);
22329 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22337 gcc_assert (MEM_P (x
));
22338 addr
= XEXP (x
, 0);
22339 gcc_assert (REG_P (addr
));
22340 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22344 /* Translate an S register number into a D register number and element index. */
22347 machine_mode mode
= GET_MODE (x
);
22350 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22352 output_operand_lossage ("invalid operand for code '%c'", code
);
22357 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22359 output_operand_lossage ("invalid operand for code '%c'", code
);
22363 regno
= regno
- FIRST_VFP_REGNUM
;
22364 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22369 gcc_assert (CONST_DOUBLE_P (x
));
22371 result
= vfp3_const_double_for_fract_bits (x
);
22373 result
= vfp3_const_double_for_bits (x
);
22374 fprintf (stream
, "#%d", result
);
22377 /* Register specifier for vld1.16/vst1.16. Translate the S register
22378 number into a D register number and element index. */
22381 machine_mode mode
= GET_MODE (x
);
22384 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22386 output_operand_lossage ("invalid operand for code '%c'", code
);
22391 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22393 output_operand_lossage ("invalid operand for code '%c'", code
);
22397 regno
= regno
- FIRST_VFP_REGNUM
;
22398 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22405 output_operand_lossage ("missing operand");
22409 switch (GET_CODE (x
))
22412 asm_fprintf (stream
, "%r", REGNO (x
));
22416 output_address (GET_MODE (x
), XEXP (x
, 0));
22422 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22423 sizeof (fpstr
), 0, 1);
22424 fprintf (stream
, "#%s", fpstr
);
22429 gcc_assert (GET_CODE (x
) != NEG
);
22430 fputc ('#', stream
);
22431 if (GET_CODE (x
) == HIGH
)
22433 fputs (":lower16:", stream
);
22437 output_addr_const (stream
, x
);
22443 /* Target hook for printing a memory address. */
22445 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
22449 int is_minus
= GET_CODE (x
) == MINUS
;
22452 asm_fprintf (stream
, "[%r]", REGNO (x
));
22453 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22455 rtx base
= XEXP (x
, 0);
22456 rtx index
= XEXP (x
, 1);
22457 HOST_WIDE_INT offset
= 0;
22459 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22461 /* Ensure that BASE is a register. */
22462 /* (one of them must be). */
22463 /* Also ensure the SP is not used as in index register. */
22464 std::swap (base
, index
);
22466 switch (GET_CODE (index
))
22469 offset
= INTVAL (index
);
22472 asm_fprintf (stream
, "[%r, #%wd]",
22473 REGNO (base
), offset
);
22477 asm_fprintf (stream
, "[%r, %s%r]",
22478 REGNO (base
), is_minus
? "-" : "",
22488 asm_fprintf (stream
, "[%r, %s%r",
22489 REGNO (base
), is_minus
? "-" : "",
22490 REGNO (XEXP (index
, 0)));
22491 arm_print_operand (stream
, index
, 'S');
22492 fputs ("]", stream
);
22497 gcc_unreachable ();
22500 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22501 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22503 gcc_assert (REG_P (XEXP (x
, 0)));
22505 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22506 asm_fprintf (stream
, "[%r, #%s%d]!",
22507 REGNO (XEXP (x
, 0)),
22508 GET_CODE (x
) == PRE_DEC
? "-" : "",
22509 GET_MODE_SIZE (mode
));
22511 asm_fprintf (stream
, "[%r], #%s%d",
22512 REGNO (XEXP (x
, 0)),
22513 GET_CODE (x
) == POST_DEC
? "-" : "",
22514 GET_MODE_SIZE (mode
));
22516 else if (GET_CODE (x
) == PRE_MODIFY
)
22518 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22519 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22520 asm_fprintf (stream
, "#%wd]!",
22521 INTVAL (XEXP (XEXP (x
, 1), 1)));
22523 asm_fprintf (stream
, "%r]!",
22524 REGNO (XEXP (XEXP (x
, 1), 1)));
22526 else if (GET_CODE (x
) == POST_MODIFY
)
22528 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22529 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22530 asm_fprintf (stream
, "#%wd",
22531 INTVAL (XEXP (XEXP (x
, 1), 1)));
22533 asm_fprintf (stream
, "%r",
22534 REGNO (XEXP (XEXP (x
, 1), 1)));
22536 else output_addr_const (stream
, x
);
22541 asm_fprintf (stream
, "[%r]", REGNO (x
));
22542 else if (GET_CODE (x
) == POST_INC
)
22543 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22544 else if (GET_CODE (x
) == PLUS
)
22546 gcc_assert (REG_P (XEXP (x
, 0)));
22547 if (CONST_INT_P (XEXP (x
, 1)))
22548 asm_fprintf (stream
, "[%r, #%wd]",
22549 REGNO (XEXP (x
, 0)),
22550 INTVAL (XEXP (x
, 1)));
22552 asm_fprintf (stream
, "[%r, %r]",
22553 REGNO (XEXP (x
, 0)),
22554 REGNO (XEXP (x
, 1)));
22557 output_addr_const (stream
, x
);
22561 /* Target hook for indicating whether a punctuation character for
22562 TARGET_PRINT_OPERAND is valid. */
22564 arm_print_operand_punct_valid_p (unsigned char code
)
22566 return (code
== '@' || code
== '|' || code
== '.'
22567 || code
== '(' || code
== ')' || code
== '#'
22568 || (TARGET_32BIT
&& (code
== '?'))
22569 || (TARGET_THUMB2
&& (code
== '!'))
22570 || (TARGET_THUMB
&& (code
== '_')));
22573 /* Target hook for assembling integer objects. The ARM version needs to
22574 handle word-sized values specially. */
22576 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22580 if (size
== UNITS_PER_WORD
&& aligned_p
)
22582 fputs ("\t.word\t", asm_out_file
);
22583 output_addr_const (asm_out_file
, x
);
22585 /* Mark symbols as position independent. We only do this in the
22586 .text segment, not in the .data segment. */
22587 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22588 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22590 /* See legitimize_pic_address for an explanation of the
22591 TARGET_VXWORKS_RTP check. */
22592 /* References to weak symbols cannot be resolved locally:
22593 they may be overridden by a non-weak definition at link
22595 if (!arm_pic_data_is_text_relative
22596 || (GET_CODE (x
) == SYMBOL_REF
22597 && (!SYMBOL_REF_LOCAL_P (x
)
22598 || (SYMBOL_REF_DECL (x
)
22599 ? DECL_WEAK (SYMBOL_REF_DECL (x
)) : 0))))
22600 fputs ("(GOT)", asm_out_file
);
22602 fputs ("(GOTOFF)", asm_out_file
);
22604 fputc ('\n', asm_out_file
);
22608 mode
= GET_MODE (x
);
22610 if (arm_vector_mode_supported_p (mode
))
22614 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22616 units
= CONST_VECTOR_NUNITS (x
);
22617 size
= GET_MODE_UNIT_SIZE (mode
);
22619 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22620 for (i
= 0; i
< units
; i
++)
22622 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22624 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22627 for (i
= 0; i
< units
; i
++)
22629 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22631 (*CONST_DOUBLE_REAL_VALUE (elt
), GET_MODE_INNER (mode
),
22632 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22638 return default_assemble_integer (x
, size
, aligned_p
);
22642 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22646 if (!TARGET_AAPCS_BASED
)
22649 default_named_section_asm_out_constructor
22650 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22654 /* Put these in the .init_array section, using a special relocation. */
22655 if (priority
!= DEFAULT_INIT_PRIORITY
)
22658 sprintf (buf
, "%s.%.5u",
22659 is_ctor
? ".init_array" : ".fini_array",
22661 s
= get_section (buf
, SECTION_WRITE
| SECTION_NOTYPE
, NULL_TREE
);
22668 switch_to_section (s
);
22669 assemble_align (POINTER_SIZE
);
22670 fputs ("\t.word\t", asm_out_file
);
22671 output_addr_const (asm_out_file
, symbol
);
22672 fputs ("(target1)\n", asm_out_file
);
22675 /* Add a function to the list of static constructors. */
22678 arm_elf_asm_constructor (rtx symbol
, int priority
)
22680 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22683 /* Add a function to the list of static destructors. */
22686 arm_elf_asm_destructor (rtx symbol
, int priority
)
22688 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22691 /* A finite state machine takes care of noticing whether or not instructions
22692 can be conditionally executed, and thus decrease execution time and code
22693 size by deleting branch instructions. The fsm is controlled by
22694 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22696 /* The state of the fsm controlling condition codes are:
22697 0: normal, do nothing special
22698 1: make ASM_OUTPUT_OPCODE not output this instruction
22699 2: make ASM_OUTPUT_OPCODE not output this instruction
22700 3: make instructions conditional
22701 4: make instructions conditional
22703 State transitions (state->state by whom under condition):
22704 0 -> 1 final_prescan_insn if the `target' is a label
22705 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22706 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22707 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22708 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22709 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22710 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22711 (the target insn is arm_target_insn).
22713 If the jump clobbers the conditions then we use states 2 and 4.
22715 A similar thing can be done with conditional return insns.
22717 XXX In case the `target' is an unconditional branch, this conditionalising
22718 of the instructions always reduces code size, but not always execution
22719 time. But then, I want to reduce the code size to somewhere near what
22720 /bin/cc produces. */
22722 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22723 instructions. When a COND_EXEC instruction is seen the subsequent
22724 instructions are scanned so that multiple conditional instructions can be
22725 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22726 specify the length and true/false mask for the IT block. These will be
22727 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22729 /* Returns the index of the ARM condition code string in
22730 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22731 COMPARISON should be an rtx like `(eq (...) (...))'. */
22734 maybe_get_arm_condition_code (rtx comparison
)
22736 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22737 enum arm_cond_code code
;
22738 enum rtx_code comp_code
= GET_CODE (comparison
);
22740 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22741 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22742 XEXP (comparison
, 1));
22746 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22747 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22748 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22749 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22750 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22751 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22752 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22753 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22754 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22755 case CC_DLTUmode
: code
= ARM_CC
;
22758 if (comp_code
== EQ
)
22759 return ARM_INVERSE_CONDITION_CODE (code
);
22760 if (comp_code
== NE
)
22767 case NE
: return ARM_NE
;
22768 case EQ
: return ARM_EQ
;
22769 case GE
: return ARM_PL
;
22770 case LT
: return ARM_MI
;
22771 default: return ARM_NV
;
22777 case NE
: return ARM_NE
;
22778 case EQ
: return ARM_EQ
;
22779 default: return ARM_NV
;
22785 case NE
: return ARM_MI
;
22786 case EQ
: return ARM_PL
;
22787 default: return ARM_NV
;
22792 /* We can handle all cases except UNEQ and LTGT. */
22795 case GE
: return ARM_GE
;
22796 case GT
: return ARM_GT
;
22797 case LE
: return ARM_LS
;
22798 case LT
: return ARM_MI
;
22799 case NE
: return ARM_NE
;
22800 case EQ
: return ARM_EQ
;
22801 case ORDERED
: return ARM_VC
;
22802 case UNORDERED
: return ARM_VS
;
22803 case UNLT
: return ARM_LT
;
22804 case UNLE
: return ARM_LE
;
22805 case UNGT
: return ARM_HI
;
22806 case UNGE
: return ARM_PL
;
22807 /* UNEQ and LTGT do not have a representation. */
22808 case UNEQ
: /* Fall through. */
22809 case LTGT
: /* Fall through. */
22810 default: return ARM_NV
;
22816 case NE
: return ARM_NE
;
22817 case EQ
: return ARM_EQ
;
22818 case GE
: return ARM_LE
;
22819 case GT
: return ARM_LT
;
22820 case LE
: return ARM_GE
;
22821 case LT
: return ARM_GT
;
22822 case GEU
: return ARM_LS
;
22823 case GTU
: return ARM_CC
;
22824 case LEU
: return ARM_CS
;
22825 case LTU
: return ARM_HI
;
22826 default: return ARM_NV
;
22832 case LTU
: return ARM_CS
;
22833 case GEU
: return ARM_CC
;
22834 case NE
: return ARM_CS
;
22835 case EQ
: return ARM_CC
;
22836 default: return ARM_NV
;
22842 case NE
: return ARM_NE
;
22843 case EQ
: return ARM_EQ
;
22844 case GEU
: return ARM_CS
;
22845 case GTU
: return ARM_HI
;
22846 case LEU
: return ARM_LS
;
22847 case LTU
: return ARM_CC
;
22848 default: return ARM_NV
;
22854 case GE
: return ARM_GE
;
22855 case LT
: return ARM_LT
;
22856 case GEU
: return ARM_CS
;
22857 case LTU
: return ARM_CC
;
22858 default: return ARM_NV
;
22864 case NE
: return ARM_VS
;
22865 case EQ
: return ARM_VC
;
22866 default: return ARM_NV
;
22872 case NE
: return ARM_NE
;
22873 case EQ
: return ARM_EQ
;
22874 case GE
: return ARM_GE
;
22875 case GT
: return ARM_GT
;
22876 case LE
: return ARM_LE
;
22877 case LT
: return ARM_LT
;
22878 case GEU
: return ARM_CS
;
22879 case GTU
: return ARM_HI
;
22880 case LEU
: return ARM_LS
;
22881 case LTU
: return ARM_CC
;
22882 default: return ARM_NV
;
22885 default: gcc_unreachable ();
22889 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22890 static enum arm_cond_code
22891 get_arm_condition_code (rtx comparison
)
22893 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22894 gcc_assert (code
!= ARM_NV
);
22898 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22901 thumb2_final_prescan_insn (rtx_insn
*insn
)
22903 rtx_insn
*first_insn
= insn
;
22904 rtx body
= PATTERN (insn
);
22906 enum arm_cond_code code
;
22911 /* max_insns_skipped in the tune was already taken into account in the
22912 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22913 just emit the IT blocks as we can. It does not make sense to split
22915 max
= MAX_INSN_PER_IT_BLOCK
;
22917 /* Remove the previous insn from the count of insns to be output. */
22918 if (arm_condexec_count
)
22919 arm_condexec_count
--;
22921 /* Nothing to do if we are already inside a conditional block. */
22922 if (arm_condexec_count
)
22925 if (GET_CODE (body
) != COND_EXEC
)
22928 /* Conditional jumps are implemented directly. */
22932 predicate
= COND_EXEC_TEST (body
);
22933 arm_current_cc
= get_arm_condition_code (predicate
);
22935 n
= get_attr_ce_count (insn
);
22936 arm_condexec_count
= 1;
22937 arm_condexec_mask
= (1 << n
) - 1;
22938 arm_condexec_masklen
= n
;
22939 /* See if subsequent instructions can be combined into the same block. */
22942 insn
= next_nonnote_insn (insn
);
22944 /* Jumping into the middle of an IT block is illegal, so a label or
22945 barrier terminates the block. */
22946 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22949 body
= PATTERN (insn
);
22950 /* USE and CLOBBER aren't really insns, so just skip them. */
22951 if (GET_CODE (body
) == USE
22952 || GET_CODE (body
) == CLOBBER
)
22955 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22956 if (GET_CODE (body
) != COND_EXEC
)
22958 /* Maximum number of conditionally executed instructions in a block. */
22959 n
= get_attr_ce_count (insn
);
22960 if (arm_condexec_masklen
+ n
> max
)
22963 predicate
= COND_EXEC_TEST (body
);
22964 code
= get_arm_condition_code (predicate
);
22965 mask
= (1 << n
) - 1;
22966 if (arm_current_cc
== code
)
22967 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22968 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22971 arm_condexec_count
++;
22972 arm_condexec_masklen
+= n
;
22974 /* A jump must be the last instruction in a conditional block. */
22978 /* Restore recog_data (getting the attributes of other insns can
22979 destroy this array, but final.c assumes that it remains intact
22980 across this call). */
22981 extract_constrain_insn_cached (first_insn
);
22985 arm_final_prescan_insn (rtx_insn
*insn
)
22987 /* BODY will hold the body of INSN. */
22988 rtx body
= PATTERN (insn
);
22990 /* This will be 1 if trying to repeat the trick, and things need to be
22991 reversed if it appears to fail. */
22994 /* If we start with a return insn, we only succeed if we find another one. */
22995 int seeking_return
= 0;
22996 enum rtx_code return_code
= UNKNOWN
;
22998 /* START_INSN will hold the insn from where we start looking. This is the
22999 first insn after the following code_label if REVERSE is true. */
23000 rtx_insn
*start_insn
= insn
;
23002 /* If in state 4, check if the target branch is reached, in order to
23003 change back to state 0. */
23004 if (arm_ccfsm_state
== 4)
23006 if (insn
== arm_target_insn
)
23008 arm_target_insn
= NULL
;
23009 arm_ccfsm_state
= 0;
23014 /* If in state 3, it is possible to repeat the trick, if this insn is an
23015 unconditional branch to a label, and immediately following this branch
23016 is the previous target label which is only used once, and the label this
23017 branch jumps to is not too far off. */
23018 if (arm_ccfsm_state
== 3)
23020 if (simplejump_p (insn
))
23022 start_insn
= next_nonnote_insn (start_insn
);
23023 if (BARRIER_P (start_insn
))
23025 /* XXX Isn't this always a barrier? */
23026 start_insn
= next_nonnote_insn (start_insn
);
23028 if (LABEL_P (start_insn
)
23029 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23030 && LABEL_NUSES (start_insn
) == 1)
23035 else if (ANY_RETURN_P (body
))
23037 start_insn
= next_nonnote_insn (start_insn
);
23038 if (BARRIER_P (start_insn
))
23039 start_insn
= next_nonnote_insn (start_insn
);
23040 if (LABEL_P (start_insn
)
23041 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23042 && LABEL_NUSES (start_insn
) == 1)
23045 seeking_return
= 1;
23046 return_code
= GET_CODE (body
);
23055 gcc_assert (!arm_ccfsm_state
|| reverse
);
23056 if (!JUMP_P (insn
))
23059 /* This jump might be paralleled with a clobber of the condition codes
23060 the jump should always come first */
23061 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
23062 body
= XVECEXP (body
, 0, 0);
23065 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
23066 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
23069 int fail
= FALSE
, succeed
= FALSE
;
23070 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23071 int then_not_else
= TRUE
;
23072 rtx_insn
*this_insn
= start_insn
;
23075 /* Register the insn jumped to. */
23078 if (!seeking_return
)
23079 label
= XEXP (SET_SRC (body
), 0);
23081 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
23082 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
23083 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
23085 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
23086 then_not_else
= FALSE
;
23088 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
23090 seeking_return
= 1;
23091 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
23093 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
23095 seeking_return
= 1;
23096 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
23097 then_not_else
= FALSE
;
23100 gcc_unreachable ();
23102 /* See how many insns this branch skips, and what kind of insns. If all
23103 insns are okay, and the label or unconditional branch to the same
23104 label is not too far away, succeed. */
23105 for (insns_skipped
= 0;
23106 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
23110 this_insn
= next_nonnote_insn (this_insn
);
23114 switch (GET_CODE (this_insn
))
23117 /* Succeed if it is the target label, otherwise fail since
23118 control falls in from somewhere else. */
23119 if (this_insn
== label
)
23121 arm_ccfsm_state
= 1;
23129 /* Succeed if the following insn is the target label.
23131 If return insns are used then the last insn in a function
23132 will be a barrier. */
23133 this_insn
= next_nonnote_insn (this_insn
);
23134 if (this_insn
&& this_insn
== label
)
23136 arm_ccfsm_state
= 1;
23144 /* The AAPCS says that conditional calls should not be
23145 used since they make interworking inefficient (the
23146 linker can't transform BL<cond> into BLX). That's
23147 only a problem if the machine has BLX. */
23154 /* Succeed if the following insn is the target label, or
23155 if the following two insns are a barrier and the
23157 this_insn
= next_nonnote_insn (this_insn
);
23158 if (this_insn
&& BARRIER_P (this_insn
))
23159 this_insn
= next_nonnote_insn (this_insn
);
23161 if (this_insn
&& this_insn
== label
23162 && insns_skipped
< max_insns_skipped
)
23164 arm_ccfsm_state
= 1;
23172 /* If this is an unconditional branch to the same label, succeed.
23173 If it is to another label, do nothing. If it is conditional,
23175 /* XXX Probably, the tests for SET and the PC are
23178 scanbody
= PATTERN (this_insn
);
23179 if (GET_CODE (scanbody
) == SET
23180 && GET_CODE (SET_DEST (scanbody
)) == PC
)
23182 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
23183 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
23185 arm_ccfsm_state
= 2;
23188 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
23191 /* Fail if a conditional return is undesirable (e.g. on a
23192 StrongARM), but still allow this if optimizing for size. */
23193 else if (GET_CODE (scanbody
) == return_code
23194 && !use_return_insn (TRUE
, NULL
)
23197 else if (GET_CODE (scanbody
) == return_code
)
23199 arm_ccfsm_state
= 2;
23202 else if (GET_CODE (scanbody
) == PARALLEL
)
23204 switch (get_attr_conds (this_insn
))
23214 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
23219 /* Instructions using or affecting the condition codes make it
23221 scanbody
= PATTERN (this_insn
);
23222 if (!(GET_CODE (scanbody
) == SET
23223 || GET_CODE (scanbody
) == PARALLEL
)
23224 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
23234 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
23235 arm_target_label
= CODE_LABEL_NUMBER (label
);
23238 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
23240 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
23242 this_insn
= next_nonnote_insn (this_insn
);
23243 gcc_assert (!this_insn
23244 || (!BARRIER_P (this_insn
)
23245 && !LABEL_P (this_insn
)));
23249 /* Oh, dear! we ran off the end.. give up. */
23250 extract_constrain_insn_cached (insn
);
23251 arm_ccfsm_state
= 0;
23252 arm_target_insn
= NULL
;
23255 arm_target_insn
= this_insn
;
23258 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23261 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
23263 if (reverse
|| then_not_else
)
23264 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
23267 /* Restore recog_data (getting the attributes of other insns can
23268 destroy this array, but final.c assumes that it remains intact
23269 across this call. */
23270 extract_constrain_insn_cached (insn
);
23274 /* Output IT instructions. */
23276 thumb2_asm_output_opcode (FILE * stream
)
23281 if (arm_condexec_mask
)
23283 for (n
= 0; n
< arm_condexec_masklen
; n
++)
23284 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
23286 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
23287 arm_condition_codes
[arm_current_cc
]);
23288 arm_condexec_mask
= 0;
23292 /* Returns true if REGNO is a valid register
23293 for holding a quantity of type MODE. */
23295 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
23297 if (GET_MODE_CLASS (mode
) == MODE_CC
)
23298 return (regno
== CC_REGNUM
23299 || (TARGET_HARD_FLOAT
23300 && regno
== VFPCC_REGNUM
));
23302 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23306 /* For the Thumb we only allow values bigger than SImode in
23307 registers 0 - 6, so that there is always a second low
23308 register available to hold the upper part of the value.
23309 We probably we ought to ensure that the register is the
23310 start of an even numbered register pair. */
23311 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23313 if (TARGET_HARD_FLOAT
&& IS_VFP_REGNUM (regno
))
23315 if (mode
== SFmode
|| mode
== SImode
)
23316 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23318 if (mode
== DFmode
)
23319 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23321 if (mode
== HFmode
)
23322 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23324 /* VFP registers can hold HImode values. */
23325 if (mode
== HImode
)
23326 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23329 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23330 || (VALID_NEON_QREG_MODE (mode
)
23331 && NEON_REGNO_OK_FOR_QUAD (regno
))
23332 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23333 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23334 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23335 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23336 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23341 if (TARGET_REALLY_IWMMXT
)
23343 if (IS_IWMMXT_GR_REGNUM (regno
))
23344 return mode
== SImode
;
23346 if (IS_IWMMXT_REGNUM (regno
))
23347 return VALID_IWMMXT_REG_MODE (mode
);
23350 /* We allow almost any value to be stored in the general registers.
23351 Restrict doubleword quantities to even register pairs in ARM state
23352 so that we can use ldrd. Do not allow very large Neon structure
23353 opaque modes in general registers; they would use too many. */
23354 if (regno
<= LAST_ARM_REGNUM
)
23356 if (ARM_NUM_REGS (mode
) > 4)
23362 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23365 if (regno
== FRAME_POINTER_REGNUM
23366 || regno
== ARG_POINTER_REGNUM
)
23367 /* We only allow integers in the fake hard registers. */
23368 return GET_MODE_CLASS (mode
) == MODE_INT
;
23373 /* Implement MODES_TIEABLE_P. */
23376 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23378 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23381 /* We specifically want to allow elements of "structure" modes to
23382 be tieable to the structure. This more general condition allows
23383 other rarer situations too. */
23385 && (VALID_NEON_DREG_MODE (mode1
)
23386 || VALID_NEON_QREG_MODE (mode1
)
23387 || VALID_NEON_STRUCT_MODE (mode1
))
23388 && (VALID_NEON_DREG_MODE (mode2
)
23389 || VALID_NEON_QREG_MODE (mode2
)
23390 || VALID_NEON_STRUCT_MODE (mode2
)))
23396 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23397 not used in arm mode. */
23400 arm_regno_class (int regno
)
23402 if (regno
== PC_REGNUM
)
23407 if (regno
== STACK_POINTER_REGNUM
)
23409 if (regno
== CC_REGNUM
)
23416 if (TARGET_THUMB2
&& regno
< 8)
23419 if ( regno
<= LAST_ARM_REGNUM
23420 || regno
== FRAME_POINTER_REGNUM
23421 || regno
== ARG_POINTER_REGNUM
)
23422 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23424 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23425 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23427 if (IS_VFP_REGNUM (regno
))
23429 if (regno
<= D7_VFP_REGNUM
)
23430 return VFP_D0_D7_REGS
;
23431 else if (regno
<= LAST_LO_VFP_REGNUM
)
23432 return VFP_LO_REGS
;
23434 return VFP_HI_REGS
;
23437 if (IS_IWMMXT_REGNUM (regno
))
23438 return IWMMXT_REGS
;
23440 if (IS_IWMMXT_GR_REGNUM (regno
))
23441 return IWMMXT_GR_REGS
;
23446 /* Handle a special case when computing the offset
23447 of an argument from the frame pointer. */
23449 arm_debugger_arg_offset (int value
, rtx addr
)
23453 /* We are only interested if dbxout_parms() failed to compute the offset. */
23457 /* We can only cope with the case where the address is held in a register. */
23461 /* If we are using the frame pointer to point at the argument, then
23462 an offset of 0 is correct. */
23463 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23466 /* If we are using the stack pointer to point at the
23467 argument, then an offset of 0 is correct. */
23468 /* ??? Check this is consistent with thumb2 frame layout. */
23469 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23470 && REGNO (addr
) == SP_REGNUM
)
23473 /* Oh dear. The argument is pointed to by a register rather
23474 than being held in a register, or being stored at a known
23475 offset from the frame pointer. Since GDB only understands
23476 those two kinds of argument we must translate the address
23477 held in the register into an offset from the frame pointer.
23478 We do this by searching through the insns for the function
23479 looking to see where this register gets its value. If the
23480 register is initialized from the frame pointer plus an offset
23481 then we are in luck and we can continue, otherwise we give up.
23483 This code is exercised by producing debugging information
23484 for a function with arguments like this:
23486 double func (double a, double b, int c, double d) {return d;}
23488 Without this code the stab for parameter 'd' will be set to
23489 an offset of 0 from the frame pointer, rather than 8. */
23491 /* The if() statement says:
23493 If the insn is a normal instruction
23494 and if the insn is setting the value in a register
23495 and if the register being set is the register holding the address of the argument
23496 and if the address is computing by an addition
23497 that involves adding to a register
23498 which is the frame pointer
23503 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23505 if ( NONJUMP_INSN_P (insn
)
23506 && GET_CODE (PATTERN (insn
)) == SET
23507 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23508 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23509 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23510 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23511 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23514 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23523 warning (0, "unable to compute real location of stacked parameter");
23524 value
= 8; /* XXX magic hack */
23530 /* Implement TARGET_PROMOTED_TYPE. */
23533 arm_promoted_type (const_tree t
)
23535 if (SCALAR_FLOAT_TYPE_P (t
)
23536 && TYPE_PRECISION (t
) == 16
23537 && TYPE_MAIN_VARIANT (t
) == arm_fp16_type_node
)
23538 return float_type_node
;
23542 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23543 This simply adds HFmode as a supported mode; even though we don't
23544 implement arithmetic on this type directly, it's supported by
23545 optabs conversions, much the way the double-word arithmetic is
23546 special-cased in the default hook. */
23549 arm_scalar_mode_supported_p (machine_mode mode
)
23551 if (mode
== HFmode
)
23552 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23553 else if (ALL_FIXED_POINT_MODE_P (mode
))
23556 return default_scalar_mode_supported_p (mode
);
23559 /* Set the value of FLT_EVAL_METHOD.
23560 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23562 0: evaluate all operations and constants, whose semantic type has at
23563 most the range and precision of type float, to the range and
23564 precision of float; evaluate all other operations and constants to
23565 the range and precision of the semantic type;
23567 N, where _FloatN is a supported interchange floating type
23568 evaluate all operations and constants, whose semantic type has at
23569 most the range and precision of _FloatN type, to the range and
23570 precision of the _FloatN type; evaluate all other operations and
23571 constants to the range and precision of the semantic type;
23573 If we have the ARMv8.2-A extensions then we support _Float16 in native
23574 precision, so we should set this to 16. Otherwise, we support the type,
23575 but want to evaluate expressions in float precision, so set this to
23578 static enum flt_eval_method
23579 arm_excess_precision (enum excess_precision_type type
)
23583 case EXCESS_PRECISION_TYPE_FAST
:
23584 case EXCESS_PRECISION_TYPE_STANDARD
:
23585 /* We can calculate either in 16-bit range and precision or
23586 32-bit range and precision. Make that decision based on whether
23587 we have native support for the ARMv8.2-A 16-bit floating-point
23588 instructions or not. */
23589 return (TARGET_VFP_FP16INST
23590 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23591 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
23592 case EXCESS_PRECISION_TYPE_IMPLICIT
:
23593 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
23595 gcc_unreachable ();
23597 return FLT_EVAL_METHOD_UNPREDICTABLE
;
23601 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23602 _Float16 if we are using anything other than ieee format for 16-bit
23603 floating point. Otherwise, punt to the default implementation. */
23604 static machine_mode
23605 arm_floatn_mode (int n
, bool extended
)
23607 if (!extended
&& n
== 16)
23608 return arm_fp16_format
== ARM_FP16_FORMAT_IEEE
? HFmode
: VOIDmode
;
23610 return default_floatn_mode (n
, extended
);
23614 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23615 not to early-clobber SRC registers in the process.
23617 We assume that the operands described by SRC and DEST represent a
23618 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23619 number of components into which the copy has been decomposed. */
23621 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23625 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23626 || REGNO (operands
[0]) < REGNO (operands
[1]))
23628 for (i
= 0; i
< count
; i
++)
23630 operands
[2 * i
] = dest
[i
];
23631 operands
[2 * i
+ 1] = src
[i
];
23636 for (i
= 0; i
< count
; i
++)
23638 operands
[2 * i
] = dest
[count
- i
- 1];
23639 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23644 /* Split operands into moves from op[1] + op[2] into op[0]. */
23647 neon_split_vcombine (rtx operands
[3])
23649 unsigned int dest
= REGNO (operands
[0]);
23650 unsigned int src1
= REGNO (operands
[1]);
23651 unsigned int src2
= REGNO (operands
[2]);
23652 machine_mode halfmode
= GET_MODE (operands
[1]);
23653 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
23654 rtx destlo
, desthi
;
23656 if (src1
== dest
&& src2
== dest
+ halfregs
)
23658 /* No-op move. Can't split to nothing; emit something. */
23659 emit_note (NOTE_INSN_DELETED
);
23663 /* Preserve register attributes for variable tracking. */
23664 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23665 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23666 GET_MODE_SIZE (halfmode
));
23668 /* Special case of reversed high/low parts. Use VSWP. */
23669 if (src2
== dest
&& src1
== dest
+ halfregs
)
23671 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
23672 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
23673 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
23677 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
23679 /* Try to avoid unnecessary moves if part of the result
23680 is in the right place already. */
23682 emit_move_insn (destlo
, operands
[1]);
23683 if (src2
!= dest
+ halfregs
)
23684 emit_move_insn (desthi
, operands
[2]);
23688 if (src2
!= dest
+ halfregs
)
23689 emit_move_insn (desthi
, operands
[2]);
23691 emit_move_insn (destlo
, operands
[1]);
23695 /* Return the number (counting from 0) of
23696 the least significant set bit in MASK. */
23699 number_of_first_bit_set (unsigned mask
)
23701 return ctz_hwi (mask
);
23704 /* Like emit_multi_reg_push, but allowing for a different set of
23705 registers to be described as saved. MASK is the set of registers
23706 to be saved; REAL_REGS is the set of registers to be described as
23707 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23710 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
23712 unsigned long regno
;
23713 rtx par
[10], tmp
, reg
;
23717 /* Build the parallel of the registers actually being stored. */
23718 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
23720 regno
= ctz_hwi (mask
);
23721 reg
= gen_rtx_REG (SImode
, regno
);
23724 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
23726 tmp
= gen_rtx_USE (VOIDmode
, reg
);
23731 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23732 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
23733 tmp
= gen_frame_mem (BLKmode
, tmp
);
23734 tmp
= gen_rtx_SET (tmp
, par
[0]);
23737 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
23738 insn
= emit_insn (tmp
);
23740 /* Always build the stack adjustment note for unwind info. */
23741 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23742 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
23745 /* Build the parallel of the registers recorded as saved for unwind. */
23746 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
23748 regno
= ctz_hwi (real_regs
);
23749 reg
= gen_rtx_REG (SImode
, regno
);
23751 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
23752 tmp
= gen_frame_mem (SImode
, tmp
);
23753 tmp
= gen_rtx_SET (tmp
, reg
);
23754 RTX_FRAME_RELATED_P (tmp
) = 1;
23762 RTX_FRAME_RELATED_P (par
[0]) = 1;
23763 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
23766 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
23771 /* Emit code to push or pop registers to or from the stack. F is the
23772 assembly file. MASK is the registers to pop. */
23774 thumb_pop (FILE *f
, unsigned long mask
)
23777 int lo_mask
= mask
& 0xFF;
23778 int pushed_words
= 0;
23782 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
23784 /* Special case. Do not generate a POP PC statement here, do it in
23786 thumb_exit (f
, -1);
23790 fprintf (f
, "\tpop\t{");
23792 /* Look at the low registers first. */
23793 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
23797 asm_fprintf (f
, "%r", regno
);
23799 if ((lo_mask
& ~1) != 0)
23806 if (mask
& (1 << PC_REGNUM
))
23808 /* Catch popping the PC. */
23809 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
|| crtl
->calls_eh_return
23810 || IS_CMSE_ENTRY (arm_current_func_type ()))
23812 /* The PC is never poped directly, instead
23813 it is popped into r3 and then BX is used. */
23814 fprintf (f
, "}\n");
23816 thumb_exit (f
, -1);
23825 asm_fprintf (f
, "%r", PC_REGNUM
);
23829 fprintf (f
, "}\n");
23832 /* Generate code to return from a thumb function.
23833 If 'reg_containing_return_addr' is -1, then the return address is
23834 actually on the stack, at the stack pointer. */
23836 thumb_exit (FILE *f
, int reg_containing_return_addr
)
23838 unsigned regs_available_for_popping
;
23839 unsigned regs_to_pop
;
23841 unsigned available
;
23845 int restore_a4
= FALSE
;
23847 /* Compute the registers we need to pop. */
23851 if (reg_containing_return_addr
== -1)
23853 regs_to_pop
|= 1 << LR_REGNUM
;
23857 if (TARGET_BACKTRACE
)
23859 /* Restore the (ARM) frame pointer and stack pointer. */
23860 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
23864 /* If there is nothing to pop then just emit the BX instruction and
23866 if (pops_needed
== 0)
23868 if (crtl
->calls_eh_return
)
23869 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23871 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23873 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n",
23874 reg_containing_return_addr
);
23875 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
23878 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23881 /* Otherwise if we are not supporting interworking and we have not created
23882 a backtrace structure and the function was not entered in ARM mode then
23883 just pop the return address straight into the PC. */
23884 else if (!TARGET_INTERWORK
23885 && !TARGET_BACKTRACE
23886 && !is_called_in_ARM_mode (current_function_decl
)
23887 && !crtl
->calls_eh_return
23888 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23890 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
23894 /* Find out how many of the (return) argument registers we can corrupt. */
23895 regs_available_for_popping
= 0;
23897 /* If returning via __builtin_eh_return, the bottom three registers
23898 all contain information needed for the return. */
23899 if (crtl
->calls_eh_return
)
23903 /* If we can deduce the registers used from the function's
23904 return value. This is more reliable that examining
23905 df_regs_ever_live_p () because that will be set if the register is
23906 ever used in the function, not just if the register is used
23907 to hold a return value. */
23909 if (crtl
->return_rtx
!= 0)
23910 mode
= GET_MODE (crtl
->return_rtx
);
23912 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
23914 size
= GET_MODE_SIZE (mode
);
23918 /* In a void function we can use any argument register.
23919 In a function that returns a structure on the stack
23920 we can use the second and third argument registers. */
23921 if (mode
== VOIDmode
)
23922 regs_available_for_popping
=
23923 (1 << ARG_REGISTER (1))
23924 | (1 << ARG_REGISTER (2))
23925 | (1 << ARG_REGISTER (3));
23927 regs_available_for_popping
=
23928 (1 << ARG_REGISTER (2))
23929 | (1 << ARG_REGISTER (3));
23931 else if (size
<= 4)
23932 regs_available_for_popping
=
23933 (1 << ARG_REGISTER (2))
23934 | (1 << ARG_REGISTER (3));
23935 else if (size
<= 8)
23936 regs_available_for_popping
=
23937 (1 << ARG_REGISTER (3));
23940 /* Match registers to be popped with registers into which we pop them. */
23941 for (available
= regs_available_for_popping
,
23942 required
= regs_to_pop
;
23943 required
!= 0 && available
!= 0;
23944 available
&= ~(available
& - available
),
23945 required
&= ~(required
& - required
))
23948 /* If we have any popping registers left over, remove them. */
23950 regs_available_for_popping
&= ~available
;
23952 /* Otherwise if we need another popping register we can use
23953 the fourth argument register. */
23954 else if (pops_needed
)
23956 /* If we have not found any free argument registers and
23957 reg a4 contains the return address, we must move it. */
23958 if (regs_available_for_popping
== 0
23959 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
23961 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
23962 reg_containing_return_addr
= LR_REGNUM
;
23964 else if (size
> 12)
23966 /* Register a4 is being used to hold part of the return value,
23967 but we have dire need of a free, low register. */
23970 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
23973 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
23975 /* The fourth argument register is available. */
23976 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
23982 /* Pop as many registers as we can. */
23983 thumb_pop (f
, regs_available_for_popping
);
23985 /* Process the registers we popped. */
23986 if (reg_containing_return_addr
== -1)
23988 /* The return address was popped into the lowest numbered register. */
23989 regs_to_pop
&= ~(1 << LR_REGNUM
);
23991 reg_containing_return_addr
=
23992 number_of_first_bit_set (regs_available_for_popping
);
23994 /* Remove this register for the mask of available registers, so that
23995 the return address will not be corrupted by further pops. */
23996 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
23999 /* If we popped other registers then handle them here. */
24000 if (regs_available_for_popping
)
24004 /* Work out which register currently contains the frame pointer. */
24005 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24007 /* Move it into the correct place. */
24008 asm_fprintf (f
, "\tmov\t%r, %r\n",
24009 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
24011 /* (Temporarily) remove it from the mask of popped registers. */
24012 regs_available_for_popping
&= ~(1 << frame_pointer
);
24013 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
24015 if (regs_available_for_popping
)
24019 /* We popped the stack pointer as well,
24020 find the register that contains it. */
24021 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24023 /* Move it into the stack register. */
24024 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
24026 /* At this point we have popped all necessary registers, so
24027 do not worry about restoring regs_available_for_popping
24028 to its correct value:
24030 assert (pops_needed == 0)
24031 assert (regs_available_for_popping == (1 << frame_pointer))
24032 assert (regs_to_pop == (1 << STACK_POINTER)) */
24036 /* Since we have just move the popped value into the frame
24037 pointer, the popping register is available for reuse, and
24038 we know that we still have the stack pointer left to pop. */
24039 regs_available_for_popping
|= (1 << frame_pointer
);
24043 /* If we still have registers left on the stack, but we no longer have
24044 any registers into which we can pop them, then we must move the return
24045 address into the link register and make available the register that
24047 if (regs_available_for_popping
== 0 && pops_needed
> 0)
24049 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
24051 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
24052 reg_containing_return_addr
);
24054 reg_containing_return_addr
= LR_REGNUM
;
24057 /* If we have registers left on the stack then pop some more.
24058 We know that at most we will want to pop FP and SP. */
24059 if (pops_needed
> 0)
24064 thumb_pop (f
, regs_available_for_popping
);
24066 /* We have popped either FP or SP.
24067 Move whichever one it is into the correct register. */
24068 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24069 move_to
= number_of_first_bit_set (regs_to_pop
);
24071 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
24073 regs_to_pop
&= ~(1 << move_to
);
24078 /* If we still have not popped everything then we must have only
24079 had one register available to us and we are now popping the SP. */
24080 if (pops_needed
> 0)
24084 thumb_pop (f
, regs_available_for_popping
);
24086 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24088 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
24090 assert (regs_to_pop == (1 << STACK_POINTER))
24091 assert (pops_needed == 1)
24095 /* If necessary restore the a4 register. */
24098 if (reg_containing_return_addr
!= LR_REGNUM
)
24100 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24101 reg_containing_return_addr
= LR_REGNUM
;
24104 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
24107 if (crtl
->calls_eh_return
)
24108 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24110 /* Return to caller. */
24111 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24113 /* This is for the cases where LR is not being used to contain the return
24114 address. It may therefore contain information that we might not want
24115 to leak, hence it must be cleared. The value in R0 will never be a
24116 secret at this point, so it is safe to use it, see the clearing code
24117 in 'cmse_nonsecure_entry_clear_before_return'. */
24118 if (reg_containing_return_addr
!= LR_REGNUM
)
24119 asm_fprintf (f
, "\tmov\tlr, r0\n");
24121 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr
);
24122 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
24125 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24128 /* Scan INSN just before assembler is output for it.
24129 For Thumb-1, we track the status of the condition codes; this
24130 information is used in the cbranchsi4_insn pattern. */
24132 thumb1_final_prescan_insn (rtx_insn
*insn
)
24134 if (flag_print_asm_name
)
24135 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
24136 INSN_ADDRESSES (INSN_UID (insn
)));
24137 /* Don't overwrite the previous setter when we get to a cbranch. */
24138 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
24140 enum attr_conds conds
;
24142 if (cfun
->machine
->thumb1_cc_insn
)
24144 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
24145 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
24148 conds
= get_attr_conds (insn
);
24149 if (conds
== CONDS_SET
)
24151 rtx set
= single_set (insn
);
24152 cfun
->machine
->thumb1_cc_insn
= insn
;
24153 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
24154 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
24155 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
24156 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
24158 rtx src1
= XEXP (SET_SRC (set
), 1);
24159 if (src1
== const0_rtx
)
24160 cfun
->machine
->thumb1_cc_mode
= CCmode
;
24162 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
24164 /* Record the src register operand instead of dest because
24165 cprop_hardreg pass propagates src. */
24166 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
24169 else if (conds
!= CONDS_NOCOND
)
24170 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
24173 /* Check if unexpected far jump is used. */
24174 if (cfun
->machine
->lr_save_eliminated
24175 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24176 internal_error("Unexpected thumb1 far jump");
24180 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
24182 unsigned HOST_WIDE_INT mask
= 0xff;
24185 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
24186 if (val
== 0) /* XXX */
24189 for (i
= 0; i
< 25; i
++)
24190 if ((val
& (mask
<< i
)) == val
)
24196 /* Returns nonzero if the current function contains,
24197 or might contain a far jump. */
24199 thumb_far_jump_used_p (void)
24202 bool far_jump
= false;
24203 unsigned int func_size
= 0;
24205 /* If we have already decided that far jumps may be used,
24206 do not bother checking again, and always return true even if
24207 it turns out that they are not being used. Once we have made
24208 the decision that far jumps are present (and that hence the link
24209 register will be pushed onto the stack) we cannot go back on it. */
24210 if (cfun
->machine
->far_jump_used
)
24213 /* If this function is not being called from the prologue/epilogue
24214 generation code then it must be being called from the
24215 INITIAL_ELIMINATION_OFFSET macro. */
24216 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
24218 /* In this case we know that we are being asked about the elimination
24219 of the arg pointer register. If that register is not being used,
24220 then there are no arguments on the stack, and we do not have to
24221 worry that a far jump might force the prologue to push the link
24222 register, changing the stack offsets. In this case we can just
24223 return false, since the presence of far jumps in the function will
24224 not affect stack offsets.
24226 If the arg pointer is live (or if it was live, but has now been
24227 eliminated and so set to dead) then we do have to test to see if
24228 the function might contain a far jump. This test can lead to some
24229 false negatives, since before reload is completed, then length of
24230 branch instructions is not known, so gcc defaults to returning their
24231 longest length, which in turn sets the far jump attribute to true.
24233 A false negative will not result in bad code being generated, but it
24234 will result in a needless push and pop of the link register. We
24235 hope that this does not occur too often.
24237 If we need doubleword stack alignment this could affect the other
24238 elimination offsets so we can't risk getting it wrong. */
24239 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
24240 cfun
->machine
->arg_pointer_live
= 1;
24241 else if (!cfun
->machine
->arg_pointer_live
)
24245 /* We should not change far_jump_used during or after reload, as there is
24246 no chance to change stack frame layout. */
24247 if (reload_in_progress
|| reload_completed
)
24250 /* Check to see if the function contains a branch
24251 insn with the far jump attribute set. */
24252 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
24254 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24258 func_size
+= get_attr_length (insn
);
24261 /* Attribute far_jump will always be true for thumb1 before
24262 shorten_branch pass. So checking far_jump attribute before
24263 shorten_branch isn't much useful.
24265 Following heuristic tries to estimate more accurately if a far jump
24266 may finally be used. The heuristic is very conservative as there is
24267 no chance to roll-back the decision of not to use far jump.
24269 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24270 2-byte insn is associated with a 4 byte constant pool. Using
24271 function size 2048/3 as the threshold is conservative enough. */
24274 if ((func_size
* 3) >= 2048)
24276 /* Record the fact that we have decided that
24277 the function does use far jumps. */
24278 cfun
->machine
->far_jump_used
= 1;
24286 /* Return nonzero if FUNC must be entered in ARM mode. */
24288 is_called_in_ARM_mode (tree func
)
24290 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
24292 /* Ignore the problem about functions whose address is taken. */
24293 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
24297 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
24303 /* Given the stack offsets and register mask in OFFSETS, decide how
24304 many additional registers to push instead of subtracting a constant
24305 from SP. For epilogues the principle is the same except we use pop.
24306 FOR_PROLOGUE indicates which we're generating. */
24308 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
24310 HOST_WIDE_INT amount
;
24311 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
24312 /* Extract a mask of the ones we can give to the Thumb's push/pop
24314 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
24315 /* Then count how many other high registers will need to be pushed. */
24316 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24317 int n_free
, reg_base
, size
;
24319 if (!for_prologue
&& frame_pointer_needed
)
24320 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24322 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24324 /* If the stack frame size is 512 exactly, we can save one load
24325 instruction, which should make this a win even when optimizing
24327 if (!optimize_size
&& amount
!= 512)
24330 /* Can't do this if there are high registers to push. */
24331 if (high_regs_pushed
!= 0)
24334 /* Shouldn't do it in the prologue if no registers would normally
24335 be pushed at all. In the epilogue, also allow it if we'll have
24336 a pop insn for the PC. */
24339 || TARGET_BACKTRACE
24340 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
24341 || TARGET_INTERWORK
24342 || crtl
->args
.pretend_args_size
!= 0))
24345 /* Don't do this if thumb_expand_prologue wants to emit instructions
24346 between the push and the stack frame allocation. */
24348 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24349 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
24356 size
= arm_size_return_regs ();
24357 reg_base
= ARM_NUM_INTS (size
);
24358 live_regs_mask
>>= reg_base
;
24361 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
24362 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
24364 live_regs_mask
>>= 1;
24370 gcc_assert (amount
/ 4 * 4 == amount
);
24372 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
24373 return (amount
- 508) / 4;
24374 if (amount
<= n_free
* 4)
24379 /* The bits which aren't usefully expanded as rtl. */
24381 thumb1_unexpanded_epilogue (void)
24383 arm_stack_offsets
*offsets
;
24385 unsigned long live_regs_mask
= 0;
24386 int high_regs_pushed
= 0;
24388 int had_to_push_lr
;
24391 if (cfun
->machine
->return_used_this_function
!= 0)
24394 if (IS_NAKED (arm_current_func_type ()))
24397 offsets
= arm_get_frame_offsets ();
24398 live_regs_mask
= offsets
->saved_regs_mask
;
24399 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24401 /* If we can deduce the registers used from the function's return value.
24402 This is more reliable that examining df_regs_ever_live_p () because that
24403 will be set if the register is ever used in the function, not just if
24404 the register is used to hold a return value. */
24405 size
= arm_size_return_regs ();
24407 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24410 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24411 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24414 /* The prolog may have pushed some high registers to use as
24415 work registers. e.g. the testsuite file:
24416 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24417 compiles to produce:
24418 push {r4, r5, r6, r7, lr}
24422 as part of the prolog. We have to undo that pushing here. */
24424 if (high_regs_pushed
)
24426 unsigned long mask
= live_regs_mask
& 0xff;
24429 /* The available low registers depend on the size of the value we are
24437 /* Oh dear! We have no low registers into which we can pop
24440 ("no low registers available for popping high registers");
24442 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
24443 if (live_regs_mask
& (1 << next_hi_reg
))
24446 while (high_regs_pushed
)
24448 /* Find lo register(s) into which the high register(s) can
24450 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24452 if (mask
& (1 << regno
))
24453 high_regs_pushed
--;
24454 if (high_regs_pushed
== 0)
24458 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24460 /* Pop the values into the low register(s). */
24461 thumb_pop (asm_out_file
, mask
);
24463 /* Move the value(s) into the high registers. */
24464 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24466 if (mask
& (1 << regno
))
24468 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24471 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24472 if (live_regs_mask
& (1 << next_hi_reg
))
24477 live_regs_mask
&= ~0x0f00;
24480 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24481 live_regs_mask
&= 0xff;
24483 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24485 /* Pop the return address into the PC. */
24486 if (had_to_push_lr
)
24487 live_regs_mask
|= 1 << PC_REGNUM
;
24489 /* Either no argument registers were pushed or a backtrace
24490 structure was created which includes an adjusted stack
24491 pointer, so just pop everything. */
24492 if (live_regs_mask
)
24493 thumb_pop (asm_out_file
, live_regs_mask
);
24495 /* We have either just popped the return address into the
24496 PC or it is was kept in LR for the entire function.
24497 Note that thumb_pop has already called thumb_exit if the
24498 PC was in the list. */
24499 if (!had_to_push_lr
)
24500 thumb_exit (asm_out_file
, LR_REGNUM
);
24504 /* Pop everything but the return address. */
24505 if (live_regs_mask
)
24506 thumb_pop (asm_out_file
, live_regs_mask
);
24508 if (had_to_push_lr
)
24512 /* We have no free low regs, so save one. */
24513 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24517 /* Get the return address into a temporary register. */
24518 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24522 /* Move the return address to lr. */
24523 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24525 /* Restore the low register. */
24526 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24531 regno
= LAST_ARG_REGNUM
;
24536 /* Remove the argument registers that were pushed onto the stack. */
24537 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24538 SP_REGNUM
, SP_REGNUM
,
24539 crtl
->args
.pretend_args_size
);
24541 thumb_exit (asm_out_file
, regno
);
24547 /* Functions to save and restore machine-specific function data. */
24548 static struct machine_function
*
24549 arm_init_machine_status (void)
24551 struct machine_function
*machine
;
24552 machine
= ggc_cleared_alloc
<machine_function
> ();
24554 #if ARM_FT_UNKNOWN != 0
24555 machine
->func_type
= ARM_FT_UNKNOWN
;
24560 /* Return an RTX indicating where the return address to the
24561 calling function can be found. */
24563 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24568 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24571 /* Do anything needed before RTL is emitted for each function. */
24573 arm_init_expanders (void)
24575 /* Arrange to initialize and mark the machine per-function status. */
24576 init_machine_status
= arm_init_machine_status
;
24578 /* This is to stop the combine pass optimizing away the alignment
24579 adjustment of va_arg. */
24580 /* ??? It is claimed that this should not be necessary. */
24582 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24585 /* Check that FUNC is called with a different mode. */
24588 arm_change_mode_p (tree func
)
24590 if (TREE_CODE (func
) != FUNCTION_DECL
)
24593 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
24596 callee_tree
= target_option_default_node
;
24598 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
24599 int flags
= callee_opts
->x_target_flags
;
24601 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
24604 /* Like arm_compute_initial_elimination offset. Simpler because there
24605 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24606 to point at the base of the local variables after static stack
24607 space for a function has been allocated. */
24610 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24612 arm_stack_offsets
*offsets
;
24614 offsets
= arm_get_frame_offsets ();
24618 case ARG_POINTER_REGNUM
:
24621 case STACK_POINTER_REGNUM
:
24622 return offsets
->outgoing_args
- offsets
->saved_args
;
24624 case FRAME_POINTER_REGNUM
:
24625 return offsets
->soft_frame
- offsets
->saved_args
;
24627 case ARM_HARD_FRAME_POINTER_REGNUM
:
24628 return offsets
->saved_regs
- offsets
->saved_args
;
24630 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24631 return offsets
->locals_base
- offsets
->saved_args
;
24634 gcc_unreachable ();
24638 case FRAME_POINTER_REGNUM
:
24641 case STACK_POINTER_REGNUM
:
24642 return offsets
->outgoing_args
- offsets
->soft_frame
;
24644 case ARM_HARD_FRAME_POINTER_REGNUM
:
24645 return offsets
->saved_regs
- offsets
->soft_frame
;
24647 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24648 return offsets
->locals_base
- offsets
->soft_frame
;
24651 gcc_unreachable ();
24656 gcc_unreachable ();
24660 /* Generate the function's prologue. */
24663 thumb1_expand_prologue (void)
24667 HOST_WIDE_INT amount
;
24668 HOST_WIDE_INT size
;
24669 arm_stack_offsets
*offsets
;
24670 unsigned long func_type
;
24672 unsigned long live_regs_mask
;
24673 unsigned long l_mask
;
24674 unsigned high_regs_pushed
= 0;
24675 bool lr_needs_saving
;
24677 func_type
= arm_current_func_type ();
24679 /* Naked functions don't have prologues. */
24680 if (IS_NAKED (func_type
))
24682 if (flag_stack_usage_info
)
24683 current_function_static_stack_size
= 0;
24687 if (IS_INTERRUPT (func_type
))
24689 error ("interrupt Service Routines cannot be coded in Thumb mode");
24693 if (is_called_in_ARM_mode (current_function_decl
))
24694 emit_insn (gen_prologue_thumb1_interwork ());
24696 offsets
= arm_get_frame_offsets ();
24697 live_regs_mask
= offsets
->saved_regs_mask
;
24698 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
24700 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24701 l_mask
= live_regs_mask
& 0x40ff;
24702 /* Then count how many other high registers will need to be pushed. */
24703 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24705 if (crtl
->args
.pretend_args_size
)
24707 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
24709 if (cfun
->machine
->uses_anonymous_args
)
24711 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
24712 unsigned long mask
;
24714 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
24715 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
24717 insn
= thumb1_emit_multi_reg_push (mask
, 0);
24721 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24722 stack_pointer_rtx
, x
));
24724 RTX_FRAME_RELATED_P (insn
) = 1;
24727 if (TARGET_BACKTRACE
)
24729 HOST_WIDE_INT offset
= 0;
24730 unsigned work_register
;
24731 rtx work_reg
, x
, arm_hfp_rtx
;
24733 /* We have been asked to create a stack backtrace structure.
24734 The code looks like this:
24738 0 sub SP, #16 Reserve space for 4 registers.
24739 2 push {R7} Push low registers.
24740 4 add R7, SP, #20 Get the stack pointer before the push.
24741 6 str R7, [SP, #8] Store the stack pointer
24742 (before reserving the space).
24743 8 mov R7, PC Get hold of the start of this code + 12.
24744 10 str R7, [SP, #16] Store it.
24745 12 mov R7, FP Get hold of the current frame pointer.
24746 14 str R7, [SP, #4] Store it.
24747 16 mov R7, LR Get hold of the current return address.
24748 18 str R7, [SP, #12] Store it.
24749 20 add R7, SP, #16 Point at the start of the
24750 backtrace structure.
24751 22 mov FP, R7 Put this value into the frame pointer. */
24753 work_register
= thumb_find_work_register (live_regs_mask
);
24754 work_reg
= gen_rtx_REG (SImode
, work_register
);
24755 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
24757 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24758 stack_pointer_rtx
, GEN_INT (-16)));
24759 RTX_FRAME_RELATED_P (insn
) = 1;
24763 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
24764 RTX_FRAME_RELATED_P (insn
) = 1;
24765 lr_needs_saving
= false;
24767 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
24770 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
24771 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24773 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
24774 x
= gen_frame_mem (SImode
, x
);
24775 emit_move_insn (x
, work_reg
);
24777 /* Make sure that the instruction fetching the PC is in the right place
24778 to calculate "start of backtrace creation code + 12". */
24779 /* ??? The stores using the common WORK_REG ought to be enough to
24780 prevent the scheduler from doing anything weird. Failing that
24781 we could always move all of the following into an UNSPEC_VOLATILE. */
24784 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24785 emit_move_insn (work_reg
, x
);
24787 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24788 x
= gen_frame_mem (SImode
, x
);
24789 emit_move_insn (x
, work_reg
);
24791 emit_move_insn (work_reg
, arm_hfp_rtx
);
24793 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24794 x
= gen_frame_mem (SImode
, x
);
24795 emit_move_insn (x
, work_reg
);
24799 emit_move_insn (work_reg
, arm_hfp_rtx
);
24801 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24802 x
= gen_frame_mem (SImode
, x
);
24803 emit_move_insn (x
, work_reg
);
24805 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24806 emit_move_insn (work_reg
, x
);
24808 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24809 x
= gen_frame_mem (SImode
, x
);
24810 emit_move_insn (x
, work_reg
);
24813 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
24814 emit_move_insn (work_reg
, x
);
24816 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
24817 x
= gen_frame_mem (SImode
, x
);
24818 emit_move_insn (x
, work_reg
);
24820 x
= GEN_INT (offset
+ 12);
24821 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24823 emit_move_insn (arm_hfp_rtx
, work_reg
);
24825 /* Optimization: If we are not pushing any low registers but we are going
24826 to push some high registers then delay our first push. This will just
24827 be a push of LR and we can combine it with the push of the first high
24829 else if ((l_mask
& 0xff) != 0
24830 || (high_regs_pushed
== 0 && lr_needs_saving
))
24832 unsigned long mask
= l_mask
;
24833 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
24834 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
24835 RTX_FRAME_RELATED_P (insn
) = 1;
24836 lr_needs_saving
= false;
24839 if (high_regs_pushed
)
24841 unsigned pushable_regs
;
24842 unsigned next_hi_reg
;
24843 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
24844 : crtl
->args
.info
.nregs
;
24845 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
24847 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
24848 if (live_regs_mask
& (1 << next_hi_reg
))
24851 /* Here we need to mask out registers used for passing arguments
24852 even if they can be pushed. This is to avoid using them to stash the high
24853 registers. Such kind of stash may clobber the use of arguments. */
24854 pushable_regs
= l_mask
& (~arg_regs_mask
);
24855 if (lr_needs_saving
)
24856 pushable_regs
&= ~(1 << LR_REGNUM
);
24858 if (pushable_regs
== 0)
24859 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
24861 while (high_regs_pushed
> 0)
24863 unsigned long real_regs_mask
= 0;
24864 unsigned long push_mask
= 0;
24866 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
24868 if (pushable_regs
& (1 << regno
))
24870 emit_move_insn (gen_rtx_REG (SImode
, regno
),
24871 gen_rtx_REG (SImode
, next_hi_reg
));
24873 high_regs_pushed
--;
24874 real_regs_mask
|= (1 << next_hi_reg
);
24875 push_mask
|= (1 << regno
);
24877 if (high_regs_pushed
)
24879 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
24881 if (live_regs_mask
& (1 << next_hi_reg
))
24889 /* If we had to find a work register and we have not yet
24890 saved the LR then add it to the list of regs to push. */
24891 if (lr_needs_saving
)
24893 push_mask
|= 1 << LR_REGNUM
;
24894 real_regs_mask
|= 1 << LR_REGNUM
;
24895 lr_needs_saving
= false;
24898 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
24899 RTX_FRAME_RELATED_P (insn
) = 1;
24903 /* Load the pic register before setting the frame pointer,
24904 so we can use r7 as a temporary work register. */
24905 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24906 arm_load_pic_register (live_regs_mask
);
24908 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
24909 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
24910 stack_pointer_rtx
);
24912 size
= offsets
->outgoing_args
- offsets
->saved_args
;
24913 if (flag_stack_usage_info
)
24914 current_function_static_stack_size
= size
;
24916 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24917 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
&& size
)
24918 sorry ("-fstack-check=specific for Thumb-1");
24920 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24921 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
24926 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24927 GEN_INT (- amount
)));
24928 RTX_FRAME_RELATED_P (insn
) = 1;
24934 /* The stack decrement is too big for an immediate value in a single
24935 insn. In theory we could issue multiple subtracts, but after
24936 three of them it becomes more space efficient to place the full
24937 value in the constant pool and load into a register. (Also the
24938 ARM debugger really likes to see only one stack decrement per
24939 function). So instead we look for a scratch register into which
24940 we can load the decrement, and then we subtract this from the
24941 stack pointer. Unfortunately on the thumb the only available
24942 scratch registers are the argument registers, and we cannot use
24943 these as they may hold arguments to the function. Instead we
24944 attempt to locate a call preserved register which is used by this
24945 function. If we can find one, then we know that it will have
24946 been pushed at the start of the prologue and so we can corrupt
24948 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
24949 if (live_regs_mask
& (1 << regno
))
24952 gcc_assert(regno
<= LAST_LO_REGNUM
);
24954 reg
= gen_rtx_REG (SImode
, regno
);
24956 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
24958 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24959 stack_pointer_rtx
, reg
));
24961 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
24962 plus_constant (Pmode
, stack_pointer_rtx
,
24964 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
24965 RTX_FRAME_RELATED_P (insn
) = 1;
24969 if (frame_pointer_needed
)
24970 thumb_set_frame_pointer (offsets
);
24972 /* If we are profiling, make sure no instructions are scheduled before
24973 the call to mcount. Similarly if the user has requested no
24974 scheduling in the prolog. Similarly if we want non-call exceptions
24975 using the EABI unwinder, to prevent faulting instructions from being
24976 swapped with a stack adjustment. */
24977 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
24978 || (arm_except_unwind_info (&global_options
) == UI_TARGET
24979 && cfun
->can_throw_non_call_exceptions
))
24980 emit_insn (gen_blockage ());
24982 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
24983 if (live_regs_mask
& 0xff)
24984 cfun
->machine
->lr_save_eliminated
= 0;
24987 /* Clear caller saved registers not used to pass return values and leaked
24988 condition flags before exiting a cmse_nonsecure_entry function. */
24991 cmse_nonsecure_entry_clear_before_return (void)
24993 uint64_t to_clear_mask
[2];
24994 uint32_t padding_bits_to_clear
= 0;
24995 uint32_t * padding_bits_to_clear_ptr
= &padding_bits_to_clear
;
24996 int regno
, maxregno
= IP_REGNUM
;
25000 to_clear_mask
[0] = (1ULL << (NUM_ARG_REGS
)) - 1;
25001 to_clear_mask
[0] |= (1ULL << IP_REGNUM
);
25003 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25004 registers. We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
25005 to make sure the instructions used to clear them are present. */
25006 if (TARGET_HARD_FLOAT
&& !TARGET_THUMB1
)
25008 uint64_t float_mask
= (1ULL << (D7_VFP_REGNUM
+ 1)) - 1;
25009 maxregno
= LAST_VFP_REGNUM
;
25011 float_mask
&= ~((1ULL << FIRST_VFP_REGNUM
) - 1);
25012 to_clear_mask
[0] |= float_mask
;
25014 float_mask
= (1ULL << (maxregno
- 63)) - 1;
25015 to_clear_mask
[1] = float_mask
;
25017 /* Make sure we don't clear the two scratch registers used to clear the
25018 relevant FPSCR bits in output_return_instruction. */
25019 emit_use (gen_rtx_REG (SImode
, IP_REGNUM
));
25020 to_clear_mask
[0] &= ~(1ULL << IP_REGNUM
);
25021 emit_use (gen_rtx_REG (SImode
, 4));
25022 to_clear_mask
[0] &= ~(1ULL << 4);
25025 /* If the user has defined registers to be caller saved, these are no longer
25026 restored by the function before returning and must thus be cleared for
25027 security purposes. */
25028 for (regno
= NUM_ARG_REGS
; regno
< LAST_VFP_REGNUM
; regno
++)
25030 /* We do not touch registers that can be used to pass arguments as per
25031 the AAPCS, since these should never be made callee-saved by user
25033 if (IN_RANGE (regno
, FIRST_VFP_REGNUM
, D7_VFP_REGNUM
))
25035 if (IN_RANGE (regno
, IP_REGNUM
, PC_REGNUM
))
25037 if (call_used_regs
[regno
])
25038 to_clear_mask
[regno
/ 64] |= (1ULL << (regno
% 64));
25041 /* Make sure we do not clear the registers used to return the result in. */
25042 result_type
= TREE_TYPE (DECL_RESULT (current_function_decl
));
25043 if (!VOID_TYPE_P (result_type
))
25045 result_rtl
= arm_function_value (result_type
, current_function_decl
, 0);
25047 /* No need to check that we return in registers, because we don't
25048 support returning on stack yet. */
25050 &= ~compute_not_to_clear_mask (result_type
, result_rtl
, 0,
25051 padding_bits_to_clear_ptr
);
25054 if (padding_bits_to_clear
!= 0)
25057 /* Padding bits to clear is not 0 so we know we are dealing with
25058 returning a composite type, which only uses r0. Let's make sure that
25059 r1-r3 is cleared too, we will use r1 as a scratch register. */
25060 gcc_assert ((to_clear_mask
[0] & 0xe) == 0xe);
25062 reg_rtx
= gen_rtx_REG (SImode
, R1_REGNUM
);
25064 /* Fill the lower half of the negated padding_bits_to_clear. */
25065 emit_move_insn (reg_rtx
,
25066 GEN_INT ((((~padding_bits_to_clear
) << 16u) >> 16u)));
25068 /* Also fill the top half of the negated padding_bits_to_clear. */
25069 if (((~padding_bits_to_clear
) >> 16) > 0)
25070 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode
, reg_rtx
,
25073 GEN_INT ((~padding_bits_to_clear
) >> 16)));
25075 emit_insn (gen_andsi3 (gen_rtx_REG (SImode
, R0_REGNUM
),
25076 gen_rtx_REG (SImode
, R0_REGNUM
),
25080 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
25082 if (!(to_clear_mask
[regno
/ 64] & (1ULL << (regno
% 64))))
25085 if (IS_VFP_REGNUM (regno
))
25087 /* If regno is an even vfp register and its successor is also to
25088 be cleared, use vmov. */
25089 if (TARGET_VFP_DOUBLE
25090 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
25091 && to_clear_mask
[regno
/ 64] & (1ULL << ((regno
% 64) + 1)))
25093 emit_move_insn (gen_rtx_REG (DFmode
, regno
),
25094 CONST1_RTX (DFmode
));
25095 emit_use (gen_rtx_REG (DFmode
, regno
));
25100 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
25101 CONST1_RTX (SFmode
));
25102 emit_use (gen_rtx_REG (SFmode
, regno
));
25109 if (regno
== R0_REGNUM
)
25110 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25113 /* R0 has either been cleared before, see code above, or it
25114 holds a return value, either way it is not secret
25116 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25117 gen_rtx_REG (SImode
, R0_REGNUM
));
25118 emit_use (gen_rtx_REG (SImode
, regno
));
25122 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25123 gen_rtx_REG (SImode
, LR_REGNUM
));
25124 emit_use (gen_rtx_REG (SImode
, regno
));
25130 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25131 POP instruction can be generated. LR should be replaced by PC. All
25132 the checks required are already done by USE_RETURN_INSN (). Hence,
25133 all we really need to check here is if single register is to be
25134 returned, or multiple register return. */
25136 thumb2_expand_return (bool simple_return
)
25139 unsigned long saved_regs_mask
;
25140 arm_stack_offsets
*offsets
;
25142 offsets
= arm_get_frame_offsets ();
25143 saved_regs_mask
= offsets
->saved_regs_mask
;
25145 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25146 if (saved_regs_mask
& (1 << i
))
25149 if (!simple_return
&& saved_regs_mask
)
25151 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25152 functions or adapt code to handle according to ACLE. This path should
25153 not be reachable for cmse_nonsecure_entry functions though we prefer
25154 to assert it for now to ensure that future code changes do not silently
25155 change this behavior. */
25156 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25159 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25160 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
25161 rtx addr
= gen_rtx_MEM (SImode
,
25162 gen_rtx_POST_INC (SImode
,
25163 stack_pointer_rtx
));
25164 set_mem_alias_set (addr
, get_frame_alias_set ());
25165 XVECEXP (par
, 0, 0) = ret_rtx
;
25166 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
25167 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
25168 emit_jump_insn (par
);
25172 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
25173 saved_regs_mask
|= (1 << PC_REGNUM
);
25174 arm_emit_multi_reg_pop (saved_regs_mask
);
25179 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25180 cmse_nonsecure_entry_clear_before_return ();
25181 emit_jump_insn (simple_return_rtx
);
25186 thumb1_expand_epilogue (void)
25188 HOST_WIDE_INT amount
;
25189 arm_stack_offsets
*offsets
;
25192 /* Naked functions don't have prologues. */
25193 if (IS_NAKED (arm_current_func_type ()))
25196 offsets
= arm_get_frame_offsets ();
25197 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25199 if (frame_pointer_needed
)
25201 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
25202 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25204 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
25206 gcc_assert (amount
>= 0);
25209 emit_insn (gen_blockage ());
25212 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25213 GEN_INT (amount
)));
25216 /* r3 is always free in the epilogue. */
25217 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
25219 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
25220 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
25224 /* Emit a USE (stack_pointer_rtx), so that
25225 the stack adjustment will not be deleted. */
25226 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25228 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
25229 emit_insn (gen_blockage ());
25231 /* Emit a clobber for each insn that will be restored in the epilogue,
25232 so that flow2 will get register lifetimes correct. */
25233 for (regno
= 0; regno
< 13; regno
++)
25234 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
25235 emit_clobber (gen_rtx_REG (SImode
, regno
));
25237 if (! df_regs_ever_live_p (LR_REGNUM
))
25238 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
25240 /* Clear all caller-saved regs that are not used to return. */
25241 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25242 cmse_nonsecure_entry_clear_before_return ();
25245 /* Epilogue code for APCS frame. */
25247 arm_expand_epilogue_apcs_frame (bool really_return
)
25249 unsigned long func_type
;
25250 unsigned long saved_regs_mask
;
25253 int floats_from_frame
= 0;
25254 arm_stack_offsets
*offsets
;
25256 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
25257 func_type
= arm_current_func_type ();
25259 /* Get frame offsets for ARM. */
25260 offsets
= arm_get_frame_offsets ();
25261 saved_regs_mask
= offsets
->saved_regs_mask
;
25263 /* Find the offset of the floating-point save area in the frame. */
25265 = (offsets
->saved_args
25266 + arm_compute_static_chain_stack_bytes ()
25269 /* Compute how many core registers saved and how far away the floats are. */
25270 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25271 if (saved_regs_mask
& (1 << i
))
25274 floats_from_frame
+= 4;
25277 if (TARGET_HARD_FLOAT
)
25280 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
25282 /* The offset is from IP_REGNUM. */
25283 int saved_size
= arm_get_vfp_saved_size ();
25284 if (saved_size
> 0)
25287 floats_from_frame
+= saved_size
;
25288 insn
= emit_insn (gen_addsi3 (ip_rtx
,
25289 hard_frame_pointer_rtx
,
25290 GEN_INT (-floats_from_frame
)));
25291 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
25292 ip_rtx
, hard_frame_pointer_rtx
);
25295 /* Generate VFP register multi-pop. */
25296 start_reg
= FIRST_VFP_REGNUM
;
25298 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
25299 /* Look for a case where a reg does not need restoring. */
25300 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25301 && (!df_regs_ever_live_p (i
+ 1)
25302 || call_used_regs
[i
+ 1]))
25304 if (start_reg
!= i
)
25305 arm_emit_vfp_multi_reg_pop (start_reg
,
25306 (i
- start_reg
) / 2,
25307 gen_rtx_REG (SImode
,
25312 /* Restore the remaining regs that we have discovered (or possibly
25313 even all of them, if the conditional in the for loop never
25315 if (start_reg
!= i
)
25316 arm_emit_vfp_multi_reg_pop (start_reg
,
25317 (i
- start_reg
) / 2,
25318 gen_rtx_REG (SImode
, IP_REGNUM
));
25323 /* The frame pointer is guaranteed to be non-double-word aligned, as
25324 it is set to double-word-aligned old_stack_pointer - 4. */
25326 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
25328 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
25329 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25331 rtx addr
= gen_frame_mem (V2SImode
,
25332 plus_constant (Pmode
, hard_frame_pointer_rtx
,
25334 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25335 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25336 gen_rtx_REG (V2SImode
, i
),
25342 /* saved_regs_mask should contain IP which contains old stack pointer
25343 at the time of activation creation. Since SP and IP are adjacent registers,
25344 we can restore the value directly into SP. */
25345 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
25346 saved_regs_mask
&= ~(1 << IP_REGNUM
);
25347 saved_regs_mask
|= (1 << SP_REGNUM
);
25349 /* There are two registers left in saved_regs_mask - LR and PC. We
25350 only need to restore LR (the return address), but to
25351 save time we can load it directly into PC, unless we need a
25352 special function exit sequence, or we are not really returning. */
25354 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
25355 && !crtl
->calls_eh_return
)
25356 /* Delete LR from the register mask, so that LR on
25357 the stack is loaded into the PC in the register mask. */
25358 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25360 saved_regs_mask
&= ~(1 << PC_REGNUM
);
25362 num_regs
= bit_count (saved_regs_mask
);
25363 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
25366 emit_insn (gen_blockage ());
25367 /* Unwind the stack to just below the saved registers. */
25368 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25369 hard_frame_pointer_rtx
,
25370 GEN_INT (- 4 * num_regs
)));
25372 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
25373 stack_pointer_rtx
, hard_frame_pointer_rtx
);
25376 arm_emit_multi_reg_pop (saved_regs_mask
);
25378 if (IS_INTERRUPT (func_type
))
25380 /* Interrupt handlers will have pushed the
25381 IP onto the stack, so restore it now. */
25383 rtx addr
= gen_rtx_MEM (SImode
,
25384 gen_rtx_POST_INC (SImode
,
25385 stack_pointer_rtx
));
25386 set_mem_alias_set (addr
, get_frame_alias_set ());
25387 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
25388 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25389 gen_rtx_REG (SImode
, IP_REGNUM
),
25393 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
25396 if (crtl
->calls_eh_return
)
25397 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25399 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25401 if (IS_STACKALIGN (func_type
))
25402 /* Restore the original stack pointer. Before prologue, the stack was
25403 realigned and the original stack pointer saved in r0. For details,
25404 see comment in arm_expand_prologue. */
25405 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25407 emit_jump_insn (simple_return_rtx
);
25410 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25411 function is not a sibcall. */
25413 arm_expand_epilogue (bool really_return
)
25415 unsigned long func_type
;
25416 unsigned long saved_regs_mask
;
25420 arm_stack_offsets
*offsets
;
25422 func_type
= arm_current_func_type ();
25424 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25425 let output_return_instruction take care of instruction emission if any. */
25426 if (IS_NAKED (func_type
)
25427 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
25430 emit_jump_insn (simple_return_rtx
);
25434 /* If we are throwing an exception, then we really must be doing a
25435 return, so we can't tail-call. */
25436 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
25438 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
25440 arm_expand_epilogue_apcs_frame (really_return
);
25444 /* Get frame offsets for ARM. */
25445 offsets
= arm_get_frame_offsets ();
25446 saved_regs_mask
= offsets
->saved_regs_mask
;
25447 num_regs
= bit_count (saved_regs_mask
);
25449 if (frame_pointer_needed
)
25452 /* Restore stack pointer if necessary. */
25455 /* In ARM mode, frame pointer points to first saved register.
25456 Restore stack pointer to last saved register. */
25457 amount
= offsets
->frame
- offsets
->saved_regs
;
25459 /* Force out any pending memory operations that reference stacked data
25460 before stack de-allocation occurs. */
25461 emit_insn (gen_blockage ());
25462 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25463 hard_frame_pointer_rtx
,
25464 GEN_INT (amount
)));
25465 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25467 hard_frame_pointer_rtx
);
25469 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25471 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25475 /* In Thumb-2 mode, the frame pointer points to the last saved
25477 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25480 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
25481 hard_frame_pointer_rtx
,
25482 GEN_INT (amount
)));
25483 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25484 hard_frame_pointer_rtx
,
25485 hard_frame_pointer_rtx
);
25488 /* Force out any pending memory operations that reference stacked data
25489 before stack de-allocation occurs. */
25490 emit_insn (gen_blockage ());
25491 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
25492 hard_frame_pointer_rtx
));
25493 arm_add_cfa_adjust_cfa_note (insn
, 0,
25495 hard_frame_pointer_rtx
);
25496 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25498 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25503 /* Pop off outgoing args and local frame to adjust stack pointer to
25504 last saved register. */
25505 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25509 /* Force out any pending memory operations that reference stacked data
25510 before stack de-allocation occurs. */
25511 emit_insn (gen_blockage ());
25512 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25514 GEN_INT (amount
)));
25515 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25516 stack_pointer_rtx
, stack_pointer_rtx
);
25517 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25519 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25523 if (TARGET_HARD_FLOAT
)
25525 /* Generate VFP register multi-pop. */
25526 int end_reg
= LAST_VFP_REGNUM
+ 1;
25528 /* Scan the registers in reverse order. We need to match
25529 any groupings made in the prologue and generate matching
25530 vldm operations. The need to match groups is because,
25531 unlike pop, vldm can only do consecutive regs. */
25532 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
25533 /* Look for a case where a reg does not need restoring. */
25534 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25535 && (!df_regs_ever_live_p (i
+ 1)
25536 || call_used_regs
[i
+ 1]))
25538 /* Restore the regs discovered so far (from reg+2 to
25540 if (end_reg
> i
+ 2)
25541 arm_emit_vfp_multi_reg_pop (i
+ 2,
25542 (end_reg
- (i
+ 2)) / 2,
25543 stack_pointer_rtx
);
25547 /* Restore the remaining regs that we have discovered (or possibly
25548 even all of them, if the conditional in the for loop never
25550 if (end_reg
> i
+ 2)
25551 arm_emit_vfp_multi_reg_pop (i
+ 2,
25552 (end_reg
- (i
+ 2)) / 2,
25553 stack_pointer_rtx
);
25557 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
25558 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25561 rtx addr
= gen_rtx_MEM (V2SImode
,
25562 gen_rtx_POST_INC (SImode
,
25563 stack_pointer_rtx
));
25564 set_mem_alias_set (addr
, get_frame_alias_set ());
25565 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25566 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25567 gen_rtx_REG (V2SImode
, i
),
25569 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25570 stack_pointer_rtx
, stack_pointer_rtx
);
25573 if (saved_regs_mask
)
25576 bool return_in_pc
= false;
25578 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
25579 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
25580 && !IS_CMSE_ENTRY (func_type
)
25581 && !IS_STACKALIGN (func_type
)
25583 && crtl
->args
.pretend_args_size
== 0
25584 && saved_regs_mask
& (1 << LR_REGNUM
)
25585 && !crtl
->calls_eh_return
)
25587 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25588 saved_regs_mask
|= (1 << PC_REGNUM
);
25589 return_in_pc
= true;
25592 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
25594 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25595 if (saved_regs_mask
& (1 << i
))
25597 rtx addr
= gen_rtx_MEM (SImode
,
25598 gen_rtx_POST_INC (SImode
,
25599 stack_pointer_rtx
));
25600 set_mem_alias_set (addr
, get_frame_alias_set ());
25602 if (i
== PC_REGNUM
)
25604 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25605 XVECEXP (insn
, 0, 0) = ret_rtx
;
25606 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
25608 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
25609 insn
= emit_jump_insn (insn
);
25613 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
25615 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25616 gen_rtx_REG (SImode
, i
),
25618 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25620 stack_pointer_rtx
);
25627 && current_tune
->prefer_ldrd_strd
25628 && !optimize_function_for_size_p (cfun
))
25631 thumb2_emit_ldrd_pop (saved_regs_mask
);
25632 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
25633 arm_emit_ldrd_pop (saved_regs_mask
);
25635 arm_emit_multi_reg_pop (saved_regs_mask
);
25638 arm_emit_multi_reg_pop (saved_regs_mask
);
25646 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
25650 rtx dwarf
= NULL_RTX
;
25652 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25654 GEN_INT (amount
)));
25656 RTX_FRAME_RELATED_P (tmp
) = 1;
25658 if (cfun
->machine
->uses_anonymous_args
)
25660 /* Restore pretend args. Refer arm_expand_prologue on how to save
25661 pretend_args in stack. */
25662 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25663 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25664 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25665 if (saved_regs_mask
& (1 << i
))
25667 rtx reg
= gen_rtx_REG (SImode
, i
);
25668 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25671 REG_NOTES (tmp
) = dwarf
;
25673 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25674 stack_pointer_rtx
, stack_pointer_rtx
);
25677 /* Clear all caller-saved regs that are not used to return. */
25678 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25680 /* CMSE_ENTRY always returns. */
25681 gcc_assert (really_return
);
25682 cmse_nonsecure_entry_clear_before_return ();
25685 if (!really_return
)
25688 if (crtl
->calls_eh_return
)
25689 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25691 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25693 if (IS_STACKALIGN (func_type
))
25694 /* Restore the original stack pointer. Before prologue, the stack was
25695 realigned and the original stack pointer saved in r0. For details,
25696 see comment in arm_expand_prologue. */
25697 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25699 emit_jump_insn (simple_return_rtx
);
25702 /* Implementation of insn prologue_thumb1_interwork. This is the first
25703 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25706 thumb1_output_interwork (void)
25709 FILE *f
= asm_out_file
;
25711 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25712 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25714 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25716 /* Generate code sequence to switch us into Thumb mode. */
25717 /* The .code 32 directive has already been emitted by
25718 ASM_DECLARE_FUNCTION_NAME. */
25719 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25720 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
25722 /* Generate a label, so that the debugger will notice the
25723 change in instruction sets. This label is also used by
25724 the assembler to bypass the ARM code when this function
25725 is called from a Thumb encoded function elsewhere in the
25726 same file. Hence the definition of STUB_NAME here must
25727 agree with the definition in gas/config/tc-arm.c. */
25729 #define STUB_NAME ".real_start_of"
25731 fprintf (f
, "\t.code\t16\n");
25733 if (arm_dllexport_name_p (name
))
25734 name
= arm_strip_name_encoding (name
);
25736 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
25737 fprintf (f
, "\t.thumb_func\n");
25738 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
25743 /* Handle the case of a double word load into a low register from
25744 a computed memory address. The computed address may involve a
25745 register which is overwritten by the load. */
25747 thumb_load_double_from_address (rtx
*operands
)
25755 gcc_assert (REG_P (operands
[0]));
25756 gcc_assert (MEM_P (operands
[1]));
25758 /* Get the memory address. */
25759 addr
= XEXP (operands
[1], 0);
25761 /* Work out how the memory address is computed. */
25762 switch (GET_CODE (addr
))
25765 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25767 if (REGNO (operands
[0]) == REGNO (addr
))
25769 output_asm_insn ("ldr\t%H0, %2", operands
);
25770 output_asm_insn ("ldr\t%0, %1", operands
);
25774 output_asm_insn ("ldr\t%0, %1", operands
);
25775 output_asm_insn ("ldr\t%H0, %2", operands
);
25780 /* Compute <address> + 4 for the high order load. */
25781 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25783 output_asm_insn ("ldr\t%0, %1", operands
);
25784 output_asm_insn ("ldr\t%H0, %2", operands
);
25788 arg1
= XEXP (addr
, 0);
25789 arg2
= XEXP (addr
, 1);
25791 if (CONSTANT_P (arg1
))
25792 base
= arg2
, offset
= arg1
;
25794 base
= arg1
, offset
= arg2
;
25796 gcc_assert (REG_P (base
));
25798 /* Catch the case of <address> = <reg> + <reg> */
25799 if (REG_P (offset
))
25801 int reg_offset
= REGNO (offset
);
25802 int reg_base
= REGNO (base
);
25803 int reg_dest
= REGNO (operands
[0]);
25805 /* Add the base and offset registers together into the
25806 higher destination register. */
25807 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
25808 reg_dest
+ 1, reg_base
, reg_offset
);
25810 /* Load the lower destination register from the address in
25811 the higher destination register. */
25812 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
25813 reg_dest
, reg_dest
+ 1);
25815 /* Load the higher destination register from its own address
25817 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
25818 reg_dest
+ 1, reg_dest
+ 1);
25822 /* Compute <address> + 4 for the high order load. */
25823 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25825 /* If the computed address is held in the low order register
25826 then load the high order register first, otherwise always
25827 load the low order register first. */
25828 if (REGNO (operands
[0]) == REGNO (base
))
25830 output_asm_insn ("ldr\t%H0, %2", operands
);
25831 output_asm_insn ("ldr\t%0, %1", operands
);
25835 output_asm_insn ("ldr\t%0, %1", operands
);
25836 output_asm_insn ("ldr\t%H0, %2", operands
);
25842 /* With no registers to worry about we can just load the value
25844 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25846 output_asm_insn ("ldr\t%H0, %2", operands
);
25847 output_asm_insn ("ldr\t%0, %1", operands
);
25851 gcc_unreachable ();
25858 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
25863 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25864 std::swap (operands
[4], operands
[5]);
25866 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
25867 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
25871 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25872 std::swap (operands
[4], operands
[5]);
25873 if (REGNO (operands
[5]) > REGNO (operands
[6]))
25874 std::swap (operands
[5], operands
[6]);
25875 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25876 std::swap (operands
[4], operands
[5]);
25878 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
25879 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
25883 gcc_unreachable ();
25889 /* Output a call-via instruction for thumb state. */
25891 thumb_call_via_reg (rtx reg
)
25893 int regno
= REGNO (reg
);
25896 gcc_assert (regno
< LR_REGNUM
);
25898 /* If we are in the normal text section we can use a single instance
25899 per compilation unit. If we are doing function sections, then we need
25900 an entry per section, since we can't rely on reachability. */
25901 if (in_section
== text_section
)
25903 thumb_call_reg_needed
= 1;
25905 if (thumb_call_via_label
[regno
] == NULL
)
25906 thumb_call_via_label
[regno
] = gen_label_rtx ();
25907 labelp
= thumb_call_via_label
+ regno
;
25911 if (cfun
->machine
->call_via
[regno
] == NULL
)
25912 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
25913 labelp
= cfun
->machine
->call_via
+ regno
;
25916 output_asm_insn ("bl\t%a0", labelp
);
25920 /* Routines for generating rtl. */
25922 thumb_expand_movmemqi (rtx
*operands
)
25924 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
25925 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
25926 HOST_WIDE_INT len
= INTVAL (operands
[2]);
25927 HOST_WIDE_INT offset
= 0;
25931 emit_insn (gen_movmem12b (out
, in
, out
, in
));
25937 emit_insn (gen_movmem8b (out
, in
, out
, in
));
25943 rtx reg
= gen_reg_rtx (SImode
);
25944 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
25945 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
25952 rtx reg
= gen_reg_rtx (HImode
);
25953 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
25954 plus_constant (Pmode
, in
,
25956 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
25965 rtx reg
= gen_reg_rtx (QImode
);
25966 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
25967 plus_constant (Pmode
, in
,
25969 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
25976 thumb_reload_out_hi (rtx
*operands
)
25978 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
25981 /* Return the length of a function name prefix
25982 that starts with the character 'c'. */
25984 arm_get_strip_length (int c
)
25988 ARM_NAME_ENCODING_LENGTHS
25993 /* Return a pointer to a function's name with any
25994 and all prefix encodings stripped from it. */
25996 arm_strip_name_encoding (const char *name
)
26000 while ((skip
= arm_get_strip_length (* name
)))
26006 /* If there is a '*' anywhere in the name's prefix, then
26007 emit the stripped name verbatim, otherwise prepend an
26008 underscore if leading underscores are being used. */
26010 arm_asm_output_labelref (FILE *stream
, const char *name
)
26015 while ((skip
= arm_get_strip_length (* name
)))
26017 verbatim
|= (*name
== '*');
26022 fputs (name
, stream
);
26024 asm_fprintf (stream
, "%U%s", name
);
26027 /* This function is used to emit an EABI tag and its associated value.
26028 We emit the numerical value of the tag in case the assembler does not
26029 support textual tags. (Eg gas prior to 2.20). If requested we include
26030 the tag name in a comment so that anyone reading the assembler output
26031 will know which tag is being set.
26033 This function is not static because arm-c.c needs it too. */
26036 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
26038 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
26039 if (flag_verbose_asm
|| flag_debug_asm
)
26040 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
26041 asm_fprintf (asm_out_file
, "\n");
26044 /* This function is used to print CPU tuning information as comment
26045 in assembler file. Pointers are not printed for now. */
26048 arm_print_tune_info (void)
26050 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune parameters\n");
26051 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"constant_limit:\t%d\n",
26052 current_tune
->constant_limit
);
26053 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26054 "max_insns_skipped:\t%d\n", current_tune
->max_insns_skipped
);
26055 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26056 "prefetch.num_slots:\t%d\n", current_tune
->prefetch
.num_slots
);
26057 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26058 "prefetch.l1_cache_size:\t%d\n",
26059 current_tune
->prefetch
.l1_cache_size
);
26060 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26061 "prefetch.l1_cache_line_size:\t%d\n",
26062 current_tune
->prefetch
.l1_cache_line_size
);
26063 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26064 "prefer_constant_pool:\t%d\n",
26065 (int) current_tune
->prefer_constant_pool
);
26066 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26067 "branch_cost:\t(s:speed, p:predictable)\n");
26068 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\ts&p\tcost\n");
26069 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t00\t%d\n",
26070 current_tune
->branch_cost (false, false));
26071 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t01\t%d\n",
26072 current_tune
->branch_cost (false, true));
26073 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t10\t%d\n",
26074 current_tune
->branch_cost (true, false));
26075 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t11\t%d\n",
26076 current_tune
->branch_cost (true, true));
26077 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26078 "prefer_ldrd_strd:\t%d\n",
26079 (int) current_tune
->prefer_ldrd_strd
);
26080 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26081 "logical_op_non_short_circuit:\t[%d,%d]\n",
26082 (int) current_tune
->logical_op_non_short_circuit_thumb
,
26083 (int) current_tune
->logical_op_non_short_circuit_arm
);
26084 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26085 "prefer_neon_for_64bits:\t%d\n",
26086 (int) current_tune
->prefer_neon_for_64bits
);
26087 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26088 "disparage_flag_setting_t16_encodings:\t%d\n",
26089 (int) current_tune
->disparage_flag_setting_t16_encodings
);
26090 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26091 "string_ops_prefer_neon:\t%d\n",
26092 (int) current_tune
->string_ops_prefer_neon
);
26093 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26094 "max_insns_inline_memset:\t%d\n",
26095 current_tune
->max_insns_inline_memset
);
26096 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"fusible_ops:\t%u\n",
26097 current_tune
->fusible_ops
);
26098 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"sched_autopref:\t%d\n",
26099 (int) current_tune
->sched_autopref
);
26103 arm_file_start (void)
26109 /* We don't have a specified CPU. Use the architecture to
26112 Note: it might be better to do this unconditionally, then the
26113 assembler would not need to know about all new CPU names as
26115 if (!arm_active_target
.core_name
)
26117 /* armv7ve doesn't support any extensions. */
26118 if (strcmp (arm_active_target
.arch_name
, "armv7ve") == 0)
26120 /* Keep backward compatability for assemblers
26121 which don't support armv7ve. */
26122 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
26123 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
26124 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
26125 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
26126 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
26130 const char* pos
= strchr (arm_active_target
.arch_name
, '+');
26134 gcc_assert (strlen (arm_active_target
.arch_name
)
26135 <= sizeof (buf
) / sizeof (*pos
));
26136 strncpy (buf
, arm_active_target
.arch_name
,
26137 (pos
- arm_active_target
.arch_name
) * sizeof (*pos
));
26138 buf
[pos
- arm_active_target
.arch_name
] = '\0';
26139 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
26140 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
26143 asm_fprintf (asm_out_file
, "\t.arch %s\n",
26144 arm_active_target
.arch_name
);
26147 else if (strncmp (arm_active_target
.core_name
, "generic", 7) == 0)
26148 asm_fprintf (asm_out_file
, "\t.arch %s\n",
26149 arm_active_target
.core_name
+ 8);
26152 const char* truncated_name
26153 = arm_rewrite_selected_cpu (arm_active_target
.core_name
);
26154 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
26157 if (print_tune_info
)
26158 arm_print_tune_info ();
26160 if (! TARGET_SOFT_FLOAT
)
26162 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
26163 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26165 if (TARGET_HARD_FLOAT_ABI
)
26166 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26169 /* Some of these attributes only apply when the corresponding features
26170 are used. However we don't have any easy way of figuring this out.
26171 Conservatively record the setting that would have been used. */
26173 if (flag_rounding_math
)
26174 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26176 if (!flag_unsafe_math_optimizations
)
26178 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26179 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26181 if (flag_signaling_nans
)
26182 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26184 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26185 flag_finite_math_only
? 1 : 3);
26187 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26188 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26189 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26190 flag_short_enums
? 1 : 2);
26192 /* Tag_ABI_optimization_goals. */
26195 else if (optimize
>= 2)
26201 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
26203 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26206 if (arm_fp16_format
)
26207 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26208 (int) arm_fp16_format
);
26210 if (arm_lang_output_object_attributes_hook
)
26211 arm_lang_output_object_attributes_hook();
26214 default_file_start ();
26218 arm_file_end (void)
26222 if (NEED_INDICATE_EXEC_STACK
)
26223 /* Add .note.GNU-stack. */
26224 file_end_indicate_exec_stack ();
26226 if (! thumb_call_reg_needed
)
26229 switch_to_section (text_section
);
26230 asm_fprintf (asm_out_file
, "\t.code 16\n");
26231 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
26233 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
26235 rtx label
= thumb_call_via_label
[regno
];
26239 targetm
.asm_out
.internal_label (asm_out_file
, "L",
26240 CODE_LABEL_NUMBER (label
));
26241 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
26247 /* Symbols in the text segment can be accessed without indirecting via the
26248 constant pool; it may take an extra binary operation, but this is still
26249 faster than indirecting via memory. Don't do this when not optimizing,
26250 since we won't be calculating al of the offsets necessary to do this
26254 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
26256 if (optimize
> 0 && TREE_CONSTANT (decl
))
26257 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
26259 default_encode_section_info (decl
, rtl
, first
);
26261 #endif /* !ARM_PE */
26264 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
26266 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
26267 && !strcmp (prefix
, "L"))
26269 arm_ccfsm_state
= 0;
26270 arm_target_insn
= NULL
;
26272 default_internal_label (stream
, prefix
, labelno
);
26275 /* Output code to add DELTA to the first argument, and then jump
26276 to FUNCTION. Used for C++ multiple inheritance. */
26279 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26280 HOST_WIDE_INT
, tree function
)
26282 static int thunk_label
= 0;
26285 int mi_delta
= delta
;
26286 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
26288 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
26291 mi_delta
= - mi_delta
;
26293 final_start_function (emit_barrier (), file
, 1);
26297 int labelno
= thunk_label
++;
26298 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
26299 /* Thunks are entered in arm mode when available. */
26300 if (TARGET_THUMB1_ONLY
)
26302 /* push r3 so we can use it as a temporary. */
26303 /* TODO: Omit this save if r3 is not used. */
26304 fputs ("\tpush {r3}\n", file
);
26305 fputs ("\tldr\tr3, ", file
);
26309 fputs ("\tldr\tr12, ", file
);
26311 assemble_name (file
, label
);
26312 fputc ('\n', file
);
26315 /* If we are generating PIC, the ldr instruction below loads
26316 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26317 the address of the add + 8, so we have:
26319 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26322 Note that we have "+ 1" because some versions of GNU ld
26323 don't set the low bit of the result for R_ARM_REL32
26324 relocations against thumb function symbols.
26325 On ARMv6M this is +4, not +8. */
26326 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
26327 assemble_name (file
, labelpc
);
26328 fputs (":\n", file
);
26329 if (TARGET_THUMB1_ONLY
)
26331 /* This is 2 insns after the start of the thunk, so we know it
26332 is 4-byte aligned. */
26333 fputs ("\tadd\tr3, pc, r3\n", file
);
26334 fputs ("\tmov r12, r3\n", file
);
26337 fputs ("\tadd\tr12, pc, r12\n", file
);
26339 else if (TARGET_THUMB1_ONLY
)
26340 fputs ("\tmov r12, r3\n", file
);
26342 if (TARGET_THUMB1_ONLY
)
26344 if (mi_delta
> 255)
26346 fputs ("\tldr\tr3, ", file
);
26347 assemble_name (file
, label
);
26348 fputs ("+4\n", file
);
26349 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
26350 mi_op
, this_regno
, this_regno
);
26352 else if (mi_delta
!= 0)
26354 /* Thumb1 unified syntax requires s suffix in instruction name when
26355 one of the operands is immediate. */
26356 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
26357 mi_op
, this_regno
, this_regno
,
26363 /* TODO: Use movw/movt for large constants when available. */
26364 while (mi_delta
!= 0)
26366 if ((mi_delta
& (3 << shift
)) == 0)
26370 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
26371 mi_op
, this_regno
, this_regno
,
26372 mi_delta
& (0xff << shift
));
26373 mi_delta
&= ~(0xff << shift
);
26380 if (TARGET_THUMB1_ONLY
)
26381 fputs ("\tpop\t{r3}\n", file
);
26383 fprintf (file
, "\tbx\tr12\n");
26384 ASM_OUTPUT_ALIGN (file
, 2);
26385 assemble_name (file
, label
);
26386 fputs (":\n", file
);
26389 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26390 rtx tem
= XEXP (DECL_RTL (function
), 0);
26391 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26392 pipeline offset is four rather than eight. Adjust the offset
26394 tem
= plus_constant (GET_MODE (tem
), tem
,
26395 TARGET_THUMB1_ONLY
? -3 : -7);
26396 tem
= gen_rtx_MINUS (GET_MODE (tem
),
26398 gen_rtx_SYMBOL_REF (Pmode
,
26399 ggc_strdup (labelpc
)));
26400 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
26403 /* Output ".word .LTHUNKn". */
26404 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
26406 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
26407 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
26411 fputs ("\tb\t", file
);
26412 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
26413 if (NEED_PLT_RELOC
)
26414 fputs ("(PLT)", file
);
26415 fputc ('\n', file
);
26418 final_end_function ();
26421 /* MI thunk handling for TARGET_32BIT. */
26424 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26425 HOST_WIDE_INT vcall_offset
, tree function
)
26427 /* On ARM, this_regno is R0 or R1 depending on
26428 whether the function returns an aggregate or not.
26430 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
26432 ? R1_REGNUM
: R0_REGNUM
);
26434 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
26435 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
26436 reload_completed
= 1;
26437 emit_note (NOTE_INSN_PROLOGUE_END
);
26439 /* Add DELTA to THIS_RTX. */
26441 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
26442 delta
, this_rtx
, this_rtx
, false);
26444 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26445 if (vcall_offset
!= 0)
26447 /* Load *THIS_RTX. */
26448 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
26449 /* Compute *THIS_RTX + VCALL_OFFSET. */
26450 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
26452 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26453 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
26454 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
26457 /* Generate a tail call to the target function. */
26458 if (!TREE_USED (function
))
26460 assemble_external (function
);
26461 TREE_USED (function
) = 1;
26463 rtx funexp
= XEXP (DECL_RTL (function
), 0);
26464 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
26465 rtx_insn
* insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
26466 SIBLING_CALL_P (insn
) = 1;
26468 insn
= get_insns ();
26469 shorten_branches (insn
);
26470 final_start_function (insn
, file
, 1);
26471 final (insn
, file
, 1);
26472 final_end_function ();
26474 /* Stop pretending this is a post-reload pass. */
26475 reload_completed
= 0;
26478 /* Output code to add DELTA to the first argument, and then jump
26479 to FUNCTION. Used for C++ multiple inheritance. */
26482 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
26483 HOST_WIDE_INT vcall_offset
, tree function
)
26486 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26488 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26492 arm_emit_vector_const (FILE *file
, rtx x
)
26495 const char * pattern
;
26497 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
26499 switch (GET_MODE (x
))
26501 case V2SImode
: pattern
= "%08x"; break;
26502 case V4HImode
: pattern
= "%04x"; break;
26503 case V8QImode
: pattern
= "%02x"; break;
26504 default: gcc_unreachable ();
26507 fprintf (file
, "0x");
26508 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
26512 element
= CONST_VECTOR_ELT (x
, i
);
26513 fprintf (file
, pattern
, INTVAL (element
));
26519 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26520 HFmode constant pool entries are actually loaded with ldr. */
26522 arm_emit_fp16_const (rtx c
)
26526 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
26527 if (WORDS_BIG_ENDIAN
)
26528 assemble_zeros (2);
26529 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
26530 if (!WORDS_BIG_ENDIAN
)
26531 assemble_zeros (2);
26535 arm_output_load_gr (rtx
*operands
)
26542 if (!MEM_P (operands
[1])
26543 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
26544 || !REG_P (reg
= XEXP (sum
, 0))
26545 || !CONST_INT_P (offset
= XEXP (sum
, 1))
26546 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
26547 return "wldrw%?\t%0, %1";
26549 /* Fix up an out-of-range load of a GR register. */
26550 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
26551 wcgr
= operands
[0];
26553 output_asm_insn ("ldr%?\t%0, %1", operands
);
26555 operands
[0] = wcgr
;
26557 output_asm_insn ("tmcr%?\t%0, %1", operands
);
26558 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
26563 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26565 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26566 named arg and all anonymous args onto the stack.
26567 XXX I know the prologue shouldn't be pushing registers, but it is faster
26571 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
26575 int second_time ATTRIBUTE_UNUSED
)
26577 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
26580 cfun
->machine
->uses_anonymous_args
= 1;
26581 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
26583 nregs
= pcum
->aapcs_ncrn
;
26586 int res
= arm_needs_doubleword_align (mode
, type
);
26587 if (res
< 0 && warn_psabi
)
26588 inform (input_location
, "parameter passing for argument of "
26589 "type %qT changed in GCC 7.1", type
);
26595 nregs
= pcum
->nregs
;
26597 if (nregs
< NUM_ARG_REGS
)
26598 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
26601 /* We can't rely on the caller doing the proper promotion when
26602 using APCS or ATPCS. */
26605 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
26607 return !TARGET_AAPCS_BASED
;
26610 static machine_mode
26611 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
26613 int *punsignedp ATTRIBUTE_UNUSED
,
26614 const_tree fntype ATTRIBUTE_UNUSED
,
26615 int for_return ATTRIBUTE_UNUSED
)
26617 if (GET_MODE_CLASS (mode
) == MODE_INT
26618 && GET_MODE_SIZE (mode
) < 4)
26626 arm_default_short_enums (void)
26628 return ARM_DEFAULT_SHORT_ENUMS
;
26632 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26635 arm_align_anon_bitfield (void)
26637 return TARGET_AAPCS_BASED
;
26641 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26644 arm_cxx_guard_type (void)
26646 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
26650 /* The EABI says test the least significant bit of a guard variable. */
26653 arm_cxx_guard_mask_bit (void)
26655 return TARGET_AAPCS_BASED
;
26659 /* The EABI specifies that all array cookies are 8 bytes long. */
26662 arm_get_cookie_size (tree type
)
26666 if (!TARGET_AAPCS_BASED
)
26667 return default_cxx_get_cookie_size (type
);
26669 size
= build_int_cst (sizetype
, 8);
26674 /* The EABI says that array cookies should also contain the element size. */
26677 arm_cookie_has_size (void)
26679 return TARGET_AAPCS_BASED
;
26683 /* The EABI says constructors and destructors should return a pointer to
26684 the object constructed/destroyed. */
26687 arm_cxx_cdtor_returns_this (void)
26689 return TARGET_AAPCS_BASED
;
26692 /* The EABI says that an inline function may never be the key
26696 arm_cxx_key_method_may_be_inline (void)
26698 return !TARGET_AAPCS_BASED
;
26702 arm_cxx_determine_class_data_visibility (tree decl
)
26704 if (!TARGET_AAPCS_BASED
26705 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
26708 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26709 is exported. However, on systems without dynamic vague linkage,
26710 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26711 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
26712 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
26714 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
26715 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
26719 arm_cxx_class_data_always_comdat (void)
26721 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26722 vague linkage if the class has no key function. */
26723 return !TARGET_AAPCS_BASED
;
26727 /* The EABI says __aeabi_atexit should be used to register static
26731 arm_cxx_use_aeabi_atexit (void)
26733 return TARGET_AAPCS_BASED
;
26738 arm_set_return_address (rtx source
, rtx scratch
)
26740 arm_stack_offsets
*offsets
;
26741 HOST_WIDE_INT delta
;
26743 unsigned long saved_regs
;
26745 offsets
= arm_get_frame_offsets ();
26746 saved_regs
= offsets
->saved_regs_mask
;
26748 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
26749 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26752 if (frame_pointer_needed
)
26753 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
26756 /* LR will be the first saved register. */
26757 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
26762 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
26763 GEN_INT (delta
& ~4095)));
26768 addr
= stack_pointer_rtx
;
26770 addr
= plus_constant (Pmode
, addr
, delta
);
26772 /* The store needs to be marked as frame related in order to prevent
26773 DSE from deleting it as dead if it is based on fp. */
26774 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26775 RTX_FRAME_RELATED_P (insn
) = 1;
26776 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26782 thumb_set_return_address (rtx source
, rtx scratch
)
26784 arm_stack_offsets
*offsets
;
26785 HOST_WIDE_INT delta
;
26786 HOST_WIDE_INT limit
;
26789 unsigned long mask
;
26793 offsets
= arm_get_frame_offsets ();
26794 mask
= offsets
->saved_regs_mask
;
26795 if (mask
& (1 << LR_REGNUM
))
26798 /* Find the saved regs. */
26799 if (frame_pointer_needed
)
26801 delta
= offsets
->soft_frame
- offsets
->saved_args
;
26802 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
26808 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
26811 /* Allow for the stack frame. */
26812 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
26814 /* The link register is always the first saved register. */
26817 /* Construct the address. */
26818 addr
= gen_rtx_REG (SImode
, reg
);
26821 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
26822 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
26826 addr
= plus_constant (Pmode
, addr
, delta
);
26828 /* The store needs to be marked as frame related in order to prevent
26829 DSE from deleting it as dead if it is based on fp. */
26830 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26831 RTX_FRAME_RELATED_P (insn
) = 1;
26832 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26835 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26838 /* Implements target hook vector_mode_supported_p. */
26840 arm_vector_mode_supported_p (machine_mode mode
)
26842 /* Neon also supports V2SImode, etc. listed in the clause below. */
26843 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
26844 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
26845 || mode
== V2DImode
|| mode
== V8HFmode
))
26848 if ((TARGET_NEON
|| TARGET_IWMMXT
)
26849 && ((mode
== V2SImode
)
26850 || (mode
== V4HImode
)
26851 || (mode
== V8QImode
)))
26854 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
26855 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
26856 || mode
== V2HAmode
))
26862 /* Implements target hook array_mode_supported_p. */
26865 arm_array_mode_supported_p (machine_mode mode
,
26866 unsigned HOST_WIDE_INT nelems
)
26869 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
26870 && (nelems
>= 2 && nelems
<= 4))
26876 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26877 registers when autovectorizing for Neon, at least until multiple vector
26878 widths are supported properly by the middle-end. */
26880 static machine_mode
26881 arm_preferred_simd_mode (machine_mode mode
)
26887 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
26889 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
26891 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
26893 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
26895 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
26902 if (TARGET_REALLY_IWMMXT
)
26918 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26920 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26921 using r0-r4 for function arguments, r7 for the stack frame and don't have
26922 enough left over to do doubleword arithmetic. For Thumb-2 all the
26923 potentially problematic instructions accept high registers so this is not
26924 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26925 that require many low registers. */
26927 arm_class_likely_spilled_p (reg_class_t rclass
)
26929 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
26930 || rclass
== CC_REG
)
26936 /* Implements target hook small_register_classes_for_mode_p. */
26938 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
26940 return TARGET_THUMB1
;
26943 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26944 ARM insns and therefore guarantee that the shift count is modulo 256.
26945 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26946 guarantee no particular behavior for out-of-range counts. */
26948 static unsigned HOST_WIDE_INT
26949 arm_shift_truncation_mask (machine_mode mode
)
26951 return mode
== SImode
? 255 : 0;
26955 /* Map internal gcc register numbers to DWARF2 register numbers. */
26958 arm_dbx_register_number (unsigned int regno
)
26963 if (IS_VFP_REGNUM (regno
))
26965 /* See comment in arm_dwarf_register_span. */
26966 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26967 return 64 + regno
- FIRST_VFP_REGNUM
;
26969 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
26972 if (IS_IWMMXT_GR_REGNUM (regno
))
26973 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
26975 if (IS_IWMMXT_REGNUM (regno
))
26976 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
26978 return DWARF_FRAME_REGISTERS
;
26981 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26982 GCC models tham as 64 32-bit registers, so we need to describe this to
26983 the DWARF generation code. Other registers can use the default. */
26985 arm_dwarf_register_span (rtx rtl
)
26993 regno
= REGNO (rtl
);
26994 if (!IS_VFP_REGNUM (regno
))
26997 /* XXX FIXME: The EABI defines two VFP register ranges:
26998 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27000 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27001 corresponding D register. Until GDB supports this, we shall use the
27002 legacy encodings. We also use these encodings for D0-D15 for
27003 compatibility with older debuggers. */
27004 mode
= GET_MODE (rtl
);
27005 if (GET_MODE_SIZE (mode
) < 8)
27008 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
27010 nregs
= GET_MODE_SIZE (mode
) / 4;
27011 for (i
= 0; i
< nregs
; i
+= 2)
27012 if (TARGET_BIG_END
)
27014 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
27015 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
27019 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
27020 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
27025 nregs
= GET_MODE_SIZE (mode
) / 8;
27026 for (i
= 0; i
< nregs
; i
++)
27027 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
27030 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
27033 #if ARM_UNWIND_INFO
27034 /* Emit unwind directives for a store-multiple instruction or stack pointer
27035 push during alignment.
27036 These should only ever be generated by the function prologue code, so
27037 expect them to have a particular form.
27038 The store-multiple instruction sometimes pushes pc as the last register,
27039 although it should not be tracked into unwind information, or for -Os
27040 sometimes pushes some dummy registers before first register that needs
27041 to be tracked in unwind information; such dummy registers are there just
27042 to avoid separate stack adjustment, and will not be restored in the
27046 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
27049 HOST_WIDE_INT offset
;
27050 HOST_WIDE_INT nregs
;
27054 unsigned padfirst
= 0, padlast
= 0;
27057 e
= XVECEXP (p
, 0, 0);
27058 gcc_assert (GET_CODE (e
) == SET
);
27060 /* First insn will adjust the stack pointer. */
27061 gcc_assert (GET_CODE (e
) == SET
27062 && REG_P (SET_DEST (e
))
27063 && REGNO (SET_DEST (e
)) == SP_REGNUM
27064 && GET_CODE (SET_SRC (e
)) == PLUS
);
27066 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
27067 nregs
= XVECLEN (p
, 0) - 1;
27068 gcc_assert (nregs
);
27070 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
27073 /* For -Os dummy registers can be pushed at the beginning to
27074 avoid separate stack pointer adjustment. */
27075 e
= XVECEXP (p
, 0, 1);
27076 e
= XEXP (SET_DEST (e
), 0);
27077 if (GET_CODE (e
) == PLUS
)
27078 padfirst
= INTVAL (XEXP (e
, 1));
27079 gcc_assert (padfirst
== 0 || optimize_size
);
27080 /* The function prologue may also push pc, but not annotate it as it is
27081 never restored. We turn this into a stack pointer adjustment. */
27082 e
= XVECEXP (p
, 0, nregs
);
27083 e
= XEXP (SET_DEST (e
), 0);
27084 if (GET_CODE (e
) == PLUS
)
27085 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
27087 padlast
= offset
- 4;
27088 gcc_assert (padlast
== 0 || padlast
== 4);
27090 fprintf (asm_out_file
, "\t.pad #4\n");
27092 fprintf (asm_out_file
, "\t.save {");
27094 else if (IS_VFP_REGNUM (reg
))
27097 fprintf (asm_out_file
, "\t.vsave {");
27100 /* Unknown register type. */
27101 gcc_unreachable ();
27103 /* If the stack increment doesn't match the size of the saved registers,
27104 something has gone horribly wrong. */
27105 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
27109 /* The remaining insns will describe the stores. */
27110 for (i
= 1; i
<= nregs
; i
++)
27112 /* Expect (set (mem <addr>) (reg)).
27113 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27114 e
= XVECEXP (p
, 0, i
);
27115 gcc_assert (GET_CODE (e
) == SET
27116 && MEM_P (SET_DEST (e
))
27117 && REG_P (SET_SRC (e
)));
27119 reg
= REGNO (SET_SRC (e
));
27120 gcc_assert (reg
>= lastreg
);
27123 fprintf (asm_out_file
, ", ");
27124 /* We can't use %r for vfp because we need to use the
27125 double precision register names. */
27126 if (IS_VFP_REGNUM (reg
))
27127 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
27129 asm_fprintf (asm_out_file
, "%r", reg
);
27133 /* Check that the addresses are consecutive. */
27134 e
= XEXP (SET_DEST (e
), 0);
27135 if (GET_CODE (e
) == PLUS
)
27136 gcc_assert (REG_P (XEXP (e
, 0))
27137 && REGNO (XEXP (e
, 0)) == SP_REGNUM
27138 && CONST_INT_P (XEXP (e
, 1))
27139 && offset
== INTVAL (XEXP (e
, 1)));
27143 && REGNO (e
) == SP_REGNUM
);
27144 offset
+= reg_size
;
27147 fprintf (asm_out_file
, "}\n");
27149 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
27152 /* Emit unwind directives for a SET. */
27155 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
27163 switch (GET_CODE (e0
))
27166 /* Pushing a single register. */
27167 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
27168 || !REG_P (XEXP (XEXP (e0
, 0), 0))
27169 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
27172 asm_fprintf (asm_out_file
, "\t.save ");
27173 if (IS_VFP_REGNUM (REGNO (e1
)))
27174 asm_fprintf(asm_out_file
, "{d%d}\n",
27175 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
27177 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
27181 if (REGNO (e0
) == SP_REGNUM
)
27183 /* A stack increment. */
27184 if (GET_CODE (e1
) != PLUS
27185 || !REG_P (XEXP (e1
, 0))
27186 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
27187 || !CONST_INT_P (XEXP (e1
, 1)))
27190 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
27191 -INTVAL (XEXP (e1
, 1)));
27193 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
27195 HOST_WIDE_INT offset
;
27197 if (GET_CODE (e1
) == PLUS
)
27199 if (!REG_P (XEXP (e1
, 0))
27200 || !CONST_INT_P (XEXP (e1
, 1)))
27202 reg
= REGNO (XEXP (e1
, 0));
27203 offset
= INTVAL (XEXP (e1
, 1));
27204 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
27205 HARD_FRAME_POINTER_REGNUM
, reg
,
27208 else if (REG_P (e1
))
27211 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
27212 HARD_FRAME_POINTER_REGNUM
, reg
);
27217 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
27219 /* Move from sp to reg. */
27220 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
27222 else if (GET_CODE (e1
) == PLUS
27223 && REG_P (XEXP (e1
, 0))
27224 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
27225 && CONST_INT_P (XEXP (e1
, 1)))
27227 /* Set reg to offset from sp. */
27228 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
27229 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
27241 /* Emit unwind directives for the given insn. */
27244 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
27247 bool handled_one
= false;
27249 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27252 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27253 && (TREE_NOTHROW (current_function_decl
)
27254 || crtl
->all_throwers_are_sibcalls
))
27257 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
27260 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
27262 switch (REG_NOTE_KIND (note
))
27264 case REG_FRAME_RELATED_EXPR
:
27265 pat
= XEXP (note
, 0);
27268 case REG_CFA_REGISTER
:
27269 pat
= XEXP (note
, 0);
27272 pat
= PATTERN (insn
);
27273 if (GET_CODE (pat
) == PARALLEL
)
27274 pat
= XVECEXP (pat
, 0, 0);
27277 /* Only emitted for IS_STACKALIGN re-alignment. */
27282 src
= SET_SRC (pat
);
27283 dest
= SET_DEST (pat
);
27285 gcc_assert (src
== stack_pointer_rtx
);
27286 reg
= REGNO (dest
);
27287 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27290 handled_one
= true;
27293 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27294 to get correct dwarf information for shrink-wrap. We should not
27295 emit unwind information for it because these are used either for
27296 pretend arguments or notes to adjust sp and restore registers from
27298 case REG_CFA_DEF_CFA
:
27299 case REG_CFA_ADJUST_CFA
:
27300 case REG_CFA_RESTORE
:
27303 case REG_CFA_EXPRESSION
:
27304 case REG_CFA_OFFSET
:
27305 /* ??? Only handling here what we actually emit. */
27306 gcc_unreachable ();
27314 pat
= PATTERN (insn
);
27317 switch (GET_CODE (pat
))
27320 arm_unwind_emit_set (asm_out_file
, pat
);
27324 /* Store multiple. */
27325 arm_unwind_emit_sequence (asm_out_file
, pat
);
27334 /* Output a reference from a function exception table to the type_info
27335 object X. The EABI specifies that the symbol should be relocated by
27336 an R_ARM_TARGET2 relocation. */
27339 arm_output_ttype (rtx x
)
27341 fputs ("\t.word\t", asm_out_file
);
27342 output_addr_const (asm_out_file
, x
);
27343 /* Use special relocations for symbol references. */
27344 if (!CONST_INT_P (x
))
27345 fputs ("(TARGET2)", asm_out_file
);
27346 fputc ('\n', asm_out_file
);
27351 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27354 arm_asm_emit_except_personality (rtx personality
)
27356 fputs ("\t.personality\t", asm_out_file
);
27357 output_addr_const (asm_out_file
, personality
);
27358 fputc ('\n', asm_out_file
);
27360 #endif /* ARM_UNWIND_INFO */
27362 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27365 arm_asm_init_sections (void)
27367 #if ARM_UNWIND_INFO
27368 exception_section
= get_unnamed_section (0, output_section_asm_op
,
27370 #endif /* ARM_UNWIND_INFO */
27372 #ifdef OBJECT_FORMAT_ELF
27373 if (target_pure_code
)
27374 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
27378 /* Output unwind directives for the start/end of a function. */
27381 arm_output_fn_unwind (FILE * f
, bool prologue
)
27383 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27387 fputs ("\t.fnstart\n", f
);
27390 /* If this function will never be unwound, then mark it as such.
27391 The came condition is used in arm_unwind_emit to suppress
27392 the frame annotations. */
27393 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27394 && (TREE_NOTHROW (current_function_decl
)
27395 || crtl
->all_throwers_are_sibcalls
))
27396 fputs("\t.cantunwind\n", f
);
27398 fputs ("\t.fnend\n", f
);
27403 arm_emit_tls_decoration (FILE *fp
, rtx x
)
27405 enum tls_reloc reloc
;
27408 val
= XVECEXP (x
, 0, 0);
27409 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
27411 output_addr_const (fp
, val
);
27416 fputs ("(tlsgd)", fp
);
27419 fputs ("(tlsldm)", fp
);
27422 fputs ("(tlsldo)", fp
);
27425 fputs ("(gottpoff)", fp
);
27428 fputs ("(tpoff)", fp
);
27431 fputs ("(tlsdesc)", fp
);
27434 gcc_unreachable ();
27443 fputs (" + (. - ", fp
);
27444 output_addr_const (fp
, XVECEXP (x
, 0, 2));
27445 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27446 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
27447 output_addr_const (fp
, XVECEXP (x
, 0, 3));
27457 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27460 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
27462 gcc_assert (size
== 4);
27463 fputs ("\t.word\t", file
);
27464 output_addr_const (file
, x
);
27465 fputs ("(tlsldo)", file
);
27468 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27471 arm_output_addr_const_extra (FILE *fp
, rtx x
)
27473 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
27474 return arm_emit_tls_decoration (fp
, x
);
27475 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
27478 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
27480 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
27481 assemble_name_raw (fp
, label
);
27485 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
27487 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
27491 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27495 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
27497 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27501 output_addr_const (fp
, XVECEXP (x
, 0, 1));
27505 else if (GET_CODE (x
) == CONST_VECTOR
)
27506 return arm_emit_vector_const (fp
, x
);
27511 /* Output assembly for a shift instruction.
27512 SET_FLAGS determines how the instruction modifies the condition codes.
27513 0 - Do not set condition codes.
27514 1 - Set condition codes.
27515 2 - Use smallest instruction. */
27517 arm_output_shift(rtx
* operands
, int set_flags
)
27520 static const char flag_chars
[3] = {'?', '.', '!'};
27525 c
= flag_chars
[set_flags
];
27526 shift
= shift_op(operands
[3], &val
);
27530 operands
[2] = GEN_INT(val
);
27531 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
27534 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
27536 output_asm_insn (pattern
, operands
);
27540 /* Output assembly for a WMMX immediate shift instruction. */
27542 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
27544 int shift
= INTVAL (operands
[2]);
27546 machine_mode opmode
= GET_MODE (operands
[0]);
27548 gcc_assert (shift
>= 0);
27550 /* If the shift value in the register versions is > 63 (for D qualifier),
27551 31 (for W qualifier) or 15 (for H qualifier). */
27552 if (((opmode
== V4HImode
) && (shift
> 15))
27553 || ((opmode
== V2SImode
) && (shift
> 31))
27554 || ((opmode
== DImode
) && (shift
> 63)))
27558 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27559 output_asm_insn (templ
, operands
);
27560 if (opmode
== DImode
)
27562 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
27563 output_asm_insn (templ
, operands
);
27568 /* The destination register will contain all zeros. */
27569 sprintf (templ
, "wzero\t%%0");
27570 output_asm_insn (templ
, operands
);
27575 if ((opmode
== DImode
) && (shift
> 32))
27577 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27578 output_asm_insn (templ
, operands
);
27579 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
27580 output_asm_insn (templ
, operands
);
27584 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
27585 output_asm_insn (templ
, operands
);
27590 /* Output assembly for a WMMX tinsr instruction. */
27592 arm_output_iwmmxt_tinsr (rtx
*operands
)
27594 int mask
= INTVAL (operands
[3]);
27597 int units
= mode_nunits
[GET_MODE (operands
[0])];
27598 gcc_assert ((mask
& (mask
- 1)) == 0);
27599 for (i
= 0; i
< units
; ++i
)
27601 if ((mask
& 0x01) == 1)
27607 gcc_assert (i
< units
);
27609 switch (GET_MODE (operands
[0]))
27612 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
27615 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
27618 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
27621 gcc_unreachable ();
27624 output_asm_insn (templ
, operands
);
27629 /* Output a Thumb-1 casesi dispatch sequence. */
27631 thumb1_output_casesi (rtx
*operands
)
27633 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
27635 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27637 switch (GET_MODE(diff_vec
))
27640 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27641 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27643 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27644 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27646 return "bl\t%___gnu_thumb1_case_si";
27648 gcc_unreachable ();
27652 /* Output a Thumb-2 casesi instruction. */
27654 thumb2_output_casesi (rtx
*operands
)
27656 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
27658 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27660 output_asm_insn ("cmp\t%0, %1", operands
);
27661 output_asm_insn ("bhi\t%l3", operands
);
27662 switch (GET_MODE(diff_vec
))
27665 return "tbb\t[%|pc, %0]";
27667 return "tbh\t[%|pc, %0, lsl #1]";
27671 output_asm_insn ("adr\t%4, %l2", operands
);
27672 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
27673 output_asm_insn ("add\t%4, %4, %5", operands
);
27678 output_asm_insn ("adr\t%4, %l2", operands
);
27679 return "ldr\t%|pc, [%4, %0, lsl #2]";
27682 gcc_unreachable ();
27686 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27687 per-core tuning structs. */
27689 arm_issue_rate (void)
27691 return current_tune
->issue_rate
;
27694 /* Return how many instructions should scheduler lookahead to choose the
27697 arm_first_cycle_multipass_dfa_lookahead (void)
27699 int issue_rate
= arm_issue_rate ();
27701 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
27704 /* Enable modeling of L2 auto-prefetcher. */
27706 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
27708 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
27712 arm_mangle_type (const_tree type
)
27714 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27715 has to be managled as if it is in the "std" namespace. */
27716 if (TARGET_AAPCS_BASED
27717 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
27718 return "St9__va_list";
27720 /* Half-precision float. */
27721 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
27724 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27726 if (TYPE_NAME (type
) != NULL
)
27727 return arm_mangle_builtin_type (type
);
27729 /* Use the default mangling. */
27733 /* Order of allocation of core registers for Thumb: this allocation is
27734 written over the corresponding initial entries of the array
27735 initialized with REG_ALLOC_ORDER. We allocate all low registers
27736 first. Saving and restoring a low register is usually cheaper than
27737 using a call-clobbered high register. */
27739 static const int thumb_core_reg_alloc_order
[] =
27741 3, 2, 1, 0, 4, 5, 6, 7,
27742 12, 14, 8, 9, 10, 11
27745 /* Adjust register allocation order when compiling for Thumb. */
27748 arm_order_regs_for_local_alloc (void)
27750 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
27751 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
27753 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
27754 sizeof (thumb_core_reg_alloc_order
));
27757 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27760 arm_frame_pointer_required (void)
27762 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
27765 /* If the function receives nonlocal gotos, it needs to save the frame
27766 pointer in the nonlocal_goto_save_area object. */
27767 if (cfun
->has_nonlocal_label
)
27770 /* The frame pointer is required for non-leaf APCS frames. */
27771 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !crtl
->is_leaf
)
27774 /* If we are probing the stack in the prologue, we will have a faulting
27775 instruction prior to the stack adjustment and this requires a frame
27776 pointer if we want to catch the exception using the EABI unwinder. */
27777 if (!IS_INTERRUPT (arm_current_func_type ())
27778 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
27779 && arm_except_unwind_info (&global_options
) == UI_TARGET
27780 && cfun
->can_throw_non_call_exceptions
)
27782 HOST_WIDE_INT size
= get_frame_size ();
27784 /* That's irrelevant if there is no stack adjustment. */
27788 /* That's relevant only if there is a stack probe. */
27789 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
27791 /* We don't have the final size of the frame so adjust. */
27792 size
+= 32 * UNITS_PER_WORD
;
27793 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
27803 /* Only thumb1 can't support conditional execution, so return true if
27804 the target is not thumb1. */
27806 arm_have_conditional_execution (void)
27808 return !TARGET_THUMB1
;
27811 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27812 static HOST_WIDE_INT
27813 arm_vector_alignment (const_tree type
)
27815 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
27817 if (TARGET_AAPCS_BASED
)
27818 align
= MIN (align
, 64);
27823 static unsigned int
27824 arm_autovectorize_vector_sizes (void)
27826 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
27830 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
27832 /* Vectors which aren't in packed structures will not be less aligned than
27833 the natural alignment of their element type, so this is safe. */
27834 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27837 return default_builtin_vector_alignment_reachable (type
, is_packed
);
27841 arm_builtin_support_vector_misalignment (machine_mode mode
,
27842 const_tree type
, int misalignment
,
27845 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27847 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
27852 /* If the misalignment is unknown, we should be able to handle the access
27853 so long as it is not to a member of a packed data structure. */
27854 if (misalignment
== -1)
27857 /* Return true if the misalignment is a multiple of the natural alignment
27858 of the vector's element type. This is probably always going to be
27859 true in practice, since we've already established that this isn't a
27861 return ((misalignment
% align
) == 0);
27864 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
27869 arm_conditional_register_usage (void)
27873 if (TARGET_THUMB1
&& optimize_size
)
27875 /* When optimizing for size on Thumb-1, it's better not
27876 to use the HI regs, because of the overhead of
27878 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
27879 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
27882 /* The link register can be clobbered by any branch insn,
27883 but we have no way to track that at present, so mark
27884 it as unavailable. */
27886 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
27888 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
)
27890 /* VFPv3 registers are disabled when earlier VFP
27891 versions are selected due to the definition of
27892 LAST_VFP_REGNUM. */
27893 for (regno
= FIRST_VFP_REGNUM
;
27894 regno
<= LAST_VFP_REGNUM
; ++ regno
)
27896 fixed_regs
[regno
] = 0;
27897 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
27898 || regno
>= FIRST_VFP_REGNUM
+ 32;
27902 if (TARGET_REALLY_IWMMXT
)
27904 regno
= FIRST_IWMMXT_GR_REGNUM
;
27905 /* The 2002/10/09 revision of the XScale ABI has wCG0
27906 and wCG1 as call-preserved registers. The 2002/11/21
27907 revision changed this so that all wCG registers are
27908 scratch registers. */
27909 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
27910 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
27911 fixed_regs
[regno
] = 0;
27912 /* The XScale ABI has wR0 - wR9 as scratch registers,
27913 the rest as call-preserved registers. */
27914 for (regno
= FIRST_IWMMXT_REGNUM
;
27915 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
27917 fixed_regs
[regno
] = 0;
27918 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
27922 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
27924 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27925 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27927 else if (TARGET_APCS_STACK
)
27929 fixed_regs
[10] = 1;
27930 call_used_regs
[10] = 1;
27932 /* -mcaller-super-interworking reserves r11 for calls to
27933 _interwork_r11_call_via_rN(). Making the register global
27934 is an easy way of ensuring that it remains valid for all
27936 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
27937 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
27939 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27940 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27941 if (TARGET_CALLER_INTERWORKING
)
27942 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27944 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27948 arm_preferred_rename_class (reg_class_t rclass
)
27950 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27951 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27952 and code size can be reduced. */
27953 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
27959 /* Compute the attribute "length" of insn "*push_multi".
27960 So this function MUST be kept in sync with that insn pattern. */
27962 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
27964 int i
, regno
, hi_reg
;
27965 int num_saves
= XVECLEN (parallel_op
, 0);
27975 regno
= REGNO (first_op
);
27976 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
27977 list is 8-bit. Normally this means all registers in the list must be
27978 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
27979 encodings. There is one exception for PUSH that LR in HI_REGS can be used
27980 with 16-bit encoding. */
27981 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27982 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
27984 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
27985 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27993 /* Compute the attribute "length" of insn. Currently, this function is used
27994 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
27995 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
27996 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
27997 true if OPERANDS contains insn which explicit updates base register. */
28000 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
28009 rtx parallel_op
= operands
[0];
28010 /* Initialize to elements number of PARALLEL. */
28011 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
28012 /* Initialize the value to base register. */
28013 unsigned regno
= REGNO (operands
[1]);
28014 /* Skip return and write back pattern.
28015 We only need register pop pattern for later analysis. */
28016 unsigned first_indx
= 0;
28017 first_indx
+= return_pc
? 1 : 0;
28018 first_indx
+= write_back_p
? 1 : 0;
28020 /* A pop operation can be done through LDM or POP. If the base register is SP
28021 and if it's with write back, then a LDM will be alias of POP. */
28022 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
28023 bool ldm_p
= !pop_p
;
28025 /* Check base register for LDM. */
28026 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
28029 /* Check each register in the list. */
28030 for (; indx
>= first_indx
; indx
--)
28032 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
28033 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28034 comment in arm_attr_length_push_multi. */
28035 if (REGNO_REG_CLASS (regno
) == HI_REGS
28036 && (regno
!= PC_REGNUM
|| ldm_p
))
28043 /* Compute the number of instructions emitted by output_move_double. */
28045 arm_count_output_move_double_insns (rtx
*operands
)
28049 /* output_move_double may modify the operands array, so call it
28050 here on a copy of the array. */
28051 ops
[0] = operands
[0];
28052 ops
[1] = operands
[1];
28053 output_move_double (ops
, false, &count
);
28058 vfp3_const_double_for_fract_bits (rtx operand
)
28060 REAL_VALUE_TYPE r0
;
28062 if (!CONST_DOUBLE_P (operand
))
28065 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
28066 if (exact_real_inverse (DFmode
, &r0
)
28067 && !REAL_VALUE_NEGATIVE (r0
))
28069 if (exact_real_truncate (DFmode
, &r0
))
28071 HOST_WIDE_INT value
= real_to_integer (&r0
);
28072 value
= value
& 0xffffffff;
28073 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
28075 int ret
= exact_log2 (value
);
28076 gcc_assert (IN_RANGE (ret
, 0, 31));
28084 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28085 log2 is in [1, 32], return that log2. Otherwise return -1.
28086 This is used in the patterns for vcvt.s32.f32 floating-point to
28087 fixed-point conversions. */
28090 vfp3_const_double_for_bits (rtx x
)
28092 const REAL_VALUE_TYPE
*r
;
28094 if (!CONST_DOUBLE_P (x
))
28097 r
= CONST_DOUBLE_REAL_VALUE (x
);
28099 if (REAL_VALUE_NEGATIVE (*r
)
28100 || REAL_VALUE_ISNAN (*r
)
28101 || REAL_VALUE_ISINF (*r
)
28102 || !real_isinteger (r
, SFmode
))
28105 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
28107 /* The exact_log2 above will have returned -1 if this is
28108 not an exact log2. */
28109 if (!IN_RANGE (hwint
, 1, 32))
28116 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28119 arm_pre_atomic_barrier (enum memmodel model
)
28121 if (need_atomic_barrier_p (model
, true))
28122 emit_insn (gen_memory_barrier ());
28126 arm_post_atomic_barrier (enum memmodel model
)
28128 if (need_atomic_barrier_p (model
, false))
28129 emit_insn (gen_memory_barrier ());
28132 /* Emit the load-exclusive and store-exclusive instructions.
28133 Use acquire and release versions if necessary. */
28136 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
28138 rtx (*gen
) (rtx
, rtx
);
28144 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
28145 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
28146 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
28147 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
28149 gcc_unreachable ();
28156 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
28157 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
28158 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
28159 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
28161 gcc_unreachable ();
28165 emit_insn (gen (rval
, mem
));
28169 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
28172 rtx (*gen
) (rtx
, rtx
, rtx
);
28178 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
28179 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
28180 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
28181 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
28183 gcc_unreachable ();
28190 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
28191 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
28192 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
28193 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
28195 gcc_unreachable ();
28199 emit_insn (gen (bval
, rval
, mem
));
28202 /* Mark the previous jump instruction as unlikely. */
28205 emit_unlikely_jump (rtx insn
)
28207 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
28209 rtx_insn
*jump
= emit_jump_insn (insn
);
28210 add_int_reg_note (jump
, REG_BR_PROB
, very_unlikely
);
28213 /* Expand a compare and swap pattern. */
28216 arm_expand_compare_and_swap (rtx operands
[])
28218 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
28220 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
28222 bval
= operands
[0];
28223 rval
= operands
[1];
28225 oldval
= operands
[3];
28226 newval
= operands
[4];
28227 is_weak
= operands
[5];
28228 mod_s
= operands
[6];
28229 mod_f
= operands
[7];
28230 mode
= GET_MODE (mem
);
28232 /* Normally the succ memory model must be stronger than fail, but in the
28233 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28234 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28236 if (TARGET_HAVE_LDACQ
28237 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
28238 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
28239 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
28245 /* For narrow modes, we're going to perform the comparison in SImode,
28246 so do the zero-extension now. */
28247 rval
= gen_reg_rtx (SImode
);
28248 oldval
= convert_modes (SImode
, mode
, oldval
, true);
28252 /* Force the value into a register if needed. We waited until after
28253 the zero-extension above to do this properly. */
28254 if (!arm_add_operand (oldval
, SImode
))
28255 oldval
= force_reg (SImode
, oldval
);
28259 if (!cmpdi_operand (oldval
, mode
))
28260 oldval
= force_reg (mode
, oldval
);
28264 gcc_unreachable ();
28271 case QImode
: gen
= gen_atomic_compare_and_swapt1qi_1
; break;
28272 case HImode
: gen
= gen_atomic_compare_and_swapt1hi_1
; break;
28273 case SImode
: gen
= gen_atomic_compare_and_swapt1si_1
; break;
28274 case DImode
: gen
= gen_atomic_compare_and_swapt1di_1
; break;
28276 gcc_unreachable ();
28283 case QImode
: gen
= gen_atomic_compare_and_swap32qi_1
; break;
28284 case HImode
: gen
= gen_atomic_compare_and_swap32hi_1
; break;
28285 case SImode
: gen
= gen_atomic_compare_and_swap32si_1
; break;
28286 case DImode
: gen
= gen_atomic_compare_and_swap32di_1
; break;
28288 gcc_unreachable ();
28292 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CC_Zmode
, CC_REGNUM
);
28293 emit_insn (gen (bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
28295 if (mode
== QImode
|| mode
== HImode
)
28296 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
28298 /* In all cases, we arrange for success to be signaled by Z set.
28299 This arrangement allows for the boolean result to be used directly
28300 in a subsequent branch, post optimization. For Thumb-1 targets, the
28301 boolean negation of the result is also stored in bval because Thumb-1
28302 backend lacks dependency tracking for CC flag due to flag-setting not
28303 being represented at RTL level. */
28305 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
28308 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
28309 emit_insn (gen_rtx_SET (bval
, x
));
28313 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28314 another memory store between the load-exclusive and store-exclusive can
28315 reset the monitor from Exclusive to Open state. This means we must wait
28316 until after reload to split the pattern, lest we get a register spill in
28317 the middle of the atomic sequence. Success of the compare and swap is
28318 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28319 for Thumb-1 targets (ie. negation of the boolean value returned by
28320 atomic_compare_and_swapmode standard pattern in operand 0). */
28323 arm_split_compare_and_swap (rtx operands
[])
28325 rtx rval
, mem
, oldval
, newval
, neg_bval
;
28327 enum memmodel mod_s
, mod_f
;
28329 rtx_code_label
*label1
, *label2
;
28332 rval
= operands
[1];
28334 oldval
= operands
[3];
28335 newval
= operands
[4];
28336 is_weak
= (operands
[5] != const0_rtx
);
28337 mod_s
= memmodel_from_int (INTVAL (operands
[6]));
28338 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
28339 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
28340 mode
= GET_MODE (mem
);
28342 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
28344 bool use_acquire
= TARGET_HAVE_LDACQ
28345 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28346 || is_mm_release (mod_s
));
28348 bool use_release
= TARGET_HAVE_LDACQ
28349 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28350 || is_mm_acquire (mod_s
));
28352 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28353 a full barrier is emitted after the store-release. */
28355 use_acquire
= false;
28357 /* Checks whether a barrier is needed and emits one accordingly. */
28358 if (!(use_acquire
|| use_release
))
28359 arm_pre_atomic_barrier (mod_s
);
28364 label1
= gen_label_rtx ();
28365 emit_label (label1
);
28367 label2
= gen_label_rtx ();
28369 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
28371 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28372 as required to communicate with arm_expand_compare_and_swap. */
28375 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
28376 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28377 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
28378 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
28379 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
28383 emit_move_insn (neg_bval
, const1_rtx
);
28384 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
28385 if (thumb1_cmpneg_operand (oldval
, SImode
))
28386 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval
, rval
, oldval
,
28389 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
28392 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
28394 /* Weak or strong, we want EQ to be true for success, so that we
28395 match the flags that we got from the compare above. */
28398 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
28399 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
28400 emit_insn (gen_rtx_SET (cond
, x
));
28405 /* Z is set to boolean value of !neg_bval, as required to communicate
28406 with arm_expand_compare_and_swap. */
28407 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
28408 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
28411 if (!is_mm_relaxed (mod_f
))
28412 emit_label (label2
);
28414 /* Checks whether a barrier is needed and emits one accordingly. */
28416 || !(use_acquire
|| use_release
))
28417 arm_post_atomic_barrier (mod_s
);
28419 if (is_mm_relaxed (mod_f
))
28420 emit_label (label2
);
28423 /* Split an atomic operation pattern. Operation is given by CODE and is one
28424 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28425 operation). Operation is performed on the content at MEM and on VALUE
28426 following the memory model MODEL_RTX. The content at MEM before and after
28427 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28428 success of the operation is returned in COND. Using a scratch register or
28429 an operand register for these determines what result is returned for that
28433 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
28434 rtx value
, rtx model_rtx
, rtx cond
)
28436 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
28437 machine_mode mode
= GET_MODE (mem
);
28438 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
28439 rtx_code_label
*label
;
28440 bool all_low_regs
, bind_old_new
;
28443 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
28445 bool use_acquire
= TARGET_HAVE_LDACQ
28446 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28447 || is_mm_release (model
));
28449 bool use_release
= TARGET_HAVE_LDACQ
28450 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28451 || is_mm_acquire (model
));
28453 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28454 a full barrier is emitted after the store-release. */
28456 use_acquire
= false;
28458 /* Checks whether a barrier is needed and emits one accordingly. */
28459 if (!(use_acquire
|| use_release
))
28460 arm_pre_atomic_barrier (model
);
28462 label
= gen_label_rtx ();
28463 emit_label (label
);
28466 new_out
= gen_lowpart (wmode
, new_out
);
28468 old_out
= gen_lowpart (wmode
, old_out
);
28471 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
28473 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
28475 /* Does the operation require destination and first operand to use the same
28476 register? This is decided by register constraints of relevant insn
28477 patterns in thumb1.md. */
28478 gcc_assert (!new_out
|| REG_P (new_out
));
28479 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
28480 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
28481 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
28486 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
28488 /* We want to return the old value while putting the result of the operation
28489 in the same register as the old value so copy the old value over to the
28490 destination register and use that register for the operation. */
28491 if (old_out
&& bind_old_new
)
28493 emit_move_insn (new_out
, old_out
);
28504 x
= gen_rtx_AND (wmode
, old_out
, value
);
28505 emit_insn (gen_rtx_SET (new_out
, x
));
28506 x
= gen_rtx_NOT (wmode
, new_out
);
28507 emit_insn (gen_rtx_SET (new_out
, x
));
28511 if (CONST_INT_P (value
))
28513 value
= GEN_INT (-INTVAL (value
));
28519 if (mode
== DImode
)
28521 /* DImode plus/minus need to clobber flags. */
28522 /* The adddi3 and subdi3 patterns are incorrectly written so that
28523 they require matching operands, even when we could easily support
28524 three operands. Thankfully, this can be fixed up post-splitting,
28525 as the individual add+adc patterns do accept three operands and
28526 post-reload cprop can make these moves go away. */
28527 emit_move_insn (new_out
, old_out
);
28529 x
= gen_adddi3 (new_out
, new_out
, value
);
28531 x
= gen_subdi3 (new_out
, new_out
, value
);
28538 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
28539 emit_insn (gen_rtx_SET (new_out
, x
));
28543 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
28546 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28547 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
28549 /* Checks whether a barrier is needed and emits one accordingly. */
28551 || !(use_acquire
|| use_release
))
28552 arm_post_atomic_barrier (model
);
28555 #define MAX_VECT_LEN 16
28557 struct expand_vec_perm_d
28559 rtx target
, op0
, op1
;
28560 unsigned char perm
[MAX_VECT_LEN
];
28561 machine_mode vmode
;
28562 unsigned char nelt
;
28567 /* Generate a variable permutation. */
28570 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28572 machine_mode vmode
= GET_MODE (target
);
28573 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28575 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
28576 gcc_checking_assert (GET_MODE (op0
) == vmode
);
28577 gcc_checking_assert (GET_MODE (op1
) == vmode
);
28578 gcc_checking_assert (GET_MODE (sel
) == vmode
);
28579 gcc_checking_assert (TARGET_NEON
);
28583 if (vmode
== V8QImode
)
28584 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
28586 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
28592 if (vmode
== V8QImode
)
28594 pair
= gen_reg_rtx (V16QImode
);
28595 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
28596 pair
= gen_lowpart (TImode
, pair
);
28597 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
28601 pair
= gen_reg_rtx (OImode
);
28602 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
28603 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
28609 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28611 machine_mode vmode
= GET_MODE (target
);
28612 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
28613 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28614 rtx rmask
[MAX_VECT_LEN
], mask
;
28616 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28617 numbering of elements for big-endian, we must reverse the order. */
28618 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
28620 /* The VTBL instruction does not use a modulo index, so we must take care
28621 of that ourselves. */
28622 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28623 for (i
= 0; i
< nelt
; ++i
)
28625 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
28626 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
28628 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
28631 /* Map lane ordering between architectural lane order, and GCC lane order,
28632 taking into account ABI. See comment above output_move_neon for details. */
28635 neon_endian_lane_map (machine_mode mode
, int lane
)
28637 if (BYTES_BIG_ENDIAN
)
28639 int nelems
= GET_MODE_NUNITS (mode
);
28640 /* Reverse lane order. */
28641 lane
= (nelems
- 1 - lane
);
28642 /* Reverse D register order, to match ABI. */
28643 if (GET_MODE_SIZE (mode
) == 16)
28644 lane
= lane
^ (nelems
/ 2);
28649 /* Some permutations index into pairs of vectors, this is a helper function
28650 to map indexes into those pairs of vectors. */
28653 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
28655 int nelem
= GET_MODE_NUNITS (mode
);
28656 if (BYTES_BIG_ENDIAN
)
28658 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
28662 /* Generate or test for an insn that supports a constant permutation. */
28664 /* Recognize patterns for the VUZP insns. */
28667 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
28669 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28670 rtx out0
, out1
, in0
, in1
;
28671 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28675 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28678 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28679 big endian pattern on 64 bit vectors, so we correct for that. */
28680 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
28681 && GET_MODE_SIZE (d
->vmode
) == 8 ? d
->nelt
: 0;
28683 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
28685 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28687 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
28691 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28693 for (i
= 0; i
< nelt
; i
++)
28696 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
28697 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
28707 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
28708 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
28709 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
28710 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
28711 case V8HFmode
: gen
= gen_neon_vuzpv8hf_internal
; break;
28712 case V4HFmode
: gen
= gen_neon_vuzpv4hf_internal
; break;
28713 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
28714 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
28715 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
28716 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
28718 gcc_unreachable ();
28723 if (swap_nelt
!= 0)
28724 std::swap (in0
, in1
);
28727 out1
= gen_reg_rtx (d
->vmode
);
28729 std::swap (out0
, out1
);
28731 emit_insn (gen (out0
, in0
, in1
, out1
));
28735 /* Recognize patterns for the VZIP insns. */
28738 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
28740 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
28741 rtx out0
, out1
, in0
, in1
;
28742 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28746 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28749 is_swapped
= BYTES_BIG_ENDIAN
;
28751 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
28754 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
28756 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28760 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28762 for (i
= 0; i
< nelt
/ 2; i
++)
28765 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
28766 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
28770 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
28771 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
28782 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
28783 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
28784 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
28785 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
28786 case V8HFmode
: gen
= gen_neon_vzipv8hf_internal
; break;
28787 case V4HFmode
: gen
= gen_neon_vzipv4hf_internal
; break;
28788 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
28789 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
28790 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
28791 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
28793 gcc_unreachable ();
28799 std::swap (in0
, in1
);
28802 out1
= gen_reg_rtx (d
->vmode
);
28804 std::swap (out0
, out1
);
28806 emit_insn (gen (out0
, in0
, in1
, out1
));
28810 /* Recognize patterns for the VREV insns. */
28813 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
28815 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
28816 rtx (*gen
)(rtx
, rtx
);
28818 if (!d
->one_vector_p
)
28827 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
28828 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
28836 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
28837 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
28838 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
28839 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
28840 case V8HFmode
: gen
= gen_neon_vrev64v8hf
; break;
28841 case V4HFmode
: gen
= gen_neon_vrev64v4hf
; break;
28849 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
28850 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
28851 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
28852 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
28853 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
28854 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
28855 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
28856 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
28865 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
28866 for (j
= 0; j
<= diff
; j
+= 1)
28868 /* This is guaranteed to be true as the value of diff
28869 is 7, 3, 1 and we should have enough elements in the
28870 queue to generate this. Getting a vector mask with a
28871 value of diff other than these values implies that
28872 something is wrong by the time we get here. */
28873 gcc_assert (i
+ j
< nelt
);
28874 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
28882 emit_insn (gen (d
->target
, d
->op0
));
28886 /* Recognize patterns for the VTRN insns. */
28889 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
28891 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28892 rtx out0
, out1
, in0
, in1
;
28893 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28895 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28898 /* Note that these are little-endian tests. Adjust for big-endian later. */
28899 if (d
->perm
[0] == 0)
28901 else if (d
->perm
[0] == 1)
28905 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28907 for (i
= 0; i
< nelt
; i
+= 2)
28909 if (d
->perm
[i
] != i
+ odd
)
28911 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
28921 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
28922 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
28923 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
28924 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
28925 case V8HFmode
: gen
= gen_neon_vtrnv8hf_internal
; break;
28926 case V4HFmode
: gen
= gen_neon_vtrnv4hf_internal
; break;
28927 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
28928 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
28929 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
28930 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
28932 gcc_unreachable ();
28937 if (BYTES_BIG_ENDIAN
)
28939 std::swap (in0
, in1
);
28944 out1
= gen_reg_rtx (d
->vmode
);
28946 std::swap (out0
, out1
);
28948 emit_insn (gen (out0
, in0
, in1
, out1
));
28952 /* Recognize patterns for the VEXT insns. */
28955 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
28957 unsigned int i
, nelt
= d
->nelt
;
28958 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
28961 unsigned int location
;
28963 unsigned int next
= d
->perm
[0] + 1;
28965 /* TODO: Handle GCC's numbering of elements for big-endian. */
28966 if (BYTES_BIG_ENDIAN
)
28969 /* Check if the extracted indexes are increasing by one. */
28970 for (i
= 1; i
< nelt
; next
++, i
++)
28972 /* If we hit the most significant element of the 2nd vector in
28973 the previous iteration, no need to test further. */
28974 if (next
== 2 * nelt
)
28977 /* If we are operating on only one vector: it could be a
28978 rotation. If there are only two elements of size < 64, let
28979 arm_evpc_neon_vrev catch it. */
28980 if (d
->one_vector_p
&& (next
== nelt
))
28982 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
28988 if (d
->perm
[i
] != next
)
28992 location
= d
->perm
[0];
28996 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
28997 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
28998 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
28999 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
29000 case V2SImode
: gen
= gen_neon_vextv2si
; break;
29001 case V4SImode
: gen
= gen_neon_vextv4si
; break;
29002 case V4HFmode
: gen
= gen_neon_vextv4hf
; break;
29003 case V8HFmode
: gen
= gen_neon_vextv8hf
; break;
29004 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
29005 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
29006 case V2DImode
: gen
= gen_neon_vextv2di
; break;
29015 offset
= GEN_INT (location
);
29016 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
29020 /* The NEON VTBL instruction is a fully variable permuation that's even
29021 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29022 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29023 can do slightly better by expanding this as a constant where we don't
29024 have to apply a mask. */
29027 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
29029 rtx rperm
[MAX_VECT_LEN
], sel
;
29030 machine_mode vmode
= d
->vmode
;
29031 unsigned int i
, nelt
= d
->nelt
;
29033 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29034 numbering of elements for big-endian, we must reverse the order. */
29035 if (BYTES_BIG_ENDIAN
)
29041 /* Generic code will try constant permutation twice. Once with the
29042 original mode and again with the elements lowered to QImode.
29043 So wait and don't do the selector expansion ourselves. */
29044 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
29047 for (i
= 0; i
< nelt
; ++i
)
29048 rperm
[i
] = GEN_INT (d
->perm
[i
]);
29049 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
29050 sel
= force_reg (vmode
, sel
);
29052 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
29057 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
29059 /* Check if the input mask matches vext before reordering the
29062 if (arm_evpc_neon_vext (d
))
29065 /* The pattern matching functions above are written to look for a small
29066 number to begin the sequence (0, 1, N/2). If we begin with an index
29067 from the second operand, we can swap the operands. */
29068 if (d
->perm
[0] >= d
->nelt
)
29070 unsigned i
, nelt
= d
->nelt
;
29072 for (i
= 0; i
< nelt
; ++i
)
29073 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
29075 std::swap (d
->op0
, d
->op1
);
29080 if (arm_evpc_neon_vuzp (d
))
29082 if (arm_evpc_neon_vzip (d
))
29084 if (arm_evpc_neon_vrev (d
))
29086 if (arm_evpc_neon_vtrn (d
))
29088 return arm_evpc_neon_vtbl (d
);
29093 /* Expand a vec_perm_const pattern. */
29096 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
29098 struct expand_vec_perm_d d
;
29099 int i
, nelt
, which
;
29105 d
.vmode
= GET_MODE (target
);
29106 gcc_assert (VECTOR_MODE_P (d
.vmode
));
29107 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
29108 d
.testing_p
= false;
29110 for (i
= which
= 0; i
< nelt
; ++i
)
29112 rtx e
= XVECEXP (sel
, 0, i
);
29113 int ei
= INTVAL (e
) & (2 * nelt
- 1);
29114 which
|= (ei
< nelt
? 1 : 2);
29124 d
.one_vector_p
= false;
29125 if (!rtx_equal_p (op0
, op1
))
29128 /* The elements of PERM do not suggest that only the first operand
29129 is used, but both operands are identical. Allow easier matching
29130 of the permutation by folding the permutation into the single
29134 for (i
= 0; i
< nelt
; ++i
)
29135 d
.perm
[i
] &= nelt
- 1;
29137 d
.one_vector_p
= true;
29142 d
.one_vector_p
= true;
29146 return arm_expand_vec_perm_const_1 (&d
);
29149 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29152 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
29153 const unsigned char *sel
)
29155 struct expand_vec_perm_d d
;
29156 unsigned int i
, nelt
, which
;
29160 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
29161 d
.testing_p
= true;
29162 memcpy (d
.perm
, sel
, nelt
);
29164 /* Categorize the set of elements in the selector. */
29165 for (i
= which
= 0; i
< nelt
; ++i
)
29167 unsigned char e
= d
.perm
[i
];
29168 gcc_assert (e
< 2 * nelt
);
29169 which
|= (e
< nelt
? 1 : 2);
29172 /* For all elements from second vector, fold the elements to first. */
29174 for (i
= 0; i
< nelt
; ++i
)
29177 /* Check whether the mask can be applied to the vector type. */
29178 d
.one_vector_p
= (which
!= 3);
29180 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
29181 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
29182 if (!d
.one_vector_p
)
29183 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
29186 ret
= arm_expand_vec_perm_const_1 (&d
);
29193 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
29195 /* If we are soft float and we do not have ldrd
29196 then all auto increment forms are ok. */
29197 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
29202 /* Post increment and Pre Decrement are supported for all
29203 instruction forms except for vector forms. */
29206 if (VECTOR_MODE_P (mode
))
29208 if (code
!= ARM_PRE_DEC
)
29218 /* Without LDRD and mode size greater than
29219 word size, there is no point in auto-incrementing
29220 because ldm and stm will not have these forms. */
29221 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
29224 /* Vector and floating point modes do not support
29225 these auto increment forms. */
29226 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
29239 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29240 on ARM, since we know that shifts by negative amounts are no-ops.
29241 Additionally, the default expansion code is not available or suitable
29242 for post-reload insn splits (this can occur when the register allocator
29243 chooses not to do a shift in NEON).
29245 This function is used in both initial expand and post-reload splits, and
29246 handles all kinds of 64-bit shifts.
29248 Input requirements:
29249 - It is safe for the input and output to be the same register, but
29250 early-clobber rules apply for the shift amount and scratch registers.
29251 - Shift by register requires both scratch registers. In all other cases
29252 the scratch registers may be NULL.
29253 - Ashiftrt by a register also clobbers the CC register. */
29255 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
29256 rtx amount
, rtx scratch1
, rtx scratch2
)
29258 rtx out_high
= gen_highpart (SImode
, out
);
29259 rtx out_low
= gen_lowpart (SImode
, out
);
29260 rtx in_high
= gen_highpart (SImode
, in
);
29261 rtx in_low
= gen_lowpart (SImode
, in
);
29264 in = the register pair containing the input value.
29265 out = the destination register pair.
29266 up = the high- or low-part of each pair.
29267 down = the opposite part to "up".
29268 In a shift, we can consider bits to shift from "up"-stream to
29269 "down"-stream, so in a left-shift "up" is the low-part and "down"
29270 is the high-part of each register pair. */
29272 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
29273 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
29274 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
29275 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
29277 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
29279 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
29280 && GET_MODE (out
) == DImode
);
29282 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
29283 && GET_MODE (in
) == DImode
);
29285 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
29286 && GET_MODE (amount
) == SImode
)
29287 || CONST_INT_P (amount
)));
29288 gcc_assert (scratch1
== NULL
29289 || (GET_CODE (scratch1
) == SCRATCH
)
29290 || (GET_MODE (scratch1
) == SImode
29291 && REG_P (scratch1
)));
29292 gcc_assert (scratch2
== NULL
29293 || (GET_CODE (scratch2
) == SCRATCH
)
29294 || (GET_MODE (scratch2
) == SImode
29295 && REG_P (scratch2
)));
29296 gcc_assert (!REG_P (out
) || !REG_P (amount
)
29297 || !HARD_REGISTER_P (out
)
29298 || (REGNO (out
) != REGNO (amount
)
29299 && REGNO (out
) + 1 != REGNO (amount
)));
29301 /* Macros to make following code more readable. */
29302 #define SUB_32(DEST,SRC) \
29303 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29304 #define RSB_32(DEST,SRC) \
29305 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29306 #define SUB_S_32(DEST,SRC) \
29307 gen_addsi3_compare0 ((DEST), (SRC), \
29309 #define SET(DEST,SRC) \
29310 gen_rtx_SET ((DEST), (SRC))
29311 #define SHIFT(CODE,SRC,AMOUNT) \
29312 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29313 #define LSHIFT(CODE,SRC,AMOUNT) \
29314 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29315 SImode, (SRC), (AMOUNT))
29316 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29317 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29318 SImode, (SRC), (AMOUNT))
29320 gen_rtx_IOR (SImode, (A), (B))
29321 #define BRANCH(COND,LABEL) \
29322 gen_arm_cond_branch ((LABEL), \
29323 gen_rtx_ ## COND (CCmode, cc_reg, \
29327 /* Shifts by register and shifts by constant are handled separately. */
29328 if (CONST_INT_P (amount
))
29330 /* We have a shift-by-constant. */
29332 /* First, handle out-of-range shift amounts.
29333 In both cases we try to match the result an ARM instruction in a
29334 shift-by-register would give. This helps reduce execution
29335 differences between optimization levels, but it won't stop other
29336 parts of the compiler doing different things. This is "undefined
29337 behavior, in any case. */
29338 if (INTVAL (amount
) <= 0)
29339 emit_insn (gen_movdi (out
, in
));
29340 else if (INTVAL (amount
) >= 64)
29342 if (code
== ASHIFTRT
)
29344 rtx const31_rtx
= GEN_INT (31);
29345 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
29346 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
29349 emit_insn (gen_movdi (out
, const0_rtx
));
29352 /* Now handle valid shifts. */
29353 else if (INTVAL (amount
) < 32)
29355 /* Shifts by a constant less than 32. */
29356 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
29358 /* Clearing the out register in DImode first avoids lots
29359 of spilling and results in less stack usage.
29360 Later this redundant insn is completely removed.
29361 Do that only if "in" and "out" are different registers. */
29362 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29363 emit_insn (SET (out
, const0_rtx
));
29364 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29365 emit_insn (SET (out_down
,
29366 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
29368 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29372 /* Shifts by a constant greater than 31. */
29373 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
29375 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29376 emit_insn (SET (out
, const0_rtx
));
29377 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
29378 if (code
== ASHIFTRT
)
29379 emit_insn (gen_ashrsi3 (out_up
, in_up
,
29382 emit_insn (SET (out_up
, const0_rtx
));
29387 /* We have a shift-by-register. */
29388 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
29390 /* This alternative requires the scratch registers. */
29391 gcc_assert (scratch1
&& REG_P (scratch1
));
29392 gcc_assert (scratch2
&& REG_P (scratch2
));
29394 /* We will need the values "amount-32" and "32-amount" later.
29395 Swapping them around now allows the later code to be more general. */
29399 emit_insn (SUB_32 (scratch1
, amount
));
29400 emit_insn (RSB_32 (scratch2
, amount
));
29403 emit_insn (RSB_32 (scratch1
, amount
));
29404 /* Also set CC = amount > 32. */
29405 emit_insn (SUB_S_32 (scratch2
, amount
));
29408 emit_insn (RSB_32 (scratch1
, amount
));
29409 emit_insn (SUB_32 (scratch2
, amount
));
29412 gcc_unreachable ();
29415 /* Emit code like this:
29418 out_down = in_down << amount;
29419 out_down = (in_up << (amount - 32)) | out_down;
29420 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29421 out_up = in_up << amount;
29424 out_down = in_down >> amount;
29425 out_down = (in_up << (32 - amount)) | out_down;
29427 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29428 out_up = in_up << amount;
29431 out_down = in_down >> amount;
29432 out_down = (in_up << (32 - amount)) | out_down;
29434 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29435 out_up = in_up << amount;
29437 The ARM and Thumb2 variants are the same but implemented slightly
29438 differently. If this were only called during expand we could just
29439 use the Thumb2 case and let combine do the right thing, but this
29440 can also be called from post-reload splitters. */
29442 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29444 if (!TARGET_THUMB2
)
29446 /* Emit code for ARM mode. */
29447 emit_insn (SET (out_down
,
29448 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
29449 if (code
== ASHIFTRT
)
29451 rtx_code_label
*done_label
= gen_label_rtx ();
29452 emit_jump_insn (BRANCH (LT
, done_label
));
29453 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
29455 emit_label (done_label
);
29458 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
29463 /* Emit code for Thumb2 mode.
29464 Thumb2 can't do shift and or in one insn. */
29465 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
29466 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
29468 if (code
== ASHIFTRT
)
29470 rtx_code_label
*done_label
= gen_label_rtx ();
29471 emit_jump_insn (BRANCH (LT
, done_label
));
29472 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
29473 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
29474 emit_label (done_label
);
29478 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
29479 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
29483 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29497 /* Returns true if the pattern is a valid symbolic address, which is either a
29498 symbol_ref or (symbol_ref + addend).
29500 According to the ARM ELF ABI, the initial addend of REL-type relocations
29501 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29502 literal field of the instruction as a 16-bit signed value in the range
29503 -32768 <= A < 32768. */
29506 arm_valid_symbolic_address_p (rtx addr
)
29508 rtx xop0
, xop1
= NULL_RTX
;
29511 if (GET_CODE (tmp
) == SYMBOL_REF
|| GET_CODE (tmp
) == LABEL_REF
)
29514 /* (const (plus: symbol_ref const_int)) */
29515 if (GET_CODE (addr
) == CONST
)
29516 tmp
= XEXP (addr
, 0);
29518 if (GET_CODE (tmp
) == PLUS
)
29520 xop0
= XEXP (tmp
, 0);
29521 xop1
= XEXP (tmp
, 1);
29523 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
29524 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
29530 /* Returns true if a valid comparison operation and makes
29531 the operands in a form that is valid. */
29533 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
29535 enum rtx_code code
= GET_CODE (*comparison
);
29537 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
29538 ? GET_MODE (*op2
) : GET_MODE (*op1
);
29540 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
29542 if (code
== UNEQ
|| code
== LTGT
)
29545 code_int
= (int)code
;
29546 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
29547 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
29552 if (!arm_add_operand (*op1
, mode
))
29553 *op1
= force_reg (mode
, *op1
);
29554 if (!arm_add_operand (*op2
, mode
))
29555 *op2
= force_reg (mode
, *op2
);
29559 if (!cmpdi_operand (*op1
, mode
))
29560 *op1
= force_reg (mode
, *op1
);
29561 if (!cmpdi_operand (*op2
, mode
))
29562 *op2
= force_reg (mode
, *op2
);
29566 if (!TARGET_VFP_FP16INST
)
29568 /* FP16 comparisons are done in SF mode. */
29570 *op1
= convert_to_mode (mode
, *op1
, 1);
29571 *op2
= convert_to_mode (mode
, *op2
, 1);
29572 /* Fall through. */
29575 if (!vfp_compare_operand (*op1
, mode
))
29576 *op1
= force_reg (mode
, *op1
);
29577 if (!vfp_compare_operand (*op2
, mode
))
29578 *op2
= force_reg (mode
, *op2
);
29588 /* Maximum number of instructions to set block of memory. */
29590 arm_block_set_max_insns (void)
29592 if (optimize_function_for_size_p (cfun
))
29595 return current_tune
->max_insns_inline_memset
;
29598 /* Return TRUE if it's profitable to set block of memory for
29599 non-vectorized case. VAL is the value to set the memory
29600 with. LENGTH is the number of bytes to set. ALIGN is the
29601 alignment of the destination memory in bytes. UNALIGNED_P
29602 is TRUE if we can only set the memory with instructions
29603 meeting alignment requirements. USE_STRD_P is TRUE if we
29604 can use strd to set the memory. */
29606 arm_block_set_non_vect_profit_p (rtx val
,
29607 unsigned HOST_WIDE_INT length
,
29608 unsigned HOST_WIDE_INT align
,
29609 bool unaligned_p
, bool use_strd_p
)
29612 /* For leftovers in bytes of 0-7, we can set the memory block using
29613 strb/strh/str with minimum instruction number. */
29614 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29618 num
= arm_const_inline_cost (SET
, val
);
29619 num
+= length
/ align
+ length
% align
;
29621 else if (use_strd_p
)
29623 num
= arm_const_double_inline_cost (val
);
29624 num
+= (length
>> 3) + leftover
[length
& 7];
29628 num
= arm_const_inline_cost (SET
, val
);
29629 num
+= (length
>> 2) + leftover
[length
& 3];
29632 /* We may be able to combine last pair STRH/STRB into a single STR
29633 by shifting one byte back. */
29634 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
29637 return (num
<= arm_block_set_max_insns ());
29640 /* Return TRUE if it's profitable to set block of memory for
29641 vectorized case. LENGTH is the number of bytes to set.
29642 ALIGN is the alignment of destination memory in bytes.
29643 MODE is the vector mode used to set the memory. */
29645 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
29646 unsigned HOST_WIDE_INT align
,
29650 bool unaligned_p
= ((align
& 3) != 0);
29651 unsigned int nelt
= GET_MODE_NUNITS (mode
);
29653 /* Instruction loading constant value. */
29655 /* Instructions storing the memory. */
29656 num
+= (length
+ nelt
- 1) / nelt
;
29657 /* Instructions adjusting the address expression. Only need to
29658 adjust address expression if it's 4 bytes aligned and bytes
29659 leftover can only be stored by mis-aligned store instruction. */
29660 if (!unaligned_p
&& (length
& 3) != 0)
29663 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29664 if (!unaligned_p
&& mode
== V16QImode
)
29667 return (num
<= arm_block_set_max_insns ());
29670 /* Set a block of memory using vectorization instructions for the
29671 unaligned case. We fill the first LENGTH bytes of the memory
29672 area starting from DSTBASE with byte constant VALUE. ALIGN is
29673 the alignment requirement of memory. Return TRUE if succeeded. */
29675 arm_block_set_unaligned_vect (rtx dstbase
,
29676 unsigned HOST_WIDE_INT length
,
29677 unsigned HOST_WIDE_INT value
,
29678 unsigned HOST_WIDE_INT align
)
29680 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
29682 rtx val_elt
, val_vec
, reg
;
29683 rtx rval
[MAX_VECT_LEN
];
29684 rtx (*gen_func
) (rtx
, rtx
);
29686 unsigned HOST_WIDE_INT v
= value
;
29687 unsigned int offset
= 0;
29688 gcc_assert ((align
& 0x3) != 0);
29689 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29690 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29691 if (length
>= nelt_v16
)
29694 gen_func
= gen_movmisalignv16qi
;
29699 gen_func
= gen_movmisalignv8qi
;
29701 nelt_mode
= GET_MODE_NUNITS (mode
);
29702 gcc_assert (length
>= nelt_mode
);
29703 /* Skip if it isn't profitable. */
29704 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29707 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29708 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29710 v
= sext_hwi (v
, BITS_PER_WORD
);
29711 val_elt
= GEN_INT (v
);
29712 for (j
= 0; j
< nelt_mode
; j
++)
29715 reg
= gen_reg_rtx (mode
);
29716 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29717 /* Emit instruction loading the constant value. */
29718 emit_move_insn (reg
, val_vec
);
29720 /* Handle nelt_mode bytes in a vector. */
29721 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29723 emit_insn ((*gen_func
) (mem
, reg
));
29724 if (i
+ 2 * nelt_mode
<= length
)
29726 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
29727 offset
+= nelt_mode
;
29728 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29732 /* If there are not less than nelt_v8 bytes leftover, we must be in
29734 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
29736 /* Handle (8, 16) bytes leftover. */
29737 if (i
+ nelt_v8
< length
)
29739 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
29740 offset
+= length
- i
;
29741 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29743 /* We are shifting bytes back, set the alignment accordingly. */
29744 if ((length
& 1) != 0 && align
>= 2)
29745 set_mem_align (mem
, BITS_PER_UNIT
);
29747 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29749 /* Handle (0, 8] bytes leftover. */
29750 else if (i
< length
&& i
+ nelt_v8
>= length
)
29752 if (mode
== V16QImode
)
29753 reg
= gen_lowpart (V8QImode
, reg
);
29755 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
29756 + (nelt_mode
- nelt_v8
))));
29757 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
29758 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
29760 /* We are shifting bytes back, set the alignment accordingly. */
29761 if ((length
& 1) != 0 && align
>= 2)
29762 set_mem_align (mem
, BITS_PER_UNIT
);
29764 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29770 /* Set a block of memory using vectorization instructions for the
29771 aligned case. We fill the first LENGTH bytes of the memory area
29772 starting from DSTBASE with byte constant VALUE. ALIGN is the
29773 alignment requirement of memory. Return TRUE if succeeded. */
29775 arm_block_set_aligned_vect (rtx dstbase
,
29776 unsigned HOST_WIDE_INT length
,
29777 unsigned HOST_WIDE_INT value
,
29778 unsigned HOST_WIDE_INT align
)
29780 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
29781 rtx dst
, addr
, mem
;
29782 rtx val_elt
, val_vec
, reg
;
29783 rtx rval
[MAX_VECT_LEN
];
29785 unsigned HOST_WIDE_INT v
= value
;
29786 unsigned int offset
= 0;
29788 gcc_assert ((align
& 0x3) == 0);
29789 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29790 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29791 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
29796 nelt_mode
= GET_MODE_NUNITS (mode
);
29797 gcc_assert (length
>= nelt_mode
);
29798 /* Skip if it isn't profitable. */
29799 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29802 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29804 v
= sext_hwi (v
, BITS_PER_WORD
);
29805 val_elt
= GEN_INT (v
);
29806 for (j
= 0; j
< nelt_mode
; j
++)
29809 reg
= gen_reg_rtx (mode
);
29810 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29811 /* Emit instruction loading the constant value. */
29812 emit_move_insn (reg
, val_vec
);
29815 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29816 if (mode
== V16QImode
)
29818 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29819 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29821 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29822 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
29824 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29825 offset
+= length
- nelt_mode
;
29826 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29827 /* We are shifting bytes back, set the alignment accordingly. */
29828 if ((length
& 0x3) == 0)
29829 set_mem_align (mem
, BITS_PER_UNIT
* 4);
29830 else if ((length
& 0x1) == 0)
29831 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29833 set_mem_align (mem
, BITS_PER_UNIT
);
29835 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29838 /* Fall through for bytes leftover. */
29840 nelt_mode
= GET_MODE_NUNITS (mode
);
29841 reg
= gen_lowpart (V8QImode
, reg
);
29844 /* Handle 8 bytes in a vector. */
29845 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29847 addr
= plus_constant (Pmode
, dst
, i
);
29848 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
29849 emit_move_insn (mem
, reg
);
29852 /* Handle single word leftover by shifting 4 bytes back. We can
29853 use aligned access for this case. */
29854 if (i
+ UNITS_PER_WORD
== length
)
29856 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
29857 offset
+= i
- UNITS_PER_WORD
;
29858 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
29859 /* We are shifting 4 bytes back, set the alignment accordingly. */
29860 if (align
> UNITS_PER_WORD
)
29861 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
29863 emit_move_insn (mem
, reg
);
29865 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29866 We have to use unaligned access for this case. */
29867 else if (i
< length
)
29869 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29870 offset
+= length
- nelt_mode
;
29871 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29872 /* We are shifting bytes back, set the alignment accordingly. */
29873 if ((length
& 1) == 0)
29874 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29876 set_mem_align (mem
, BITS_PER_UNIT
);
29878 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29884 /* Set a block of memory using plain strh/strb instructions, only
29885 using instructions allowed by ALIGN on processor. We fill the
29886 first LENGTH bytes of the memory area starting from DSTBASE
29887 with byte constant VALUE. ALIGN is the alignment requirement
29890 arm_block_set_unaligned_non_vect (rtx dstbase
,
29891 unsigned HOST_WIDE_INT length
,
29892 unsigned HOST_WIDE_INT value
,
29893 unsigned HOST_WIDE_INT align
)
29896 rtx dst
, addr
, mem
;
29897 rtx val_exp
, val_reg
, reg
;
29899 HOST_WIDE_INT v
= value
;
29901 gcc_assert (align
== 1 || align
== 2);
29904 v
|= (value
<< BITS_PER_UNIT
);
29906 v
= sext_hwi (v
, BITS_PER_WORD
);
29907 val_exp
= GEN_INT (v
);
29908 /* Skip if it isn't profitable. */
29909 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29910 align
, true, false))
29913 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29914 mode
= (align
== 2 ? HImode
: QImode
);
29915 val_reg
= force_reg (SImode
, val_exp
);
29916 reg
= gen_lowpart (mode
, val_reg
);
29918 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
29920 addr
= plus_constant (Pmode
, dst
, i
);
29921 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
29922 emit_move_insn (mem
, reg
);
29925 /* Handle single byte leftover. */
29926 if (i
+ 1 == length
)
29928 reg
= gen_lowpart (QImode
, val_reg
);
29929 addr
= plus_constant (Pmode
, dst
, i
);
29930 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29931 emit_move_insn (mem
, reg
);
29935 gcc_assert (i
== length
);
29939 /* Set a block of memory using plain strd/str/strh/strb instructions,
29940 to permit unaligned copies on processors which support unaligned
29941 semantics for those instructions. We fill the first LENGTH bytes
29942 of the memory area starting from DSTBASE with byte constant VALUE.
29943 ALIGN is the alignment requirement of memory. */
29945 arm_block_set_aligned_non_vect (rtx dstbase
,
29946 unsigned HOST_WIDE_INT length
,
29947 unsigned HOST_WIDE_INT value
,
29948 unsigned HOST_WIDE_INT align
)
29951 rtx dst
, addr
, mem
;
29952 rtx val_exp
, val_reg
, reg
;
29953 unsigned HOST_WIDE_INT v
;
29956 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
29957 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
29959 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
29960 if (length
< UNITS_PER_WORD
)
29961 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
29964 v
|= (v
<< BITS_PER_WORD
);
29966 v
= sext_hwi (v
, BITS_PER_WORD
);
29968 val_exp
= GEN_INT (v
);
29969 /* Skip if it isn't profitable. */
29970 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29971 align
, false, use_strd_p
))
29976 /* Try without strd. */
29977 v
= (v
>> BITS_PER_WORD
);
29978 v
= sext_hwi (v
, BITS_PER_WORD
);
29979 val_exp
= GEN_INT (v
);
29980 use_strd_p
= false;
29981 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29982 align
, false, use_strd_p
))
29987 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29988 /* Handle double words using strd if possible. */
29991 val_reg
= force_reg (DImode
, val_exp
);
29993 for (; (i
+ 8 <= length
); i
+= 8)
29995 addr
= plus_constant (Pmode
, dst
, i
);
29996 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
29997 emit_move_insn (mem
, reg
);
30001 val_reg
= force_reg (SImode
, val_exp
);
30003 /* Handle words. */
30004 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
30005 for (; (i
+ 4 <= length
); i
+= 4)
30007 addr
= plus_constant (Pmode
, dst
, i
);
30008 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
30009 if ((align
& 3) == 0)
30010 emit_move_insn (mem
, reg
);
30012 emit_insn (gen_unaligned_storesi (mem
, reg
));
30015 /* Merge last pair of STRH and STRB into a STR if possible. */
30016 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
30018 addr
= plus_constant (Pmode
, dst
, i
- 1);
30019 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
30020 /* We are shifting one byte back, set the alignment accordingly. */
30021 if ((align
& 1) == 0)
30022 set_mem_align (mem
, BITS_PER_UNIT
);
30024 /* Most likely this is an unaligned access, and we can't tell at
30025 compilation time. */
30026 emit_insn (gen_unaligned_storesi (mem
, reg
));
30030 /* Handle half word leftover. */
30031 if (i
+ 2 <= length
)
30033 reg
= gen_lowpart (HImode
, val_reg
);
30034 addr
= plus_constant (Pmode
, dst
, i
);
30035 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
30036 if ((align
& 1) == 0)
30037 emit_move_insn (mem
, reg
);
30039 emit_insn (gen_unaligned_storehi (mem
, reg
));
30044 /* Handle single byte leftover. */
30045 if (i
+ 1 == length
)
30047 reg
= gen_lowpart (QImode
, val_reg
);
30048 addr
= plus_constant (Pmode
, dst
, i
);
30049 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
30050 emit_move_insn (mem
, reg
);
30056 /* Set a block of memory using vectorization instructions for both
30057 aligned and unaligned cases. We fill the first LENGTH bytes of
30058 the memory area starting from DSTBASE with byte constant VALUE.
30059 ALIGN is the alignment requirement of memory. */
30061 arm_block_set_vect (rtx dstbase
,
30062 unsigned HOST_WIDE_INT length
,
30063 unsigned HOST_WIDE_INT value
,
30064 unsigned HOST_WIDE_INT align
)
30066 /* Check whether we need to use unaligned store instruction. */
30067 if (((align
& 3) != 0 || (length
& 3) != 0)
30068 /* Check whether unaligned store instruction is available. */
30069 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
30072 if ((align
& 3) == 0)
30073 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
30075 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
30078 /* Expand string store operation. Firstly we try to do that by using
30079 vectorization instructions, then try with ARM unaligned access and
30080 double-word store if profitable. OPERANDS[0] is the destination,
30081 OPERANDS[1] is the number of bytes, operands[2] is the value to
30082 initialize the memory, OPERANDS[3] is the known alignment of the
30085 arm_gen_setmem (rtx
*operands
)
30087 rtx dstbase
= operands
[0];
30088 unsigned HOST_WIDE_INT length
;
30089 unsigned HOST_WIDE_INT value
;
30090 unsigned HOST_WIDE_INT align
;
30092 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
30095 length
= UINTVAL (operands
[1]);
30099 value
= (UINTVAL (operands
[2]) & 0xFF);
30100 align
= UINTVAL (operands
[3]);
30101 if (TARGET_NEON
&& length
>= 8
30102 && current_tune
->string_ops_prefer_neon
30103 && arm_block_set_vect (dstbase
, length
, value
, align
))
30106 if (!unaligned_access
&& (align
& 3) != 0)
30107 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
30109 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
30114 arm_macro_fusion_p (void)
30116 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
30119 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30120 for MOVW / MOVT macro fusion. */
30123 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
30125 /* We are trying to fuse
30126 movw imm / movt imm
30127 instructions as a group that gets scheduled together. */
30129 rtx set_dest
= SET_DEST (curr_set
);
30131 if (GET_MODE (set_dest
) != SImode
)
30134 /* We are trying to match:
30135 prev (movw) == (set (reg r0) (const_int imm16))
30136 curr (movt) == (set (zero_extract (reg r0)
30139 (const_int imm16_1))
30141 prev (movw) == (set (reg r1)
30142 (high (symbol_ref ("SYM"))))
30143 curr (movt) == (set (reg r0)
30145 (symbol_ref ("SYM")))) */
30147 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
30149 if (CONST_INT_P (SET_SRC (curr_set
))
30150 && CONST_INT_P (SET_SRC (prev_set
))
30151 && REG_P (XEXP (set_dest
, 0))
30152 && REG_P (SET_DEST (prev_set
))
30153 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
30157 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
30158 && REG_P (SET_DEST (curr_set
))
30159 && REG_P (SET_DEST (prev_set
))
30160 && GET_CODE (SET_SRC (prev_set
)) == HIGH
30161 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
30168 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
30170 rtx prev_set
= single_set (prev
);
30171 rtx curr_set
= single_set (curr
);
30177 if (any_condjump_p (curr
))
30180 if (!arm_macro_fusion_p ())
30183 if (current_tune
->fusible_ops
& tune_params::FUSE_AES_AESMC
30184 && aarch_crypto_can_dual_issue (prev
, curr
))
30187 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
30188 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
30194 /* Return true iff the instruction fusion described by OP is enabled. */
30196 arm_fusion_enabled_p (tune_params::fuse_ops op
)
30198 return current_tune
->fusible_ops
& op
;
30201 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30202 scheduled for speculative execution. Reject the long-running division
30203 and square-root instructions. */
30206 arm_sched_can_speculate_insn (rtx_insn
*insn
)
30208 switch (get_attr_type (insn
))
30216 case TYPE_NEON_FP_SQRT_S
:
30217 case TYPE_NEON_FP_SQRT_D
:
30218 case TYPE_NEON_FP_SQRT_S_Q
:
30219 case TYPE_NEON_FP_SQRT_D_Q
:
30220 case TYPE_NEON_FP_DIV_S
:
30221 case TYPE_NEON_FP_DIV_D
:
30222 case TYPE_NEON_FP_DIV_S_Q
:
30223 case TYPE_NEON_FP_DIV_D_Q
:
30230 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30232 static unsigned HOST_WIDE_INT
30233 arm_asan_shadow_offset (void)
30235 return HOST_WIDE_INT_1U
<< 29;
30239 /* This is a temporary fix for PR60655. Ideally we need
30240 to handle most of these cases in the generic part but
30241 currently we reject minus (..) (sym_ref). We try to
30242 ameliorate the case with minus (sym_ref1) (sym_ref2)
30243 where they are in the same section. */
30246 arm_const_not_ok_for_debug_p (rtx p
)
30248 tree decl_op0
= NULL
;
30249 tree decl_op1
= NULL
;
30251 if (GET_CODE (p
) == MINUS
)
30253 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
30255 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
30257 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
30258 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
30260 if ((VAR_P (decl_op1
)
30261 || TREE_CODE (decl_op1
) == CONST_DECL
)
30262 && (VAR_P (decl_op0
)
30263 || TREE_CODE (decl_op0
) == CONST_DECL
))
30264 return (get_variable_section (decl_op1
, false)
30265 != get_variable_section (decl_op0
, false));
30267 if (TREE_CODE (decl_op1
) == LABEL_DECL
30268 && TREE_CODE (decl_op0
) == LABEL_DECL
)
30269 return (DECL_CONTEXT (decl_op1
)
30270 != DECL_CONTEXT (decl_op0
));
30280 /* return TRUE if x is a reference to a value in a constant pool */
30282 arm_is_constant_pool_ref (rtx x
)
30285 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
30286 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
30289 /* Remember the last target of arm_set_current_function. */
30290 static GTY(()) tree arm_previous_fndecl
;
30292 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30295 save_restore_target_globals (tree new_tree
)
30297 /* If we have a previous state, use it. */
30298 if (TREE_TARGET_GLOBALS (new_tree
))
30299 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
30300 else if (new_tree
== target_option_default_node
)
30301 restore_target_globals (&default_target_globals
);
30304 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30305 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
30308 arm_option_params_internal ();
30311 /* Invalidate arm_previous_fndecl. */
30314 arm_reset_previous_fndecl (void)
30316 arm_previous_fndecl
= NULL_TREE
;
30319 /* Establish appropriate back-end context for processing the function
30320 FNDECL. The argument might be NULL to indicate processing at top
30321 level, outside of any function scope. */
30324 arm_set_current_function (tree fndecl
)
30326 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
30329 tree old_tree
= (arm_previous_fndecl
30330 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
30333 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30335 /* If current function has no attributes but previous one did,
30336 use the default node. */
30337 if (! new_tree
&& old_tree
)
30338 new_tree
= target_option_default_node
;
30340 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30341 the default have been handled by save_restore_target_globals from
30342 arm_pragma_target_parse. */
30343 if (old_tree
== new_tree
)
30346 arm_previous_fndecl
= fndecl
;
30348 /* First set the target options. */
30349 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
30351 save_restore_target_globals (new_tree
);
30354 /* Implement TARGET_OPTION_PRINT. */
30357 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
30359 int flags
= ptr
->x_target_flags
;
30360 const char *fpu_name
;
30362 fpu_name
= (ptr
->x_arm_fpu_index
== TARGET_FPU_auto
30363 ? "auto" : all_fpus
[ptr
->x_arm_fpu_index
].name
);
30365 fprintf (file
, "%*sselected arch %s\n", indent
, "",
30366 TARGET_THUMB2_P (flags
) ? "thumb2" :
30367 TARGET_THUMB_P (flags
) ? "thumb1" :
30370 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_name
);
30373 /* Hook to determine if one function can safely inline another. */
30376 arm_can_inline_p (tree caller
, tree callee
)
30378 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
30379 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
30380 bool can_inline
= true;
30382 struct cl_target_option
*caller_opts
30383 = TREE_TARGET_OPTION (caller_tree
? caller_tree
30384 : target_option_default_node
);
30386 struct cl_target_option
*callee_opts
30387 = TREE_TARGET_OPTION (callee_tree
? callee_tree
30388 : target_option_default_node
);
30390 if (callee_opts
== caller_opts
)
30393 /* Callee's ISA features should be a subset of the caller's. */
30394 struct arm_build_target caller_target
;
30395 struct arm_build_target callee_target
;
30396 caller_target
.isa
= sbitmap_alloc (isa_num_bits
);
30397 callee_target
.isa
= sbitmap_alloc (isa_num_bits
);
30399 arm_configure_build_target (&caller_target
, caller_opts
, &global_options_set
,
30401 arm_configure_build_target (&callee_target
, callee_opts
, &global_options_set
,
30403 if (!bitmap_subset_p (callee_target
.isa
, caller_target
.isa
))
30404 can_inline
= false;
30406 sbitmap_free (caller_target
.isa
);
30407 sbitmap_free (callee_target
.isa
);
30409 /* OK to inline between different modes.
30410 Function with mode specific instructions, e.g using asm,
30411 must be explicitly protected with noinline. */
30415 /* Hook to fix function's alignment affected by target attribute. */
30418 arm_relayout_function (tree fndecl
)
30420 if (DECL_USER_ALIGN (fndecl
))
30423 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30426 callee_tree
= target_option_default_node
;
30428 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
30431 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts
->x_target_flags
)));
30434 /* Inner function to process the attribute((target(...))), take an argument and
30435 set the current options from the argument. If we have a list, recursively
30436 go over the list. */
30439 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
30441 if (TREE_CODE (args
) == TREE_LIST
)
30445 for (; args
; args
= TREE_CHAIN (args
))
30446 if (TREE_VALUE (args
)
30447 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
30452 else if (TREE_CODE (args
) != STRING_CST
)
30454 error ("attribute %<target%> argument not a string");
30458 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
30461 while ((q
= strtok (argstr
, ",")) != NULL
)
30463 while (ISSPACE (*q
)) ++q
;
30466 if (!strncmp (q
, "thumb", 5))
30467 opts
->x_target_flags
|= MASK_THUMB
;
30469 else if (!strncmp (q
, "arm", 3))
30470 opts
->x_target_flags
&= ~MASK_THUMB
;
30472 else if (!strncmp (q
, "fpu=", 4))
30475 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+4,
30476 &fpu_index
, CL_TARGET
))
30478 error ("invalid fpu for attribute(target(\"%s\"))", q
);
30481 if (fpu_index
== TARGET_FPU_auto
)
30483 /* This doesn't really make sense until we support
30484 general dynamic selection of the architecture and all
30486 sorry ("auto fpu selection not currently permitted here");
30489 opts
->x_arm_fpu_index
= (enum fpu_type
) fpu_index
;
30493 error ("attribute(target(\"%s\")) is unknown", q
);
30501 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30504 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
30505 struct gcc_options
*opts_set
)
30507 struct cl_target_option cl_opts
;
30509 if (!arm_valid_target_attribute_rec (args
, opts
))
30512 cl_target_option_save (&cl_opts
, opts
);
30513 arm_configure_build_target (&arm_active_target
, &cl_opts
, opts_set
, false);
30514 arm_option_check_internal (opts
);
30515 /* Do any overrides, such as global options arch=xxx. */
30516 arm_option_override_internal (opts
, opts_set
);
30518 return build_target_option_node (opts
);
30522 add_attribute (const char * mode
, tree
*attributes
)
30524 size_t len
= strlen (mode
);
30525 tree value
= build_string (len
, mode
);
30527 TREE_TYPE (value
) = build_array_type (char_type_node
,
30528 build_index_type (size_int (len
)));
30530 *attributes
= tree_cons (get_identifier ("target"),
30531 build_tree_list (NULL_TREE
, value
),
30535 /* For testing. Insert thumb or arm modes alternatively on functions. */
30538 arm_insert_attributes (tree fndecl
, tree
* attributes
)
30542 if (! TARGET_FLIP_THUMB
)
30545 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
30546 || DECL_BUILT_IN (fndecl
) || DECL_ARTIFICIAL (fndecl
))
30549 /* Nested definitions must inherit mode. */
30550 if (current_function_decl
)
30552 mode
= TARGET_THUMB
? "thumb" : "arm";
30553 add_attribute (mode
, attributes
);
30557 /* If there is already a setting don't change it. */
30558 if (lookup_attribute ("target", *attributes
) != NULL
)
30561 mode
= thumb_flipper
? "thumb" : "arm";
30562 add_attribute (mode
, attributes
);
30564 thumb_flipper
= !thumb_flipper
;
30567 /* Hook to validate attribute((target("string"))). */
30570 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
30571 tree args
, int ARG_UNUSED (flags
))
30574 struct gcc_options func_options
;
30575 tree cur_tree
, new_optimize
;
30576 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
30578 /* Get the optimization options of the current function. */
30579 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
30581 /* If the function changed the optimization levels as well as setting target
30582 options, start with the optimizations specified. */
30583 if (!func_optimize
)
30584 func_optimize
= optimization_default_node
;
30586 /* Init func_options. */
30587 memset (&func_options
, 0, sizeof (func_options
));
30588 init_options_struct (&func_options
, NULL
);
30589 lang_hooks
.init_options_struct (&func_options
);
30591 /* Initialize func_options to the defaults. */
30592 cl_optimization_restore (&func_options
,
30593 TREE_OPTIMIZATION (func_optimize
));
30595 cl_target_option_restore (&func_options
,
30596 TREE_TARGET_OPTION (target_option_default_node
));
30598 /* Set func_options flags with new target mode. */
30599 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
30600 &global_options_set
);
30602 if (cur_tree
== NULL_TREE
)
30605 new_optimize
= build_optimization_node (&func_options
);
30607 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
30609 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
30611 finalize_options_struct (&func_options
);
30616 /* Match an ISA feature bitmap to a named FPU. We always use the
30617 first entry that exactly matches the feature set, so that we
30618 effectively canonicalize the FPU name for the assembler. */
30620 arm_identify_fpu_from_isa (sbitmap isa
)
30622 auto_sbitmap
fpubits (isa_num_bits
);
30623 auto_sbitmap
cand_fpubits (isa_num_bits
);
30625 bitmap_and (fpubits
, isa
, isa_all_fpubits
);
30627 /* If there are no ISA feature bits relating to the FPU, we must be
30628 doing soft-float. */
30629 if (bitmap_empty_p (fpubits
))
30632 for (unsigned int i
= 0; i
< ARRAY_SIZE (all_fpus
); i
++)
30634 arm_initialize_isa (cand_fpubits
, all_fpus
[i
].isa_bits
);
30635 if (bitmap_equal_p (fpubits
, cand_fpubits
))
30636 return all_fpus
[i
].name
;
30638 /* We must find an entry, or things have gone wrong. */
30639 gcc_unreachable ();
30643 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
30646 fprintf (stream
, "\t.syntax unified\n");
30650 if (is_called_in_ARM_mode (decl
)
30651 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
30652 && cfun
->is_thunk
))
30653 fprintf (stream
, "\t.code 32\n");
30654 else if (TARGET_THUMB1
)
30655 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
30657 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
30660 fprintf (stream
, "\t.arm\n");
30662 asm_fprintf (asm_out_file
, "\t.fpu %s\n",
30665 : arm_identify_fpu_from_isa (arm_active_target
.isa
)));
30667 if (TARGET_POKE_FUNCTION_NAME
)
30668 arm_poke_function_name (stream
, (const char *) name
);
30671 /* If MEM is in the form of [base+offset], extract the two parts
30672 of address and set to BASE and OFFSET, otherwise return false
30673 after clearing BASE and OFFSET. */
30676 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
30680 gcc_assert (MEM_P (mem
));
30682 addr
= XEXP (mem
, 0);
30684 /* Strip off const from addresses like (const (addr)). */
30685 if (GET_CODE (addr
) == CONST
)
30686 addr
= XEXP (addr
, 0);
30688 if (GET_CODE (addr
) == REG
)
30691 *offset
= const0_rtx
;
30695 if (GET_CODE (addr
) == PLUS
30696 && GET_CODE (XEXP (addr
, 0)) == REG
30697 && CONST_INT_P (XEXP (addr
, 1)))
30699 *base
= XEXP (addr
, 0);
30700 *offset
= XEXP (addr
, 1);
30705 *offset
= NULL_RTX
;
30710 /* If INSN is a load or store of address in the form of [base+offset],
30711 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30712 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30713 otherwise return FALSE. */
30716 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
30720 gcc_assert (INSN_P (insn
));
30721 x
= PATTERN (insn
);
30722 if (GET_CODE (x
) != SET
)
30726 dest
= SET_DEST (x
);
30727 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
30730 extract_base_offset_in_addr (dest
, base
, offset
);
30732 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
30735 extract_base_offset_in_addr (src
, base
, offset
);
30740 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
30743 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30745 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30746 and PRI are only calculated for these instructions. For other instruction,
30747 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30748 instruction fusion can be supported by returning different priorities.
30750 It's important that irrelevant instructions get the largest FUSION_PRI. */
30753 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
30754 int *fusion_pri
, int *pri
)
30760 gcc_assert (INSN_P (insn
));
30763 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
30770 /* Load goes first. */
30772 *fusion_pri
= tmp
- 1;
30774 *fusion_pri
= tmp
- 2;
30778 /* INSN with smaller base register goes first. */
30779 tmp
-= ((REGNO (base
) & 0xff) << 20);
30781 /* INSN with smaller offset goes first. */
30782 off_val
= (int)(INTVAL (offset
));
30784 tmp
-= (off_val
& 0xfffff);
30786 tmp
+= ((- off_val
) & 0xfffff);
30793 /* Construct and return a PARALLEL RTX vector with elements numbering the
30794 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30795 the vector - from the perspective of the architecture. This does not
30796 line up with GCC's perspective on lane numbers, so we end up with
30797 different masks depending on our target endian-ness. The diagram
30798 below may help. We must draw the distinction when building masks
30799 which select one half of the vector. An instruction selecting
30800 architectural low-lanes for a big-endian target, must be described using
30801 a mask selecting GCC high-lanes.
30803 Big-Endian Little-Endian
30805 GCC 0 1 2 3 3 2 1 0
30806 | x | x | x | x | | x | x | x | x |
30807 Architecture 3 2 1 0 3 2 1 0
30809 Low Mask: { 2, 3 } { 0, 1 }
30810 High Mask: { 0, 1 } { 2, 3 }
30814 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
30816 int nunits
= GET_MODE_NUNITS (mode
);
30817 rtvec v
= rtvec_alloc (nunits
/ 2);
30818 int high_base
= nunits
/ 2;
30824 if (BYTES_BIG_ENDIAN
)
30825 base
= high
? low_base
: high_base
;
30827 base
= high
? high_base
: low_base
;
30829 for (i
= 0; i
< nunits
/ 2; i
++)
30830 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
30832 t1
= gen_rtx_PARALLEL (mode
, v
);
30836 /* Check OP for validity as a PARALLEL RTX vector with elements
30837 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30838 from the perspective of the architecture. See the diagram above
30839 arm_simd_vect_par_cnst_half_p for more details. */
30842 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
30845 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
30846 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
30847 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
30850 if (!VECTOR_MODE_P (mode
))
30853 if (count_op
!= count_ideal
)
30856 for (i
= 0; i
< count_ideal
; i
++)
30858 rtx elt_op
= XVECEXP (op
, 0, i
);
30859 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
30861 if (!CONST_INT_P (elt_op
)
30862 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
30868 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30871 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
30874 /* For now, we punt and not handle this for TARGET_THUMB1. */
30875 if (vcall_offset
&& TARGET_THUMB1
)
30878 /* Otherwise ok. */
30882 /* Generate RTL for a conditional branch with rtx comparison CODE in
30883 mode CC_MODE. The destination of the unlikely conditional branch
30887 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
30891 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
30892 gen_rtx_REG (cc_mode
, CC_REGNUM
),
30895 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30896 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
30898 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
30901 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30903 For pure-code sections there is no letter code for this attribute, so
30904 output all the section flags numerically when this is needed. */
30907 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
30910 if (flags
& SECTION_ARM_PURECODE
)
30914 if (!(flags
& SECTION_DEBUG
))
30916 if (flags
& SECTION_EXCLUDE
)
30917 *num
|= 0x80000000;
30918 if (flags
& SECTION_WRITE
)
30920 if (flags
& SECTION_CODE
)
30922 if (flags
& SECTION_MERGE
)
30924 if (flags
& SECTION_STRINGS
)
30926 if (flags
& SECTION_TLS
)
30928 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
30937 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
30939 If pure-code is passed as an option, make sure all functions are in
30940 sections that have the SHF_ARM_PURECODE attribute. */
30943 arm_function_section (tree decl
, enum node_frequency freq
,
30944 bool startup
, bool exit
)
30946 const char * section_name
;
30949 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
30950 return default_function_section (decl
, freq
, startup
, exit
);
30952 if (!target_pure_code
)
30953 return default_function_section (decl
, freq
, startup
, exit
);
30956 section_name
= DECL_SECTION_NAME (decl
);
30958 /* If a function is not in a named section then it falls under the 'default'
30959 text section, also known as '.text'. We can preserve previous behavior as
30960 the default text section already has the SHF_ARM_PURECODE section
30964 section
*default_sec
= default_function_section (decl
, freq
, startup
,
30967 /* If default_sec is not null, then it must be a special section like for
30968 example .text.startup. We set the pure-code attribute and return the
30969 same section to preserve existing behavior. */
30971 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
30972 return default_sec
;
30975 /* Otherwise look whether a section has already been created with
30977 sec
= get_named_section (decl
, section_name
, 0);
30979 /* If that is not the case passing NULL as the section's name to
30980 'get_named_section' will create a section with the declaration's
30982 sec
= get_named_section (decl
, NULL
, 0);
30984 /* Set the SHF_ARM_PURECODE attribute. */
30985 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
30990 /* Implements the TARGET_SECTION_FLAGS hook.
30992 If DECL is a function declaration and pure-code is passed as an option
30993 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
30994 section's name and RELOC indicates whether the declarations initializer may
30995 contain runtime relocations. */
30997 static unsigned int
30998 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
31000 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
31002 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
31003 flags
|= SECTION_ARM_PURECODE
;
31008 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31011 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
31013 rtx
*quot_p
, rtx
*rem_p
)
31015 if (mode
== SImode
)
31016 gcc_assert (!TARGET_IDIV
);
31018 machine_mode libval_mode
= smallest_mode_for_size (2 * GET_MODE_BITSIZE (mode
),
31021 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
31023 op0
, GET_MODE (op0
),
31024 op1
, GET_MODE (op1
));
31026 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
31027 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
31028 GET_MODE_SIZE (mode
));
31030 gcc_assert (quotient
);
31031 gcc_assert (remainder
);
31033 *quot_p
= quotient
;
31034 *rem_p
= remainder
;
31037 /* This function checks for the availability of the coprocessor builtin passed
31038 in BUILTIN for the current target. Returns true if it is available and
31039 false otherwise. If a BUILTIN is passed for which this function has not
31040 been implemented it will cause an exception. */
31043 arm_coproc_builtin_available (enum unspecv builtin
)
31045 /* None of these builtins are available in Thumb mode if the target only
31046 supports Thumb-1. */
31064 case VUNSPEC_LDC2L
:
31066 case VUNSPEC_STC2L
:
31069 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31076 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31078 if (arm_arch6
|| arm_arch5te
)
31081 case VUNSPEC_MCRR2
:
31082 case VUNSPEC_MRRC2
:
31087 gcc_unreachable ();
31092 /* This function returns true if OP is a valid memory operand for the ldc and
31093 stc coprocessor instructions and false otherwise. */
31096 arm_coproc_ldc_stc_legitimate_address (rtx op
)
31098 HOST_WIDE_INT range
;
31099 /* Has to be a memory operand. */
31105 /* We accept registers. */
31109 switch GET_CODE (op
)
31113 /* Or registers with an offset. */
31114 if (!REG_P (XEXP (op
, 0)))
31119 /* The offset must be an immediate though. */
31120 if (!CONST_INT_P (op
))
31123 range
= INTVAL (op
);
31125 /* Within the range of [-1020,1020]. */
31126 if (!IN_RANGE (range
, -1020, 1020))
31129 /* And a multiple of 4. */
31130 return (range
% 4) == 0;
31136 return REG_P (XEXP (op
, 0));
31138 gcc_unreachable ();
31142 #include "gt-arm.h"