1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
34 #include "stringpool.h"
41 #include "diagnostic-core.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
48 #include "insn-attr.h"
54 #include "sched-int.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
62 #include "target-globals.h"
64 #include "tm-constrs.h"
66 #include "optabs-libfuncs.h"
71 /* This file should be included last. */
72 #include "target-def.h"
74 /* Forward definitions of types. */
75 typedef struct minipool_node Mnode
;
76 typedef struct minipool_fixup Mfix
;
78 void (*arm_lang_output_object_attributes_hook
)(void);
85 /* Forward function declarations. */
86 static bool arm_const_not_ok_for_debug_p (rtx
);
87 static int arm_needs_doubleword_align (machine_mode
, const_tree
);
88 static int arm_compute_static_chain_stack_bytes (void);
89 static arm_stack_offsets
*arm_get_frame_offsets (void);
90 static void arm_compute_frame_layout (void);
91 static void arm_add_gc_roots (void);
92 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
93 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
94 static unsigned bit_count (unsigned long);
95 static unsigned bitmap_popcount (const sbitmap
);
96 static int arm_address_register_rtx_p (rtx
, int);
97 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
98 static bool is_called_in_ARM_mode (tree
);
99 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
100 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
101 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
102 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
103 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
104 inline static int thumb1_index_register_rtx_p (rtx
, int);
105 static int thumb_far_jump_used_p (void);
106 static bool thumb_force_lr_save (void);
107 static unsigned arm_size_return_regs (void);
108 static bool arm_assemble_integer (rtx
, unsigned int, int);
109 static void arm_print_operand (FILE *, rtx
, int);
110 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
111 static bool arm_print_operand_punct_valid_p (unsigned char code
);
112 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
113 static arm_cc
get_arm_condition_code (rtx
);
114 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
115 static const char *output_multi_immediate (rtx
*, const char *, const char *,
117 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
118 static struct machine_function
*arm_init_machine_status (void);
119 static void thumb_exit (FILE *, int);
120 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
121 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
122 static Mnode
*add_minipool_forward_ref (Mfix
*);
123 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
124 static Mnode
*add_minipool_backward_ref (Mfix
*);
125 static void assign_minipool_offsets (Mfix
*);
126 static void arm_print_value (FILE *, rtx
);
127 static void dump_minipool (rtx_insn
*);
128 static int arm_barrier_cost (rtx_insn
*);
129 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
130 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
131 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
133 static void arm_reorg (void);
134 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
135 static unsigned long arm_compute_save_reg0_reg12_mask (void);
136 static unsigned long arm_compute_save_core_reg_mask (void);
137 static unsigned long arm_isr_value (tree
);
138 static unsigned long arm_compute_func_type (void);
139 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
140 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
141 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
143 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
145 static tree
arm_handle_cmse_nonsecure_entry (tree
*, tree
, tree
, int, bool *);
146 static tree
arm_handle_cmse_nonsecure_call (tree
*, tree
, tree
, int, bool *);
147 static void arm_output_function_epilogue (FILE *);
148 static void arm_output_function_prologue (FILE *);
149 static int arm_comp_type_attributes (const_tree
, const_tree
);
150 static void arm_set_default_type_attributes (tree
);
151 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
152 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
153 static int optimal_immediate_sequence (enum rtx_code code
,
154 unsigned HOST_WIDE_INT val
,
155 struct four_ints
*return_sequence
);
156 static int optimal_immediate_sequence_1 (enum rtx_code code
,
157 unsigned HOST_WIDE_INT val
,
158 struct four_ints
*return_sequence
,
160 static int arm_get_strip_length (int);
161 static bool arm_function_ok_for_sibcall (tree
, tree
);
162 static machine_mode
arm_promote_function_mode (const_tree
,
165 static bool arm_return_in_memory (const_tree
, const_tree
);
166 static rtx
arm_function_value (const_tree
, const_tree
, bool);
167 static rtx
arm_libcall_value_1 (machine_mode
);
168 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
169 static bool arm_function_value_regno_p (const unsigned int);
170 static void arm_internal_label (FILE *, const char *, unsigned long);
171 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
173 static bool arm_have_conditional_execution (void);
174 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
175 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
176 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
177 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
178 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
179 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
180 static void emit_constant_insn (rtx cond
, rtx pattern
);
181 static rtx_insn
*emit_set_insn (rtx
, rtx
);
182 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
183 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
185 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
187 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
189 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
190 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
192 static rtx
aapcs_libcall_value (machine_mode
);
193 static int aapcs_select_return_coproc (const_tree
, const_tree
);
195 #ifdef OBJECT_FORMAT_ELF
196 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
197 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
200 static void arm_encode_section_info (tree
, rtx
, int);
203 static void arm_file_end (void);
204 static void arm_file_start (void);
205 static void arm_insert_attributes (tree
, tree
*);
207 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
209 static bool arm_pass_by_reference (cumulative_args_t
,
210 machine_mode
, const_tree
, bool);
211 static bool arm_promote_prototypes (const_tree
);
212 static bool arm_default_short_enums (void);
213 static bool arm_align_anon_bitfield (void);
214 static bool arm_return_in_msb (const_tree
);
215 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
216 static bool arm_return_in_memory (const_tree
, const_tree
);
218 static void arm_unwind_emit (FILE *, rtx_insn
*);
219 static bool arm_output_ttype (rtx
);
220 static void arm_asm_emit_except_personality (rtx
);
222 static void arm_asm_init_sections (void);
223 static rtx
arm_dwarf_register_span (rtx
);
225 static tree
arm_cxx_guard_type (void);
226 static bool arm_cxx_guard_mask_bit (void);
227 static tree
arm_get_cookie_size (tree
);
228 static bool arm_cookie_has_size (void);
229 static bool arm_cxx_cdtor_returns_this (void);
230 static bool arm_cxx_key_method_may_be_inline (void);
231 static void arm_cxx_determine_class_data_visibility (tree
);
232 static bool arm_cxx_class_data_always_comdat (void);
233 static bool arm_cxx_use_aeabi_atexit (void);
234 static void arm_init_libfuncs (void);
235 static tree
arm_build_builtin_va_list (void);
236 static void arm_expand_builtin_va_start (tree
, rtx
);
237 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
238 static void arm_option_override (void);
239 static void arm_option_save (struct cl_target_option
*, struct gcc_options
*);
240 static void arm_option_restore (struct gcc_options
*,
241 struct cl_target_option
*);
242 static void arm_override_options_after_change (void);
243 static void arm_option_print (FILE *, int, struct cl_target_option
*);
244 static void arm_set_current_function (tree
);
245 static bool arm_can_inline_p (tree
, tree
);
246 static void arm_relayout_function (tree
);
247 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
248 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
249 static bool arm_sched_can_speculate_insn (rtx_insn
*);
250 static bool arm_macro_fusion_p (void);
251 static bool arm_cannot_copy_insn_p (rtx_insn
*);
252 static int arm_issue_rate (void);
253 static int arm_first_cycle_multipass_dfa_lookahead (void);
254 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
255 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
256 static bool arm_output_addr_const_extra (FILE *, rtx
);
257 static bool arm_allocate_stack_slots_for_args (void);
258 static bool arm_warn_func_return (tree
);
259 static tree
arm_promoted_type (const_tree t
);
260 static bool arm_scalar_mode_supported_p (machine_mode
);
261 static bool arm_frame_pointer_required (void);
262 static bool arm_can_eliminate (const int, const int);
263 static void arm_asm_trampoline_template (FILE *);
264 static void arm_trampoline_init (rtx
, tree
, rtx
);
265 static rtx
arm_trampoline_adjust_address (rtx
);
266 static rtx_insn
*arm_pic_static_addr (rtx orig
, rtx reg
);
267 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
268 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
269 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
270 static bool arm_array_mode_supported_p (machine_mode
,
271 unsigned HOST_WIDE_INT
);
272 static machine_mode
arm_preferred_simd_mode (machine_mode
);
273 static bool arm_class_likely_spilled_p (reg_class_t
);
274 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
275 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
276 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
280 static void arm_conditional_register_usage (void);
281 static enum flt_eval_method
arm_excess_precision (enum excess_precision_type
);
282 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
283 static unsigned int arm_autovectorize_vector_sizes (void);
284 static int arm_default_branch_cost (bool, bool);
285 static int arm_cortex_a5_branch_cost (bool, bool);
286 static int arm_cortex_m_branch_cost (bool, bool);
287 static int arm_cortex_m7_branch_cost (bool, bool);
289 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
290 const unsigned char *sel
);
292 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
294 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
296 int misalign ATTRIBUTE_UNUSED
);
297 static unsigned arm_add_stmt_cost (void *data
, int count
,
298 enum vect_cost_for_stmt kind
,
299 struct _stmt_vec_info
*stmt_info
,
301 enum vect_cost_model_location where
);
303 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
304 bool op0_preserve_value
);
305 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
307 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
308 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
310 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
311 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
312 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
314 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
315 static opt_scalar_float_mode
arm_floatn_mode (int, bool);
317 /* Table of machine attributes. */
318 static const struct attribute_spec arm_attribute_table
[] =
320 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
321 affects_type_identity } */
322 /* Function calls made to this symbol must be done indirectly, because
323 it may lie outside of the 26 bit addressing range of a normal function
325 { "long_call", 0, 0, false, true, true, NULL
, false },
326 /* Whereas these functions are always known to reside within the 26 bit
328 { "short_call", 0, 0, false, true, true, NULL
, false },
329 /* Specify the procedure call conventions for a function. */
330 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
332 /* Interrupt Service Routines have special prologue and epilogue requirements. */
333 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
335 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
337 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
340 /* ARM/PE has three new attributes:
342 dllexport - for exporting a function/variable that will live in a dll
343 dllimport - for importing a function/variable from a dll
345 Microsoft allows multiple declspecs in one __declspec, separating
346 them with spaces. We do NOT support this. Instead, use __declspec
349 { "dllimport", 0, 0, true, false, false, NULL
, false },
350 { "dllexport", 0, 0, true, false, false, NULL
, false },
351 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
353 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
354 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
355 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
356 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
359 /* ARMv8-M Security Extensions support. */
360 { "cmse_nonsecure_entry", 0, 0, true, false, false,
361 arm_handle_cmse_nonsecure_entry
, false },
362 { "cmse_nonsecure_call", 0, 0, true, false, false,
363 arm_handle_cmse_nonsecure_call
, true },
364 { NULL
, 0, 0, false, false, false, NULL
, false }
367 /* Initialize the GCC target structure. */
368 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
369 #undef TARGET_MERGE_DECL_ATTRIBUTES
370 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
373 #undef TARGET_LEGITIMIZE_ADDRESS
374 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
376 #undef TARGET_ATTRIBUTE_TABLE
377 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
379 #undef TARGET_INSERT_ATTRIBUTES
380 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
382 #undef TARGET_ASM_FILE_START
383 #define TARGET_ASM_FILE_START arm_file_start
384 #undef TARGET_ASM_FILE_END
385 #define TARGET_ASM_FILE_END arm_file_end
387 #undef TARGET_ASM_ALIGNED_SI_OP
388 #define TARGET_ASM_ALIGNED_SI_OP NULL
389 #undef TARGET_ASM_INTEGER
390 #define TARGET_ASM_INTEGER arm_assemble_integer
392 #undef TARGET_PRINT_OPERAND
393 #define TARGET_PRINT_OPERAND arm_print_operand
394 #undef TARGET_PRINT_OPERAND_ADDRESS
395 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
396 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
397 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
399 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
400 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
402 #undef TARGET_ASM_FUNCTION_PROLOGUE
403 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
405 #undef TARGET_ASM_FUNCTION_EPILOGUE
406 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
408 #undef TARGET_CAN_INLINE_P
409 #define TARGET_CAN_INLINE_P arm_can_inline_p
411 #undef TARGET_RELAYOUT_FUNCTION
412 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
414 #undef TARGET_OPTION_OVERRIDE
415 #define TARGET_OPTION_OVERRIDE arm_option_override
417 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
418 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
420 #undef TARGET_OPTION_SAVE
421 #define TARGET_OPTION_SAVE arm_option_save
423 #undef TARGET_OPTION_RESTORE
424 #define TARGET_OPTION_RESTORE arm_option_restore
426 #undef TARGET_OPTION_PRINT
427 #define TARGET_OPTION_PRINT arm_option_print
429 #undef TARGET_COMP_TYPE_ATTRIBUTES
430 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
432 #undef TARGET_SCHED_CAN_SPECULATE_INSN
433 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
435 #undef TARGET_SCHED_MACRO_FUSION_P
436 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
438 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
439 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
441 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
442 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
444 #undef TARGET_SCHED_ADJUST_COST
445 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
447 #undef TARGET_SET_CURRENT_FUNCTION
448 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
450 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
451 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
453 #undef TARGET_SCHED_REORDER
454 #define TARGET_SCHED_REORDER arm_sched_reorder
456 #undef TARGET_REGISTER_MOVE_COST
457 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
459 #undef TARGET_MEMORY_MOVE_COST
460 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
462 #undef TARGET_ENCODE_SECTION_INFO
464 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
466 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
469 #undef TARGET_STRIP_NAME_ENCODING
470 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
472 #undef TARGET_ASM_INTERNAL_LABEL
473 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
475 #undef TARGET_FLOATN_MODE
476 #define TARGET_FLOATN_MODE arm_floatn_mode
478 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
479 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
481 #undef TARGET_FUNCTION_VALUE
482 #define TARGET_FUNCTION_VALUE arm_function_value
484 #undef TARGET_LIBCALL_VALUE
485 #define TARGET_LIBCALL_VALUE arm_libcall_value
487 #undef TARGET_FUNCTION_VALUE_REGNO_P
488 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
490 #undef TARGET_ASM_OUTPUT_MI_THUNK
491 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
492 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
493 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
495 #undef TARGET_RTX_COSTS
496 #define TARGET_RTX_COSTS arm_rtx_costs
497 #undef TARGET_ADDRESS_COST
498 #define TARGET_ADDRESS_COST arm_address_cost
500 #undef TARGET_SHIFT_TRUNCATION_MASK
501 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
502 #undef TARGET_VECTOR_MODE_SUPPORTED_P
503 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
504 #undef TARGET_ARRAY_MODE_SUPPORTED_P
505 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
506 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
507 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
508 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
509 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
510 arm_autovectorize_vector_sizes
512 #undef TARGET_MACHINE_DEPENDENT_REORG
513 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
515 #undef TARGET_INIT_BUILTINS
516 #define TARGET_INIT_BUILTINS arm_init_builtins
517 #undef TARGET_EXPAND_BUILTIN
518 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
519 #undef TARGET_BUILTIN_DECL
520 #define TARGET_BUILTIN_DECL arm_builtin_decl
522 #undef TARGET_INIT_LIBFUNCS
523 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
525 #undef TARGET_PROMOTE_FUNCTION_MODE
526 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
527 #undef TARGET_PROMOTE_PROTOTYPES
528 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
529 #undef TARGET_PASS_BY_REFERENCE
530 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
531 #undef TARGET_ARG_PARTIAL_BYTES
532 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
533 #undef TARGET_FUNCTION_ARG
534 #define TARGET_FUNCTION_ARG arm_function_arg
535 #undef TARGET_FUNCTION_ARG_ADVANCE
536 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
537 #undef TARGET_FUNCTION_ARG_BOUNDARY
538 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
540 #undef TARGET_SETUP_INCOMING_VARARGS
541 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
543 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
544 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
546 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
547 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
548 #undef TARGET_TRAMPOLINE_INIT
549 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
550 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
551 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
553 #undef TARGET_WARN_FUNC_RETURN
554 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
556 #undef TARGET_DEFAULT_SHORT_ENUMS
557 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
559 #undef TARGET_ALIGN_ANON_BITFIELD
560 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
562 #undef TARGET_NARROW_VOLATILE_BITFIELD
563 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
565 #undef TARGET_CXX_GUARD_TYPE
566 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
568 #undef TARGET_CXX_GUARD_MASK_BIT
569 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
571 #undef TARGET_CXX_GET_COOKIE_SIZE
572 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
574 #undef TARGET_CXX_COOKIE_HAS_SIZE
575 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
577 #undef TARGET_CXX_CDTOR_RETURNS_THIS
578 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
580 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
581 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
583 #undef TARGET_CXX_USE_AEABI_ATEXIT
584 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
586 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
587 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
588 arm_cxx_determine_class_data_visibility
590 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
591 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
593 #undef TARGET_RETURN_IN_MSB
594 #define TARGET_RETURN_IN_MSB arm_return_in_msb
596 #undef TARGET_RETURN_IN_MEMORY
597 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
599 #undef TARGET_MUST_PASS_IN_STACK
600 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
603 #undef TARGET_ASM_UNWIND_EMIT
604 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
606 /* EABI unwinding tables use a different format for the typeinfo tables. */
607 #undef TARGET_ASM_TTYPE
608 #define TARGET_ASM_TTYPE arm_output_ttype
610 #undef TARGET_ARM_EABI_UNWINDER
611 #define TARGET_ARM_EABI_UNWINDER true
613 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
614 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
616 #endif /* ARM_UNWIND_INFO */
618 #undef TARGET_ASM_INIT_SECTIONS
619 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
621 #undef TARGET_DWARF_REGISTER_SPAN
622 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
624 #undef TARGET_CANNOT_COPY_INSN_P
625 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
628 #undef TARGET_HAVE_TLS
629 #define TARGET_HAVE_TLS true
632 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
633 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
635 #undef TARGET_LEGITIMATE_CONSTANT_P
636 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
638 #undef TARGET_CANNOT_FORCE_CONST_MEM
639 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
641 #undef TARGET_MAX_ANCHOR_OFFSET
642 #define TARGET_MAX_ANCHOR_OFFSET 4095
644 /* The minimum is set such that the total size of the block
645 for a particular anchor is -4088 + 1 + 4095 bytes, which is
646 divisible by eight, ensuring natural spacing of anchors. */
647 #undef TARGET_MIN_ANCHOR_OFFSET
648 #define TARGET_MIN_ANCHOR_OFFSET -4088
650 #undef TARGET_SCHED_ISSUE_RATE
651 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
653 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
654 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
655 arm_first_cycle_multipass_dfa_lookahead
657 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
658 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
659 arm_first_cycle_multipass_dfa_lookahead_guard
661 #undef TARGET_MANGLE_TYPE
662 #define TARGET_MANGLE_TYPE arm_mangle_type
664 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
665 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
667 #undef TARGET_BUILD_BUILTIN_VA_LIST
668 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
669 #undef TARGET_EXPAND_BUILTIN_VA_START
670 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
671 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
672 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
675 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
676 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
679 #undef TARGET_LEGITIMATE_ADDRESS_P
680 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
682 #undef TARGET_PREFERRED_RELOAD_CLASS
683 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
685 #undef TARGET_PROMOTED_TYPE
686 #define TARGET_PROMOTED_TYPE arm_promoted_type
688 #undef TARGET_SCALAR_MODE_SUPPORTED_P
689 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
691 #undef TARGET_COMPUTE_FRAME_LAYOUT
692 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
694 #undef TARGET_FRAME_POINTER_REQUIRED
695 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
697 #undef TARGET_CAN_ELIMINATE
698 #define TARGET_CAN_ELIMINATE arm_can_eliminate
700 #undef TARGET_CONDITIONAL_REGISTER_USAGE
701 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
703 #undef TARGET_CLASS_LIKELY_SPILLED_P
704 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
706 #undef TARGET_VECTORIZE_BUILTINS
707 #define TARGET_VECTORIZE_BUILTINS
709 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
710 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
711 arm_builtin_vectorized_function
713 #undef TARGET_VECTOR_ALIGNMENT
714 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
716 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
717 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
718 arm_vector_alignment_reachable
720 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
721 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
722 arm_builtin_support_vector_misalignment
724 #undef TARGET_PREFERRED_RENAME_CLASS
725 #define TARGET_PREFERRED_RENAME_CLASS \
726 arm_preferred_rename_class
728 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
729 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
730 arm_vectorize_vec_perm_const_ok
732 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
733 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
734 arm_builtin_vectorization_cost
735 #undef TARGET_VECTORIZE_ADD_STMT_COST
736 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
738 #undef TARGET_CANONICALIZE_COMPARISON
739 #define TARGET_CANONICALIZE_COMPARISON \
740 arm_canonicalize_comparison
742 #undef TARGET_ASAN_SHADOW_OFFSET
743 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
745 #undef MAX_INSN_PER_IT_BLOCK
746 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
748 #undef TARGET_CAN_USE_DOLOOP_P
749 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
751 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
752 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
754 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
755 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
757 #undef TARGET_SCHED_FUSION_PRIORITY
758 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
760 #undef TARGET_ASM_FUNCTION_SECTION
761 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
763 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
764 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
766 #undef TARGET_SECTION_TYPE_FLAGS
767 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
769 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
770 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
772 #undef TARGET_C_EXCESS_PRECISION
773 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
775 /* Although the architecture reserves bits 0 and 1, only the former is
776 used for ARM/Thumb ISA selection in v7 and earlier versions. */
777 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
778 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
780 #undef TARGET_FIXED_CONDITION_CODE_REGS
781 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
784 /* Obstack for minipool constant handling. */
785 static struct obstack minipool_obstack
;
786 static char * minipool_startobj
;
788 /* The maximum number of insns skipped which
789 will be conditionalised if possible. */
790 static int max_insns_skipped
= 5;
792 extern FILE * asm_out_file
;
794 /* True if we are currently building a constant table. */
795 int making_const_table
;
797 /* The processor for which instructions should be scheduled. */
798 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
800 /* The current tuning set. */
801 const struct tune_params
*current_tune
;
803 /* Which floating point hardware to schedule for. */
806 /* Used for Thumb call_via trampolines. */
807 rtx thumb_call_via_label
[14];
808 static int thumb_call_reg_needed
;
810 /* The bits in this mask specify which instruction scheduling options should
812 unsigned int tune_flags
= 0;
814 /* The highest ARM architecture version supported by the
816 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
818 /* Active target architecture and tuning. */
820 struct arm_build_target arm_active_target
;
822 /* The following are used in the arm.md file as equivalents to bits
823 in the above two flag variables. */
825 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
828 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
831 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
834 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
837 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
840 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
843 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
846 /* Nonzero if this chip supports the ARM 6K extensions. */
849 /* Nonzero if this chip supports the ARM 6KZ extensions. */
852 /* Nonzero if instructions present in ARMv6-M can be used. */
855 /* Nonzero if this chip supports the ARM 7 extensions. */
858 /* Nonzero if this chip supports the Large Physical Address Extension. */
859 int arm_arch_lpae
= 0;
861 /* Nonzero if instructions not present in the 'M' profile can be used. */
862 int arm_arch_notm
= 0;
864 /* Nonzero if instructions present in ARMv7E-M can be used. */
867 /* Nonzero if instructions present in ARMv8 can be used. */
870 /* Nonzero if this chip supports the ARMv8.1 extensions. */
873 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
876 /* Nonzero if this chip supports the FP16 instructions extension of ARM
878 int arm_fp16_inst
= 0;
880 /* Nonzero if this chip can benefit from load scheduling. */
881 int arm_ld_sched
= 0;
883 /* Nonzero if this chip is a StrongARM. */
884 int arm_tune_strongarm
= 0;
886 /* Nonzero if this chip supports Intel Wireless MMX technology. */
887 int arm_arch_iwmmxt
= 0;
889 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
890 int arm_arch_iwmmxt2
= 0;
892 /* Nonzero if this chip is an XScale. */
893 int arm_arch_xscale
= 0;
895 /* Nonzero if tuning for XScale */
896 int arm_tune_xscale
= 0;
898 /* Nonzero if we want to tune for stores that access the write-buffer.
899 This typically means an ARM6 or ARM7 with MMU or MPU. */
900 int arm_tune_wbuf
= 0;
902 /* Nonzero if tuning for Cortex-A9. */
903 int arm_tune_cortex_a9
= 0;
905 /* Nonzero if we should define __THUMB_INTERWORK__ in the
907 XXX This is a bit of a hack, it's intended to help work around
908 problems in GLD which doesn't understand that armv5t code is
909 interworking clean. */
910 int arm_cpp_interwork
= 0;
912 /* Nonzero if chip supports Thumb 1. */
915 /* Nonzero if chip supports Thumb 2. */
918 /* Nonzero if chip supports integer division instruction. */
919 int arm_arch_arm_hwdiv
;
920 int arm_arch_thumb_hwdiv
;
922 /* Nonzero if chip disallows volatile memory access in IT block. */
923 int arm_arch_no_volatile_ce
;
925 /* Nonzero if we should use Neon to handle 64-bits operations rather
926 than core registers. */
927 int prefer_neon_for_64bits
= 0;
929 /* Nonzero if we shouldn't use literal pools. */
930 bool arm_disable_literal_pool
= false;
932 /* The register number to be used for the PIC offset register. */
933 unsigned arm_pic_register
= INVALID_REGNUM
;
935 enum arm_pcs arm_pcs_default
;
937 /* For an explanation of these variables, see final_prescan_insn below. */
939 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
940 enum arm_cond_code arm_current_cc
;
943 int arm_target_label
;
944 /* The number of conditionally executed insns, including the current insn. */
945 int arm_condexec_count
= 0;
946 /* A bitmask specifying the patterns for the IT block.
947 Zero means do not output an IT block before this insn. */
948 int arm_condexec_mask
= 0;
949 /* The number of bits used in arm_condexec_mask. */
950 int arm_condexec_masklen
= 0;
952 /* Nonzero if chip supports the ARMv8 CRC instructions. */
953 int arm_arch_crc
= 0;
955 /* Nonzero if chip supports the ARMv8-M security extensions. */
956 int arm_arch_cmse
= 0;
958 /* Nonzero if the core has a very small, high-latency, multiply unit. */
959 int arm_m_profile_small_mul
= 0;
961 /* The condition codes of the ARM, and the inverse function. */
962 static const char * const arm_condition_codes
[] =
964 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
965 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
968 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
969 int arm_regs_in_sequence
[] =
971 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
974 #define ARM_LSL_NAME "lsl"
975 #define streq(string1, string2) (strcmp (string1, string2) == 0)
977 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
978 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
979 | (1 << PIC_OFFSET_TABLE_REGNUM)))
981 /* Initialization code. */
985 enum processor_type scheduler
;
986 unsigned int tune_flags
;
987 const struct tune_params
*tune
;
990 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
991 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
998 /* arm generic vectorizer costs. */
1000 struct cpu_vec_costs arm_default_vec_cost
= {
1001 1, /* scalar_stmt_cost. */
1002 1, /* scalar load_cost. */
1003 1, /* scalar_store_cost. */
1004 1, /* vec_stmt_cost. */
1005 1, /* vec_to_scalar_cost. */
1006 1, /* scalar_to_vec_cost. */
1007 1, /* vec_align_load_cost. */
1008 1, /* vec_unalign_load_cost. */
1009 1, /* vec_unalign_store_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1015 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1016 #include "aarch-cost-tables.h"
1020 const struct cpu_cost_table cortexa9_extra_costs
=
1027 COSTS_N_INSNS (1), /* shift_reg. */
1028 COSTS_N_INSNS (1), /* arith_shift. */
1029 COSTS_N_INSNS (2), /* arith_shift_reg. */
1031 COSTS_N_INSNS (1), /* log_shift_reg. */
1032 COSTS_N_INSNS (1), /* extend. */
1033 COSTS_N_INSNS (2), /* extend_arith. */
1034 COSTS_N_INSNS (1), /* bfi. */
1035 COSTS_N_INSNS (1), /* bfx. */
1039 true /* non_exec_costs_exec. */
1044 COSTS_N_INSNS (3), /* simple. */
1045 COSTS_N_INSNS (3), /* flag_setting. */
1046 COSTS_N_INSNS (2), /* extend. */
1047 COSTS_N_INSNS (3), /* add. */
1048 COSTS_N_INSNS (2), /* extend_add. */
1049 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1053 0, /* simple (N/A). */
1054 0, /* flag_setting (N/A). */
1055 COSTS_N_INSNS (4), /* extend. */
1057 COSTS_N_INSNS (4), /* extend_add. */
1063 COSTS_N_INSNS (2), /* load. */
1064 COSTS_N_INSNS (2), /* load_sign_extend. */
1065 COSTS_N_INSNS (2), /* ldrd. */
1066 COSTS_N_INSNS (2), /* ldm_1st. */
1067 1, /* ldm_regs_per_insn_1st. */
1068 2, /* ldm_regs_per_insn_subsequent. */
1069 COSTS_N_INSNS (5), /* loadf. */
1070 COSTS_N_INSNS (5), /* loadd. */
1071 COSTS_N_INSNS (1), /* load_unaligned. */
1072 COSTS_N_INSNS (2), /* store. */
1073 COSTS_N_INSNS (2), /* strd. */
1074 COSTS_N_INSNS (2), /* stm_1st. */
1075 1, /* stm_regs_per_insn_1st. */
1076 2, /* stm_regs_per_insn_subsequent. */
1077 COSTS_N_INSNS (1), /* storef. */
1078 COSTS_N_INSNS (1), /* stored. */
1079 COSTS_N_INSNS (1), /* store_unaligned. */
1080 COSTS_N_INSNS (1), /* loadv. */
1081 COSTS_N_INSNS (1) /* storev. */
1086 COSTS_N_INSNS (14), /* div. */
1087 COSTS_N_INSNS (4), /* mult. */
1088 COSTS_N_INSNS (7), /* mult_addsub. */
1089 COSTS_N_INSNS (30), /* fma. */
1090 COSTS_N_INSNS (3), /* addsub. */
1091 COSTS_N_INSNS (1), /* fpconst. */
1092 COSTS_N_INSNS (1), /* neg. */
1093 COSTS_N_INSNS (3), /* compare. */
1094 COSTS_N_INSNS (3), /* widen. */
1095 COSTS_N_INSNS (3), /* narrow. */
1096 COSTS_N_INSNS (3), /* toint. */
1097 COSTS_N_INSNS (3), /* fromint. */
1098 COSTS_N_INSNS (3) /* roundint. */
1102 COSTS_N_INSNS (24), /* div. */
1103 COSTS_N_INSNS (5), /* mult. */
1104 COSTS_N_INSNS (8), /* mult_addsub. */
1105 COSTS_N_INSNS (30), /* fma. */
1106 COSTS_N_INSNS (3), /* addsub. */
1107 COSTS_N_INSNS (1), /* fpconst. */
1108 COSTS_N_INSNS (1), /* neg. */
1109 COSTS_N_INSNS (3), /* compare. */
1110 COSTS_N_INSNS (3), /* widen. */
1111 COSTS_N_INSNS (3), /* narrow. */
1112 COSTS_N_INSNS (3), /* toint. */
1113 COSTS_N_INSNS (3), /* fromint. */
1114 COSTS_N_INSNS (3) /* roundint. */
1119 COSTS_N_INSNS (1) /* alu. */
1123 const struct cpu_cost_table cortexa8_extra_costs
=
1129 COSTS_N_INSNS (1), /* shift. */
1131 COSTS_N_INSNS (1), /* arith_shift. */
1132 0, /* arith_shift_reg. */
1133 COSTS_N_INSNS (1), /* log_shift. */
1134 0, /* log_shift_reg. */
1136 0, /* extend_arith. */
1142 true /* non_exec_costs_exec. */
1147 COSTS_N_INSNS (1), /* simple. */
1148 COSTS_N_INSNS (1), /* flag_setting. */
1149 COSTS_N_INSNS (1), /* extend. */
1150 COSTS_N_INSNS (1), /* add. */
1151 COSTS_N_INSNS (1), /* extend_add. */
1152 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1156 0, /* simple (N/A). */
1157 0, /* flag_setting (N/A). */
1158 COSTS_N_INSNS (2), /* extend. */
1160 COSTS_N_INSNS (2), /* extend_add. */
1166 COSTS_N_INSNS (1), /* load. */
1167 COSTS_N_INSNS (1), /* load_sign_extend. */
1168 COSTS_N_INSNS (1), /* ldrd. */
1169 COSTS_N_INSNS (1), /* ldm_1st. */
1170 1, /* ldm_regs_per_insn_1st. */
1171 2, /* ldm_regs_per_insn_subsequent. */
1172 COSTS_N_INSNS (1), /* loadf. */
1173 COSTS_N_INSNS (1), /* loadd. */
1174 COSTS_N_INSNS (1), /* load_unaligned. */
1175 COSTS_N_INSNS (1), /* store. */
1176 COSTS_N_INSNS (1), /* strd. */
1177 COSTS_N_INSNS (1), /* stm_1st. */
1178 1, /* stm_regs_per_insn_1st. */
1179 2, /* stm_regs_per_insn_subsequent. */
1180 COSTS_N_INSNS (1), /* storef. */
1181 COSTS_N_INSNS (1), /* stored. */
1182 COSTS_N_INSNS (1), /* store_unaligned. */
1183 COSTS_N_INSNS (1), /* loadv. */
1184 COSTS_N_INSNS (1) /* storev. */
1189 COSTS_N_INSNS (36), /* div. */
1190 COSTS_N_INSNS (11), /* mult. */
1191 COSTS_N_INSNS (20), /* mult_addsub. */
1192 COSTS_N_INSNS (30), /* fma. */
1193 COSTS_N_INSNS (9), /* addsub. */
1194 COSTS_N_INSNS (3), /* fpconst. */
1195 COSTS_N_INSNS (3), /* neg. */
1196 COSTS_N_INSNS (6), /* compare. */
1197 COSTS_N_INSNS (4), /* widen. */
1198 COSTS_N_INSNS (4), /* narrow. */
1199 COSTS_N_INSNS (8), /* toint. */
1200 COSTS_N_INSNS (8), /* fromint. */
1201 COSTS_N_INSNS (8) /* roundint. */
1205 COSTS_N_INSNS (64), /* div. */
1206 COSTS_N_INSNS (16), /* mult. */
1207 COSTS_N_INSNS (25), /* mult_addsub. */
1208 COSTS_N_INSNS (30), /* fma. */
1209 COSTS_N_INSNS (9), /* addsub. */
1210 COSTS_N_INSNS (3), /* fpconst. */
1211 COSTS_N_INSNS (3), /* neg. */
1212 COSTS_N_INSNS (6), /* compare. */
1213 COSTS_N_INSNS (6), /* widen. */
1214 COSTS_N_INSNS (6), /* narrow. */
1215 COSTS_N_INSNS (8), /* toint. */
1216 COSTS_N_INSNS (8), /* fromint. */
1217 COSTS_N_INSNS (8) /* roundint. */
1222 COSTS_N_INSNS (1) /* alu. */
1226 const struct cpu_cost_table cortexa5_extra_costs
=
1232 COSTS_N_INSNS (1), /* shift. */
1233 COSTS_N_INSNS (1), /* shift_reg. */
1234 COSTS_N_INSNS (1), /* arith_shift. */
1235 COSTS_N_INSNS (1), /* arith_shift_reg. */
1236 COSTS_N_INSNS (1), /* log_shift. */
1237 COSTS_N_INSNS (1), /* log_shift_reg. */
1238 COSTS_N_INSNS (1), /* extend. */
1239 COSTS_N_INSNS (1), /* extend_arith. */
1240 COSTS_N_INSNS (1), /* bfi. */
1241 COSTS_N_INSNS (1), /* bfx. */
1242 COSTS_N_INSNS (1), /* clz. */
1243 COSTS_N_INSNS (1), /* rev. */
1245 true /* non_exec_costs_exec. */
1252 COSTS_N_INSNS (1), /* flag_setting. */
1253 COSTS_N_INSNS (1), /* extend. */
1254 COSTS_N_INSNS (1), /* add. */
1255 COSTS_N_INSNS (1), /* extend_add. */
1256 COSTS_N_INSNS (7) /* idiv. */
1260 0, /* simple (N/A). */
1261 0, /* flag_setting (N/A). */
1262 COSTS_N_INSNS (1), /* extend. */
1264 COSTS_N_INSNS (2), /* extend_add. */
1270 COSTS_N_INSNS (1), /* load. */
1271 COSTS_N_INSNS (1), /* load_sign_extend. */
1272 COSTS_N_INSNS (6), /* ldrd. */
1273 COSTS_N_INSNS (1), /* ldm_1st. */
1274 1, /* ldm_regs_per_insn_1st. */
1275 2, /* ldm_regs_per_insn_subsequent. */
1276 COSTS_N_INSNS (2), /* loadf. */
1277 COSTS_N_INSNS (4), /* loadd. */
1278 COSTS_N_INSNS (1), /* load_unaligned. */
1279 COSTS_N_INSNS (1), /* store. */
1280 COSTS_N_INSNS (3), /* strd. */
1281 COSTS_N_INSNS (1), /* stm_1st. */
1282 1, /* stm_regs_per_insn_1st. */
1283 2, /* stm_regs_per_insn_subsequent. */
1284 COSTS_N_INSNS (2), /* storef. */
1285 COSTS_N_INSNS (2), /* stored. */
1286 COSTS_N_INSNS (1), /* store_unaligned. */
1287 COSTS_N_INSNS (1), /* loadv. */
1288 COSTS_N_INSNS (1) /* storev. */
1293 COSTS_N_INSNS (15), /* div. */
1294 COSTS_N_INSNS (3), /* mult. */
1295 COSTS_N_INSNS (7), /* mult_addsub. */
1296 COSTS_N_INSNS (7), /* fma. */
1297 COSTS_N_INSNS (3), /* addsub. */
1298 COSTS_N_INSNS (3), /* fpconst. */
1299 COSTS_N_INSNS (3), /* neg. */
1300 COSTS_N_INSNS (3), /* compare. */
1301 COSTS_N_INSNS (3), /* widen. */
1302 COSTS_N_INSNS (3), /* narrow. */
1303 COSTS_N_INSNS (3), /* toint. */
1304 COSTS_N_INSNS (3), /* fromint. */
1305 COSTS_N_INSNS (3) /* roundint. */
1309 COSTS_N_INSNS (30), /* div. */
1310 COSTS_N_INSNS (6), /* mult. */
1311 COSTS_N_INSNS (10), /* mult_addsub. */
1312 COSTS_N_INSNS (7), /* fma. */
1313 COSTS_N_INSNS (3), /* addsub. */
1314 COSTS_N_INSNS (3), /* fpconst. */
1315 COSTS_N_INSNS (3), /* neg. */
1316 COSTS_N_INSNS (3), /* compare. */
1317 COSTS_N_INSNS (3), /* widen. */
1318 COSTS_N_INSNS (3), /* narrow. */
1319 COSTS_N_INSNS (3), /* toint. */
1320 COSTS_N_INSNS (3), /* fromint. */
1321 COSTS_N_INSNS (3) /* roundint. */
1326 COSTS_N_INSNS (1) /* alu. */
1331 const struct cpu_cost_table cortexa7_extra_costs
=
1337 COSTS_N_INSNS (1), /* shift. */
1338 COSTS_N_INSNS (1), /* shift_reg. */
1339 COSTS_N_INSNS (1), /* arith_shift. */
1340 COSTS_N_INSNS (1), /* arith_shift_reg. */
1341 COSTS_N_INSNS (1), /* log_shift. */
1342 COSTS_N_INSNS (1), /* log_shift_reg. */
1343 COSTS_N_INSNS (1), /* extend. */
1344 COSTS_N_INSNS (1), /* extend_arith. */
1345 COSTS_N_INSNS (1), /* bfi. */
1346 COSTS_N_INSNS (1), /* bfx. */
1347 COSTS_N_INSNS (1), /* clz. */
1348 COSTS_N_INSNS (1), /* rev. */
1350 true /* non_exec_costs_exec. */
1357 COSTS_N_INSNS (1), /* flag_setting. */
1358 COSTS_N_INSNS (1), /* extend. */
1359 COSTS_N_INSNS (1), /* add. */
1360 COSTS_N_INSNS (1), /* extend_add. */
1361 COSTS_N_INSNS (7) /* idiv. */
1365 0, /* simple (N/A). */
1366 0, /* flag_setting (N/A). */
1367 COSTS_N_INSNS (1), /* extend. */
1369 COSTS_N_INSNS (2), /* extend_add. */
1375 COSTS_N_INSNS (1), /* load. */
1376 COSTS_N_INSNS (1), /* load_sign_extend. */
1377 COSTS_N_INSNS (3), /* ldrd. */
1378 COSTS_N_INSNS (1), /* ldm_1st. */
1379 1, /* ldm_regs_per_insn_1st. */
1380 2, /* ldm_regs_per_insn_subsequent. */
1381 COSTS_N_INSNS (2), /* loadf. */
1382 COSTS_N_INSNS (2), /* loadd. */
1383 COSTS_N_INSNS (1), /* load_unaligned. */
1384 COSTS_N_INSNS (1), /* store. */
1385 COSTS_N_INSNS (3), /* strd. */
1386 COSTS_N_INSNS (1), /* stm_1st. */
1387 1, /* stm_regs_per_insn_1st. */
1388 2, /* stm_regs_per_insn_subsequent. */
1389 COSTS_N_INSNS (2), /* storef. */
1390 COSTS_N_INSNS (2), /* stored. */
1391 COSTS_N_INSNS (1), /* store_unaligned. */
1392 COSTS_N_INSNS (1), /* loadv. */
1393 COSTS_N_INSNS (1) /* storev. */
1398 COSTS_N_INSNS (15), /* div. */
1399 COSTS_N_INSNS (3), /* mult. */
1400 COSTS_N_INSNS (7), /* mult_addsub. */
1401 COSTS_N_INSNS (7), /* fma. */
1402 COSTS_N_INSNS (3), /* addsub. */
1403 COSTS_N_INSNS (3), /* fpconst. */
1404 COSTS_N_INSNS (3), /* neg. */
1405 COSTS_N_INSNS (3), /* compare. */
1406 COSTS_N_INSNS (3), /* widen. */
1407 COSTS_N_INSNS (3), /* narrow. */
1408 COSTS_N_INSNS (3), /* toint. */
1409 COSTS_N_INSNS (3), /* fromint. */
1410 COSTS_N_INSNS (3) /* roundint. */
1414 COSTS_N_INSNS (30), /* div. */
1415 COSTS_N_INSNS (6), /* mult. */
1416 COSTS_N_INSNS (10), /* mult_addsub. */
1417 COSTS_N_INSNS (7), /* fma. */
1418 COSTS_N_INSNS (3), /* addsub. */
1419 COSTS_N_INSNS (3), /* fpconst. */
1420 COSTS_N_INSNS (3), /* neg. */
1421 COSTS_N_INSNS (3), /* compare. */
1422 COSTS_N_INSNS (3), /* widen. */
1423 COSTS_N_INSNS (3), /* narrow. */
1424 COSTS_N_INSNS (3), /* toint. */
1425 COSTS_N_INSNS (3), /* fromint. */
1426 COSTS_N_INSNS (3) /* roundint. */
1431 COSTS_N_INSNS (1) /* alu. */
1435 const struct cpu_cost_table cortexa12_extra_costs
=
1442 COSTS_N_INSNS (1), /* shift_reg. */
1443 COSTS_N_INSNS (1), /* arith_shift. */
1444 COSTS_N_INSNS (1), /* arith_shift_reg. */
1445 COSTS_N_INSNS (1), /* log_shift. */
1446 COSTS_N_INSNS (1), /* log_shift_reg. */
1448 COSTS_N_INSNS (1), /* extend_arith. */
1450 COSTS_N_INSNS (1), /* bfx. */
1451 COSTS_N_INSNS (1), /* clz. */
1452 COSTS_N_INSNS (1), /* rev. */
1454 true /* non_exec_costs_exec. */
1459 COSTS_N_INSNS (2), /* simple. */
1460 COSTS_N_INSNS (3), /* flag_setting. */
1461 COSTS_N_INSNS (2), /* extend. */
1462 COSTS_N_INSNS (3), /* add. */
1463 COSTS_N_INSNS (2), /* extend_add. */
1464 COSTS_N_INSNS (18) /* idiv. */
1468 0, /* simple (N/A). */
1469 0, /* flag_setting (N/A). */
1470 COSTS_N_INSNS (3), /* extend. */
1472 COSTS_N_INSNS (3), /* extend_add. */
1478 COSTS_N_INSNS (3), /* load. */
1479 COSTS_N_INSNS (3), /* load_sign_extend. */
1480 COSTS_N_INSNS (3), /* ldrd. */
1481 COSTS_N_INSNS (3), /* ldm_1st. */
1482 1, /* ldm_regs_per_insn_1st. */
1483 2, /* ldm_regs_per_insn_subsequent. */
1484 COSTS_N_INSNS (3), /* loadf. */
1485 COSTS_N_INSNS (3), /* loadd. */
1486 0, /* load_unaligned. */
1490 1, /* stm_regs_per_insn_1st. */
1491 2, /* stm_regs_per_insn_subsequent. */
1492 COSTS_N_INSNS (2), /* storef. */
1493 COSTS_N_INSNS (2), /* stored. */
1494 0, /* store_unaligned. */
1495 COSTS_N_INSNS (1), /* loadv. */
1496 COSTS_N_INSNS (1) /* storev. */
1501 COSTS_N_INSNS (17), /* div. */
1502 COSTS_N_INSNS (4), /* mult. */
1503 COSTS_N_INSNS (8), /* mult_addsub. */
1504 COSTS_N_INSNS (8), /* fma. */
1505 COSTS_N_INSNS (4), /* addsub. */
1506 COSTS_N_INSNS (2), /* fpconst. */
1507 COSTS_N_INSNS (2), /* neg. */
1508 COSTS_N_INSNS (2), /* compare. */
1509 COSTS_N_INSNS (4), /* widen. */
1510 COSTS_N_INSNS (4), /* narrow. */
1511 COSTS_N_INSNS (4), /* toint. */
1512 COSTS_N_INSNS (4), /* fromint. */
1513 COSTS_N_INSNS (4) /* roundint. */
1517 COSTS_N_INSNS (31), /* div. */
1518 COSTS_N_INSNS (4), /* mult. */
1519 COSTS_N_INSNS (8), /* mult_addsub. */
1520 COSTS_N_INSNS (8), /* fma. */
1521 COSTS_N_INSNS (4), /* addsub. */
1522 COSTS_N_INSNS (2), /* fpconst. */
1523 COSTS_N_INSNS (2), /* neg. */
1524 COSTS_N_INSNS (2), /* compare. */
1525 COSTS_N_INSNS (4), /* widen. */
1526 COSTS_N_INSNS (4), /* narrow. */
1527 COSTS_N_INSNS (4), /* toint. */
1528 COSTS_N_INSNS (4), /* fromint. */
1529 COSTS_N_INSNS (4) /* roundint. */
1534 COSTS_N_INSNS (1) /* alu. */
1538 const struct cpu_cost_table cortexa15_extra_costs
=
1546 COSTS_N_INSNS (1), /* arith_shift. */
1547 COSTS_N_INSNS (1), /* arith_shift_reg. */
1548 COSTS_N_INSNS (1), /* log_shift. */
1549 COSTS_N_INSNS (1), /* log_shift_reg. */
1551 COSTS_N_INSNS (1), /* extend_arith. */
1552 COSTS_N_INSNS (1), /* bfi. */
1557 true /* non_exec_costs_exec. */
1562 COSTS_N_INSNS (2), /* simple. */
1563 COSTS_N_INSNS (3), /* flag_setting. */
1564 COSTS_N_INSNS (2), /* extend. */
1565 COSTS_N_INSNS (2), /* add. */
1566 COSTS_N_INSNS (2), /* extend_add. */
1567 COSTS_N_INSNS (18) /* idiv. */
1571 0, /* simple (N/A). */
1572 0, /* flag_setting (N/A). */
1573 COSTS_N_INSNS (3), /* extend. */
1575 COSTS_N_INSNS (3), /* extend_add. */
1581 COSTS_N_INSNS (3), /* load. */
1582 COSTS_N_INSNS (3), /* load_sign_extend. */
1583 COSTS_N_INSNS (3), /* ldrd. */
1584 COSTS_N_INSNS (4), /* ldm_1st. */
1585 1, /* ldm_regs_per_insn_1st. */
1586 2, /* ldm_regs_per_insn_subsequent. */
1587 COSTS_N_INSNS (4), /* loadf. */
1588 COSTS_N_INSNS (4), /* loadd. */
1589 0, /* load_unaligned. */
1592 COSTS_N_INSNS (1), /* stm_1st. */
1593 1, /* stm_regs_per_insn_1st. */
1594 2, /* stm_regs_per_insn_subsequent. */
1597 0, /* store_unaligned. */
1598 COSTS_N_INSNS (1), /* loadv. */
1599 COSTS_N_INSNS (1) /* storev. */
1604 COSTS_N_INSNS (17), /* div. */
1605 COSTS_N_INSNS (4), /* mult. */
1606 COSTS_N_INSNS (8), /* mult_addsub. */
1607 COSTS_N_INSNS (8), /* fma. */
1608 COSTS_N_INSNS (4), /* addsub. */
1609 COSTS_N_INSNS (2), /* fpconst. */
1610 COSTS_N_INSNS (2), /* neg. */
1611 COSTS_N_INSNS (5), /* compare. */
1612 COSTS_N_INSNS (4), /* widen. */
1613 COSTS_N_INSNS (4), /* narrow. */
1614 COSTS_N_INSNS (4), /* toint. */
1615 COSTS_N_INSNS (4), /* fromint. */
1616 COSTS_N_INSNS (4) /* roundint. */
1620 COSTS_N_INSNS (31), /* div. */
1621 COSTS_N_INSNS (4), /* mult. */
1622 COSTS_N_INSNS (8), /* mult_addsub. */
1623 COSTS_N_INSNS (8), /* fma. */
1624 COSTS_N_INSNS (4), /* addsub. */
1625 COSTS_N_INSNS (2), /* fpconst. */
1626 COSTS_N_INSNS (2), /* neg. */
1627 COSTS_N_INSNS (2), /* compare. */
1628 COSTS_N_INSNS (4), /* widen. */
1629 COSTS_N_INSNS (4), /* narrow. */
1630 COSTS_N_INSNS (4), /* toint. */
1631 COSTS_N_INSNS (4), /* fromint. */
1632 COSTS_N_INSNS (4) /* roundint. */
1637 COSTS_N_INSNS (1) /* alu. */
1641 const struct cpu_cost_table v7m_extra_costs
=
1649 0, /* arith_shift. */
1650 COSTS_N_INSNS (1), /* arith_shift_reg. */
1652 COSTS_N_INSNS (1), /* log_shift_reg. */
1654 COSTS_N_INSNS (1), /* extend_arith. */
1659 COSTS_N_INSNS (1), /* non_exec. */
1660 false /* non_exec_costs_exec. */
1665 COSTS_N_INSNS (1), /* simple. */
1666 COSTS_N_INSNS (1), /* flag_setting. */
1667 COSTS_N_INSNS (2), /* extend. */
1668 COSTS_N_INSNS (1), /* add. */
1669 COSTS_N_INSNS (3), /* extend_add. */
1670 COSTS_N_INSNS (8) /* idiv. */
1674 0, /* simple (N/A). */
1675 0, /* flag_setting (N/A). */
1676 COSTS_N_INSNS (2), /* extend. */
1678 COSTS_N_INSNS (3), /* extend_add. */
1684 COSTS_N_INSNS (2), /* load. */
1685 0, /* load_sign_extend. */
1686 COSTS_N_INSNS (3), /* ldrd. */
1687 COSTS_N_INSNS (2), /* ldm_1st. */
1688 1, /* ldm_regs_per_insn_1st. */
1689 1, /* ldm_regs_per_insn_subsequent. */
1690 COSTS_N_INSNS (2), /* loadf. */
1691 COSTS_N_INSNS (3), /* loadd. */
1692 COSTS_N_INSNS (1), /* load_unaligned. */
1693 COSTS_N_INSNS (2), /* store. */
1694 COSTS_N_INSNS (3), /* strd. */
1695 COSTS_N_INSNS (2), /* stm_1st. */
1696 1, /* stm_regs_per_insn_1st. */
1697 1, /* stm_regs_per_insn_subsequent. */
1698 COSTS_N_INSNS (2), /* storef. */
1699 COSTS_N_INSNS (3), /* stored. */
1700 COSTS_N_INSNS (1), /* store_unaligned. */
1701 COSTS_N_INSNS (1), /* loadv. */
1702 COSTS_N_INSNS (1) /* storev. */
1707 COSTS_N_INSNS (7), /* div. */
1708 COSTS_N_INSNS (2), /* mult. */
1709 COSTS_N_INSNS (5), /* mult_addsub. */
1710 COSTS_N_INSNS (3), /* fma. */
1711 COSTS_N_INSNS (1), /* addsub. */
1723 COSTS_N_INSNS (15), /* div. */
1724 COSTS_N_INSNS (5), /* mult. */
1725 COSTS_N_INSNS (7), /* mult_addsub. */
1726 COSTS_N_INSNS (7), /* fma. */
1727 COSTS_N_INSNS (3), /* addsub. */
1740 COSTS_N_INSNS (1) /* alu. */
1744 const struct tune_params arm_slowmul_tune
=
1746 &generic_extra_costs
, /* Insn extra costs. */
1747 NULL
, /* Sched adj cost. */
1748 arm_default_branch_cost
,
1749 &arm_default_vec_cost
,
1750 3, /* Constant limit. */
1751 5, /* Max cond insns. */
1752 8, /* Memset max inline. */
1753 1, /* Issue rate. */
1754 ARM_PREFETCH_NOT_BENEFICIAL
,
1755 tune_params::PREF_CONST_POOL_TRUE
,
1756 tune_params::PREF_LDRD_FALSE
,
1757 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1758 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1759 tune_params::DISPARAGE_FLAGS_NEITHER
,
1760 tune_params::PREF_NEON_64_FALSE
,
1761 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1762 tune_params::FUSE_NOTHING
,
1763 tune_params::SCHED_AUTOPREF_OFF
1766 const struct tune_params arm_fastmul_tune
=
1768 &generic_extra_costs
, /* Insn extra costs. */
1769 NULL
, /* Sched adj cost. */
1770 arm_default_branch_cost
,
1771 &arm_default_vec_cost
,
1772 1, /* Constant limit. */
1773 5, /* Max cond insns. */
1774 8, /* Memset max inline. */
1775 1, /* Issue rate. */
1776 ARM_PREFETCH_NOT_BENEFICIAL
,
1777 tune_params::PREF_CONST_POOL_TRUE
,
1778 tune_params::PREF_LDRD_FALSE
,
1779 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1780 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1781 tune_params::DISPARAGE_FLAGS_NEITHER
,
1782 tune_params::PREF_NEON_64_FALSE
,
1783 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1784 tune_params::FUSE_NOTHING
,
1785 tune_params::SCHED_AUTOPREF_OFF
1788 /* StrongARM has early execution of branches, so a sequence that is worth
1789 skipping is shorter. Set max_insns_skipped to a lower value. */
1791 const struct tune_params arm_strongarm_tune
=
1793 &generic_extra_costs
, /* Insn extra costs. */
1794 NULL
, /* Sched adj cost. */
1795 arm_default_branch_cost
,
1796 &arm_default_vec_cost
,
1797 1, /* Constant limit. */
1798 3, /* Max cond insns. */
1799 8, /* Memset max inline. */
1800 1, /* Issue rate. */
1801 ARM_PREFETCH_NOT_BENEFICIAL
,
1802 tune_params::PREF_CONST_POOL_TRUE
,
1803 tune_params::PREF_LDRD_FALSE
,
1804 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1805 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1806 tune_params::DISPARAGE_FLAGS_NEITHER
,
1807 tune_params::PREF_NEON_64_FALSE
,
1808 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1809 tune_params::FUSE_NOTHING
,
1810 tune_params::SCHED_AUTOPREF_OFF
1813 const struct tune_params arm_xscale_tune
=
1815 &generic_extra_costs
, /* Insn extra costs. */
1816 xscale_sched_adjust_cost
,
1817 arm_default_branch_cost
,
1818 &arm_default_vec_cost
,
1819 2, /* Constant limit. */
1820 3, /* Max cond insns. */
1821 8, /* Memset max inline. */
1822 1, /* Issue rate. */
1823 ARM_PREFETCH_NOT_BENEFICIAL
,
1824 tune_params::PREF_CONST_POOL_TRUE
,
1825 tune_params::PREF_LDRD_FALSE
,
1826 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1827 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1828 tune_params::DISPARAGE_FLAGS_NEITHER
,
1829 tune_params::PREF_NEON_64_FALSE
,
1830 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1831 tune_params::FUSE_NOTHING
,
1832 tune_params::SCHED_AUTOPREF_OFF
1835 const struct tune_params arm_9e_tune
=
1837 &generic_extra_costs
, /* Insn extra costs. */
1838 NULL
, /* Sched adj cost. */
1839 arm_default_branch_cost
,
1840 &arm_default_vec_cost
,
1841 1, /* Constant limit. */
1842 5, /* Max cond insns. */
1843 8, /* Memset max inline. */
1844 1, /* Issue rate. */
1845 ARM_PREFETCH_NOT_BENEFICIAL
,
1846 tune_params::PREF_CONST_POOL_TRUE
,
1847 tune_params::PREF_LDRD_FALSE
,
1848 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1849 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1850 tune_params::DISPARAGE_FLAGS_NEITHER
,
1851 tune_params::PREF_NEON_64_FALSE
,
1852 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1853 tune_params::FUSE_NOTHING
,
1854 tune_params::SCHED_AUTOPREF_OFF
1857 const struct tune_params arm_marvell_pj4_tune
=
1859 &generic_extra_costs
, /* Insn extra costs. */
1860 NULL
, /* Sched adj cost. */
1861 arm_default_branch_cost
,
1862 &arm_default_vec_cost
,
1863 1, /* Constant limit. */
1864 5, /* Max cond insns. */
1865 8, /* Memset max inline. */
1866 2, /* Issue rate. */
1867 ARM_PREFETCH_NOT_BENEFICIAL
,
1868 tune_params::PREF_CONST_POOL_TRUE
,
1869 tune_params::PREF_LDRD_FALSE
,
1870 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1871 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1872 tune_params::DISPARAGE_FLAGS_NEITHER
,
1873 tune_params::PREF_NEON_64_FALSE
,
1874 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1875 tune_params::FUSE_NOTHING
,
1876 tune_params::SCHED_AUTOPREF_OFF
1879 const struct tune_params arm_v6t2_tune
=
1881 &generic_extra_costs
, /* Insn extra costs. */
1882 NULL
, /* Sched adj cost. */
1883 arm_default_branch_cost
,
1884 &arm_default_vec_cost
,
1885 1, /* Constant limit. */
1886 5, /* Max cond insns. */
1887 8, /* Memset max inline. */
1888 1, /* Issue rate. */
1889 ARM_PREFETCH_NOT_BENEFICIAL
,
1890 tune_params::PREF_CONST_POOL_FALSE
,
1891 tune_params::PREF_LDRD_FALSE
,
1892 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1893 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1894 tune_params::DISPARAGE_FLAGS_NEITHER
,
1895 tune_params::PREF_NEON_64_FALSE
,
1896 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1897 tune_params::FUSE_NOTHING
,
1898 tune_params::SCHED_AUTOPREF_OFF
1902 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1903 const struct tune_params arm_cortex_tune
=
1905 &generic_extra_costs
,
1906 NULL
, /* Sched adj cost. */
1907 arm_default_branch_cost
,
1908 &arm_default_vec_cost
,
1909 1, /* Constant limit. */
1910 5, /* Max cond insns. */
1911 8, /* Memset max inline. */
1912 2, /* Issue rate. */
1913 ARM_PREFETCH_NOT_BENEFICIAL
,
1914 tune_params::PREF_CONST_POOL_FALSE
,
1915 tune_params::PREF_LDRD_FALSE
,
1916 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1917 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1918 tune_params::DISPARAGE_FLAGS_NEITHER
,
1919 tune_params::PREF_NEON_64_FALSE
,
1920 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1921 tune_params::FUSE_NOTHING
,
1922 tune_params::SCHED_AUTOPREF_OFF
1925 const struct tune_params arm_cortex_a8_tune
=
1927 &cortexa8_extra_costs
,
1928 NULL
, /* Sched adj cost. */
1929 arm_default_branch_cost
,
1930 &arm_default_vec_cost
,
1931 1, /* Constant limit. */
1932 5, /* Max cond insns. */
1933 8, /* Memset max inline. */
1934 2, /* Issue rate. */
1935 ARM_PREFETCH_NOT_BENEFICIAL
,
1936 tune_params::PREF_CONST_POOL_FALSE
,
1937 tune_params::PREF_LDRD_FALSE
,
1938 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1939 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1940 tune_params::DISPARAGE_FLAGS_NEITHER
,
1941 tune_params::PREF_NEON_64_FALSE
,
1942 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1943 tune_params::FUSE_NOTHING
,
1944 tune_params::SCHED_AUTOPREF_OFF
1947 const struct tune_params arm_cortex_a7_tune
=
1949 &cortexa7_extra_costs
,
1950 NULL
, /* Sched adj cost. */
1951 arm_default_branch_cost
,
1952 &arm_default_vec_cost
,
1953 1, /* Constant limit. */
1954 5, /* Max cond insns. */
1955 8, /* Memset max inline. */
1956 2, /* Issue rate. */
1957 ARM_PREFETCH_NOT_BENEFICIAL
,
1958 tune_params::PREF_CONST_POOL_FALSE
,
1959 tune_params::PREF_LDRD_FALSE
,
1960 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1961 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1962 tune_params::DISPARAGE_FLAGS_NEITHER
,
1963 tune_params::PREF_NEON_64_FALSE
,
1964 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1965 tune_params::FUSE_NOTHING
,
1966 tune_params::SCHED_AUTOPREF_OFF
1969 const struct tune_params arm_cortex_a15_tune
=
1971 &cortexa15_extra_costs
,
1972 NULL
, /* Sched adj cost. */
1973 arm_default_branch_cost
,
1974 &arm_default_vec_cost
,
1975 1, /* Constant limit. */
1976 2, /* Max cond insns. */
1977 8, /* Memset max inline. */
1978 3, /* Issue rate. */
1979 ARM_PREFETCH_NOT_BENEFICIAL
,
1980 tune_params::PREF_CONST_POOL_FALSE
,
1981 tune_params::PREF_LDRD_TRUE
,
1982 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1983 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1984 tune_params::DISPARAGE_FLAGS_ALL
,
1985 tune_params::PREF_NEON_64_FALSE
,
1986 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1987 tune_params::FUSE_NOTHING
,
1988 tune_params::SCHED_AUTOPREF_FULL
1991 const struct tune_params arm_cortex_a35_tune
=
1993 &cortexa53_extra_costs
,
1994 NULL
, /* Sched adj cost. */
1995 arm_default_branch_cost
,
1996 &arm_default_vec_cost
,
1997 1, /* Constant limit. */
1998 5, /* Max cond insns. */
1999 8, /* Memset max inline. */
2000 1, /* Issue rate. */
2001 ARM_PREFETCH_NOT_BENEFICIAL
,
2002 tune_params::PREF_CONST_POOL_FALSE
,
2003 tune_params::PREF_LDRD_FALSE
,
2004 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2005 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2006 tune_params::DISPARAGE_FLAGS_NEITHER
,
2007 tune_params::PREF_NEON_64_FALSE
,
2008 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2009 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2010 tune_params::SCHED_AUTOPREF_OFF
2013 const struct tune_params arm_cortex_a53_tune
=
2015 &cortexa53_extra_costs
,
2016 NULL
, /* Sched adj cost. */
2017 arm_default_branch_cost
,
2018 &arm_default_vec_cost
,
2019 1, /* Constant limit. */
2020 5, /* Max cond insns. */
2021 8, /* Memset max inline. */
2022 2, /* Issue rate. */
2023 ARM_PREFETCH_NOT_BENEFICIAL
,
2024 tune_params::PREF_CONST_POOL_FALSE
,
2025 tune_params::PREF_LDRD_FALSE
,
2026 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2027 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2028 tune_params::DISPARAGE_FLAGS_NEITHER
,
2029 tune_params::PREF_NEON_64_FALSE
,
2030 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2031 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2032 tune_params::SCHED_AUTOPREF_OFF
2035 const struct tune_params arm_cortex_a57_tune
=
2037 &cortexa57_extra_costs
,
2038 NULL
, /* Sched adj cost. */
2039 arm_default_branch_cost
,
2040 &arm_default_vec_cost
,
2041 1, /* Constant limit. */
2042 2, /* Max cond insns. */
2043 8, /* Memset max inline. */
2044 3, /* Issue rate. */
2045 ARM_PREFETCH_NOT_BENEFICIAL
,
2046 tune_params::PREF_CONST_POOL_FALSE
,
2047 tune_params::PREF_LDRD_TRUE
,
2048 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2049 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2050 tune_params::DISPARAGE_FLAGS_ALL
,
2051 tune_params::PREF_NEON_64_FALSE
,
2052 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2053 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2054 tune_params::SCHED_AUTOPREF_FULL
2057 const struct tune_params arm_exynosm1_tune
=
2059 &exynosm1_extra_costs
,
2060 NULL
, /* Sched adj cost. */
2061 arm_default_branch_cost
,
2062 &arm_default_vec_cost
,
2063 1, /* Constant limit. */
2064 2, /* Max cond insns. */
2065 8, /* Memset max inline. */
2066 3, /* Issue rate. */
2067 ARM_PREFETCH_NOT_BENEFICIAL
,
2068 tune_params::PREF_CONST_POOL_FALSE
,
2069 tune_params::PREF_LDRD_TRUE
,
2070 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2071 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2072 tune_params::DISPARAGE_FLAGS_ALL
,
2073 tune_params::PREF_NEON_64_FALSE
,
2074 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2075 tune_params::FUSE_NOTHING
,
2076 tune_params::SCHED_AUTOPREF_OFF
2079 const struct tune_params arm_xgene1_tune
=
2081 &xgene1_extra_costs
,
2082 NULL
, /* Sched adj cost. */
2083 arm_default_branch_cost
,
2084 &arm_default_vec_cost
,
2085 1, /* Constant limit. */
2086 2, /* Max cond insns. */
2087 32, /* Memset max inline. */
2088 4, /* Issue rate. */
2089 ARM_PREFETCH_NOT_BENEFICIAL
,
2090 tune_params::PREF_CONST_POOL_FALSE
,
2091 tune_params::PREF_LDRD_TRUE
,
2092 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2093 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2094 tune_params::DISPARAGE_FLAGS_ALL
,
2095 tune_params::PREF_NEON_64_FALSE
,
2096 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2097 tune_params::FUSE_NOTHING
,
2098 tune_params::SCHED_AUTOPREF_OFF
2101 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2102 less appealing. Set max_insns_skipped to a low value. */
2104 const struct tune_params arm_cortex_a5_tune
=
2106 &cortexa5_extra_costs
,
2107 NULL
, /* Sched adj cost. */
2108 arm_cortex_a5_branch_cost
,
2109 &arm_default_vec_cost
,
2110 1, /* Constant limit. */
2111 1, /* Max cond insns. */
2112 8, /* Memset max inline. */
2113 2, /* Issue rate. */
2114 ARM_PREFETCH_NOT_BENEFICIAL
,
2115 tune_params::PREF_CONST_POOL_FALSE
,
2116 tune_params::PREF_LDRD_FALSE
,
2117 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2118 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2119 tune_params::DISPARAGE_FLAGS_NEITHER
,
2120 tune_params::PREF_NEON_64_FALSE
,
2121 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2122 tune_params::FUSE_NOTHING
,
2123 tune_params::SCHED_AUTOPREF_OFF
2126 const struct tune_params arm_cortex_a9_tune
=
2128 &cortexa9_extra_costs
,
2129 cortex_a9_sched_adjust_cost
,
2130 arm_default_branch_cost
,
2131 &arm_default_vec_cost
,
2132 1, /* Constant limit. */
2133 5, /* Max cond insns. */
2134 8, /* Memset max inline. */
2135 2, /* Issue rate. */
2136 ARM_PREFETCH_BENEFICIAL(4,32,32),
2137 tune_params::PREF_CONST_POOL_FALSE
,
2138 tune_params::PREF_LDRD_FALSE
,
2139 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2140 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2141 tune_params::DISPARAGE_FLAGS_NEITHER
,
2142 tune_params::PREF_NEON_64_FALSE
,
2143 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2144 tune_params::FUSE_NOTHING
,
2145 tune_params::SCHED_AUTOPREF_OFF
2148 const struct tune_params arm_cortex_a12_tune
=
2150 &cortexa12_extra_costs
,
2151 NULL
, /* Sched adj cost. */
2152 arm_default_branch_cost
,
2153 &arm_default_vec_cost
, /* Vectorizer costs. */
2154 1, /* Constant limit. */
2155 2, /* Max cond insns. */
2156 8, /* Memset max inline. */
2157 2, /* Issue rate. */
2158 ARM_PREFETCH_NOT_BENEFICIAL
,
2159 tune_params::PREF_CONST_POOL_FALSE
,
2160 tune_params::PREF_LDRD_TRUE
,
2161 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2162 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2163 tune_params::DISPARAGE_FLAGS_ALL
,
2164 tune_params::PREF_NEON_64_FALSE
,
2165 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2166 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2167 tune_params::SCHED_AUTOPREF_OFF
2170 const struct tune_params arm_cortex_a73_tune
=
2172 &cortexa57_extra_costs
,
2173 NULL
, /* Sched adj cost. */
2174 arm_default_branch_cost
,
2175 &arm_default_vec_cost
, /* Vectorizer costs. */
2176 1, /* Constant limit. */
2177 2, /* Max cond insns. */
2178 8, /* Memset max inline. */
2179 2, /* Issue rate. */
2180 ARM_PREFETCH_NOT_BENEFICIAL
,
2181 tune_params::PREF_CONST_POOL_FALSE
,
2182 tune_params::PREF_LDRD_TRUE
,
2183 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2184 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2185 tune_params::DISPARAGE_FLAGS_ALL
,
2186 tune_params::PREF_NEON_64_FALSE
,
2187 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2188 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2189 tune_params::SCHED_AUTOPREF_FULL
2192 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2193 cycle to execute each. An LDR from the constant pool also takes two cycles
2194 to execute, but mildly increases pipelining opportunity (consecutive
2195 loads/stores can be pipelined together, saving one cycle), and may also
2196 improve icache utilisation. Hence we prefer the constant pool for such
2199 const struct tune_params arm_v7m_tune
=
2202 NULL
, /* Sched adj cost. */
2203 arm_cortex_m_branch_cost
,
2204 &arm_default_vec_cost
,
2205 1, /* Constant limit. */
2206 2, /* Max cond insns. */
2207 8, /* Memset max inline. */
2208 1, /* Issue rate. */
2209 ARM_PREFETCH_NOT_BENEFICIAL
,
2210 tune_params::PREF_CONST_POOL_TRUE
,
2211 tune_params::PREF_LDRD_FALSE
,
2212 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2213 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2214 tune_params::DISPARAGE_FLAGS_NEITHER
,
2215 tune_params::PREF_NEON_64_FALSE
,
2216 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2217 tune_params::FUSE_NOTHING
,
2218 tune_params::SCHED_AUTOPREF_OFF
2221 /* Cortex-M7 tuning. */
2223 const struct tune_params arm_cortex_m7_tune
=
2226 NULL
, /* Sched adj cost. */
2227 arm_cortex_m7_branch_cost
,
2228 &arm_default_vec_cost
,
2229 0, /* Constant limit. */
2230 1, /* Max cond insns. */
2231 8, /* Memset max inline. */
2232 2, /* Issue rate. */
2233 ARM_PREFETCH_NOT_BENEFICIAL
,
2234 tune_params::PREF_CONST_POOL_TRUE
,
2235 tune_params::PREF_LDRD_FALSE
,
2236 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2237 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2238 tune_params::DISPARAGE_FLAGS_NEITHER
,
2239 tune_params::PREF_NEON_64_FALSE
,
2240 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2241 tune_params::FUSE_NOTHING
,
2242 tune_params::SCHED_AUTOPREF_OFF
2245 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2246 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2248 const struct tune_params arm_v6m_tune
=
2250 &generic_extra_costs
, /* Insn extra costs. */
2251 NULL
, /* Sched adj cost. */
2252 arm_default_branch_cost
,
2253 &arm_default_vec_cost
, /* Vectorizer costs. */
2254 1, /* Constant limit. */
2255 5, /* Max cond insns. */
2256 8, /* Memset max inline. */
2257 1, /* Issue rate. */
2258 ARM_PREFETCH_NOT_BENEFICIAL
,
2259 tune_params::PREF_CONST_POOL_FALSE
,
2260 tune_params::PREF_LDRD_FALSE
,
2261 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2262 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2263 tune_params::DISPARAGE_FLAGS_NEITHER
,
2264 tune_params::PREF_NEON_64_FALSE
,
2265 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2266 tune_params::FUSE_NOTHING
,
2267 tune_params::SCHED_AUTOPREF_OFF
2270 const struct tune_params arm_fa726te_tune
=
2272 &generic_extra_costs
, /* Insn extra costs. */
2273 fa726te_sched_adjust_cost
,
2274 arm_default_branch_cost
,
2275 &arm_default_vec_cost
,
2276 1, /* Constant limit. */
2277 5, /* Max cond insns. */
2278 8, /* Memset max inline. */
2279 2, /* Issue rate. */
2280 ARM_PREFETCH_NOT_BENEFICIAL
,
2281 tune_params::PREF_CONST_POOL_TRUE
,
2282 tune_params::PREF_LDRD_FALSE
,
2283 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2284 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2285 tune_params::DISPARAGE_FLAGS_NEITHER
,
2286 tune_params::PREF_NEON_64_FALSE
,
2287 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2288 tune_params::FUSE_NOTHING
,
2289 tune_params::SCHED_AUTOPREF_OFF
2292 /* Auto-generated CPU, FPU and architecture tables. */
2293 #include "arm-cpu-data.h"
2295 /* The name of the preprocessor macro to define for this architecture. PROFILE
2296 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2297 is thus chosen to be big enough to hold the longest architecture name. */
2299 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2301 /* Supported TLS relocations. */
2309 TLS_DESCSEQ
/* GNU scheme */
2312 /* The maximum number of insns to be used when loading a constant. */
2314 arm_constant_limit (bool size_p
)
2316 return size_p
? 1 : current_tune
->constant_limit
;
2319 /* Emit an insn that's a simple single-set. Both the operands must be known
2321 inline static rtx_insn
*
2322 emit_set_insn (rtx x
, rtx y
)
2324 return emit_insn (gen_rtx_SET (x
, y
));
2327 /* Return the number of bits set in VALUE. */
2329 bit_count (unsigned long value
)
2331 unsigned long count
= 0;
2336 value
&= value
- 1; /* Clear the least-significant set bit. */
2342 /* Return the number of bits set in BMAP. */
2344 bitmap_popcount (const sbitmap bmap
)
2346 unsigned int count
= 0;
2348 sbitmap_iterator sbi
;
2350 EXECUTE_IF_SET_IN_BITMAP (bmap
, 0, n
, sbi
)
2359 } arm_fixed_mode_set
;
2361 /* A small helper for setting fixed-point library libfuncs. */
2364 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2365 const char *funcname
, const char *modename
,
2370 if (num_suffix
== 0)
2371 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2373 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2375 set_optab_libfunc (optable
, mode
, buffer
);
2379 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2380 machine_mode from
, const char *funcname
,
2381 const char *toname
, const char *fromname
)
2384 const char *maybe_suffix_2
= "";
2386 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2387 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2388 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2389 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2390 maybe_suffix_2
= "2";
2392 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2395 set_conv_libfunc (optable
, to
, from
, buffer
);
2398 /* Set up library functions unique to ARM. */
2401 arm_init_libfuncs (void)
2403 /* For Linux, we have access to kernel support for atomic operations. */
2404 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2405 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2407 /* There are no special library functions unless we are using the
2412 /* The functions below are described in Section 4 of the "Run-Time
2413 ABI for the ARM architecture", Version 1.0. */
2415 /* Double-precision floating-point arithmetic. Table 2. */
2416 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2417 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2418 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2419 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2420 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2422 /* Double-precision comparisons. Table 3. */
2423 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2424 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2425 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2426 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2427 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2428 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2429 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2431 /* Single-precision floating-point arithmetic. Table 4. */
2432 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2433 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2434 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2435 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2436 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2438 /* Single-precision comparisons. Table 5. */
2439 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2440 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2441 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2442 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2443 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2444 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2445 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2447 /* Floating-point to integer conversions. Table 6. */
2448 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2449 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2450 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2451 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2452 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2453 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2454 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2455 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2457 /* Conversions between floating types. Table 7. */
2458 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2459 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2461 /* Integer to floating-point conversions. Table 8. */
2462 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2463 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2464 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2465 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2466 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2467 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2468 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2469 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2471 /* Long long. Table 9. */
2472 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2473 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2474 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2475 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2476 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2477 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2478 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2479 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2481 /* Integer (32/32->32) division. \S 4.3.1. */
2482 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2483 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2485 /* The divmod functions are designed so that they can be used for
2486 plain division, even though they return both the quotient and the
2487 remainder. The quotient is returned in the usual location (i.e.,
2488 r0 for SImode, {r0, r1} for DImode), just as would be expected
2489 for an ordinary division routine. Because the AAPCS calling
2490 conventions specify that all of { r0, r1, r2, r3 } are
2491 callee-saved registers, there is no need to tell the compiler
2492 explicitly that those registers are clobbered by these
2494 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2495 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2497 /* For SImode division the ABI provides div-without-mod routines,
2498 which are faster. */
2499 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2500 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2502 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2503 divmod libcalls instead. */
2504 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2505 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2506 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2507 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2509 /* Half-precision float operations. The compiler handles all operations
2510 with NULL libfuncs by converting the SFmode. */
2511 switch (arm_fp16_format
)
2513 case ARM_FP16_FORMAT_IEEE
:
2514 case ARM_FP16_FORMAT_ALTERNATIVE
:
2517 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2518 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2520 : "__gnu_f2h_alternative"));
2521 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2522 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2524 : "__gnu_h2f_alternative"));
2526 set_conv_libfunc (trunc_optab
, HFmode
, DFmode
,
2527 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2529 : "__gnu_d2h_alternative"));
2532 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2533 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2534 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2535 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2536 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2539 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2540 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2541 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2542 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2543 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2544 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2545 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2552 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2554 const arm_fixed_mode_set fixed_arith_modes
[] =
2557 { E_UQQmode
, "uqq" },
2559 { E_UHQmode
, "uhq" },
2561 { E_USQmode
, "usq" },
2563 { E_UDQmode
, "udq" },
2565 { E_UTQmode
, "utq" },
2567 { E_UHAmode
, "uha" },
2569 { E_USAmode
, "usa" },
2571 { E_UDAmode
, "uda" },
2573 { E_UTAmode
, "uta" }
2575 const arm_fixed_mode_set fixed_conv_modes
[] =
2578 { E_UQQmode
, "uqq" },
2580 { E_UHQmode
, "uhq" },
2582 { E_USQmode
, "usq" },
2584 { E_UDQmode
, "udq" },
2586 { E_UTQmode
, "utq" },
2588 { E_UHAmode
, "uha" },
2590 { E_USAmode
, "usa" },
2592 { E_UDAmode
, "uda" },
2594 { E_UTAmode
, "uta" },
2605 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2607 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2608 "add", fixed_arith_modes
[i
].name
, 3);
2609 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2610 "ssadd", fixed_arith_modes
[i
].name
, 3);
2611 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2612 "usadd", fixed_arith_modes
[i
].name
, 3);
2613 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2614 "sub", fixed_arith_modes
[i
].name
, 3);
2615 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2616 "sssub", fixed_arith_modes
[i
].name
, 3);
2617 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2618 "ussub", fixed_arith_modes
[i
].name
, 3);
2619 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2620 "mul", fixed_arith_modes
[i
].name
, 3);
2621 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2622 "ssmul", fixed_arith_modes
[i
].name
, 3);
2623 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2624 "usmul", fixed_arith_modes
[i
].name
, 3);
2625 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2626 "div", fixed_arith_modes
[i
].name
, 3);
2627 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2628 "udiv", fixed_arith_modes
[i
].name
, 3);
2629 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2630 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2631 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2632 "usdiv", fixed_arith_modes
[i
].name
, 3);
2633 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2634 "neg", fixed_arith_modes
[i
].name
, 2);
2635 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2636 "ssneg", fixed_arith_modes
[i
].name
, 2);
2637 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2638 "usneg", fixed_arith_modes
[i
].name
, 2);
2639 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2640 "ashl", fixed_arith_modes
[i
].name
, 3);
2641 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2642 "ashr", fixed_arith_modes
[i
].name
, 3);
2643 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2644 "lshr", fixed_arith_modes
[i
].name
, 3);
2645 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2646 "ssashl", fixed_arith_modes
[i
].name
, 3);
2647 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2648 "usashl", fixed_arith_modes
[i
].name
, 3);
2649 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2650 "cmp", fixed_arith_modes
[i
].name
, 2);
2653 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2654 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2657 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2658 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2661 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2662 fixed_conv_modes
[j
].mode
, "fract",
2663 fixed_conv_modes
[i
].name
,
2664 fixed_conv_modes
[j
].name
);
2665 arm_set_fixed_conv_libfunc (satfract_optab
,
2666 fixed_conv_modes
[i
].mode
,
2667 fixed_conv_modes
[j
].mode
, "satfract",
2668 fixed_conv_modes
[i
].name
,
2669 fixed_conv_modes
[j
].name
);
2670 arm_set_fixed_conv_libfunc (fractuns_optab
,
2671 fixed_conv_modes
[i
].mode
,
2672 fixed_conv_modes
[j
].mode
, "fractuns",
2673 fixed_conv_modes
[i
].name
,
2674 fixed_conv_modes
[j
].name
);
2675 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2676 fixed_conv_modes
[i
].mode
,
2677 fixed_conv_modes
[j
].mode
, "satfractuns",
2678 fixed_conv_modes
[i
].name
,
2679 fixed_conv_modes
[j
].name
);
2683 if (TARGET_AAPCS_BASED
)
2684 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2687 /* On AAPCS systems, this is the "struct __va_list". */
2688 static GTY(()) tree va_list_type
;
2690 /* Return the type to use as __builtin_va_list. */
2692 arm_build_builtin_va_list (void)
2697 if (!TARGET_AAPCS_BASED
)
2698 return std_build_builtin_va_list ();
2700 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2708 The C Library ABI further reinforces this definition in \S
2711 We must follow this definition exactly. The structure tag
2712 name is visible in C++ mangled names, and thus forms a part
2713 of the ABI. The field name may be used by people who
2714 #include <stdarg.h>. */
2715 /* Create the type. */
2716 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2717 /* Give it the required name. */
2718 va_list_name
= build_decl (BUILTINS_LOCATION
,
2720 get_identifier ("__va_list"),
2722 DECL_ARTIFICIAL (va_list_name
) = 1;
2723 TYPE_NAME (va_list_type
) = va_list_name
;
2724 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2725 /* Create the __ap field. */
2726 ap_field
= build_decl (BUILTINS_LOCATION
,
2728 get_identifier ("__ap"),
2730 DECL_ARTIFICIAL (ap_field
) = 1;
2731 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2732 TYPE_FIELDS (va_list_type
) = ap_field
;
2733 /* Compute its layout. */
2734 layout_type (va_list_type
);
2736 return va_list_type
;
2739 /* Return an expression of type "void *" pointing to the next
2740 available argument in a variable-argument list. VALIST is the
2741 user-level va_list object, of type __builtin_va_list. */
2743 arm_extract_valist_ptr (tree valist
)
2745 if (TREE_TYPE (valist
) == error_mark_node
)
2746 return error_mark_node
;
2748 /* On an AAPCS target, the pointer is stored within "struct
2750 if (TARGET_AAPCS_BASED
)
2752 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2753 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2754 valist
, ap_field
, NULL_TREE
);
2760 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2762 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2764 valist
= arm_extract_valist_ptr (valist
);
2765 std_expand_builtin_va_start (valist
, nextarg
);
2768 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2770 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2773 valist
= arm_extract_valist_ptr (valist
);
2774 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2777 /* Check any incompatible options that the user has specified. */
2779 arm_option_check_internal (struct gcc_options
*opts
)
2781 int flags
= opts
->x_target_flags
;
2783 /* iWMMXt and NEON are incompatible. */
2785 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_neon
))
2786 error ("iWMMXt and NEON are incompatible");
2788 /* Make sure that the processor choice does not conflict with any of the
2789 other command line choices. */
2790 if (TARGET_ARM_P (flags
)
2791 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
))
2792 error ("target CPU does not support ARM mode");
2794 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2795 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2796 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2798 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2799 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2801 /* If this target is normally configured to use APCS frames, warn if they
2802 are turned off and debugging is turned on. */
2803 if (TARGET_ARM_P (flags
)
2804 && write_symbols
!= NO_DEBUG
2805 && !TARGET_APCS_FRAME
2806 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2807 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2809 /* iWMMXt unsupported under Thumb mode. */
2810 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2811 error ("iWMMXt unsupported under Thumb mode");
2813 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2814 error ("can not use -mtp=cp15 with 16-bit Thumb");
2816 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2818 error ("RTP PIC is incompatible with Thumb");
2822 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2824 if ((target_pure_code
|| target_slow_flash_data
)
2825 && (!TARGET_HAVE_MOVT
|| arm_arch_notm
|| flag_pic
|| TARGET_NEON
))
2827 const char *flag
= (target_pure_code
? "-mpure-code" :
2828 "-mslow-flash-data");
2829 error ("%s only supports non-pic code on M-profile targets with the "
2830 "MOVT instruction", flag
);
2835 /* Recompute the global settings depending on target attribute options. */
2838 arm_option_params_internal (void)
2840 /* If we are not using the default (ARM mode) section anchor offset
2841 ranges, then set the correct ranges now. */
2844 /* Thumb-1 LDR instructions cannot have negative offsets.
2845 Permissible positive offset ranges are 5-bit (for byte loads),
2846 6-bit (for halfword loads), or 7-bit (for word loads).
2847 Empirical results suggest a 7-bit anchor range gives the best
2848 overall code size. */
2849 targetm
.min_anchor_offset
= 0;
2850 targetm
.max_anchor_offset
= 127;
2852 else if (TARGET_THUMB2
)
2854 /* The minimum is set such that the total size of the block
2855 for a particular anchor is 248 + 1 + 4095 bytes, which is
2856 divisible by eight, ensuring natural spacing of anchors. */
2857 targetm
.min_anchor_offset
= -248;
2858 targetm
.max_anchor_offset
= 4095;
2862 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2863 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2868 /* If optimizing for size, bump the number of instructions that we
2869 are prepared to conditionally execute (even on a StrongARM). */
2870 max_insns_skipped
= 6;
2872 /* For THUMB2, we limit the conditional sequence to one IT block. */
2874 max_insns_skipped
= arm_restrict_it
? 1 : 4;
2877 /* When -mrestrict-it is in use tone down the if-conversion. */
2878 max_insns_skipped
= (TARGET_THUMB2
&& arm_restrict_it
)
2879 ? 1 : current_tune
->max_insns_skipped
;
2882 /* True if -mflip-thumb should next add an attribute for the default
2883 mode, false if it should next add an attribute for the opposite mode. */
2884 static GTY(()) bool thumb_flipper
;
2886 /* Options after initial target override. */
2887 static GTY(()) tree init_optimize
;
2890 arm_override_options_after_change_1 (struct gcc_options
*opts
)
2892 if (opts
->x_align_functions
<= 0)
2893 opts
->x_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
2894 && opts
->x_optimize_size
? 2 : 4;
2897 /* Implement targetm.override_options_after_change. */
2900 arm_override_options_after_change (void)
2902 arm_configure_build_target (&arm_active_target
,
2903 TREE_TARGET_OPTION (target_option_default_node
),
2904 &global_options_set
, false);
2906 arm_override_options_after_change_1 (&global_options
);
2909 /* Implement TARGET_OPTION_SAVE. */
2911 arm_option_save (struct cl_target_option
*ptr
, struct gcc_options
*opts
)
2913 ptr
->x_arm_arch_string
= opts
->x_arm_arch_string
;
2914 ptr
->x_arm_cpu_string
= opts
->x_arm_cpu_string
;
2915 ptr
->x_arm_tune_string
= opts
->x_arm_tune_string
;
2918 /* Implement TARGET_OPTION_RESTORE. */
2920 arm_option_restore (struct gcc_options
*opts
, struct cl_target_option
*ptr
)
2922 opts
->x_arm_arch_string
= ptr
->x_arm_arch_string
;
2923 opts
->x_arm_cpu_string
= ptr
->x_arm_cpu_string
;
2924 opts
->x_arm_tune_string
= ptr
->x_arm_tune_string
;
2925 arm_configure_build_target (&arm_active_target
, ptr
, &global_options_set
,
2929 /* Reset options between modes that the user has specified. */
2931 arm_option_override_internal (struct gcc_options
*opts
,
2932 struct gcc_options
*opts_set
)
2934 arm_override_options_after_change_1 (opts
);
2936 if (TARGET_INTERWORK
&& !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
2938 /* The default is to enable interworking, so this warning message would
2939 be confusing to users who have just compiled with, eg, -march=armv3. */
2940 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2941 opts
->x_target_flags
&= ~MASK_INTERWORK
;
2944 if (TARGET_THUMB_P (opts
->x_target_flags
)
2945 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
2947 warning (0, "target CPU does not support THUMB instructions");
2948 opts
->x_target_flags
&= ~MASK_THUMB
;
2951 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
2953 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2954 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
2957 /* Callee super interworking implies thumb interworking. Adding
2958 this to the flags here simplifies the logic elsewhere. */
2959 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
2960 opts
->x_target_flags
|= MASK_INTERWORK
;
2962 /* need to remember initial values so combinaisons of options like
2963 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2964 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
2966 if (! opts_set
->x_arm_restrict_it
)
2967 opts
->x_arm_restrict_it
= arm_arch8
;
2969 /* ARM execution state and M profile don't have [restrict] IT. */
2970 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
2971 opts
->x_arm_restrict_it
= 0;
2973 /* Enable -munaligned-access by default for
2974 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2975 i.e. Thumb2 and ARM state only.
2976 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2977 - ARMv8 architecture-base processors.
2979 Disable -munaligned-access by default for
2980 - all pre-ARMv6 architecture-based processors
2981 - ARMv6-M architecture-based processors
2982 - ARMv8-M Baseline processors. */
2984 if (! opts_set
->x_unaligned_access
)
2986 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
2987 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
2989 else if (opts
->x_unaligned_access
== 1
2990 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
2992 warning (0, "target CPU does not support unaligned accesses");
2993 opts
->x_unaligned_access
= 0;
2996 /* Don't warn since it's on by default in -O2. */
2997 if (TARGET_THUMB1_P (opts
->x_target_flags
))
2998 opts
->x_flag_schedule_insns
= 0;
3000 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
3002 /* Disable shrink-wrap when optimizing function for size, since it tends to
3003 generate additional returns. */
3004 if (optimize_function_for_size_p (cfun
)
3005 && TARGET_THUMB2_P (opts
->x_target_flags
))
3006 opts
->x_flag_shrink_wrap
= false;
3008 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
3010 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3011 - epilogue_insns - does not accurately model the corresponding insns
3012 emitted in the asm file. In particular, see the comment in thumb_exit
3013 'Find out how many of the (return) argument registers we can corrupt'.
3014 As a consequence, the epilogue may clobber registers without fipa-ra
3015 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3016 TODO: Accurately model clobbers for epilogue_insns and reenable
3018 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3019 opts
->x_flag_ipa_ra
= 0;
3021 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3023 /* Thumb2 inline assembly code should always use unified syntax.
3024 This will apply to ARM and Thumb1 eventually. */
3025 opts
->x_inline_asm_unified
= TARGET_THUMB2_P (opts
->x_target_flags
);
3027 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3028 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3032 static sbitmap isa_all_fpubits
;
3033 static sbitmap isa_quirkbits
;
3035 /* Configure a build target TARGET from the user-specified options OPTS and
3036 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3037 architecture have been specified, but the two are not identical. */
3039 arm_configure_build_target (struct arm_build_target
*target
,
3040 struct cl_target_option
*opts
,
3041 struct gcc_options
*opts_set
,
3042 bool warn_compatible
)
3044 const cpu_option
*arm_selected_tune
= NULL
;
3045 const arch_option
*arm_selected_arch
= NULL
;
3046 const cpu_option
*arm_selected_cpu
= NULL
;
3047 const arm_fpu_desc
*arm_selected_fpu
= NULL
;
3048 const char *tune_opts
= NULL
;
3049 const char *arch_opts
= NULL
;
3050 const char *cpu_opts
= NULL
;
3052 bitmap_clear (target
->isa
);
3053 target
->core_name
= NULL
;
3054 target
->arch_name
= NULL
;
3056 if (opts_set
->x_arm_arch_string
)
3058 arm_selected_arch
= arm_parse_arch_option_name (all_architectures
,
3060 opts
->x_arm_arch_string
);
3061 arch_opts
= strchr (opts
->x_arm_arch_string
, '+');
3064 if (opts_set
->x_arm_cpu_string
)
3066 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "-mcpu",
3067 opts
->x_arm_cpu_string
);
3068 cpu_opts
= strchr (opts
->x_arm_cpu_string
, '+');
3069 arm_selected_tune
= arm_selected_cpu
;
3070 /* If taking the tuning from -mcpu, we don't need to rescan the
3071 options for tuning. */
3074 if (opts_set
->x_arm_tune_string
)
3076 arm_selected_tune
= arm_parse_cpu_option_name (all_cores
, "-mtune",
3077 opts
->x_arm_tune_string
);
3078 tune_opts
= strchr (opts
->x_arm_tune_string
, '+');
3081 if (arm_selected_arch
)
3083 arm_initialize_isa (target
->isa
, arm_selected_arch
->common
.isa_bits
);
3084 arm_parse_option_features (target
->isa
, &arm_selected_arch
->common
,
3087 if (arm_selected_cpu
)
3089 auto_sbitmap
cpu_isa (isa_num_bits
);
3090 auto_sbitmap
isa_delta (isa_num_bits
);
3092 arm_initialize_isa (cpu_isa
, arm_selected_cpu
->common
.isa_bits
);
3093 arm_parse_option_features (cpu_isa
, &arm_selected_cpu
->common
,
3095 bitmap_xor (isa_delta
, cpu_isa
, target
->isa
);
3096 /* Ignore any bits that are quirk bits. */
3097 bitmap_and_compl (isa_delta
, isa_delta
, isa_quirkbits
);
3098 /* Ignore (for now) any bits that might be set by -mfpu. */
3099 bitmap_and_compl (isa_delta
, isa_delta
, isa_all_fpubits
);
3101 if (!bitmap_empty_p (isa_delta
))
3103 if (warn_compatible
)
3104 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3105 arm_selected_cpu
->common
.name
,
3106 arm_selected_arch
->common
.name
);
3107 /* -march wins for code generation.
3108 -mcpu wins for default tuning. */
3109 if (!arm_selected_tune
)
3110 arm_selected_tune
= arm_selected_cpu
;
3112 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3113 target
->arch_name
= arm_selected_arch
->common
.name
;
3117 /* Architecture and CPU are essentially the same.
3118 Prefer the CPU setting. */
3119 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3120 target
->core_name
= arm_selected_cpu
->common
.name
;
3121 /* Copy the CPU's capabilities, so that we inherit the
3122 appropriate extensions and quirks. */
3123 bitmap_copy (target
->isa
, cpu_isa
);
3128 /* Pick a CPU based on the architecture. */
3129 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3130 target
->arch_name
= arm_selected_arch
->common
.name
;
3131 /* Note: target->core_name is left unset in this path. */
3134 else if (arm_selected_cpu
)
3136 target
->core_name
= arm_selected_cpu
->common
.name
;
3137 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3138 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3140 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3142 /* If the user did not specify a processor or architecture, choose
3146 const cpu_option
*sel
;
3147 auto_sbitmap
sought_isa (isa_num_bits
);
3148 bitmap_clear (sought_isa
);
3149 auto_sbitmap
default_isa (isa_num_bits
);
3151 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "default CPU",
3152 TARGET_CPU_DEFAULT
);
3153 cpu_opts
= strchr (TARGET_CPU_DEFAULT
, '+');
3154 gcc_assert (arm_selected_cpu
->common
.name
);
3156 /* RWE: All of the selection logic below (to the end of this
3157 'if' clause) looks somewhat suspect. It appears to be mostly
3158 there to support forcing thumb support when the default CPU
3159 does not have thumb (somewhat dubious in terms of what the
3160 user might be expecting). I think it should be removed once
3161 support for the pre-thumb era cores is removed. */
3162 sel
= arm_selected_cpu
;
3163 arm_initialize_isa (default_isa
, sel
->common
.isa_bits
);
3164 arm_parse_option_features (default_isa
, &arm_selected_cpu
->common
,
3167 /* Now check to see if the user has specified any command line
3168 switches that require certain abilities from the cpu. */
3170 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3172 bitmap_set_bit (sought_isa
, isa_bit_thumb
);
3173 bitmap_set_bit (sought_isa
, isa_bit_mode32
);
3175 /* There are no ARM processors that support both APCS-26 and
3176 interworking. Therefore we forcibly remove MODE26 from
3177 from the isa features here (if it was set), so that the
3178 search below will always be able to find a compatible
3180 bitmap_clear_bit (default_isa
, isa_bit_mode26
);
3183 /* If there are such requirements and the default CPU does not
3184 satisfy them, we need to run over the complete list of
3185 cores looking for one that is satisfactory. */
3186 if (!bitmap_empty_p (sought_isa
)
3187 && !bitmap_subset_p (sought_isa
, default_isa
))
3189 auto_sbitmap
candidate_isa (isa_num_bits
);
3190 /* We're only interested in a CPU with at least the
3191 capabilities of the default CPU and the required
3192 additional features. */
3193 bitmap_ior (default_isa
, default_isa
, sought_isa
);
3195 /* Try to locate a CPU type that supports all of the abilities
3196 of the default CPU, plus the extra abilities requested by
3198 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3200 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3201 /* An exact match? */
3202 if (bitmap_equal_p (default_isa
, candidate_isa
))
3206 if (sel
->common
.name
== NULL
)
3208 unsigned current_bit_count
= isa_num_bits
;
3209 const cpu_option
*best_fit
= NULL
;
3211 /* Ideally we would like to issue an error message here
3212 saying that it was not possible to find a CPU compatible
3213 with the default CPU, but which also supports the command
3214 line options specified by the programmer, and so they
3215 ought to use the -mcpu=<name> command line option to
3216 override the default CPU type.
3218 If we cannot find a CPU that has exactly the
3219 characteristics of the default CPU and the given
3220 command line options we scan the array again looking
3221 for a best match. The best match must have at least
3222 the capabilities of the perfect match. */
3223 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3225 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3227 if (bitmap_subset_p (default_isa
, candidate_isa
))
3231 bitmap_and_compl (candidate_isa
, candidate_isa
,
3233 count
= bitmap_popcount (candidate_isa
);
3235 if (count
< current_bit_count
)
3238 current_bit_count
= count
;
3242 gcc_assert (best_fit
);
3246 arm_selected_cpu
= sel
;
3249 /* Now we know the CPU, we can finally initialize the target
3251 target
->core_name
= arm_selected_cpu
->common
.name
;
3252 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3253 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3255 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3258 gcc_assert (arm_selected_cpu
);
3259 gcc_assert (arm_selected_arch
);
3261 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
)
3263 arm_selected_fpu
= &all_fpus
[opts
->x_arm_fpu_index
];
3264 auto_sbitmap
fpu_bits (isa_num_bits
);
3266 arm_initialize_isa (fpu_bits
, arm_selected_fpu
->isa_bits
);
3267 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpubits
);
3268 bitmap_ior (target
->isa
, target
->isa
, fpu_bits
);
3271 if (!arm_selected_tune
)
3272 arm_selected_tune
= arm_selected_cpu
;
3273 else /* Validate the features passed to -mtune. */
3274 arm_parse_option_features (NULL
, &arm_selected_tune
->common
, tune_opts
);
3276 const cpu_tune
*tune_data
= &all_tunes
[arm_selected_tune
- all_cores
];
3278 /* Finish initializing the target structure. */
3279 target
->arch_pp_name
= arm_selected_arch
->arch
;
3280 target
->base_arch
= arm_selected_arch
->base_arch
;
3281 target
->profile
= arm_selected_arch
->profile
;
3283 target
->tune_flags
= tune_data
->tune_flags
;
3284 target
->tune
= tune_data
->tune
;
3285 target
->tune_core
= tune_data
->scheduler
;
3288 /* Fix up any incompatible options that the user has specified. */
3290 arm_option_override (void)
3292 static const enum isa_feature fpu_bitlist
[]
3293 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
3294 static const enum isa_feature quirk_bitlist
[] = { ISA_ALL_QUIRKS
, isa_nobit
};
3295 cl_target_option opts
;
3297 isa_quirkbits
= sbitmap_alloc (isa_num_bits
);
3298 arm_initialize_isa (isa_quirkbits
, quirk_bitlist
);
3300 isa_all_fpubits
= sbitmap_alloc (isa_num_bits
);
3301 arm_initialize_isa (isa_all_fpubits
, fpu_bitlist
);
3303 arm_active_target
.isa
= sbitmap_alloc (isa_num_bits
);
3305 if (!global_options_set
.x_arm_fpu_index
)
3310 ok
= opt_enum_arg_to_value (OPT_mfpu_
, FPUTYPE_AUTO
, &fpu_index
,
3313 arm_fpu_index
= (enum fpu_type
) fpu_index
;
3316 cl_target_option_save (&opts
, &global_options
);
3317 arm_configure_build_target (&arm_active_target
, &opts
, &global_options_set
,
3320 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3321 SUBTARGET_OVERRIDE_OPTIONS
;
3324 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_active_target
.arch_pp_name
);
3325 arm_base_arch
= arm_active_target
.base_arch
;
3327 arm_tune
= arm_active_target
.tune_core
;
3328 tune_flags
= arm_active_target
.tune_flags
;
3329 current_tune
= arm_active_target
.tune
;
3331 /* TBD: Dwarf info for apcs frame is not handled yet. */
3332 if (TARGET_APCS_FRAME
)
3333 flag_shrink_wrap
= false;
3335 /* BPABI targets use linker tricks to allow interworking on cores
3336 without thumb support. */
3337 if (TARGET_INTERWORK
3339 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3341 warning (0, "target CPU does not support interworking" );
3342 target_flags
&= ~MASK_INTERWORK
;
3345 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3347 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3348 target_flags
|= MASK_APCS_FRAME
;
3351 if (TARGET_POKE_FUNCTION_NAME
)
3352 target_flags
|= MASK_APCS_FRAME
;
3354 if (TARGET_APCS_REENT
&& flag_pic
)
3355 error ("-fpic and -mapcs-reent are incompatible");
3357 if (TARGET_APCS_REENT
)
3358 warning (0, "APCS reentrant code not supported. Ignored");
3360 /* Initialize boolean versions of the architectural flags, for use
3361 in the arm.md file. */
3362 arm_arch3m
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv3m
);
3363 arm_arch4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv4
);
3364 arm_arch4t
= arm_arch4
&& bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3365 arm_arch5
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv5
);
3366 arm_arch5e
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv5e
);
3367 arm_arch5te
= arm_arch5e
3368 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3369 arm_arch6
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv6
);
3370 arm_arch6k
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv6k
);
3371 arm_arch_notm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
);
3372 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3373 arm_arch7
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv7
);
3374 arm_arch7em
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv7em
);
3375 arm_arch8
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8
);
3376 arm_arch8_1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8_1
);
3377 arm_arch8_2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8_2
);
3378 arm_arch_thumb1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3379 arm_arch_thumb2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb2
);
3380 arm_arch_xscale
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_xscale
);
3381 arm_arch_iwmmxt
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt
);
3382 arm_arch_iwmmxt2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt2
);
3383 arm_arch_thumb_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_tdiv
);
3384 arm_arch_arm_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_adiv
);
3385 arm_arch_crc
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_crc32
);
3386 arm_arch_cmse
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_cmse
);
3387 arm_fp16_inst
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_fp16
);
3388 arm_arch_lpae
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_lpae
);
3391 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3392 error ("selected fp16 options are incompatible");
3393 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3397 /* Set up some tuning parameters. */
3398 arm_ld_sched
= (tune_flags
& TF_LDSCHED
) != 0;
3399 arm_tune_strongarm
= (tune_flags
& TF_STRONG
) != 0;
3400 arm_tune_wbuf
= (tune_flags
& TF_WBUF
) != 0;
3401 arm_tune_xscale
= (tune_flags
& TF_XSCALE
) != 0;
3402 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3403 arm_m_profile_small_mul
= (tune_flags
& TF_SMALLMUL
) != 0;
3405 /* And finally, set up some quirks. */
3406 arm_arch_no_volatile_ce
3407 = bitmap_bit_p (arm_active_target
.isa
, isa_quirk_no_volatile_ce
);
3409 = arm_arch6k
&& bitmap_bit_p (arm_active_target
.isa
, isa_quirk_ARMv6kz
);
3411 /* V5 code we generate is completely interworking capable, so we turn off
3412 TARGET_INTERWORK here to avoid many tests later on. */
3414 /* XXX However, we must pass the right pre-processor defines to CPP
3415 or GLD can get confused. This is a hack. */
3416 if (TARGET_INTERWORK
)
3417 arm_cpp_interwork
= 1;
3420 target_flags
&= ~MASK_INTERWORK
;
3422 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3423 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3425 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3426 error ("iwmmxt abi requires an iwmmxt capable cpu");
3428 /* If soft-float is specified then don't use FPU. */
3429 if (TARGET_SOFT_FLOAT
)
3430 arm_fpu_attr
= FPU_NONE
;
3432 arm_fpu_attr
= FPU_VFP
;
3434 if (TARGET_AAPCS_BASED
)
3436 if (TARGET_CALLER_INTERWORKING
)
3437 error ("AAPCS does not support -mcaller-super-interworking");
3439 if (TARGET_CALLEE_INTERWORKING
)
3440 error ("AAPCS does not support -mcallee-super-interworking");
3443 /* __fp16 support currently assumes the core has ldrh. */
3444 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3445 sorry ("__fp16 and no ldrh");
3447 if (TARGET_AAPCS_BASED
)
3449 if (arm_abi
== ARM_ABI_IWMMXT
)
3450 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3451 else if (TARGET_HARD_FLOAT_ABI
)
3453 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3454 if (!bitmap_bit_p (arm_active_target
.isa
, isa_bit_VFPv2
))
3455 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3458 arm_pcs_default
= ARM_PCS_AAPCS
;
3462 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
3463 sorry ("-mfloat-abi=hard and VFP");
3465 if (arm_abi
== ARM_ABI_APCS
)
3466 arm_pcs_default
= ARM_PCS_APCS
;
3468 arm_pcs_default
= ARM_PCS_ATPCS
;
3471 /* For arm2/3 there is no need to do any scheduling if we are doing
3472 software floating-point. */
3473 if (TARGET_SOFT_FLOAT
&& (tune_flags
& TF_NO_MODE32
))
3474 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3476 /* Use the cp15 method if it is available. */
3477 if (target_thread_pointer
== TP_AUTO
)
3479 if (arm_arch6k
&& !TARGET_THUMB1
)
3480 target_thread_pointer
= TP_CP15
;
3482 target_thread_pointer
= TP_SOFT
;
3485 /* Override the default structure alignment for AAPCS ABI. */
3486 if (!global_options_set
.x_arm_structure_size_boundary
)
3488 if (TARGET_AAPCS_BASED
)
3489 arm_structure_size_boundary
= 8;
3493 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3495 if (arm_structure_size_boundary
!= 8
3496 && arm_structure_size_boundary
!= 32
3497 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3499 if (ARM_DOUBLEWORD_ALIGN
)
3501 "structure size boundary can only be set to 8, 32 or 64");
3503 warning (0, "structure size boundary can only be set to 8 or 32");
3504 arm_structure_size_boundary
3505 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3509 if (TARGET_VXWORKS_RTP
)
3511 if (!global_options_set
.x_arm_pic_data_is_text_relative
)
3512 arm_pic_data_is_text_relative
= 0;
3515 && !arm_pic_data_is_text_relative
3516 && !(global_options_set
.x_target_flags
& MASK_SINGLE_PIC_BASE
))
3517 /* When text & data segments don't have a fixed displacement, the
3518 intended use is with a single, read only, pic base register.
3519 Unless the user explicitly requested not to do that, set
3521 target_flags
|= MASK_SINGLE_PIC_BASE
;
3523 /* If stack checking is disabled, we can use r10 as the PIC register,
3524 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3525 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3527 if (TARGET_VXWORKS_RTP
)
3528 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3529 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3532 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3533 arm_pic_register
= 9;
3535 if (arm_pic_register_string
!= NULL
)
3537 int pic_register
= decode_reg_name (arm_pic_register_string
);
3540 warning (0, "-mpic-register= is useless without -fpic");
3542 /* Prevent the user from choosing an obviously stupid PIC register. */
3543 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3544 || pic_register
== HARD_FRAME_POINTER_REGNUM
3545 || pic_register
== STACK_POINTER_REGNUM
3546 || pic_register
>= PC_REGNUM
3547 || (TARGET_VXWORKS_RTP
3548 && (unsigned int) pic_register
!= arm_pic_register
))
3549 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3551 arm_pic_register
= pic_register
;
3554 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3555 if (fix_cm3_ldrd
== 2)
3557 if (bitmap_bit_p (arm_active_target
.isa
, isa_quirk_cm3_ldrd
))
3563 /* Hot/Cold partitioning is not currently supported, since we can't
3564 handle literal pool placement in that case. */
3565 if (flag_reorder_blocks_and_partition
)
3567 inform (input_location
,
3568 "-freorder-blocks-and-partition not supported on this architecture");
3569 flag_reorder_blocks_and_partition
= 0;
3570 flag_reorder_blocks
= 1;
3574 /* Hoisting PIC address calculations more aggressively provides a small,
3575 but measurable, size reduction for PIC code. Therefore, we decrease
3576 the bar for unrestricted expression hoisting to the cost of PIC address
3577 calculation, which is 2 instructions. */
3578 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3579 global_options
.x_param_values
,
3580 global_options_set
.x_param_values
);
3582 /* ARM EABI defaults to strict volatile bitfields. */
3583 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3584 && abi_version_at_least(2))
3585 flag_strict_volatile_bitfields
= 1;
3587 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3588 have deemed it beneficial (signified by setting
3589 prefetch.num_slots to 1 or more). */
3590 if (flag_prefetch_loop_arrays
< 0
3593 && current_tune
->prefetch
.num_slots
> 0)
3594 flag_prefetch_loop_arrays
= 1;
3596 /* Set up parameters to be used in prefetching algorithm. Do not
3597 override the defaults unless we are tuning for a core we have
3598 researched values for. */
3599 if (current_tune
->prefetch
.num_slots
> 0)
3600 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3601 current_tune
->prefetch
.num_slots
,
3602 global_options
.x_param_values
,
3603 global_options_set
.x_param_values
);
3604 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3605 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3606 current_tune
->prefetch
.l1_cache_line_size
,
3607 global_options
.x_param_values
,
3608 global_options_set
.x_param_values
);
3609 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3610 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3611 current_tune
->prefetch
.l1_cache_size
,
3612 global_options
.x_param_values
,
3613 global_options_set
.x_param_values
);
3615 /* Use Neon to perform 64-bits operations rather than core
3617 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3618 if (use_neon_for_64bits
== 1)
3619 prefer_neon_for_64bits
= true;
3621 /* Use the alternative scheduling-pressure algorithm by default. */
3622 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3623 global_options
.x_param_values
,
3624 global_options_set
.x_param_values
);
3626 /* Look through ready list and all of queue for instructions
3627 relevant for L2 auto-prefetcher. */
3628 int param_sched_autopref_queue_depth
;
3630 switch (current_tune
->sched_autopref
)
3632 case tune_params::SCHED_AUTOPREF_OFF
:
3633 param_sched_autopref_queue_depth
= -1;
3636 case tune_params::SCHED_AUTOPREF_RANK
:
3637 param_sched_autopref_queue_depth
= 0;
3640 case tune_params::SCHED_AUTOPREF_FULL
:
3641 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3648 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3649 param_sched_autopref_queue_depth
,
3650 global_options
.x_param_values
,
3651 global_options_set
.x_param_values
);
3653 /* Currently, for slow flash data, we just disable literal pools. We also
3654 disable it for pure-code. */
3655 if (target_slow_flash_data
|| target_pure_code
)
3656 arm_disable_literal_pool
= true;
3658 if (use_cmse
&& !arm_arch_cmse
)
3659 error ("target CPU does not support ARMv8-M Security Extensions");
3661 /* Disable scheduling fusion by default if it's not armv7 processor
3662 or doesn't prefer ldrd/strd. */
3663 if (flag_schedule_fusion
== 2
3664 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3665 flag_schedule_fusion
= 0;
3667 /* Need to remember initial options before they are overriden. */
3668 init_optimize
= build_optimization_node (&global_options
);
3670 arm_option_override_internal (&global_options
, &global_options_set
);
3671 arm_option_check_internal (&global_options
);
3672 arm_option_params_internal ();
3674 /* Create the default target_options structure. */
3675 target_option_default_node
= target_option_current_node
3676 = build_target_option_node (&global_options
);
3678 /* Register global variables with the garbage collector. */
3679 arm_add_gc_roots ();
3681 /* Init initial mode for testing. */
3682 thumb_flipper
= TARGET_THUMB
;
3686 arm_add_gc_roots (void)
3688 gcc_obstack_init(&minipool_obstack
);
3689 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3692 /* A table of known ARM exception types.
3693 For use with the interrupt function attribute. */
3697 const char *const arg
;
3698 const unsigned long return_value
;
3702 static const isr_attribute_arg isr_attribute_args
[] =
3704 { "IRQ", ARM_FT_ISR
},
3705 { "irq", ARM_FT_ISR
},
3706 { "FIQ", ARM_FT_FIQ
},
3707 { "fiq", ARM_FT_FIQ
},
3708 { "ABORT", ARM_FT_ISR
},
3709 { "abort", ARM_FT_ISR
},
3710 { "ABORT", ARM_FT_ISR
},
3711 { "abort", ARM_FT_ISR
},
3712 { "UNDEF", ARM_FT_EXCEPTION
},
3713 { "undef", ARM_FT_EXCEPTION
},
3714 { "SWI", ARM_FT_EXCEPTION
},
3715 { "swi", ARM_FT_EXCEPTION
},
3716 { NULL
, ARM_FT_NORMAL
}
3719 /* Returns the (interrupt) function type of the current
3720 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3722 static unsigned long
3723 arm_isr_value (tree argument
)
3725 const isr_attribute_arg
* ptr
;
3729 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3731 /* No argument - default to IRQ. */
3732 if (argument
== NULL_TREE
)
3735 /* Get the value of the argument. */
3736 if (TREE_VALUE (argument
) == NULL_TREE
3737 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3738 return ARM_FT_UNKNOWN
;
3740 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3742 /* Check it against the list of known arguments. */
3743 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3744 if (streq (arg
, ptr
->arg
))
3745 return ptr
->return_value
;
3747 /* An unrecognized interrupt type. */
3748 return ARM_FT_UNKNOWN
;
3751 /* Computes the type of the current function. */
3753 static unsigned long
3754 arm_compute_func_type (void)
3756 unsigned long type
= ARM_FT_UNKNOWN
;
3760 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3762 /* Decide if the current function is volatile. Such functions
3763 never return, and many memory cycles can be saved by not storing
3764 register values that will never be needed again. This optimization
3765 was added to speed up context switching in a kernel application. */
3767 && (TREE_NOTHROW (current_function_decl
)
3768 || !(flag_unwind_tables
3770 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3771 && TREE_THIS_VOLATILE (current_function_decl
))
3772 type
|= ARM_FT_VOLATILE
;
3774 if (cfun
->static_chain_decl
!= NULL
)
3775 type
|= ARM_FT_NESTED
;
3777 attr
= DECL_ATTRIBUTES (current_function_decl
);
3779 a
= lookup_attribute ("naked", attr
);
3781 type
|= ARM_FT_NAKED
;
3783 a
= lookup_attribute ("isr", attr
);
3785 a
= lookup_attribute ("interrupt", attr
);
3788 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3790 type
|= arm_isr_value (TREE_VALUE (a
));
3792 if (lookup_attribute ("cmse_nonsecure_entry", attr
))
3793 type
|= ARM_FT_CMSE_ENTRY
;
3798 /* Returns the type of the current function. */
3801 arm_current_func_type (void)
3803 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3804 cfun
->machine
->func_type
= arm_compute_func_type ();
3806 return cfun
->machine
->func_type
;
3810 arm_allocate_stack_slots_for_args (void)
3812 /* Naked functions should not allocate stack slots for arguments. */
3813 return !IS_NAKED (arm_current_func_type ());
3817 arm_warn_func_return (tree decl
)
3819 /* Naked functions are implemented entirely in assembly, including the
3820 return sequence, so suppress warnings about this. */
3821 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3825 /* Output assembler code for a block containing the constant parts
3826 of a trampoline, leaving space for the variable parts.
3828 On the ARM, (if r8 is the static chain regnum, and remembering that
3829 referencing pc adds an offset of 8) the trampoline looks like:
3832 .word static chain value
3833 .word function's address
3834 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3837 arm_asm_trampoline_template (FILE *f
)
3839 fprintf (f
, "\t.syntax unified\n");
3843 fprintf (f
, "\t.arm\n");
3844 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3845 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3847 else if (TARGET_THUMB2
)
3849 fprintf (f
, "\t.thumb\n");
3850 /* The Thumb-2 trampoline is similar to the arm implementation.
3851 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3852 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3853 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3854 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3858 ASM_OUTPUT_ALIGN (f
, 2);
3859 fprintf (f
, "\t.code\t16\n");
3860 fprintf (f
, ".Ltrampoline_start:\n");
3861 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3862 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3863 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3864 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3865 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3866 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3868 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3869 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3872 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3875 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3877 rtx fnaddr
, mem
, a_tramp
;
3879 emit_block_move (m_tramp
, assemble_trampoline_template (),
3880 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3882 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3883 emit_move_insn (mem
, chain_value
);
3885 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3886 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3887 emit_move_insn (mem
, fnaddr
);
3889 a_tramp
= XEXP (m_tramp
, 0);
3890 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3891 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3892 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3895 /* Thumb trampolines should be entered in thumb mode, so set
3896 the bottom bit of the address. */
3899 arm_trampoline_adjust_address (rtx addr
)
3902 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3903 NULL
, 0, OPTAB_LIB_WIDEN
);
3907 /* Return 1 if it is possible to return using a single instruction.
3908 If SIBLING is non-null, this is a test for a return before a sibling
3909 call. SIBLING is the call insn, so we can examine its register usage. */
3912 use_return_insn (int iscond
, rtx sibling
)
3915 unsigned int func_type
;
3916 unsigned long saved_int_regs
;
3917 unsigned HOST_WIDE_INT stack_adjust
;
3918 arm_stack_offsets
*offsets
;
3920 /* Never use a return instruction before reload has run. */
3921 if (!reload_completed
)
3924 func_type
= arm_current_func_type ();
3926 /* Naked, volatile and stack alignment functions need special
3928 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3931 /* So do interrupt functions that use the frame pointer and Thumb
3932 interrupt functions. */
3933 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3936 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3937 && !optimize_function_for_size_p (cfun
))
3940 offsets
= arm_get_frame_offsets ();
3941 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3943 /* As do variadic functions. */
3944 if (crtl
->args
.pretend_args_size
3945 || cfun
->machine
->uses_anonymous_args
3946 /* Or if the function calls __builtin_eh_return () */
3947 || crtl
->calls_eh_return
3948 /* Or if the function calls alloca */
3949 || cfun
->calls_alloca
3950 /* Or if there is a stack adjustment. However, if the stack pointer
3951 is saved on the stack, we can use a pre-incrementing stack load. */
3952 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3953 && stack_adjust
== 4))
3954 /* Or if the static chain register was saved above the frame, under the
3955 assumption that the stack pointer isn't saved on the stack. */
3956 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
3957 && arm_compute_static_chain_stack_bytes() != 0))
3960 saved_int_regs
= offsets
->saved_regs_mask
;
3962 /* Unfortunately, the insn
3964 ldmib sp, {..., sp, ...}
3966 triggers a bug on most SA-110 based devices, such that the stack
3967 pointer won't be correctly restored if the instruction takes a
3968 page fault. We work around this problem by popping r3 along with
3969 the other registers, since that is never slower than executing
3970 another instruction.
3972 We test for !arm_arch5 here, because code for any architecture
3973 less than this could potentially be run on one of the buggy
3975 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3977 /* Validate that r3 is a call-clobbered register (always true in
3978 the default abi) ... */
3979 if (!call_used_regs
[3])
3982 /* ... that it isn't being used for a return value ... */
3983 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3986 /* ... or for a tail-call argument ... */
3989 gcc_assert (CALL_P (sibling
));
3991 if (find_regno_fusage (sibling
, USE
, 3))
3995 /* ... and that there are no call-saved registers in r0-r2
3996 (always true in the default ABI). */
3997 if (saved_int_regs
& 0x7)
4001 /* Can't be done if interworking with Thumb, and any registers have been
4003 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
4006 /* On StrongARM, conditional returns are expensive if they aren't
4007 taken and multiple registers have been stacked. */
4008 if (iscond
&& arm_tune_strongarm
)
4010 /* Conditional return when just the LR is stored is a simple
4011 conditional-load instruction, that's not expensive. */
4012 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
4016 && arm_pic_register
!= INVALID_REGNUM
4017 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
4021 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4022 several instructions if anything needs to be popped. */
4023 if (saved_int_regs
&& IS_CMSE_ENTRY (func_type
))
4026 /* If there are saved registers but the LR isn't saved, then we need
4027 two instructions for the return. */
4028 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
4031 /* Can't be done if any of the VFP regs are pushed,
4032 since this also requires an insn. */
4033 if (TARGET_HARD_FLOAT
)
4034 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
4035 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
4038 if (TARGET_REALLY_IWMMXT
)
4039 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
4040 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
4046 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4047 shrink-wrapping if possible. This is the case if we need to emit a
4048 prologue, which we can test by looking at the offsets. */
4050 use_simple_return_p (void)
4052 arm_stack_offsets
*offsets
;
4054 /* Note this function can be called before or after reload. */
4055 if (!reload_completed
)
4056 arm_compute_frame_layout ();
4058 offsets
= arm_get_frame_offsets ();
4059 return offsets
->outgoing_args
!= 0;
4062 /* Return TRUE if int I is a valid immediate ARM constant. */
4065 const_ok_for_arm (HOST_WIDE_INT i
)
4069 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4070 be all zero, or all one. */
4071 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
4072 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
4073 != ((~(unsigned HOST_WIDE_INT
) 0)
4074 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
4077 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
4079 /* Fast return for 0 and small values. We must do this for zero, since
4080 the code below can't handle that one case. */
4081 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
4084 /* Get the number of trailing zeros. */
4085 lowbit
= ffs((int) i
) - 1;
4087 /* Only even shifts are allowed in ARM mode so round down to the
4088 nearest even number. */
4092 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
4097 /* Allow rotated constants in ARM mode. */
4099 && ((i
& ~0xc000003f) == 0
4100 || (i
& ~0xf000000f) == 0
4101 || (i
& ~0xfc000003) == 0))
4104 else if (TARGET_THUMB2
)
4108 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4111 if (i
== v
|| i
== (v
| (v
<< 8)))
4114 /* Allow repeated pattern 0xXY00XY00. */
4120 else if (TARGET_HAVE_MOVT
)
4122 /* Thumb-1 Targets with MOVT. */
4132 /* Return true if I is a valid constant for the operation CODE. */
4134 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
4136 if (const_ok_for_arm (i
))
4142 /* See if we can use movw. */
4143 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
4146 /* Otherwise, try mvn. */
4147 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4150 /* See if we can use addw or subw. */
4152 && ((i
& 0xfffff000) == 0
4153 || ((-i
) & 0xfffff000) == 0))
4174 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4176 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4182 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4186 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4193 /* Return true if I is a valid di mode constant for the operation CODE. */
4195 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4197 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4198 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4199 rtx hi
= GEN_INT (hi_val
);
4200 rtx lo
= GEN_INT (lo_val
);
4210 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
4211 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
4213 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4220 /* Emit a sequence of insns to handle a large constant.
4221 CODE is the code of the operation required, it can be any of SET, PLUS,
4222 IOR, AND, XOR, MINUS;
4223 MODE is the mode in which the operation is being performed;
4224 VAL is the integer to operate on;
4225 SOURCE is the other operand (a register, or a null-pointer for SET);
4226 SUBTARGETS means it is safe to create scratch registers if that will
4227 either produce a simpler sequence, or we will want to cse the values.
4228 Return value is the number of insns emitted. */
4230 /* ??? Tweak this for thumb2. */
4232 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4233 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4237 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4238 cond
= COND_EXEC_TEST (PATTERN (insn
));
4242 if (subtargets
|| code
== SET
4243 || (REG_P (target
) && REG_P (source
)
4244 && REGNO (target
) != REGNO (source
)))
4246 /* After arm_reorg has been called, we can't fix up expensive
4247 constants by pushing them into memory so we must synthesize
4248 them in-line, regardless of the cost. This is only likely to
4249 be more costly on chips that have load delay slots and we are
4250 compiling without running the scheduler (so no splitting
4251 occurred before the final instruction emission).
4253 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4255 if (!cfun
->machine
->after_arm_reorg
4257 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4259 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4264 /* Currently SET is the only monadic value for CODE, all
4265 the rest are diadic. */
4266 if (TARGET_USE_MOVT
)
4267 arm_emit_movpair (target
, GEN_INT (val
));
4269 emit_set_insn (target
, GEN_INT (val
));
4275 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4277 if (TARGET_USE_MOVT
)
4278 arm_emit_movpair (temp
, GEN_INT (val
));
4280 emit_set_insn (temp
, GEN_INT (val
));
4282 /* For MINUS, the value is subtracted from, since we never
4283 have subtraction of a constant. */
4285 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4287 emit_set_insn (target
,
4288 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4294 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4298 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4299 ARM/THUMB2 immediates, and add up to VAL.
4300 Thr function return value gives the number of insns required. */
4302 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4303 struct four_ints
*return_sequence
)
4305 int best_consecutive_zeros
= 0;
4309 struct four_ints tmp_sequence
;
4311 /* If we aren't targeting ARM, the best place to start is always at
4312 the bottom, otherwise look more closely. */
4315 for (i
= 0; i
< 32; i
+= 2)
4317 int consecutive_zeros
= 0;
4319 if (!(val
& (3 << i
)))
4321 while ((i
< 32) && !(val
& (3 << i
)))
4323 consecutive_zeros
+= 2;
4326 if (consecutive_zeros
> best_consecutive_zeros
)
4328 best_consecutive_zeros
= consecutive_zeros
;
4329 best_start
= i
- consecutive_zeros
;
4336 /* So long as it won't require any more insns to do so, it's
4337 desirable to emit a small constant (in bits 0...9) in the last
4338 insn. This way there is more chance that it can be combined with
4339 a later addressing insn to form a pre-indexed load or store
4340 operation. Consider:
4342 *((volatile int *)0xe0000100) = 1;
4343 *((volatile int *)0xe0000110) = 2;
4345 We want this to wind up as:
4349 str rB, [rA, #0x100]
4351 str rB, [rA, #0x110]
4353 rather than having to synthesize both large constants from scratch.
4355 Therefore, we calculate how many insns would be required to emit
4356 the constant starting from `best_start', and also starting from
4357 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4358 yield a shorter sequence, we may as well use zero. */
4359 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4361 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4363 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4364 if (insns2
<= insns1
)
4366 *return_sequence
= tmp_sequence
;
4374 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4376 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4377 struct four_ints
*return_sequence
, int i
)
4379 int remainder
= val
& 0xffffffff;
4382 /* Try and find a way of doing the job in either two or three
4385 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4386 location. We start at position I. This may be the MSB, or
4387 optimial_immediate_sequence may have positioned it at the largest block
4388 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4389 wrapping around to the top of the word when we drop off the bottom.
4390 In the worst case this code should produce no more than four insns.
4392 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4393 constants, shifted to any arbitrary location. We should always start
4398 unsigned int b1
, b2
, b3
, b4
;
4399 unsigned HOST_WIDE_INT result
;
4402 gcc_assert (insns
< 4);
4407 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4408 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4411 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4412 /* We can use addw/subw for the last 12 bits. */
4416 /* Use an 8-bit shifted/rotated immediate. */
4420 result
= remainder
& ((0x0ff << end
)
4421 | ((i
< end
) ? (0xff >> (32 - end
))
4428 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4429 arbitrary shifts. */
4430 i
-= TARGET_ARM
? 2 : 1;
4434 /* Next, see if we can do a better job with a thumb2 replicated
4437 We do it this way around to catch the cases like 0x01F001E0 where
4438 two 8-bit immediates would work, but a replicated constant would
4441 TODO: 16-bit constants that don't clear all the bits, but still win.
4442 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4445 b1
= (remainder
& 0xff000000) >> 24;
4446 b2
= (remainder
& 0x00ff0000) >> 16;
4447 b3
= (remainder
& 0x0000ff00) >> 8;
4448 b4
= remainder
& 0xff;
4452 /* The 8-bit immediate already found clears b1 (and maybe b2),
4453 but must leave b3 and b4 alone. */
4455 /* First try to find a 32-bit replicated constant that clears
4456 almost everything. We can assume that we can't do it in one,
4457 or else we wouldn't be here. */
4458 unsigned int tmp
= b1
& b2
& b3
& b4
;
4459 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4461 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4462 + (tmp
== b3
) + (tmp
== b4
);
4464 && (matching_bytes
>= 3
4465 || (matching_bytes
== 2
4466 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4468 /* At least 3 of the bytes match, and the fourth has at
4469 least as many bits set, or two of the bytes match
4470 and it will only require one more insn to finish. */
4478 /* Second, try to find a 16-bit replicated constant that can
4479 leave three of the bytes clear. If b2 or b4 is already
4480 zero, then we can. If the 8-bit from above would not
4481 clear b2 anyway, then we still win. */
4482 else if (b1
== b3
&& (!b2
|| !b4
4483 || (remainder
& 0x00ff0000 & ~result
)))
4485 result
= remainder
& 0xff00ff00;
4491 /* The 8-bit immediate already found clears b2 (and maybe b3)
4492 and we don't get here unless b1 is alredy clear, but it will
4493 leave b4 unchanged. */
4495 /* If we can clear b2 and b4 at once, then we win, since the
4496 8-bits couldn't possibly reach that far. */
4499 result
= remainder
& 0x00ff00ff;
4505 return_sequence
->i
[insns
++] = result
;
4506 remainder
&= ~result
;
4508 if (code
== SET
|| code
== MINUS
)
4516 /* Emit an instruction with the indicated PATTERN. If COND is
4517 non-NULL, conditionalize the execution of the instruction on COND
4521 emit_constant_insn (rtx cond
, rtx pattern
)
4524 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4525 emit_insn (pattern
);
4528 /* As above, but extra parameter GENERATE which, if clear, suppresses
4532 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4533 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4534 int subtargets
, int generate
)
4538 int final_invert
= 0;
4540 int set_sign_bit_copies
= 0;
4541 int clear_sign_bit_copies
= 0;
4542 int clear_zero_bit_copies
= 0;
4543 int set_zero_bit_copies
= 0;
4544 int insns
= 0, neg_insns
, inv_insns
;
4545 unsigned HOST_WIDE_INT temp1
, temp2
;
4546 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4547 struct four_ints
*immediates
;
4548 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4550 /* Find out which operations are safe for a given CODE. Also do a quick
4551 check for degenerate cases; these can occur when DImode operations
4564 if (remainder
== 0xffffffff)
4567 emit_constant_insn (cond
,
4568 gen_rtx_SET (target
,
4569 GEN_INT (ARM_SIGN_EXTEND (val
))));
4575 if (reload_completed
&& rtx_equal_p (target
, source
))
4579 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4588 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4591 if (remainder
== 0xffffffff)
4593 if (reload_completed
&& rtx_equal_p (target
, source
))
4596 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4605 if (reload_completed
&& rtx_equal_p (target
, source
))
4608 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4612 if (remainder
== 0xffffffff)
4615 emit_constant_insn (cond
,
4616 gen_rtx_SET (target
,
4617 gen_rtx_NOT (mode
, source
)));
4624 /* We treat MINUS as (val - source), since (source - val) is always
4625 passed as (source + (-val)). */
4629 emit_constant_insn (cond
,
4630 gen_rtx_SET (target
,
4631 gen_rtx_NEG (mode
, source
)));
4634 if (const_ok_for_arm (val
))
4637 emit_constant_insn (cond
,
4638 gen_rtx_SET (target
,
4639 gen_rtx_MINUS (mode
, GEN_INT (val
),
4650 /* If we can do it in one insn get out quickly. */
4651 if (const_ok_for_op (val
, code
))
4654 emit_constant_insn (cond
,
4655 gen_rtx_SET (target
,
4657 ? gen_rtx_fmt_ee (code
, mode
, source
,
4663 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4665 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4666 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4670 if (mode
== SImode
&& i
== 16)
4671 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4673 emit_constant_insn (cond
,
4674 gen_zero_extendhisi2
4675 (target
, gen_lowpart (HImode
, source
)));
4677 /* Extz only supports SImode, but we can coerce the operands
4679 emit_constant_insn (cond
,
4680 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4681 gen_lowpart (SImode
, source
),
4682 GEN_INT (i
), const0_rtx
));
4688 /* Calculate a few attributes that may be useful for specific
4690 /* Count number of leading zeros. */
4691 for (i
= 31; i
>= 0; i
--)
4693 if ((remainder
& (1 << i
)) == 0)
4694 clear_sign_bit_copies
++;
4699 /* Count number of leading 1's. */
4700 for (i
= 31; i
>= 0; i
--)
4702 if ((remainder
& (1 << i
)) != 0)
4703 set_sign_bit_copies
++;
4708 /* Count number of trailing zero's. */
4709 for (i
= 0; i
<= 31; i
++)
4711 if ((remainder
& (1 << i
)) == 0)
4712 clear_zero_bit_copies
++;
4717 /* Count number of trailing 1's. */
4718 for (i
= 0; i
<= 31; i
++)
4720 if ((remainder
& (1 << i
)) != 0)
4721 set_zero_bit_copies
++;
4729 /* See if we can do this by sign_extending a constant that is known
4730 to be negative. This is a good, way of doing it, since the shift
4731 may well merge into a subsequent insn. */
4732 if (set_sign_bit_copies
> 1)
4734 if (const_ok_for_arm
4735 (temp1
= ARM_SIGN_EXTEND (remainder
4736 << (set_sign_bit_copies
- 1))))
4740 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4741 emit_constant_insn (cond
,
4742 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4743 emit_constant_insn (cond
,
4744 gen_ashrsi3 (target
, new_src
,
4745 GEN_INT (set_sign_bit_copies
- 1)));
4749 /* For an inverted constant, we will need to set the low bits,
4750 these will be shifted out of harm's way. */
4751 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4752 if (const_ok_for_arm (~temp1
))
4756 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4757 emit_constant_insn (cond
,
4758 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4759 emit_constant_insn (cond
,
4760 gen_ashrsi3 (target
, new_src
,
4761 GEN_INT (set_sign_bit_copies
- 1)));
4767 /* See if we can calculate the value as the difference between two
4768 valid immediates. */
4769 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4771 int topshift
= clear_sign_bit_copies
& ~1;
4773 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4774 & (0xff000000 >> topshift
));
4776 /* If temp1 is zero, then that means the 9 most significant
4777 bits of remainder were 1 and we've caused it to overflow.
4778 When topshift is 0 we don't need to do anything since we
4779 can borrow from 'bit 32'. */
4780 if (temp1
== 0 && topshift
!= 0)
4781 temp1
= 0x80000000 >> (topshift
- 1);
4783 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4785 if (const_ok_for_arm (temp2
))
4789 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4790 emit_constant_insn (cond
,
4791 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4792 emit_constant_insn (cond
,
4793 gen_addsi3 (target
, new_src
,
4801 /* See if we can generate this by setting the bottom (or the top)
4802 16 bits, and then shifting these into the other half of the
4803 word. We only look for the simplest cases, to do more would cost
4804 too much. Be careful, however, not to generate this when the
4805 alternative would take fewer insns. */
4806 if (val
& 0xffff0000)
4808 temp1
= remainder
& 0xffff0000;
4809 temp2
= remainder
& 0x0000ffff;
4811 /* Overlaps outside this range are best done using other methods. */
4812 for (i
= 9; i
< 24; i
++)
4814 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4815 && !const_ok_for_arm (temp2
))
4817 rtx new_src
= (subtargets
4818 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4820 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4821 source
, subtargets
, generate
);
4829 gen_rtx_ASHIFT (mode
, source
,
4836 /* Don't duplicate cases already considered. */
4837 for (i
= 17; i
< 24; i
++)
4839 if (((temp1
| (temp1
>> i
)) == remainder
)
4840 && !const_ok_for_arm (temp1
))
4842 rtx new_src
= (subtargets
4843 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4845 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4846 source
, subtargets
, generate
);
4851 gen_rtx_SET (target
,
4854 gen_rtx_LSHIFTRT (mode
, source
,
4865 /* If we have IOR or XOR, and the constant can be loaded in a
4866 single instruction, and we can find a temporary to put it in,
4867 then this can be done in two instructions instead of 3-4. */
4869 /* TARGET can't be NULL if SUBTARGETS is 0 */
4870 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4872 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4876 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4878 emit_constant_insn (cond
,
4879 gen_rtx_SET (sub
, GEN_INT (val
)));
4880 emit_constant_insn (cond
,
4881 gen_rtx_SET (target
,
4882 gen_rtx_fmt_ee (code
, mode
,
4893 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4894 and the remainder 0s for e.g. 0xfff00000)
4895 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4897 This can be done in 2 instructions by using shifts with mov or mvn.
4902 mvn r0, r0, lsr #12 */
4903 if (set_sign_bit_copies
> 8
4904 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
4908 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4909 rtx shift
= GEN_INT (set_sign_bit_copies
);
4915 gen_rtx_ASHIFT (mode
,
4920 gen_rtx_SET (target
,
4922 gen_rtx_LSHIFTRT (mode
, sub
,
4929 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4931 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4933 For eg. r0 = r0 | 0xfff
4938 if (set_zero_bit_copies
> 8
4939 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4943 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4944 rtx shift
= GEN_INT (set_zero_bit_copies
);
4950 gen_rtx_LSHIFTRT (mode
,
4955 gen_rtx_SET (target
,
4957 gen_rtx_ASHIFT (mode
, sub
,
4963 /* This will never be reached for Thumb2 because orn is a valid
4964 instruction. This is for Thumb1 and the ARM 32 bit cases.
4966 x = y | constant (such that ~constant is a valid constant)
4968 x = ~(~y & ~constant).
4970 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4974 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4975 emit_constant_insn (cond
,
4977 gen_rtx_NOT (mode
, source
)));
4980 sub
= gen_reg_rtx (mode
);
4981 emit_constant_insn (cond
,
4983 gen_rtx_AND (mode
, source
,
4985 emit_constant_insn (cond
,
4986 gen_rtx_SET (target
,
4987 gen_rtx_NOT (mode
, sub
)));
4994 /* See if two shifts will do 2 or more insn's worth of work. */
4995 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4997 HOST_WIDE_INT shift_mask
= ((0xffffffff
4998 << (32 - clear_sign_bit_copies
))
5001 if ((remainder
| shift_mask
) != 0xffffffff)
5003 HOST_WIDE_INT new_val
5004 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5008 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5009 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
5010 new_src
, source
, subtargets
, 1);
5015 rtx targ
= subtargets
? NULL_RTX
: target
;
5016 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5017 targ
, source
, subtargets
, 0);
5023 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5024 rtx shift
= GEN_INT (clear_sign_bit_copies
);
5026 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
5027 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
5033 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
5035 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
5037 if ((remainder
| shift_mask
) != 0xffffffff)
5039 HOST_WIDE_INT new_val
5040 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5043 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5045 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5046 new_src
, source
, subtargets
, 1);
5051 rtx targ
= subtargets
? NULL_RTX
: target
;
5053 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5054 targ
, source
, subtargets
, 0);
5060 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5061 rtx shift
= GEN_INT (clear_zero_bit_copies
);
5063 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
5064 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
5076 /* Calculate what the instruction sequences would be if we generated it
5077 normally, negated, or inverted. */
5079 /* AND cannot be split into multiple insns, so invert and use BIC. */
5082 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
5085 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
5090 if (can_invert
|| final_invert
)
5091 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
5096 immediates
= &pos_immediates
;
5098 /* Is the negated immediate sequence more efficient? */
5099 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
5102 immediates
= &neg_immediates
;
5107 /* Is the inverted immediate sequence more efficient?
5108 We must allow for an extra NOT instruction for XOR operations, although
5109 there is some chance that the final 'mvn' will get optimized later. */
5110 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
5113 immediates
= &inv_immediates
;
5121 /* Now output the chosen sequence as instructions. */
5124 for (i
= 0; i
< insns
; i
++)
5126 rtx new_src
, temp1_rtx
;
5128 temp1
= immediates
->i
[i
];
5130 if (code
== SET
|| code
== MINUS
)
5131 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
5132 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
5133 new_src
= gen_reg_rtx (mode
);
5139 else if (can_negate
)
5142 temp1
= trunc_int_for_mode (temp1
, mode
);
5143 temp1_rtx
= GEN_INT (temp1
);
5147 else if (code
== MINUS
)
5148 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
5150 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
5152 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5157 can_negate
= can_invert
;
5161 else if (code
== MINUS
)
5169 emit_constant_insn (cond
, gen_rtx_SET (target
,
5170 gen_rtx_NOT (mode
, source
)));
5177 /* Canonicalize a comparison so that we are more likely to recognize it.
5178 This can be done for a few constant compares, where we can make the
5179 immediate value easier to load. */
5182 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5183 bool op0_preserve_value
)
5186 unsigned HOST_WIDE_INT i
, maxval
;
5188 mode
= GET_MODE (*op0
);
5189 if (mode
== VOIDmode
)
5190 mode
= GET_MODE (*op1
);
5192 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5194 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5195 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5196 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5197 for GTU/LEU in Thumb mode. */
5201 if (*code
== GT
|| *code
== LE
5202 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
5204 /* Missing comparison. First try to use an available
5206 if (CONST_INT_P (*op1
))
5214 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5216 *op1
= GEN_INT (i
+ 1);
5217 *code
= *code
== GT
? GE
: LT
;
5223 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5224 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5226 *op1
= GEN_INT (i
+ 1);
5227 *code
= *code
== GTU
? GEU
: LTU
;
5236 /* If that did not work, reverse the condition. */
5237 if (!op0_preserve_value
)
5239 std::swap (*op0
, *op1
);
5240 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5246 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5247 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5248 to facilitate possible combining with a cmp into 'ands'. */
5250 && GET_CODE (*op0
) == ZERO_EXTEND
5251 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5252 && GET_MODE (XEXP (*op0
, 0)) == QImode
5253 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5254 && subreg_lowpart_p (XEXP (*op0
, 0))
5255 && *op1
== const0_rtx
)
5256 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5259 /* Comparisons smaller than DImode. Only adjust comparisons against
5260 an out-of-range constant. */
5261 if (!CONST_INT_P (*op1
)
5262 || const_ok_for_arm (INTVAL (*op1
))
5263 || const_ok_for_arm (- INTVAL (*op1
)))
5277 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5279 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5280 *code
= *code
== GT
? GE
: LT
;
5288 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5290 *op1
= GEN_INT (i
- 1);
5291 *code
= *code
== GE
? GT
: LE
;
5298 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5299 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5301 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5302 *code
= *code
== GTU
? GEU
: LTU
;
5310 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5312 *op1
= GEN_INT (i
- 1);
5313 *code
= *code
== GEU
? GTU
: LEU
;
5324 /* Define how to find the value returned by a function. */
5327 arm_function_value(const_tree type
, const_tree func
,
5328 bool outgoing ATTRIBUTE_UNUSED
)
5331 int unsignedp ATTRIBUTE_UNUSED
;
5332 rtx r ATTRIBUTE_UNUSED
;
5334 mode
= TYPE_MODE (type
);
5336 if (TARGET_AAPCS_BASED
)
5337 return aapcs_allocate_return_reg (mode
, type
, func
);
5339 /* Promote integer types. */
5340 if (INTEGRAL_TYPE_P (type
))
5341 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5343 /* Promotes small structs returned in a register to full-word size
5344 for big-endian AAPCS. */
5345 if (arm_return_in_msb (type
))
5347 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5348 if (size
% UNITS_PER_WORD
!= 0)
5350 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5351 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5355 return arm_libcall_value_1 (mode
);
5358 /* libcall hashtable helpers. */
5360 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5362 static inline hashval_t
hash (const rtx_def
*);
5363 static inline bool equal (const rtx_def
*, const rtx_def
*);
5364 static inline void remove (rtx_def
*);
5368 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5370 return rtx_equal_p (p1
, p2
);
5374 libcall_hasher::hash (const rtx_def
*p1
)
5376 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5379 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5382 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5384 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5388 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5390 static bool init_done
= false;
5391 static libcall_table_type
*libcall_htab
= NULL
;
5397 libcall_htab
= new libcall_table_type (31);
5398 add_libcall (libcall_htab
,
5399 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5400 add_libcall (libcall_htab
,
5401 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5402 add_libcall (libcall_htab
,
5403 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5404 add_libcall (libcall_htab
,
5405 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5407 add_libcall (libcall_htab
,
5408 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5409 add_libcall (libcall_htab
,
5410 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5411 add_libcall (libcall_htab
,
5412 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5413 add_libcall (libcall_htab
,
5414 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5416 add_libcall (libcall_htab
,
5417 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5418 add_libcall (libcall_htab
,
5419 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5420 add_libcall (libcall_htab
,
5421 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5422 add_libcall (libcall_htab
,
5423 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5424 add_libcall (libcall_htab
,
5425 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5426 add_libcall (libcall_htab
,
5427 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5428 add_libcall (libcall_htab
,
5429 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5430 add_libcall (libcall_htab
,
5431 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5433 /* Values from double-precision helper functions are returned in core
5434 registers if the selected core only supports single-precision
5435 arithmetic, even if we are using the hard-float ABI. The same is
5436 true for single-precision helpers, but we will never be using the
5437 hard-float ABI on a CPU which doesn't support single-precision
5438 operations in hardware. */
5439 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5440 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5441 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5442 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5443 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5444 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5445 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5446 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5447 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5448 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5449 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5450 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5452 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5454 add_libcall (libcall_htab
,
5455 convert_optab_libfunc (trunc_optab
, HFmode
, DFmode
));
5458 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5462 arm_libcall_value_1 (machine_mode mode
)
5464 if (TARGET_AAPCS_BASED
)
5465 return aapcs_libcall_value (mode
);
5466 else if (TARGET_IWMMXT_ABI
5467 && arm_vector_mode_supported_p (mode
))
5468 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5470 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5473 /* Define how to find the value returned by a library function
5474 assuming the value has mode MODE. */
5477 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5479 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5480 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5482 /* The following libcalls return their result in integer registers,
5483 even though they return a floating point value. */
5484 if (arm_libcall_uses_aapcs_base (libcall
))
5485 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5489 return arm_libcall_value_1 (mode
);
5492 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5495 arm_function_value_regno_p (const unsigned int regno
)
5497 if (regno
== ARG_REGISTER (1)
5499 && TARGET_AAPCS_BASED
5500 && TARGET_HARD_FLOAT
5501 && regno
== FIRST_VFP_REGNUM
)
5502 || (TARGET_IWMMXT_ABI
5503 && regno
== FIRST_IWMMXT_REGNUM
))
5509 /* Determine the amount of memory needed to store the possible return
5510 registers of an untyped call. */
5512 arm_apply_result_size (void)
5518 if (TARGET_HARD_FLOAT_ABI
)
5520 if (TARGET_IWMMXT_ABI
)
5527 /* Decide whether TYPE should be returned in memory (true)
5528 or in a register (false). FNTYPE is the type of the function making
5531 arm_return_in_memory (const_tree type
, const_tree fntype
)
5535 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5537 if (TARGET_AAPCS_BASED
)
5539 /* Simple, non-aggregate types (ie not including vectors and
5540 complex) are always returned in a register (or registers).
5541 We don't care about which register here, so we can short-cut
5542 some of the detail. */
5543 if (!AGGREGATE_TYPE_P (type
)
5544 && TREE_CODE (type
) != VECTOR_TYPE
5545 && TREE_CODE (type
) != COMPLEX_TYPE
)
5548 /* Any return value that is no larger than one word can be
5550 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5553 /* Check any available co-processors to see if they accept the
5554 type as a register candidate (VFP, for example, can return
5555 some aggregates in consecutive registers). These aren't
5556 available if the call is variadic. */
5557 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5560 /* Vector values should be returned using ARM registers, not
5561 memory (unless they're over 16 bytes, which will break since
5562 we only have four call-clobbered registers to play with). */
5563 if (TREE_CODE (type
) == VECTOR_TYPE
)
5564 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5566 /* The rest go in memory. */
5570 if (TREE_CODE (type
) == VECTOR_TYPE
)
5571 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5573 if (!AGGREGATE_TYPE_P (type
) &&
5574 (TREE_CODE (type
) != VECTOR_TYPE
))
5575 /* All simple types are returned in registers. */
5578 if (arm_abi
!= ARM_ABI_APCS
)
5580 /* ATPCS and later return aggregate types in memory only if they are
5581 larger than a word (or are variable size). */
5582 return (size
< 0 || size
> UNITS_PER_WORD
);
5585 /* For the arm-wince targets we choose to be compatible with Microsoft's
5586 ARM and Thumb compilers, which always return aggregates in memory. */
5588 /* All structures/unions bigger than one word are returned in memory.
5589 Also catch the case where int_size_in_bytes returns -1. In this case
5590 the aggregate is either huge or of variable size, and in either case
5591 we will want to return it via memory and not in a register. */
5592 if (size
< 0 || size
> UNITS_PER_WORD
)
5595 if (TREE_CODE (type
) == RECORD_TYPE
)
5599 /* For a struct the APCS says that we only return in a register
5600 if the type is 'integer like' and every addressable element
5601 has an offset of zero. For practical purposes this means
5602 that the structure can have at most one non bit-field element
5603 and that this element must be the first one in the structure. */
5605 /* Find the first field, ignoring non FIELD_DECL things which will
5606 have been created by C++. */
5607 for (field
= TYPE_FIELDS (type
);
5608 field
&& TREE_CODE (field
) != FIELD_DECL
;
5609 field
= DECL_CHAIN (field
))
5613 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5615 /* Check that the first field is valid for returning in a register. */
5617 /* ... Floats are not allowed */
5618 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5621 /* ... Aggregates that are not themselves valid for returning in
5622 a register are not allowed. */
5623 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5626 /* Now check the remaining fields, if any. Only bitfields are allowed,
5627 since they are not addressable. */
5628 for (field
= DECL_CHAIN (field
);
5630 field
= DECL_CHAIN (field
))
5632 if (TREE_CODE (field
) != FIELD_DECL
)
5635 if (!DECL_BIT_FIELD_TYPE (field
))
5642 if (TREE_CODE (type
) == UNION_TYPE
)
5646 /* Unions can be returned in registers if every element is
5647 integral, or can be returned in an integer register. */
5648 for (field
= TYPE_FIELDS (type
);
5650 field
= DECL_CHAIN (field
))
5652 if (TREE_CODE (field
) != FIELD_DECL
)
5655 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5658 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5664 #endif /* not ARM_WINCE */
5666 /* Return all other types in memory. */
5670 const struct pcs_attribute_arg
5674 } pcs_attribute_args
[] =
5676 {"aapcs", ARM_PCS_AAPCS
},
5677 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5679 /* We could recognize these, but changes would be needed elsewhere
5680 * to implement them. */
5681 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5682 {"atpcs", ARM_PCS_ATPCS
},
5683 {"apcs", ARM_PCS_APCS
},
5685 {NULL
, ARM_PCS_UNKNOWN
}
5689 arm_pcs_from_attribute (tree attr
)
5691 const struct pcs_attribute_arg
*ptr
;
5694 /* Get the value of the argument. */
5695 if (TREE_VALUE (attr
) == NULL_TREE
5696 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5697 return ARM_PCS_UNKNOWN
;
5699 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5701 /* Check it against the list of known arguments. */
5702 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5703 if (streq (arg
, ptr
->arg
))
5706 /* An unrecognized interrupt type. */
5707 return ARM_PCS_UNKNOWN
;
5710 /* Get the PCS variant to use for this call. TYPE is the function's type
5711 specification, DECL is the specific declartion. DECL may be null if
5712 the call could be indirect or if this is a library call. */
5714 arm_get_pcs_model (const_tree type
, const_tree decl
)
5716 bool user_convention
= false;
5717 enum arm_pcs user_pcs
= arm_pcs_default
;
5722 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5725 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5726 user_convention
= true;
5729 if (TARGET_AAPCS_BASED
)
5731 /* Detect varargs functions. These always use the base rules
5732 (no argument is ever a candidate for a co-processor
5734 bool base_rules
= stdarg_p (type
);
5736 if (user_convention
)
5738 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5739 sorry ("non-AAPCS derived PCS variant");
5740 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5741 error ("variadic functions must use the base AAPCS variant");
5745 return ARM_PCS_AAPCS
;
5746 else if (user_convention
)
5748 else if (decl
&& flag_unit_at_a_time
)
5750 /* Local functions never leak outside this compilation unit,
5751 so we are free to use whatever conventions are
5753 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5754 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5756 return ARM_PCS_AAPCS_LOCAL
;
5759 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5760 sorry ("PCS variant");
5762 /* For everything else we use the target's default. */
5763 return arm_pcs_default
;
5768 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5769 const_tree fntype ATTRIBUTE_UNUSED
,
5770 rtx libcall ATTRIBUTE_UNUSED
,
5771 const_tree fndecl ATTRIBUTE_UNUSED
)
5773 /* Record the unallocated VFP registers. */
5774 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5775 pcum
->aapcs_vfp_reg_alloc
= 0;
5778 /* Walk down the type tree of TYPE counting consecutive base elements.
5779 If *MODEP is VOIDmode, then set it to the first valid floating point
5780 type. If a non-floating point type is found, or if a floating point
5781 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5782 otherwise return the count in the sub-tree. */
5784 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5789 switch (TREE_CODE (type
))
5792 mode
= TYPE_MODE (type
);
5793 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
)
5796 if (*modep
== VOIDmode
)
5805 mode
= TYPE_MODE (TREE_TYPE (type
));
5806 if (mode
!= DFmode
&& mode
!= SFmode
)
5809 if (*modep
== VOIDmode
)
5818 /* Use V2SImode and V4SImode as representatives of all 64-bit
5819 and 128-bit vector types, whether or not those modes are
5820 supported with the present options. */
5821 size
= int_size_in_bytes (type
);
5834 if (*modep
== VOIDmode
)
5837 /* Vector modes are considered to be opaque: two vectors are
5838 equivalent for the purposes of being homogeneous aggregates
5839 if they are the same size. */
5848 tree index
= TYPE_DOMAIN (type
);
5850 /* Can't handle incomplete types nor sizes that are not
5852 if (!COMPLETE_TYPE_P (type
)
5853 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5856 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5859 || !TYPE_MAX_VALUE (index
)
5860 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5861 || !TYPE_MIN_VALUE (index
)
5862 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5866 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5867 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5869 /* There must be no padding. */
5870 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5882 /* Can't handle incomplete types nor sizes that are not
5884 if (!COMPLETE_TYPE_P (type
)
5885 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5888 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5890 if (TREE_CODE (field
) != FIELD_DECL
)
5893 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5899 /* There must be no padding. */
5900 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5907 case QUAL_UNION_TYPE
:
5909 /* These aren't very interesting except in a degenerate case. */
5914 /* Can't handle incomplete types nor sizes that are not
5916 if (!COMPLETE_TYPE_P (type
)
5917 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5920 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5922 if (TREE_CODE (field
) != FIELD_DECL
)
5925 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5928 count
= count
> sub_count
? count
: sub_count
;
5931 /* There must be no padding. */
5932 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5945 /* Return true if PCS_VARIANT should use VFP registers. */
5947 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5949 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5951 static bool seen_thumb1_vfp
= false;
5953 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5955 sorry ("Thumb-1 hard-float VFP ABI");
5956 /* sorry() is not immediately fatal, so only display this once. */
5957 seen_thumb1_vfp
= true;
5963 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5966 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
5967 (TARGET_VFP_DOUBLE
|| !is_double
));
5970 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5971 suitable for passing or returning in VFP registers for the PCS
5972 variant selected. If it is, then *BASE_MODE is updated to contain
5973 a machine mode describing each element of the argument's type and
5974 *COUNT to hold the number of such elements. */
5976 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5977 machine_mode mode
, const_tree type
,
5978 machine_mode
*base_mode
, int *count
)
5980 machine_mode new_mode
= VOIDmode
;
5982 /* If we have the type information, prefer that to working things
5983 out from the mode. */
5986 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5988 if (ag_count
> 0 && ag_count
<= 4)
5993 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5994 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5995 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6000 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6003 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
6009 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
6012 *base_mode
= new_mode
;
6017 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
6018 machine_mode mode
, const_tree type
)
6020 int count ATTRIBUTE_UNUSED
;
6021 machine_mode ag_mode ATTRIBUTE_UNUSED
;
6023 if (!use_vfp_abi (pcs_variant
, false))
6025 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6030 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6033 if (!use_vfp_abi (pcum
->pcs_variant
, false))
6036 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
6037 &pcum
->aapcs_vfp_rmode
,
6038 &pcum
->aapcs_vfp_rcount
);
6041 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6042 for the behaviour of this function. */
6045 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6046 const_tree type ATTRIBUTE_UNUSED
)
6049 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
6050 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
6051 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
6054 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
6055 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
6057 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
6059 || (mode
== TImode
&& ! TARGET_NEON
)
6060 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
6063 int rcount
= pcum
->aapcs_vfp_rcount
;
6065 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
6069 /* Avoid using unsupported vector modes. */
6070 if (rmode
== V2SImode
)
6072 else if (rmode
== V4SImode
)
6079 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
6080 for (i
= 0; i
< rcount
; i
++)
6082 rtx tmp
= gen_rtx_REG (rmode
,
6083 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
6084 tmp
= gen_rtx_EXPR_LIST
6086 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
6087 XVECEXP (par
, 0, i
) = tmp
;
6090 pcum
->aapcs_reg
= par
;
6093 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
6099 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6100 comment there for the behaviour of this function. */
6103 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
6105 const_tree type ATTRIBUTE_UNUSED
)
6107 if (!use_vfp_abi (pcs_variant
, false))
6111 || (GET_MODE_CLASS (mode
) == MODE_INT
6112 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
6116 machine_mode ag_mode
;
6121 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6126 if (ag_mode
== V2SImode
)
6128 else if (ag_mode
== V4SImode
)
6134 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
6135 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
6136 for (i
= 0; i
< count
; i
++)
6138 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
6139 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
6140 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
6141 XVECEXP (par
, 0, i
) = tmp
;
6147 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
6151 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6152 machine_mode mode ATTRIBUTE_UNUSED
,
6153 const_tree type ATTRIBUTE_UNUSED
)
6155 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6156 pcum
->aapcs_vfp_reg_alloc
= 0;
6160 #define AAPCS_CP(X) \
6162 aapcs_ ## X ## _cum_init, \
6163 aapcs_ ## X ## _is_call_candidate, \
6164 aapcs_ ## X ## _allocate, \
6165 aapcs_ ## X ## _is_return_candidate, \
6166 aapcs_ ## X ## _allocate_return_reg, \
6167 aapcs_ ## X ## _advance \
6170 /* Table of co-processors that can be used to pass arguments in
6171 registers. Idealy no arugment should be a candidate for more than
6172 one co-processor table entry, but the table is processed in order
6173 and stops after the first match. If that entry then fails to put
6174 the argument into a co-processor register, the argument will go on
6178 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6179 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6181 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6182 BLKmode) is a candidate for this co-processor's registers; this
6183 function should ignore any position-dependent state in
6184 CUMULATIVE_ARGS and only use call-type dependent information. */
6185 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6187 /* Return true if the argument does get a co-processor register; it
6188 should set aapcs_reg to an RTX of the register allocated as is
6189 required for a return from FUNCTION_ARG. */
6190 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6192 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6193 be returned in this co-processor's registers. */
6194 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6196 /* Allocate and return an RTX element to hold the return type of a call. This
6197 routine must not fail and will only be called if is_return_candidate
6198 returned true with the same parameters. */
6199 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6201 /* Finish processing this argument and prepare to start processing
6203 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6204 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6212 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6217 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6218 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6225 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6227 /* We aren't passed a decl, so we can't check that a call is local.
6228 However, it isn't clear that that would be a win anyway, since it
6229 might limit some tail-calling opportunities. */
6230 enum arm_pcs pcs_variant
;
6234 const_tree fndecl
= NULL_TREE
;
6236 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6239 fntype
= TREE_TYPE (fntype
);
6242 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6245 pcs_variant
= arm_pcs_default
;
6247 if (pcs_variant
!= ARM_PCS_AAPCS
)
6251 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6252 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6261 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6264 /* We aren't passed a decl, so we can't check that a call is local.
6265 However, it isn't clear that that would be a win anyway, since it
6266 might limit some tail-calling opportunities. */
6267 enum arm_pcs pcs_variant
;
6268 int unsignedp ATTRIBUTE_UNUSED
;
6272 const_tree fndecl
= NULL_TREE
;
6274 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6277 fntype
= TREE_TYPE (fntype
);
6280 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6283 pcs_variant
= arm_pcs_default
;
6285 /* Promote integer types. */
6286 if (type
&& INTEGRAL_TYPE_P (type
))
6287 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6289 if (pcs_variant
!= ARM_PCS_AAPCS
)
6293 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6294 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6296 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6300 /* Promotes small structs returned in a register to full-word size
6301 for big-endian AAPCS. */
6302 if (type
&& arm_return_in_msb (type
))
6304 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6305 if (size
% UNITS_PER_WORD
!= 0)
6307 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6308 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
6312 return gen_rtx_REG (mode
, R0_REGNUM
);
6316 aapcs_libcall_value (machine_mode mode
)
6318 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6319 && GET_MODE_SIZE (mode
) <= 4)
6322 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6325 /* Lay out a function argument using the AAPCS rules. The rule
6326 numbers referred to here are those in the AAPCS. */
6328 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6329 const_tree type
, bool named
)
6334 /* We only need to do this once per argument. */
6335 if (pcum
->aapcs_arg_processed
)
6338 pcum
->aapcs_arg_processed
= true;
6340 /* Special case: if named is false then we are handling an incoming
6341 anonymous argument which is on the stack. */
6345 /* Is this a potential co-processor register candidate? */
6346 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6348 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6349 pcum
->aapcs_cprc_slot
= slot
;
6351 /* We don't have to apply any of the rules from part B of the
6352 preparation phase, these are handled elsewhere in the
6357 /* A Co-processor register candidate goes either in its own
6358 class of registers or on the stack. */
6359 if (!pcum
->aapcs_cprc_failed
[slot
])
6361 /* C1.cp - Try to allocate the argument to co-processor
6363 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6366 /* C2.cp - Put the argument on the stack and note that we
6367 can't assign any more candidates in this slot. We also
6368 need to note that we have allocated stack space, so that
6369 we won't later try to split a non-cprc candidate between
6370 core registers and the stack. */
6371 pcum
->aapcs_cprc_failed
[slot
] = true;
6372 pcum
->can_split
= false;
6375 /* We didn't get a register, so this argument goes on the
6377 gcc_assert (pcum
->can_split
== false);
6382 /* C3 - For double-word aligned arguments, round the NCRN up to the
6383 next even number. */
6384 ncrn
= pcum
->aapcs_ncrn
;
6387 int res
= arm_needs_doubleword_align (mode
, type
);
6388 /* Only warn during RTL expansion of call stmts, otherwise we would
6389 warn e.g. during gimplification even on functions that will be
6390 always inlined, and we'd warn multiple times. Don't warn when
6391 called in expand_function_start either, as we warn instead in
6392 arm_function_arg_boundary in that case. */
6393 if (res
< 0 && warn_psabi
&& currently_expanding_gimple_stmt
)
6394 inform (input_location
, "parameter passing for argument of type "
6395 "%qT changed in GCC 7.1", type
);
6400 nregs
= ARM_NUM_REGS2(mode
, type
);
6402 /* Sigh, this test should really assert that nregs > 0, but a GCC
6403 extension allows empty structs and then gives them empty size; it
6404 then allows such a structure to be passed by value. For some of
6405 the code below we have to pretend that such an argument has
6406 non-zero size so that we 'locate' it correctly either in
6407 registers or on the stack. */
6408 gcc_assert (nregs
>= 0);
6410 nregs2
= nregs
? nregs
: 1;
6412 /* C4 - Argument fits entirely in core registers. */
6413 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6415 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6416 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6420 /* C5 - Some core registers left and there are no arguments already
6421 on the stack: split this argument between the remaining core
6422 registers and the stack. */
6423 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6425 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6426 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6427 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6431 /* C6 - NCRN is set to 4. */
6432 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6434 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6438 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6439 for a call to a function whose data type is FNTYPE.
6440 For a library call, FNTYPE is NULL. */
6442 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6444 tree fndecl ATTRIBUTE_UNUSED
)
6446 /* Long call handling. */
6448 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6450 pcum
->pcs_variant
= arm_pcs_default
;
6452 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6454 if (arm_libcall_uses_aapcs_base (libname
))
6455 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6457 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6458 pcum
->aapcs_reg
= NULL_RTX
;
6459 pcum
->aapcs_partial
= 0;
6460 pcum
->aapcs_arg_processed
= false;
6461 pcum
->aapcs_cprc_slot
= -1;
6462 pcum
->can_split
= true;
6464 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6468 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6470 pcum
->aapcs_cprc_failed
[i
] = false;
6471 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6479 /* On the ARM, the offset starts at 0. */
6481 pcum
->iwmmxt_nregs
= 0;
6482 pcum
->can_split
= true;
6484 /* Varargs vectors are treated the same as long long.
6485 named_count avoids having to change the way arm handles 'named' */
6486 pcum
->named_count
= 0;
6489 if (TARGET_REALLY_IWMMXT
&& fntype
)
6493 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6495 fn_arg
= TREE_CHAIN (fn_arg
))
6496 pcum
->named_count
+= 1;
6498 if (! pcum
->named_count
)
6499 pcum
->named_count
= INT_MAX
;
6503 /* Return 1 if double word alignment is required for argument passing.
6504 Return -1 if double word alignment used to be required for argument
6505 passing before PR77728 ABI fix, but is not required anymore.
6506 Return 0 if double word alignment is not required and wasn't requried
6509 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6512 return GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
;
6514 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6515 if (!AGGREGATE_TYPE_P (type
))
6516 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6518 /* Array types: Use member alignment of element type. */
6519 if (TREE_CODE (type
) == ARRAY_TYPE
)
6520 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
6523 /* Record/aggregate types: Use greatest member alignment of any member. */
6524 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6525 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
6527 if (TREE_CODE (field
) == FIELD_DECL
)
6530 /* Before PR77728 fix, we were incorrectly considering also
6531 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6532 Make sure we can warn about that with -Wpsabi. */
6540 /* Determine where to put an argument to a function.
6541 Value is zero to push the argument on the stack,
6542 or a hard register in which to store the argument.
6544 MODE is the argument's machine mode.
6545 TYPE is the data type of the argument (as a tree).
6546 This is null for libcalls where that information may
6548 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6549 the preceding args and about the function being called.
6550 NAMED is nonzero if this argument is a named parameter
6551 (otherwise it is an extra parameter matching an ellipsis).
6553 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6554 other arguments are passed on the stack. If (NAMED == 0) (which happens
6555 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6556 defined), say it is passed in the stack (function_prologue will
6557 indeed make it pass in the stack if necessary). */
6560 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6561 const_tree type
, bool named
)
6563 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6566 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6567 a call insn (op3 of a call_value insn). */
6568 if (mode
== VOIDmode
)
6571 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6573 aapcs_layout_arg (pcum
, mode
, type
, named
);
6574 return pcum
->aapcs_reg
;
6577 /* Varargs vectors are treated the same as long long.
6578 named_count avoids having to change the way arm handles 'named' */
6579 if (TARGET_IWMMXT_ABI
6580 && arm_vector_mode_supported_p (mode
)
6581 && pcum
->named_count
> pcum
->nargs
+ 1)
6583 if (pcum
->iwmmxt_nregs
<= 9)
6584 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6587 pcum
->can_split
= false;
6592 /* Put doubleword aligned quantities in even register pairs. */
6593 if ((pcum
->nregs
& 1) && ARM_DOUBLEWORD_ALIGN
)
6595 int res
= arm_needs_doubleword_align (mode
, type
);
6596 if (res
< 0 && warn_psabi
)
6597 inform (input_location
, "parameter passing for argument of type "
6598 "%qT changed in GCC 7.1", type
);
6603 /* Only allow splitting an arg between regs and memory if all preceding
6604 args were allocated to regs. For args passed by reference we only count
6605 the reference pointer. */
6606 if (pcum
->can_split
)
6609 nregs
= ARM_NUM_REGS2 (mode
, type
);
6611 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6614 return gen_rtx_REG (mode
, pcum
->nregs
);
6618 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6620 if (!ARM_DOUBLEWORD_ALIGN
)
6621 return PARM_BOUNDARY
;
6623 int res
= arm_needs_doubleword_align (mode
, type
);
6624 if (res
< 0 && warn_psabi
)
6625 inform (input_location
, "parameter passing for argument of type %qT "
6626 "changed in GCC 7.1", type
);
6628 return res
> 0 ? DOUBLEWORD_ALIGNMENT
: PARM_BOUNDARY
;
6632 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6633 tree type
, bool named
)
6635 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6636 int nregs
= pcum
->nregs
;
6638 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6640 aapcs_layout_arg (pcum
, mode
, type
, named
);
6641 return pcum
->aapcs_partial
;
6644 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6647 if (NUM_ARG_REGS
> nregs
6648 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6650 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6655 /* Update the data in PCUM to advance over an argument
6656 of mode MODE and data type TYPE.
6657 (TYPE is null for libcalls where that information may not be available.) */
6660 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6661 const_tree type
, bool named
)
6663 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6665 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6667 aapcs_layout_arg (pcum
, mode
, type
, named
);
6669 if (pcum
->aapcs_cprc_slot
>= 0)
6671 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6673 pcum
->aapcs_cprc_slot
= -1;
6676 /* Generic stuff. */
6677 pcum
->aapcs_arg_processed
= false;
6678 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6679 pcum
->aapcs_reg
= NULL_RTX
;
6680 pcum
->aapcs_partial
= 0;
6685 if (arm_vector_mode_supported_p (mode
)
6686 && pcum
->named_count
> pcum
->nargs
6687 && TARGET_IWMMXT_ABI
)
6688 pcum
->iwmmxt_nregs
+= 1;
6690 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6694 /* Variable sized types are passed by reference. This is a GCC
6695 extension to the ARM ABI. */
6698 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6699 machine_mode mode ATTRIBUTE_UNUSED
,
6700 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6702 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6705 /* Encode the current state of the #pragma [no_]long_calls. */
6708 OFF
, /* No #pragma [no_]long_calls is in effect. */
6709 LONG
, /* #pragma long_calls is in effect. */
6710 SHORT
/* #pragma no_long_calls is in effect. */
6713 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6716 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6718 arm_pragma_long_calls
= LONG
;
6722 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6724 arm_pragma_long_calls
= SHORT
;
6728 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6730 arm_pragma_long_calls
= OFF
;
6733 /* Handle an attribute requiring a FUNCTION_DECL;
6734 arguments as in struct attribute_spec.handler. */
6736 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6737 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6739 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6741 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6743 *no_add_attrs
= true;
6749 /* Handle an "interrupt" or "isr" attribute;
6750 arguments as in struct attribute_spec.handler. */
6752 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6757 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6759 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6761 *no_add_attrs
= true;
6763 /* FIXME: the argument if any is checked for type attributes;
6764 should it be checked for decl ones? */
6768 if (TREE_CODE (*node
) == FUNCTION_TYPE
6769 || TREE_CODE (*node
) == METHOD_TYPE
)
6771 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6773 warning (OPT_Wattributes
, "%qE attribute ignored",
6775 *no_add_attrs
= true;
6778 else if (TREE_CODE (*node
) == POINTER_TYPE
6779 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6780 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6781 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6783 *node
= build_variant_type_copy (*node
);
6784 TREE_TYPE (*node
) = build_type_attribute_variant
6786 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6787 *no_add_attrs
= true;
6791 /* Possibly pass this attribute on from the type to a decl. */
6792 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6793 | (int) ATTR_FLAG_FUNCTION_NEXT
6794 | (int) ATTR_FLAG_ARRAY_NEXT
))
6796 *no_add_attrs
= true;
6797 return tree_cons (name
, args
, NULL_TREE
);
6801 warning (OPT_Wattributes
, "%qE attribute ignored",
6810 /* Handle a "pcs" attribute; arguments as in struct
6811 attribute_spec.handler. */
6813 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6814 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6816 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6818 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6819 *no_add_attrs
= true;
6824 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6825 /* Handle the "notshared" attribute. This attribute is another way of
6826 requesting hidden visibility. ARM's compiler supports
6827 "__declspec(notshared)"; we support the same thing via an
6831 arm_handle_notshared_attribute (tree
*node
,
6832 tree name ATTRIBUTE_UNUSED
,
6833 tree args ATTRIBUTE_UNUSED
,
6834 int flags ATTRIBUTE_UNUSED
,
6837 tree decl
= TYPE_NAME (*node
);
6841 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6842 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6843 *no_add_attrs
= false;
6849 /* This function returns true if a function with declaration FNDECL and type
6850 FNTYPE uses the stack to pass arguments or return variables and false
6851 otherwise. This is used for functions with the attributes
6852 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6853 diagnostic messages if the stack is used. NAME is the name of the attribute
6857 cmse_func_args_or_return_in_stack (tree fndecl
, tree name
, tree fntype
)
6859 function_args_iterator args_iter
;
6860 CUMULATIVE_ARGS args_so_far_v
;
6861 cumulative_args_t args_so_far
;
6862 bool first_param
= true;
6863 tree arg_type
, prev_arg_type
= NULL_TREE
, ret_type
;
6865 /* Error out if any argument is passed on the stack. */
6866 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
6867 args_so_far
= pack_cumulative_args (&args_so_far_v
);
6868 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
6871 machine_mode arg_mode
= TYPE_MODE (arg_type
);
6873 prev_arg_type
= arg_type
;
6874 if (VOID_TYPE_P (arg_type
))
6878 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
, true);
6879 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
, true);
6881 || arm_arg_partial_bytes (args_so_far
, arg_mode
, arg_type
, true))
6883 error ("%qE attribute not available to functions with arguments "
6884 "passed on the stack", name
);
6887 first_param
= false;
6890 /* Error out for variadic functions since we cannot control how many
6891 arguments will be passed and thus stack could be used. stdarg_p () is not
6892 used for the checking to avoid browsing arguments twice. */
6893 if (prev_arg_type
!= NULL_TREE
&& !VOID_TYPE_P (prev_arg_type
))
6895 error ("%qE attribute not available to functions with variable number "
6896 "of arguments", name
);
6900 /* Error out if return value is passed on the stack. */
6901 ret_type
= TREE_TYPE (fntype
);
6902 if (arm_return_in_memory (ret_type
, fntype
))
6904 error ("%qE attribute not available to functions that return value on "
6911 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6912 function will check whether the attribute is allowed here and will add the
6913 attribute to the function declaration tree or otherwise issue a warning. */
6916 arm_handle_cmse_nonsecure_entry (tree
*node
, tree name
,
6925 *no_add_attrs
= true;
6926 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
6931 /* Ignore attribute for function types. */
6932 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6934 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6936 *no_add_attrs
= true;
6942 /* Warn for static linkage functions. */
6943 if (!TREE_PUBLIC (fndecl
))
6945 warning (OPT_Wattributes
, "%qE attribute has no effect on functions "
6946 "with static linkage", name
);
6947 *no_add_attrs
= true;
6951 *no_add_attrs
|= cmse_func_args_or_return_in_stack (fndecl
, name
,
6952 TREE_TYPE (fndecl
));
6957 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6958 function will check whether the attribute is allowed here and will add the
6959 attribute to the function type tree or otherwise issue a diagnostic. The
6960 reason we check this at declaration time is to only allow the use of the
6961 attribute with declarations of function pointers and not function
6962 declarations. This function checks NODE is of the expected type and issues
6963 diagnostics otherwise using NAME. If it is not of the expected type
6964 *NO_ADD_ATTRS will be set to true. */
6967 arm_handle_cmse_nonsecure_call (tree
*node
, tree name
,
6972 tree decl
= NULL_TREE
, fntype
= NULL_TREE
;
6977 *no_add_attrs
= true;
6978 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
6983 if (TREE_CODE (*node
) == VAR_DECL
|| TREE_CODE (*node
) == TYPE_DECL
)
6986 fntype
= TREE_TYPE (decl
);
6989 while (fntype
!= NULL_TREE
&& TREE_CODE (fntype
) == POINTER_TYPE
)
6990 fntype
= TREE_TYPE (fntype
);
6992 if (!decl
|| TREE_CODE (fntype
) != FUNCTION_TYPE
)
6994 warning (OPT_Wattributes
, "%qE attribute only applies to base type of a "
6995 "function pointer", name
);
6996 *no_add_attrs
= true;
7000 *no_add_attrs
|= cmse_func_args_or_return_in_stack (NULL
, name
, fntype
);
7005 /* Prevent trees being shared among function types with and without
7006 cmse_nonsecure_call attribute. */
7007 type
= TREE_TYPE (decl
);
7009 type
= build_distinct_type_copy (type
);
7010 TREE_TYPE (decl
) = type
;
7013 while (TREE_CODE (fntype
) != FUNCTION_TYPE
)
7016 fntype
= TREE_TYPE (fntype
);
7017 fntype
= build_distinct_type_copy (fntype
);
7018 TREE_TYPE (type
) = fntype
;
7021 /* Construct a type attribute and add it to the function type. */
7022 tree attrs
= tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE
,
7023 TYPE_ATTRIBUTES (fntype
));
7024 TYPE_ATTRIBUTES (fntype
) = attrs
;
7028 /* Return 0 if the attributes for two types are incompatible, 1 if they
7029 are compatible, and 2 if they are nearly compatible (which causes a
7030 warning to be generated). */
7032 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
7036 /* Check for mismatch of non-default calling convention. */
7037 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
7040 /* Check for mismatched call attributes. */
7041 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7042 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7043 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7044 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7046 /* Only bother to check if an attribute is defined. */
7047 if (l1
| l2
| s1
| s2
)
7049 /* If one type has an attribute, the other must have the same attribute. */
7050 if ((l1
!= l2
) || (s1
!= s2
))
7053 /* Disallow mixed attributes. */
7054 if ((l1
& s2
) || (l2
& s1
))
7058 /* Check for mismatched ISR attribute. */
7059 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
7061 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
7062 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
7064 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
7068 l1
= lookup_attribute ("cmse_nonsecure_call",
7069 TYPE_ATTRIBUTES (type1
)) != NULL
;
7070 l2
= lookup_attribute ("cmse_nonsecure_call",
7071 TYPE_ATTRIBUTES (type2
)) != NULL
;
7079 /* Assigns default attributes to newly defined type. This is used to
7080 set short_call/long_call attributes for function types of
7081 functions defined inside corresponding #pragma scopes. */
7083 arm_set_default_type_attributes (tree type
)
7085 /* Add __attribute__ ((long_call)) to all functions, when
7086 inside #pragma long_calls or __attribute__ ((short_call)),
7087 when inside #pragma no_long_calls. */
7088 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
7090 tree type_attr_list
, attr_name
;
7091 type_attr_list
= TYPE_ATTRIBUTES (type
);
7093 if (arm_pragma_long_calls
== LONG
)
7094 attr_name
= get_identifier ("long_call");
7095 else if (arm_pragma_long_calls
== SHORT
)
7096 attr_name
= get_identifier ("short_call");
7100 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
7101 TYPE_ATTRIBUTES (type
) = type_attr_list
;
7105 /* Return true if DECL is known to be linked into section SECTION. */
7108 arm_function_in_section_p (tree decl
, section
*section
)
7110 /* We can only be certain about the prevailing symbol definition. */
7111 if (!decl_binds_to_current_def_p (decl
))
7114 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7115 if (!DECL_SECTION_NAME (decl
))
7117 /* Make sure that we will not create a unique section for DECL. */
7118 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
7122 return function_section (decl
) == section
;
7125 /* Return nonzero if a 32-bit "long_call" should be generated for
7126 a call from the current function to DECL. We generate a long_call
7129 a. has an __attribute__((long call))
7130 or b. is within the scope of a #pragma long_calls
7131 or c. the -mlong-calls command line switch has been specified
7133 However we do not generate a long call if the function:
7135 d. has an __attribute__ ((short_call))
7136 or e. is inside the scope of a #pragma no_long_calls
7137 or f. is defined in the same section as the current function. */
7140 arm_is_long_call_p (tree decl
)
7145 return TARGET_LONG_CALLS
;
7147 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
7148 if (lookup_attribute ("short_call", attrs
))
7151 /* For "f", be conservative, and only cater for cases in which the
7152 whole of the current function is placed in the same section. */
7153 if (!flag_reorder_blocks_and_partition
7154 && TREE_CODE (decl
) == FUNCTION_DECL
7155 && arm_function_in_section_p (decl
, current_function_section ()))
7158 if (lookup_attribute ("long_call", attrs
))
7161 return TARGET_LONG_CALLS
;
7164 /* Return nonzero if it is ok to make a tail-call to DECL. */
7166 arm_function_ok_for_sibcall (tree decl
, tree exp
)
7168 unsigned long func_type
;
7170 if (cfun
->machine
->sibcall_blocked
)
7173 /* Never tailcall something if we are generating code for Thumb-1. */
7177 /* The PIC register is live on entry to VxWorks PLT entries, so we
7178 must make the call before restoring the PIC register. */
7179 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
7182 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7183 may be used both as target of the call and base register for restoring
7184 the VFP registers */
7185 if (TARGET_APCS_FRAME
&& TARGET_ARM
7186 && TARGET_HARD_FLOAT
7187 && decl
&& arm_is_long_call_p (decl
))
7190 /* If we are interworking and the function is not declared static
7191 then we can't tail-call it unless we know that it exists in this
7192 compilation unit (since it might be a Thumb routine). */
7193 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
7194 && !TREE_ASM_WRITTEN (decl
))
7197 func_type
= arm_current_func_type ();
7198 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7199 if (IS_INTERRUPT (func_type
))
7202 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7203 generated for entry functions themselves. */
7204 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7207 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7208 this would complicate matters for later code generation. */
7209 if (TREE_CODE (exp
) == CALL_EXPR
)
7211 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7212 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype
)))
7216 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
7218 /* Check that the return value locations are the same. For
7219 example that we aren't returning a value from the sibling in
7220 a VFP register but then need to transfer it to a core
7223 tree decl_or_type
= decl
;
7225 /* If it is an indirect function pointer, get the function type. */
7227 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7229 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
7230 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
7232 if (!rtx_equal_p (a
, b
))
7236 /* Never tailcall if function may be called with a misaligned SP. */
7237 if (IS_STACKALIGN (func_type
))
7240 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7241 references should become a NOP. Don't convert such calls into
7243 if (TARGET_AAPCS_BASED
7244 && arm_abi
== ARM_ABI_AAPCS
7246 && DECL_WEAK (decl
))
7249 /* We cannot do a tailcall for an indirect call by descriptor if all the
7250 argument registers are used because the only register left to load the
7251 address is IP and it will already contain the static chain. */
7252 if (!decl
&& CALL_EXPR_BY_DESCRIPTOR (exp
) && !flag_trampolines
)
7254 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7255 CUMULATIVE_ARGS cum
;
7256 cumulative_args_t cum_v
;
7258 arm_init_cumulative_args (&cum
, fntype
, NULL_RTX
, NULL_TREE
);
7259 cum_v
= pack_cumulative_args (&cum
);
7261 for (tree t
= TYPE_ARG_TYPES (fntype
); t
; t
= TREE_CHAIN (t
))
7263 tree type
= TREE_VALUE (t
);
7264 if (!VOID_TYPE_P (type
))
7265 arm_function_arg_advance (cum_v
, TYPE_MODE (type
), type
, true);
7268 if (!arm_function_arg (cum_v
, SImode
, integer_type_node
, true))
7272 /* Everything else is ok. */
7277 /* Addressing mode support functions. */
7279 /* Return nonzero if X is a legitimate immediate operand when compiling
7280 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7282 legitimate_pic_operand_p (rtx x
)
7284 if (GET_CODE (x
) == SYMBOL_REF
7285 || (GET_CODE (x
) == CONST
7286 && GET_CODE (XEXP (x
, 0)) == PLUS
7287 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7293 /* Record that the current function needs a PIC register. Initialize
7294 cfun->machine->pic_reg if we have not already done so. */
7297 require_pic_register (void)
7299 /* A lot of the logic here is made obscure by the fact that this
7300 routine gets called as part of the rtx cost estimation process.
7301 We don't want those calls to affect any assumptions about the real
7302 function; and further, we can't call entry_of_function() until we
7303 start the real expansion process. */
7304 if (!crtl
->uses_pic_offset_table
)
7306 gcc_assert (can_create_pseudo_p ());
7307 if (arm_pic_register
!= INVALID_REGNUM
7308 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
7310 if (!cfun
->machine
->pic_reg
)
7311 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
7313 /* Play games to avoid marking the function as needing pic
7314 if we are being called as part of the cost-estimation
7316 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7317 crtl
->uses_pic_offset_table
= 1;
7321 rtx_insn
*seq
, *insn
;
7323 if (!cfun
->machine
->pic_reg
)
7324 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
7326 /* Play games to avoid marking the function as needing pic
7327 if we are being called as part of the cost-estimation
7329 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7331 crtl
->uses_pic_offset_table
= 1;
7334 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
7335 && arm_pic_register
> LAST_LO_REGNUM
)
7336 emit_move_insn (cfun
->machine
->pic_reg
,
7337 gen_rtx_REG (Pmode
, arm_pic_register
));
7339 arm_load_pic_register (0UL);
7344 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
7346 INSN_LOCATION (insn
) = prologue_location
;
7348 /* We can be called during expansion of PHI nodes, where
7349 we can't yet emit instructions directly in the final
7350 insn stream. Queue the insns on the entry edge, they will
7351 be committed after everything else is expanded. */
7352 insert_insn_on_edge (seq
,
7353 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
7360 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
7362 if (GET_CODE (orig
) == SYMBOL_REF
7363 || GET_CODE (orig
) == LABEL_REF
)
7367 gcc_assert (can_create_pseudo_p ());
7368 reg
= gen_reg_rtx (Pmode
);
7371 /* VxWorks does not impose a fixed gap between segments; the run-time
7372 gap can be different from the object-file gap. We therefore can't
7373 use GOTOFF unless we are absolutely sure that the symbol is in the
7374 same segment as the GOT. Unfortunately, the flexibility of linker
7375 scripts means that we can't be sure of that in general, so assume
7376 that GOTOFF is never valid on VxWorks. */
7377 /* References to weak symbols cannot be resolved locally: they
7378 may be overridden by a non-weak definition at link time. */
7380 if ((GET_CODE (orig
) == LABEL_REF
7381 || (GET_CODE (orig
) == SYMBOL_REF
7382 && SYMBOL_REF_LOCAL_P (orig
)
7383 && (SYMBOL_REF_DECL (orig
)
7384 ? !DECL_WEAK (SYMBOL_REF_DECL (orig
)) : 1)))
7386 && arm_pic_data_is_text_relative
)
7387 insn
= arm_pic_static_addr (orig
, reg
);
7393 /* If this function doesn't have a pic register, create one now. */
7394 require_pic_register ();
7396 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
7398 /* Make the MEM as close to a constant as possible. */
7399 mem
= SET_SRC (pat
);
7400 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
7401 MEM_READONLY_P (mem
) = 1;
7402 MEM_NOTRAP_P (mem
) = 1;
7404 insn
= emit_insn (pat
);
7407 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7409 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
7413 else if (GET_CODE (orig
) == CONST
)
7417 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7418 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
7421 /* Handle the case where we have: const (UNSPEC_TLS). */
7422 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
7423 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
7426 /* Handle the case where we have:
7427 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7429 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7430 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
7431 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
7433 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
7439 gcc_assert (can_create_pseudo_p ());
7440 reg
= gen_reg_rtx (Pmode
);
7443 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
7445 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
7446 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
7447 base
== reg
? 0 : reg
);
7449 if (CONST_INT_P (offset
))
7451 /* The base register doesn't really matter, we only want to
7452 test the index for the appropriate mode. */
7453 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
7455 gcc_assert (can_create_pseudo_p ());
7456 offset
= force_reg (Pmode
, offset
);
7459 if (CONST_INT_P (offset
))
7460 return plus_constant (Pmode
, base
, INTVAL (offset
));
7463 if (GET_MODE_SIZE (mode
) > 4
7464 && (GET_MODE_CLASS (mode
) == MODE_INT
7465 || TARGET_SOFT_FLOAT
))
7467 emit_insn (gen_addsi3 (reg
, base
, offset
));
7471 return gen_rtx_PLUS (Pmode
, base
, offset
);
7478 /* Find a spare register to use during the prolog of a function. */
7481 thumb_find_work_register (unsigned long pushed_regs_mask
)
7485 /* Check the argument registers first as these are call-used. The
7486 register allocation order means that sometimes r3 might be used
7487 but earlier argument registers might not, so check them all. */
7488 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
7489 if (!df_regs_ever_live_p (reg
))
7492 /* Before going on to check the call-saved registers we can try a couple
7493 more ways of deducing that r3 is available. The first is when we are
7494 pushing anonymous arguments onto the stack and we have less than 4
7495 registers worth of fixed arguments(*). In this case r3 will be part of
7496 the variable argument list and so we can be sure that it will be
7497 pushed right at the start of the function. Hence it will be available
7498 for the rest of the prologue.
7499 (*): ie crtl->args.pretend_args_size is greater than 0. */
7500 if (cfun
->machine
->uses_anonymous_args
7501 && crtl
->args
.pretend_args_size
> 0)
7502 return LAST_ARG_REGNUM
;
7504 /* The other case is when we have fixed arguments but less than 4 registers
7505 worth. In this case r3 might be used in the body of the function, but
7506 it is not being used to convey an argument into the function. In theory
7507 we could just check crtl->args.size to see how many bytes are
7508 being passed in argument registers, but it seems that it is unreliable.
7509 Sometimes it will have the value 0 when in fact arguments are being
7510 passed. (See testcase execute/20021111-1.c for an example). So we also
7511 check the args_info.nregs field as well. The problem with this field is
7512 that it makes no allowances for arguments that are passed to the
7513 function but which are not used. Hence we could miss an opportunity
7514 when a function has an unused argument in r3. But it is better to be
7515 safe than to be sorry. */
7516 if (! cfun
->machine
->uses_anonymous_args
7517 && crtl
->args
.size
>= 0
7518 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
7519 && (TARGET_AAPCS_BASED
7520 ? crtl
->args
.info
.aapcs_ncrn
< 4
7521 : crtl
->args
.info
.nregs
< 4))
7522 return LAST_ARG_REGNUM
;
7524 /* Otherwise look for a call-saved register that is going to be pushed. */
7525 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
7526 if (pushed_regs_mask
& (1 << reg
))
7531 /* Thumb-2 can use high regs. */
7532 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
7533 if (pushed_regs_mask
& (1 << reg
))
7536 /* Something went wrong - thumb_compute_save_reg_mask()
7537 should have arranged for a suitable register to be pushed. */
7541 static GTY(()) int pic_labelno
;
7543 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7547 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
7549 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
7551 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
7554 gcc_assert (flag_pic
);
7556 pic_reg
= cfun
->machine
->pic_reg
;
7557 if (TARGET_VXWORKS_RTP
)
7559 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
7560 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7561 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
7563 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
7565 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
7566 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
7570 /* We use an UNSPEC rather than a LABEL_REF because this label
7571 never appears in the code stream. */
7573 labelno
= GEN_INT (pic_labelno
++);
7574 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7575 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7577 /* On the ARM the PC register contains 'dot + 8' at the time of the
7578 addition, on the Thumb it is 'dot + 4'. */
7579 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7580 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
7582 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7586 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7588 else /* TARGET_THUMB1 */
7590 if (arm_pic_register
!= INVALID_REGNUM
7591 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
7593 /* We will have pushed the pic register, so we should always be
7594 able to find a work register. */
7595 pic_tmp
= gen_rtx_REG (SImode
,
7596 thumb_find_work_register (saved_regs
));
7597 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
7598 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
7599 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
7601 else if (arm_pic_register
!= INVALID_REGNUM
7602 && arm_pic_register
> LAST_LO_REGNUM
7603 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
7605 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7606 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
7607 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
7610 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7614 /* Need to emit this whether or not we obey regdecls,
7615 since setjmp/longjmp can cause life info to screw up. */
7619 /* Generate code to load the address of a static var when flag_pic is set. */
7621 arm_pic_static_addr (rtx orig
, rtx reg
)
7623 rtx l1
, labelno
, offset_rtx
;
7625 gcc_assert (flag_pic
);
7627 /* We use an UNSPEC rather than a LABEL_REF because this label
7628 never appears in the code stream. */
7629 labelno
= GEN_INT (pic_labelno
++);
7630 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7631 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7633 /* On the ARM the PC register contains 'dot + 8' at the time of the
7634 addition, on the Thumb it is 'dot + 4'. */
7635 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7636 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
7637 UNSPEC_SYMBOL_OFFSET
);
7638 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
7640 return emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
7643 /* Return nonzero if X is valid as an ARM state addressing register. */
7645 arm_address_register_rtx_p (rtx x
, int strict_p
)
7655 return ARM_REGNO_OK_FOR_BASE_P (regno
);
7657 return (regno
<= LAST_ARM_REGNUM
7658 || regno
>= FIRST_PSEUDO_REGISTER
7659 || regno
== FRAME_POINTER_REGNUM
7660 || regno
== ARG_POINTER_REGNUM
);
7663 /* Return TRUE if this rtx is the difference of a symbol and a label,
7664 and will reduce to a PC-relative relocation in the object file.
7665 Expressions like this can be left alone when generating PIC, rather
7666 than forced through the GOT. */
7668 pcrel_constant_p (rtx x
)
7670 if (GET_CODE (x
) == MINUS
)
7671 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
7676 /* Return true if X will surely end up in an index register after next
7679 will_be_in_index_register (const_rtx x
)
7681 /* arm.md: calculate_pic_address will split this into a register. */
7682 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
7685 /* Return nonzero if X is a valid ARM state address operand. */
7687 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
7691 enum rtx_code code
= GET_CODE (x
);
7693 if (arm_address_register_rtx_p (x
, strict_p
))
7696 use_ldrd
= (TARGET_LDRD
7697 && (mode
== DImode
|| mode
== DFmode
));
7699 if (code
== POST_INC
|| code
== PRE_DEC
7700 || ((code
== PRE_INC
|| code
== POST_DEC
)
7701 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7702 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7704 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7705 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7706 && GET_CODE (XEXP (x
, 1)) == PLUS
7707 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7709 rtx addend
= XEXP (XEXP (x
, 1), 1);
7711 /* Don't allow ldrd post increment by register because it's hard
7712 to fixup invalid register choices. */
7714 && GET_CODE (x
) == POST_MODIFY
7718 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
7719 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
7722 /* After reload constants split into minipools will have addresses
7723 from a LABEL_REF. */
7724 else if (reload_completed
7725 && (code
== LABEL_REF
7727 && GET_CODE (XEXP (x
, 0)) == PLUS
7728 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7729 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7732 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7735 else if (code
== PLUS
)
7737 rtx xop0
= XEXP (x
, 0);
7738 rtx xop1
= XEXP (x
, 1);
7740 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7741 && ((CONST_INT_P (xop1
)
7742 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7743 || (!strict_p
&& will_be_in_index_register (xop1
))))
7744 || (arm_address_register_rtx_p (xop1
, strict_p
)
7745 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7749 /* Reload currently can't handle MINUS, so disable this for now */
7750 else if (GET_CODE (x
) == MINUS
)
7752 rtx xop0
= XEXP (x
, 0);
7753 rtx xop1
= XEXP (x
, 1);
7755 return (arm_address_register_rtx_p (xop0
, strict_p
)
7756 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7760 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7761 && code
== SYMBOL_REF
7762 && CONSTANT_POOL_ADDRESS_P (x
)
7764 && symbol_mentioned_p (get_pool_constant (x
))
7765 && ! pcrel_constant_p (get_pool_constant (x
))))
7771 /* Return true if we can avoid creating a constant pool entry for x. */
7773 can_avoid_literal_pool_for_label_p (rtx x
)
7775 /* Normally we can assign constant values to target registers without
7776 the help of constant pool. But there are cases we have to use constant
7778 1) assign a label to register.
7779 2) sign-extend a 8bit value to 32bit and then assign to register.
7781 Constant pool access in format:
7782 (set (reg r0) (mem (symbol_ref (".LC0"))))
7783 will cause the use of literal pool (later in function arm_reorg).
7784 So here we mark such format as an invalid format, then the compiler
7785 will adjust it into:
7786 (set (reg r0) (symbol_ref (".LC0")))
7787 (set (reg r0) (mem (reg r0))).
7788 No extra register is required, and (mem (reg r0)) won't cause the use
7789 of literal pools. */
7790 if (arm_disable_literal_pool
&& GET_CODE (x
) == SYMBOL_REF
7791 && CONSTANT_POOL_ADDRESS_P (x
))
7797 /* Return nonzero if X is a valid Thumb-2 address operand. */
7799 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7802 enum rtx_code code
= GET_CODE (x
);
7804 if (arm_address_register_rtx_p (x
, strict_p
))
7807 use_ldrd
= (TARGET_LDRD
7808 && (mode
== DImode
|| mode
== DFmode
));
7810 if (code
== POST_INC
|| code
== PRE_DEC
7811 || ((code
== PRE_INC
|| code
== POST_DEC
)
7812 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7813 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7815 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7816 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7817 && GET_CODE (XEXP (x
, 1)) == PLUS
7818 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7820 /* Thumb-2 only has autoincrement by constant. */
7821 rtx addend
= XEXP (XEXP (x
, 1), 1);
7822 HOST_WIDE_INT offset
;
7824 if (!CONST_INT_P (addend
))
7827 offset
= INTVAL(addend
);
7828 if (GET_MODE_SIZE (mode
) <= 4)
7829 return (offset
> -256 && offset
< 256);
7831 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7832 && (offset
& 3) == 0);
7835 /* After reload constants split into minipools will have addresses
7836 from a LABEL_REF. */
7837 else if (reload_completed
7838 && (code
== LABEL_REF
7840 && GET_CODE (XEXP (x
, 0)) == PLUS
7841 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7842 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7845 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7848 else if (code
== PLUS
)
7850 rtx xop0
= XEXP (x
, 0);
7851 rtx xop1
= XEXP (x
, 1);
7853 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7854 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7855 || (!strict_p
&& will_be_in_index_register (xop1
))))
7856 || (arm_address_register_rtx_p (xop1
, strict_p
)
7857 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7860 else if (can_avoid_literal_pool_for_label_p (x
))
7863 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7864 && code
== SYMBOL_REF
7865 && CONSTANT_POOL_ADDRESS_P (x
)
7867 && symbol_mentioned_p (get_pool_constant (x
))
7868 && ! pcrel_constant_p (get_pool_constant (x
))))
7874 /* Return nonzero if INDEX is valid for an address index operand in
7877 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7880 HOST_WIDE_INT range
;
7881 enum rtx_code code
= GET_CODE (index
);
7883 /* Standard coprocessor addressing modes. */
7884 if (TARGET_HARD_FLOAT
7885 && (mode
== SFmode
|| mode
== DFmode
))
7886 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7887 && INTVAL (index
) > -1024
7888 && (INTVAL (index
) & 3) == 0);
7890 /* For quad modes, we restrict the constant offset to be slightly less
7891 than what the instruction format permits. We do this because for
7892 quad mode moves, we will actually decompose them into two separate
7893 double-mode reads or writes. INDEX must therefore be a valid
7894 (double-mode) offset and so should INDEX+8. */
7895 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7896 return (code
== CONST_INT
7897 && INTVAL (index
) < 1016
7898 && INTVAL (index
) > -1024
7899 && (INTVAL (index
) & 3) == 0);
7901 /* We have no such constraint on double mode offsets, so we permit the
7902 full range of the instruction format. */
7903 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7904 return (code
== CONST_INT
7905 && INTVAL (index
) < 1024
7906 && INTVAL (index
) > -1024
7907 && (INTVAL (index
) & 3) == 0);
7909 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7910 return (code
== CONST_INT
7911 && INTVAL (index
) < 1024
7912 && INTVAL (index
) > -1024
7913 && (INTVAL (index
) & 3) == 0);
7915 if (arm_address_register_rtx_p (index
, strict_p
)
7916 && (GET_MODE_SIZE (mode
) <= 4))
7919 if (mode
== DImode
|| mode
== DFmode
)
7921 if (code
== CONST_INT
)
7923 HOST_WIDE_INT val
= INTVAL (index
);
7926 return val
> -256 && val
< 256;
7928 return val
> -4096 && val
< 4092;
7931 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7934 if (GET_MODE_SIZE (mode
) <= 4
7938 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7942 rtx xiop0
= XEXP (index
, 0);
7943 rtx xiop1
= XEXP (index
, 1);
7945 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7946 && power_of_two_operand (xiop1
, SImode
))
7947 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7948 && power_of_two_operand (xiop0
, SImode
)));
7950 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7951 || code
== ASHIFT
|| code
== ROTATERT
)
7953 rtx op
= XEXP (index
, 1);
7955 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7958 && INTVAL (op
) <= 31);
7962 /* For ARM v4 we may be doing a sign-extend operation during the
7968 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7974 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7976 return (code
== CONST_INT
7977 && INTVAL (index
) < range
7978 && INTVAL (index
) > -range
);
7981 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7982 index operand. i.e. 1, 2, 4 or 8. */
7984 thumb2_index_mul_operand (rtx op
)
7988 if (!CONST_INT_P (op
))
7992 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7995 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7997 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
7999 enum rtx_code code
= GET_CODE (index
);
8001 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8002 /* Standard coprocessor addressing modes. */
8003 if (TARGET_HARD_FLOAT
8004 && (mode
== SFmode
|| mode
== DFmode
))
8005 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8006 /* Thumb-2 allows only > -256 index range for it's core register
8007 load/stores. Since we allow SF/DF in core registers, we have
8008 to use the intersection between -256~4096 (core) and -1024~1024
8010 && INTVAL (index
) > -256
8011 && (INTVAL (index
) & 3) == 0);
8013 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8015 /* For DImode assume values will usually live in core regs
8016 and only allow LDRD addressing modes. */
8017 if (!TARGET_LDRD
|| mode
!= DImode
)
8018 return (code
== CONST_INT
8019 && INTVAL (index
) < 1024
8020 && INTVAL (index
) > -1024
8021 && (INTVAL (index
) & 3) == 0);
8024 /* For quad modes, we restrict the constant offset to be slightly less
8025 than what the instruction format permits. We do this because for
8026 quad mode moves, we will actually decompose them into two separate
8027 double-mode reads or writes. INDEX must therefore be a valid
8028 (double-mode) offset and so should INDEX+8. */
8029 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8030 return (code
== CONST_INT
8031 && INTVAL (index
) < 1016
8032 && INTVAL (index
) > -1024
8033 && (INTVAL (index
) & 3) == 0);
8035 /* We have no such constraint on double mode offsets, so we permit the
8036 full range of the instruction format. */
8037 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8038 return (code
== CONST_INT
8039 && INTVAL (index
) < 1024
8040 && INTVAL (index
) > -1024
8041 && (INTVAL (index
) & 3) == 0);
8043 if (arm_address_register_rtx_p (index
, strict_p
)
8044 && (GET_MODE_SIZE (mode
) <= 4))
8047 if (mode
== DImode
|| mode
== DFmode
)
8049 if (code
== CONST_INT
)
8051 HOST_WIDE_INT val
= INTVAL (index
);
8052 /* ??? Can we assume ldrd for thumb2? */
8053 /* Thumb-2 ldrd only has reg+const addressing modes. */
8054 /* ldrd supports offsets of +-1020.
8055 However the ldr fallback does not. */
8056 return val
> -256 && val
< 256 && (val
& 3) == 0;
8064 rtx xiop0
= XEXP (index
, 0);
8065 rtx xiop1
= XEXP (index
, 1);
8067 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8068 && thumb2_index_mul_operand (xiop1
))
8069 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8070 && thumb2_index_mul_operand (xiop0
)));
8072 else if (code
== ASHIFT
)
8074 rtx op
= XEXP (index
, 1);
8076 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8079 && INTVAL (op
) <= 3);
8082 return (code
== CONST_INT
8083 && INTVAL (index
) < 4096
8084 && INTVAL (index
) > -256);
8087 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8089 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
8099 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
8101 return (regno
<= LAST_LO_REGNUM
8102 || regno
> LAST_VIRTUAL_REGISTER
8103 || regno
== FRAME_POINTER_REGNUM
8104 || (GET_MODE_SIZE (mode
) >= 4
8105 && (regno
== STACK_POINTER_REGNUM
8106 || regno
>= FIRST_PSEUDO_REGISTER
8107 || x
== hard_frame_pointer_rtx
8108 || x
== arg_pointer_rtx
)));
8111 /* Return nonzero if x is a legitimate index register. This is the case
8112 for any base register that can access a QImode object. */
8114 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
8116 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
8119 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8121 The AP may be eliminated to either the SP or the FP, so we use the
8122 least common denominator, e.g. SImode, and offsets from 0 to 64.
8124 ??? Verify whether the above is the right approach.
8126 ??? Also, the FP may be eliminated to the SP, so perhaps that
8127 needs special handling also.
8129 ??? Look at how the mips16 port solves this problem. It probably uses
8130 better ways to solve some of these problems.
8132 Although it is not incorrect, we don't accept QImode and HImode
8133 addresses based on the frame pointer or arg pointer until the
8134 reload pass starts. This is so that eliminating such addresses
8135 into stack based ones won't produce impossible code. */
8137 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8139 if (TARGET_HAVE_MOVT
&& can_avoid_literal_pool_for_label_p (x
))
8142 /* ??? Not clear if this is right. Experiment. */
8143 if (GET_MODE_SIZE (mode
) < 4
8144 && !(reload_in_progress
|| reload_completed
)
8145 && (reg_mentioned_p (frame_pointer_rtx
, x
)
8146 || reg_mentioned_p (arg_pointer_rtx
, x
)
8147 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
8148 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
8149 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
8150 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
8153 /* Accept any base register. SP only in SImode or larger. */
8154 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
8157 /* This is PC relative data before arm_reorg runs. */
8158 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
8159 && GET_CODE (x
) == SYMBOL_REF
8160 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
8163 /* This is PC relative data after arm_reorg runs. */
8164 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
8166 && (GET_CODE (x
) == LABEL_REF
8167 || (GET_CODE (x
) == CONST
8168 && GET_CODE (XEXP (x
, 0)) == PLUS
8169 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8170 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8173 /* Post-inc indexing only supported for SImode and larger. */
8174 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
8175 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
8178 else if (GET_CODE (x
) == PLUS
)
8180 /* REG+REG address can be any two index registers. */
8181 /* We disallow FRAME+REG addressing since we know that FRAME
8182 will be replaced with STACK, and SP relative addressing only
8183 permits SP+OFFSET. */
8184 if (GET_MODE_SIZE (mode
) <= 4
8185 && XEXP (x
, 0) != frame_pointer_rtx
8186 && XEXP (x
, 1) != frame_pointer_rtx
8187 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8188 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
8189 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
8192 /* REG+const has 5-7 bit offset for non-SP registers. */
8193 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8194 || XEXP (x
, 0) == arg_pointer_rtx
)
8195 && CONST_INT_P (XEXP (x
, 1))
8196 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
8199 /* REG+const has 10-bit offset for SP, but only SImode and
8200 larger is supported. */
8201 /* ??? Should probably check for DI/DFmode overflow here
8202 just like GO_IF_LEGITIMATE_OFFSET does. */
8203 else if (REG_P (XEXP (x
, 0))
8204 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
8205 && GET_MODE_SIZE (mode
) >= 4
8206 && CONST_INT_P (XEXP (x
, 1))
8207 && INTVAL (XEXP (x
, 1)) >= 0
8208 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
8209 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8212 else if (REG_P (XEXP (x
, 0))
8213 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
8214 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
8215 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
8216 && REGNO (XEXP (x
, 0))
8217 <= LAST_VIRTUAL_POINTER_REGISTER
))
8218 && GET_MODE_SIZE (mode
) >= 4
8219 && CONST_INT_P (XEXP (x
, 1))
8220 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8224 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8225 && GET_MODE_SIZE (mode
) == 4
8226 && GET_CODE (x
) == SYMBOL_REF
8227 && CONSTANT_POOL_ADDRESS_P (x
)
8229 && symbol_mentioned_p (get_pool_constant (x
))
8230 && ! pcrel_constant_p (get_pool_constant (x
))))
8236 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8237 instruction of mode MODE. */
8239 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
8241 switch (GET_MODE_SIZE (mode
))
8244 return val
>= 0 && val
< 32;
8247 return val
>= 0 && val
< 64 && (val
& 1) == 0;
8251 && (val
+ GET_MODE_SIZE (mode
)) <= 128
8257 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
8260 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
8261 else if (TARGET_THUMB2
)
8262 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
8263 else /* if (TARGET_THUMB1) */
8264 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
8267 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8269 Given an rtx X being reloaded into a reg required to be
8270 in class CLASS, return the class of reg to actually use.
8271 In general this is just CLASS, but for the Thumb core registers and
8272 immediate constants we prefer a LO_REGS class or a subset. */
8275 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
8281 if (rclass
== GENERAL_REGS
)
8288 /* Build the SYMBOL_REF for __tls_get_addr. */
8290 static GTY(()) rtx tls_get_addr_libfunc
;
8293 get_tls_get_addr (void)
8295 if (!tls_get_addr_libfunc
)
8296 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
8297 return tls_get_addr_libfunc
;
8301 arm_load_tp (rtx target
)
8304 target
= gen_reg_rtx (SImode
);
8308 /* Can return in any reg. */
8309 emit_insn (gen_load_tp_hard (target
));
8313 /* Always returned in r0. Immediately copy the result into a pseudo,
8314 otherwise other uses of r0 (e.g. setting up function arguments) may
8315 clobber the value. */
8319 emit_insn (gen_load_tp_soft ());
8321 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
8322 emit_move_insn (target
, tmp
);
8328 load_tls_operand (rtx x
, rtx reg
)
8332 if (reg
== NULL_RTX
)
8333 reg
= gen_reg_rtx (SImode
);
8335 tmp
= gen_rtx_CONST (SImode
, x
);
8337 emit_move_insn (reg
, tmp
);
8343 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
8345 rtx label
, labelno
, sum
;
8347 gcc_assert (reloc
!= TLS_DESCSEQ
);
8350 labelno
= GEN_INT (pic_labelno
++);
8351 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8352 label
= gen_rtx_CONST (VOIDmode
, label
);
8354 sum
= gen_rtx_UNSPEC (Pmode
,
8355 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
8356 GEN_INT (TARGET_ARM
? 8 : 4)),
8358 reg
= load_tls_operand (sum
, reg
);
8361 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
8363 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8365 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
8366 LCT_PURE
, /* LCT_CONST? */
8367 Pmode
, 1, reg
, Pmode
);
8369 rtx_insn
*insns
= get_insns ();
8376 arm_tls_descseq_addr (rtx x
, rtx reg
)
8378 rtx labelno
= GEN_INT (pic_labelno
++);
8379 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8380 rtx sum
= gen_rtx_UNSPEC (Pmode
,
8381 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
8382 gen_rtx_CONST (VOIDmode
, label
),
8383 GEN_INT (!TARGET_ARM
)),
8385 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
8387 emit_insn (gen_tlscall (x
, labelno
));
8389 reg
= gen_reg_rtx (SImode
);
8391 gcc_assert (REGNO (reg
) != R0_REGNUM
);
8393 emit_move_insn (reg
, reg0
);
8399 legitimize_tls_address (rtx x
, rtx reg
)
8401 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
8403 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
8407 case TLS_MODEL_GLOBAL_DYNAMIC
:
8408 if (TARGET_GNU2_TLS
)
8410 reg
= arm_tls_descseq_addr (x
, reg
);
8412 tp
= arm_load_tp (NULL_RTX
);
8414 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8418 /* Original scheme */
8419 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
8420 dest
= gen_reg_rtx (Pmode
);
8421 emit_libcall_block (insns
, dest
, ret
, x
);
8425 case TLS_MODEL_LOCAL_DYNAMIC
:
8426 if (TARGET_GNU2_TLS
)
8428 reg
= arm_tls_descseq_addr (x
, reg
);
8430 tp
= arm_load_tp (NULL_RTX
);
8432 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8436 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
8438 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8439 share the LDM result with other LD model accesses. */
8440 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
8442 dest
= gen_reg_rtx (Pmode
);
8443 emit_libcall_block (insns
, dest
, ret
, eqv
);
8445 /* Load the addend. */
8446 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
8447 GEN_INT (TLS_LDO32
)),
8449 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
8450 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
8454 case TLS_MODEL_INITIAL_EXEC
:
8455 labelno
= GEN_INT (pic_labelno
++);
8456 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8457 label
= gen_rtx_CONST (VOIDmode
, label
);
8458 sum
= gen_rtx_UNSPEC (Pmode
,
8459 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
8460 GEN_INT (TARGET_ARM
? 8 : 4)),
8462 reg
= load_tls_operand (sum
, reg
);
8465 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
8466 else if (TARGET_THUMB2
)
8467 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
8470 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8471 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
8474 tp
= arm_load_tp (NULL_RTX
);
8476 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8478 case TLS_MODEL_LOCAL_EXEC
:
8479 tp
= arm_load_tp (NULL_RTX
);
8481 reg
= gen_rtx_UNSPEC (Pmode
,
8482 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
8484 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
8486 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8493 /* Try machine-dependent ways of modifying an illegitimate address
8494 to be legitimate. If we find one, return the new, valid address. */
8496 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8498 if (arm_tls_referenced_p (x
))
8502 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
8504 addend
= XEXP (XEXP (x
, 0), 1);
8505 x
= XEXP (XEXP (x
, 0), 0);
8508 if (GET_CODE (x
) != SYMBOL_REF
)
8511 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
8513 x
= legitimize_tls_address (x
, NULL_RTX
);
8517 x
= gen_rtx_PLUS (SImode
, x
, addend
);
8526 /* TODO: legitimize_address for Thumb2. */
8529 return thumb_legitimize_address (x
, orig_x
, mode
);
8532 if (GET_CODE (x
) == PLUS
)
8534 rtx xop0
= XEXP (x
, 0);
8535 rtx xop1
= XEXP (x
, 1);
8537 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
8538 xop0
= force_reg (SImode
, xop0
);
8540 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
8541 && !symbol_mentioned_p (xop1
))
8542 xop1
= force_reg (SImode
, xop1
);
8544 if (ARM_BASE_REGISTER_RTX_P (xop0
)
8545 && CONST_INT_P (xop1
))
8547 HOST_WIDE_INT n
, low_n
;
8551 /* VFP addressing modes actually allow greater offsets, but for
8552 now we just stick with the lowest common denominator. */
8553 if (mode
== DImode
|| mode
== DFmode
)
8565 low_n
= ((mode
) == TImode
? 0
8566 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
8570 base_reg
= gen_reg_rtx (SImode
);
8571 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
8572 emit_move_insn (base_reg
, val
);
8573 x
= plus_constant (Pmode
, base_reg
, low_n
);
8575 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8576 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8579 /* XXX We don't allow MINUS any more -- see comment in
8580 arm_legitimate_address_outer_p (). */
8581 else if (GET_CODE (x
) == MINUS
)
8583 rtx xop0
= XEXP (x
, 0);
8584 rtx xop1
= XEXP (x
, 1);
8586 if (CONSTANT_P (xop0
))
8587 xop0
= force_reg (SImode
, xop0
);
8589 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
8590 xop1
= force_reg (SImode
, xop1
);
8592 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8593 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
8596 /* Make sure to take full advantage of the pre-indexed addressing mode
8597 with absolute addresses which often allows for the base register to
8598 be factorized for multiple adjacent memory references, and it might
8599 even allows for the mini pool to be avoided entirely. */
8600 else if (CONST_INT_P (x
) && optimize
> 0)
8603 HOST_WIDE_INT mask
, base
, index
;
8606 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8607 use a 8-bit index. So let's use a 12-bit index for SImode only and
8608 hope that arm_gen_constant will enable ldrb to use more bits. */
8609 bits
= (mode
== SImode
) ? 12 : 8;
8610 mask
= (1 << bits
) - 1;
8611 base
= INTVAL (x
) & ~mask
;
8612 index
= INTVAL (x
) & mask
;
8613 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
8615 /* It'll most probably be more efficient to generate the base
8616 with more bits set and use a negative index instead. */
8620 base_reg
= force_reg (SImode
, GEN_INT (base
));
8621 x
= plus_constant (Pmode
, base_reg
, index
);
8626 /* We need to find and carefully transform any SYMBOL and LABEL
8627 references; so go back to the original address expression. */
8628 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8630 if (new_x
!= orig_x
)
8638 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8639 to be legitimate. If we find one, return the new, valid address. */
8641 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8643 if (GET_CODE (x
) == PLUS
8644 && CONST_INT_P (XEXP (x
, 1))
8645 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
8646 || INTVAL (XEXP (x
, 1)) < 0))
8648 rtx xop0
= XEXP (x
, 0);
8649 rtx xop1
= XEXP (x
, 1);
8650 HOST_WIDE_INT offset
= INTVAL (xop1
);
8652 /* Try and fold the offset into a biasing of the base register and
8653 then offsetting that. Don't do this when optimizing for space
8654 since it can cause too many CSEs. */
8655 if (optimize_size
&& offset
>= 0
8656 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
8658 HOST_WIDE_INT delta
;
8661 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
8662 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
8663 delta
= 31 * GET_MODE_SIZE (mode
);
8665 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
8667 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
8669 x
= plus_constant (Pmode
, xop0
, delta
);
8671 else if (offset
< 0 && offset
> -256)
8672 /* Small negative offsets are best done with a subtract before the
8673 dereference, forcing these into a register normally takes two
8675 x
= force_operand (x
, NULL_RTX
);
8678 /* For the remaining cases, force the constant into a register. */
8679 xop1
= force_reg (SImode
, xop1
);
8680 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8683 else if (GET_CODE (x
) == PLUS
8684 && s_register_operand (XEXP (x
, 1), SImode
)
8685 && !s_register_operand (XEXP (x
, 0), SImode
))
8687 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
8689 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
8694 /* We need to find and carefully transform any SYMBOL and LABEL
8695 references; so go back to the original address expression. */
8696 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8698 if (new_x
!= orig_x
)
8705 /* Return TRUE if X contains any TLS symbol references. */
8708 arm_tls_referenced_p (rtx x
)
8710 if (! TARGET_HAVE_TLS
)
8713 subrtx_iterator::array_type array
;
8714 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8716 const_rtx x
= *iter
;
8717 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8719 /* ARM currently does not provide relocations to encode TLS variables
8720 into AArch32 instructions, only data, so there is no way to
8721 currently implement these if a literal pool is disabled. */
8722 if (arm_disable_literal_pool
)
8723 sorry ("accessing thread-local storage is not currently supported "
8724 "with -mpure-code or -mslow-flash-data");
8729 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8730 TLS offsets, not real symbol references. */
8731 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8732 iter
.skip_subrtxes ();
8737 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8739 On the ARM, allow any integer (invalid ones are removed later by insn
8740 patterns), nice doubles and symbol_refs which refer to the function's
8743 When generating pic allow anything. */
8746 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8748 return flag_pic
|| !label_mentioned_p (x
);
8752 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8754 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8755 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8756 for ARMv8-M Baseline or later the result is valid. */
8757 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
8760 return (CONST_INT_P (x
)
8761 || CONST_DOUBLE_P (x
)
8762 || CONSTANT_ADDRESS_P (x
)
8763 || (TARGET_HAVE_MOVT
&& GET_CODE (x
) == SYMBOL_REF
)
8768 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8770 return (!arm_cannot_force_const_mem (mode
, x
)
8772 ? arm_legitimate_constant_p_1 (mode
, x
)
8773 : thumb_legitimate_constant_p (mode
, x
)));
8776 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8779 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8783 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8785 split_const (x
, &base
, &offset
);
8786 if (GET_CODE (base
) == SYMBOL_REF
8787 && !offset_within_block_p (base
, INTVAL (offset
)))
8790 return arm_tls_referenced_p (x
);
8793 #define REG_OR_SUBREG_REG(X) \
8795 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8797 #define REG_OR_SUBREG_RTX(X) \
8798 (REG_P (X) ? (X) : SUBREG_REG (X))
8801 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8803 machine_mode mode
= GET_MODE (x
);
8812 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8819 return COSTS_N_INSNS (1);
8822 if (arm_arch6m
&& arm_m_profile_small_mul
)
8823 return COSTS_N_INSNS (32);
8825 if (CONST_INT_P (XEXP (x
, 1)))
8828 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8835 return COSTS_N_INSNS (2) + cycles
;
8837 return COSTS_N_INSNS (1) + 16;
8840 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8842 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8843 return (COSTS_N_INSNS (words
)
8844 + 4 * ((MEM_P (SET_SRC (x
)))
8845 + MEM_P (SET_DEST (x
))));
8850 if (UINTVAL (x
) < 256
8851 /* 16-bit constant. */
8852 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
8854 if (thumb_shiftable_const (INTVAL (x
)))
8855 return COSTS_N_INSNS (2);
8856 return COSTS_N_INSNS (3);
8858 else if ((outer
== PLUS
|| outer
== COMPARE
)
8859 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8861 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8862 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8863 return COSTS_N_INSNS (1);
8864 else if (outer
== AND
)
8867 /* This duplicates the tests in the andsi3 expander. */
8868 for (i
= 9; i
<= 31; i
++)
8869 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8870 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8871 return COSTS_N_INSNS (2);
8873 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8874 || outer
== LSHIFTRT
)
8876 return COSTS_N_INSNS (2);
8882 return COSTS_N_INSNS (3);
8900 /* XXX another guess. */
8901 /* Memory costs quite a lot for the first word, but subsequent words
8902 load at the equivalent of a single insn each. */
8903 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8904 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8909 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8915 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8916 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8922 return total
+ COSTS_N_INSNS (1);
8924 /* Assume a two-shift sequence. Increase the cost slightly so
8925 we prefer actual shifts over an extend operation. */
8926 return total
+ 1 + COSTS_N_INSNS (2);
8933 /* Estimates the size cost of thumb1 instructions.
8934 For now most of the code is copied from thumb1_rtx_costs. We need more
8935 fine grain tuning when we have more related test cases. */
8937 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8939 machine_mode mode
= GET_MODE (x
);
8948 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8952 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8953 defined by RTL expansion, especially for the expansion of
8955 if ((GET_CODE (XEXP (x
, 0)) == MULT
8956 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8957 || (GET_CODE (XEXP (x
, 1)) == MULT
8958 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8959 return COSTS_N_INSNS (2);
8964 return COSTS_N_INSNS (1);
8967 if (CONST_INT_P (XEXP (x
, 1)))
8969 /* Thumb1 mul instruction can't operate on const. We must Load it
8970 into a register first. */
8971 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8972 /* For the targets which have a very small and high-latency multiply
8973 unit, we prefer to synthesize the mult with up to 5 instructions,
8974 giving a good balance between size and performance. */
8975 if (arm_arch6m
&& arm_m_profile_small_mul
)
8976 return COSTS_N_INSNS (5);
8978 return COSTS_N_INSNS (1) + const_size
;
8980 return COSTS_N_INSNS (1);
8983 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8985 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8986 cost
= COSTS_N_INSNS (words
);
8987 if (satisfies_constraint_J (SET_SRC (x
))
8988 || satisfies_constraint_K (SET_SRC (x
))
8989 /* Too big an immediate for a 2-byte mov, using MOVT. */
8990 || (CONST_INT_P (SET_SRC (x
))
8991 && UINTVAL (SET_SRC (x
)) >= 256
8993 && satisfies_constraint_j (SET_SRC (x
)))
8994 /* thumb1_movdi_insn. */
8995 || ((words
> 1) && MEM_P (SET_SRC (x
))))
8996 cost
+= COSTS_N_INSNS (1);
9002 if (UINTVAL (x
) < 256)
9003 return COSTS_N_INSNS (1);
9004 /* movw is 4byte long. */
9005 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
9006 return COSTS_N_INSNS (2);
9007 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9008 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
9009 return COSTS_N_INSNS (2);
9010 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9011 if (thumb_shiftable_const (INTVAL (x
)))
9012 return COSTS_N_INSNS (2);
9013 return COSTS_N_INSNS (3);
9015 else if ((outer
== PLUS
|| outer
== COMPARE
)
9016 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9018 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9019 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9020 return COSTS_N_INSNS (1);
9021 else if (outer
== AND
)
9024 /* This duplicates the tests in the andsi3 expander. */
9025 for (i
= 9; i
<= 31; i
++)
9026 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9027 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9028 return COSTS_N_INSNS (2);
9030 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9031 || outer
== LSHIFTRT
)
9033 return COSTS_N_INSNS (2);
9039 return COSTS_N_INSNS (3);
9053 return COSTS_N_INSNS (1);
9056 return (COSTS_N_INSNS (1)
9058 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9059 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9060 ? COSTS_N_INSNS (1) : 0));
9064 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9069 /* XXX still guessing. */
9070 switch (GET_MODE (XEXP (x
, 0)))
9073 return (1 + (mode
== DImode
? 4 : 0)
9074 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9077 return (4 + (mode
== DImode
? 4 : 0)
9078 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9081 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9092 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9093 operand, then return the operand that is being shifted. If the shift
9094 is not by a constant, then set SHIFT_REG to point to the operand.
9095 Return NULL if OP is not a shifter operand. */
9097 shifter_op_p (rtx op
, rtx
*shift_reg
)
9099 enum rtx_code code
= GET_CODE (op
);
9101 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9102 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9103 return XEXP (op
, 0);
9104 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9105 return XEXP (op
, 0);
9106 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9107 || code
== ASHIFTRT
)
9109 if (!CONST_INT_P (XEXP (op
, 1)))
9110 *shift_reg
= XEXP (op
, 1);
9111 return XEXP (op
, 0);
9118 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9120 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9121 rtx_code code
= GET_CODE (x
);
9122 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9124 switch (XINT (x
, 1))
9126 case UNSPEC_UNALIGNED_LOAD
:
9127 /* We can only do unaligned loads into the integer unit, and we can't
9129 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9131 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9132 + extra_cost
->ldst
.load_unaligned
);
9135 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9136 ADDR_SPACE_GENERIC
, speed_p
);
9140 case UNSPEC_UNALIGNED_STORE
:
9141 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9143 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9144 + extra_cost
->ldst
.store_unaligned
);
9146 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
9148 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9149 ADDR_SPACE_GENERIC
, speed_p
);
9160 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9164 *cost
= COSTS_N_INSNS (2);
9170 /* Cost of a libcall. We assume one insn per argument, an amount for the
9171 call (one insn for -Os) and then one for processing the result. */
9172 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9174 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9177 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9178 if (shift_op != NULL \
9179 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9184 *cost += extra_cost->alu.arith_shift_reg; \
9185 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9186 ASHIFT, 1, speed_p); \
9189 *cost += extra_cost->alu.arith_shift; \
9191 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9192 ASHIFT, 0, speed_p) \
9193 + rtx_cost (XEXP (x, 1 - IDX), \
9194 GET_MODE (shift_op), \
9201 /* RTX costs. Make an estimate of the cost of executing the operation
9202 X, which is contained with an operation with code OUTER_CODE.
9203 SPEED_P indicates whether the cost desired is the performance cost,
9204 or the size cost. The estimate is stored in COST and the return
9205 value is TRUE if the cost calculation is final, or FALSE if the
9206 caller should recurse through the operands of X to add additional
9209 We currently make no attempt to model the size savings of Thumb-2
9210 16-bit instructions. At the normal points in compilation where
9211 this code is called we have no measure of whether the condition
9212 flags are live or not, and thus no realistic way to determine what
9213 the size will eventually be. */
9215 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9216 const struct cpu_cost_table
*extra_cost
,
9217 int *cost
, bool speed_p
)
9219 machine_mode mode
= GET_MODE (x
);
9221 *cost
= COSTS_N_INSNS (1);
9226 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9228 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9236 /* SET RTXs don't have a mode so we get it from the destination. */
9237 mode
= GET_MODE (SET_DEST (x
));
9239 if (REG_P (SET_SRC (x
))
9240 && REG_P (SET_DEST (x
)))
9242 /* Assume that most copies can be done with a single insn,
9243 unless we don't have HW FP, in which case everything
9244 larger than word mode will require two insns. */
9245 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9246 && GET_MODE_SIZE (mode
) > 4)
9249 /* Conditional register moves can be encoded
9250 in 16 bits in Thumb mode. */
9251 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9257 if (CONST_INT_P (SET_SRC (x
)))
9259 /* Handle CONST_INT here, since the value doesn't have a mode
9260 and we would otherwise be unable to work out the true cost. */
9261 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
9264 /* Slightly lower the cost of setting a core reg to a constant.
9265 This helps break up chains and allows for better scheduling. */
9266 if (REG_P (SET_DEST (x
))
9267 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9270 /* Immediate moves with an immediate in the range [0, 255] can be
9271 encoded in 16 bits in Thumb mode. */
9272 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9273 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9275 goto const_int_cost
;
9281 /* A memory access costs 1 insn if the mode is small, or the address is
9282 a single register, otherwise it costs one insn per word. */
9283 if (REG_P (XEXP (x
, 0)))
9284 *cost
= COSTS_N_INSNS (1);
9286 && GET_CODE (XEXP (x
, 0)) == PLUS
9287 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9288 /* This will be split into two instructions.
9289 See arm.md:calculate_pic_address. */
9290 *cost
= COSTS_N_INSNS (2);
9292 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9294 /* For speed optimizations, add the costs of the address and
9295 accessing memory. */
9298 *cost
+= (extra_cost
->ldst
.load
9299 + arm_address_cost (XEXP (x
, 0), mode
,
9300 ADDR_SPACE_GENERIC
, speed_p
));
9302 *cost
+= extra_cost
->ldst
.load
;
9308 /* Calculations of LDM costs are complex. We assume an initial cost
9309 (ldm_1st) which will load the number of registers mentioned in
9310 ldm_regs_per_insn_1st registers; then each additional
9311 ldm_regs_per_insn_subsequent registers cost one more insn. The
9312 formula for N regs is thus:
9314 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9315 + ldm_regs_per_insn_subsequent - 1)
9316 / ldm_regs_per_insn_subsequent).
9318 Additional costs may also be added for addressing. A similar
9319 formula is used for STM. */
9321 bool is_ldm
= load_multiple_operation (x
, SImode
);
9322 bool is_stm
= store_multiple_operation (x
, SImode
);
9324 if (is_ldm
|| is_stm
)
9328 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9329 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9330 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9331 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9332 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9333 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9334 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9336 *cost
+= regs_per_insn_1st
9337 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9338 + regs_per_insn_sub
- 1)
9339 / regs_per_insn_sub
);
9348 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9349 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9350 *cost
+= COSTS_N_INSNS (speed_p
9351 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
9352 else if (mode
== SImode
&& TARGET_IDIV
)
9353 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
9355 *cost
= LIBCALL_COST (2);
9357 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9358 possible udiv is prefered. */
9359 *cost
+= (code
== DIV
? COSTS_N_INSNS (1) : 0);
9360 return false; /* All arguments must be in registers. */
9363 /* MOD by a power of 2 can be expanded as:
9365 and r0, r0, #(n - 1)
9366 and r1, r1, #(n - 1)
9367 rsbpl r0, r1, #0. */
9368 if (CONST_INT_P (XEXP (x
, 1))
9369 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
9372 *cost
+= COSTS_N_INSNS (3);
9375 *cost
+= 2 * extra_cost
->alu
.logical
9376 + extra_cost
->alu
.arith
;
9382 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9383 possible udiv is prefered. */
9384 *cost
= LIBCALL_COST (2) + (code
== MOD
? COSTS_N_INSNS (1) : 0);
9385 return false; /* All arguments must be in registers. */
9388 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9390 *cost
+= (COSTS_N_INSNS (1)
9391 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9393 *cost
+= extra_cost
->alu
.shift_reg
;
9401 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9403 *cost
+= (COSTS_N_INSNS (2)
9404 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9406 *cost
+= 2 * extra_cost
->alu
.shift
;
9409 else if (mode
== SImode
)
9411 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9412 /* Slightly disparage register shifts at -Os, but not by much. */
9413 if (!CONST_INT_P (XEXP (x
, 1)))
9414 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9415 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9418 else if (GET_MODE_CLASS (mode
) == MODE_INT
9419 && GET_MODE_SIZE (mode
) < 4)
9423 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9424 /* Slightly disparage register shifts at -Os, but not by
9426 if (!CONST_INT_P (XEXP (x
, 1)))
9427 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9428 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9430 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9432 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9434 /* Can use SBFX/UBFX. */
9436 *cost
+= extra_cost
->alu
.bfx
;
9437 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9441 *cost
+= COSTS_N_INSNS (1);
9442 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9445 if (CONST_INT_P (XEXP (x
, 1)))
9446 *cost
+= 2 * extra_cost
->alu
.shift
;
9448 *cost
+= (extra_cost
->alu
.shift
9449 + extra_cost
->alu
.shift_reg
);
9452 /* Slightly disparage register shifts. */
9453 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9458 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
9459 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9462 if (CONST_INT_P (XEXP (x
, 1)))
9463 *cost
+= (2 * extra_cost
->alu
.shift
9464 + extra_cost
->alu
.log_shift
);
9466 *cost
+= (extra_cost
->alu
.shift
9467 + extra_cost
->alu
.shift_reg
9468 + extra_cost
->alu
.log_shift_reg
);
9474 *cost
= LIBCALL_COST (2);
9483 *cost
+= extra_cost
->alu
.rev
;
9490 /* No rev instruction available. Look at arm_legacy_rev
9491 and thumb_legacy_rev for the form of RTL used then. */
9494 *cost
+= COSTS_N_INSNS (9);
9498 *cost
+= 6 * extra_cost
->alu
.shift
;
9499 *cost
+= 3 * extra_cost
->alu
.logical
;
9504 *cost
+= COSTS_N_INSNS (4);
9508 *cost
+= 2 * extra_cost
->alu
.shift
;
9509 *cost
+= extra_cost
->alu
.arith_shift
;
9510 *cost
+= 2 * extra_cost
->alu
.logical
;
9518 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9519 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9521 if (GET_CODE (XEXP (x
, 0)) == MULT
9522 || GET_CODE (XEXP (x
, 1)) == MULT
)
9524 rtx mul_op0
, mul_op1
, sub_op
;
9527 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9529 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9531 mul_op0
= XEXP (XEXP (x
, 0), 0);
9532 mul_op1
= XEXP (XEXP (x
, 0), 1);
9533 sub_op
= XEXP (x
, 1);
9537 mul_op0
= XEXP (XEXP (x
, 1), 0);
9538 mul_op1
= XEXP (XEXP (x
, 1), 1);
9539 sub_op
= XEXP (x
, 0);
9542 /* The first operand of the multiply may be optionally
9544 if (GET_CODE (mul_op0
) == NEG
)
9545 mul_op0
= XEXP (mul_op0
, 0);
9547 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9548 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9549 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
9555 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9561 rtx shift_by_reg
= NULL
;
9565 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9566 if (shift_op
== NULL
)
9568 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9569 non_shift_op
= XEXP (x
, 0);
9572 non_shift_op
= XEXP (x
, 1);
9574 if (shift_op
!= NULL
)
9576 if (shift_by_reg
!= NULL
)
9579 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9580 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
9583 *cost
+= extra_cost
->alu
.arith_shift
;
9585 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
9586 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
9591 && GET_CODE (XEXP (x
, 1)) == MULT
)
9595 *cost
+= extra_cost
->mult
[0].add
;
9596 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
9597 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
9598 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
9602 if (CONST_INT_P (XEXP (x
, 0)))
9604 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9605 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9607 *cost
= COSTS_N_INSNS (insns
);
9609 *cost
+= insns
* extra_cost
->alu
.arith
;
9610 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9614 *cost
+= extra_cost
->alu
.arith
;
9619 if (GET_MODE_CLASS (mode
) == MODE_INT
9620 && GET_MODE_SIZE (mode
) < 4)
9622 rtx shift_op
, shift_reg
;
9625 /* We check both sides of the MINUS for shifter operands since,
9626 unlike PLUS, it's not commutative. */
9628 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9629 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9631 /* Slightly disparage, as we might need to widen the result. */
9634 *cost
+= extra_cost
->alu
.arith
;
9636 if (CONST_INT_P (XEXP (x
, 0)))
9638 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9647 *cost
+= COSTS_N_INSNS (1);
9649 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9651 rtx op1
= XEXP (x
, 1);
9654 *cost
+= 2 * extra_cost
->alu
.arith
;
9656 if (GET_CODE (op1
) == ZERO_EXTEND
)
9657 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
9660 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
9661 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9665 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9668 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9669 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
9671 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
9674 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9675 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9678 *cost
+= (extra_cost
->alu
.arith
9679 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9680 ? extra_cost
->alu
.arith
9681 : extra_cost
->alu
.arith_shift
));
9682 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
9683 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
9684 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9689 *cost
+= 2 * extra_cost
->alu
.arith
;
9695 *cost
= LIBCALL_COST (2);
9699 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9700 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9702 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9704 rtx mul_op0
, mul_op1
, add_op
;
9707 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9709 mul_op0
= XEXP (XEXP (x
, 0), 0);
9710 mul_op1
= XEXP (XEXP (x
, 0), 1);
9711 add_op
= XEXP (x
, 1);
9713 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9714 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9715 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
9721 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9724 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9726 *cost
= LIBCALL_COST (2);
9730 /* Narrow modes can be synthesized in SImode, but the range
9731 of useful sub-operations is limited. Check for shift operations
9732 on one of the operands. Only left shifts can be used in the
9734 if (GET_MODE_CLASS (mode
) == MODE_INT
9735 && GET_MODE_SIZE (mode
) < 4)
9737 rtx shift_op
, shift_reg
;
9740 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9742 if (CONST_INT_P (XEXP (x
, 1)))
9744 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9745 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9747 *cost
= COSTS_N_INSNS (insns
);
9749 *cost
+= insns
* extra_cost
->alu
.arith
;
9750 /* Slightly penalize a narrow operation as the result may
9752 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9756 /* Slightly penalize a narrow operation as the result may
9760 *cost
+= extra_cost
->alu
.arith
;
9767 rtx shift_op
, shift_reg
;
9770 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9771 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9773 /* UXTA[BH] or SXTA[BH]. */
9775 *cost
+= extra_cost
->alu
.extend_arith
;
9776 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9778 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
9783 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9784 if (shift_op
!= NULL
)
9789 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9790 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9793 *cost
+= extra_cost
->alu
.arith_shift
;
9795 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9796 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9799 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9801 rtx mul_op
= XEXP (x
, 0);
9803 if (TARGET_DSP_MULTIPLY
9804 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9805 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9806 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9807 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9808 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9809 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9810 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9811 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9812 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9813 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9814 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9815 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9820 *cost
+= extra_cost
->mult
[0].extend_add
;
9821 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
9822 SIGN_EXTEND
, 0, speed_p
)
9823 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
9824 SIGN_EXTEND
, 0, speed_p
)
9825 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9830 *cost
+= extra_cost
->mult
[0].add
;
9831 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
9832 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
9833 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9836 if (CONST_INT_P (XEXP (x
, 1)))
9838 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9839 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9841 *cost
= COSTS_N_INSNS (insns
);
9843 *cost
+= insns
* extra_cost
->alu
.arith
;
9844 *cost
+= rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9848 *cost
+= extra_cost
->alu
.arith
;
9856 && GET_CODE (XEXP (x
, 0)) == MULT
9857 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9858 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9859 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
9860 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
9863 *cost
+= extra_cost
->mult
[1].extend_add
;
9864 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
9865 ZERO_EXTEND
, 0, speed_p
)
9866 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
9867 ZERO_EXTEND
, 0, speed_p
)
9868 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9872 *cost
+= COSTS_N_INSNS (1);
9874 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9875 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9878 *cost
+= (extra_cost
->alu
.arith
9879 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9880 ? extra_cost
->alu
.arith
9881 : extra_cost
->alu
.arith_shift
));
9883 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9885 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9890 *cost
+= 2 * extra_cost
->alu
.arith
;
9895 *cost
= LIBCALL_COST (2);
9898 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
9901 *cost
+= extra_cost
->alu
.rev
;
9909 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9910 rtx op0
= XEXP (x
, 0);
9911 rtx shift_op
, shift_reg
;
9915 || (code
== IOR
&& TARGET_THUMB2
)))
9916 op0
= XEXP (op0
, 0);
9919 shift_op
= shifter_op_p (op0
, &shift_reg
);
9920 if (shift_op
!= NULL
)
9925 *cost
+= extra_cost
->alu
.log_shift_reg
;
9926 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9929 *cost
+= extra_cost
->alu
.log_shift
;
9931 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9932 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9936 if (CONST_INT_P (XEXP (x
, 1)))
9938 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
9939 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9942 *cost
= COSTS_N_INSNS (insns
);
9944 *cost
+= insns
* extra_cost
->alu
.logical
;
9945 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
9950 *cost
+= extra_cost
->alu
.logical
;
9951 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
9952 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9958 rtx op0
= XEXP (x
, 0);
9959 enum rtx_code subcode
= GET_CODE (op0
);
9961 *cost
+= COSTS_N_INSNS (1);
9965 || (code
== IOR
&& TARGET_THUMB2
)))
9966 op0
= XEXP (op0
, 0);
9968 if (GET_CODE (op0
) == ZERO_EXTEND
)
9971 *cost
+= 2 * extra_cost
->alu
.logical
;
9973 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
9975 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
9978 else if (GET_CODE (op0
) == SIGN_EXTEND
)
9981 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
9983 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
9985 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
9990 *cost
+= 2 * extra_cost
->alu
.logical
;
9996 *cost
= LIBCALL_COST (2);
10000 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10001 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10003 rtx op0
= XEXP (x
, 0);
10005 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
10006 op0
= XEXP (op0
, 0);
10009 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10011 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
10012 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
10015 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10017 *cost
= LIBCALL_COST (2);
10021 if (mode
== SImode
)
10023 if (TARGET_DSP_MULTIPLY
10024 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10025 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10026 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10027 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10028 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10029 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10030 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10031 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10032 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10033 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10034 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10035 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10038 /* SMUL[TB][TB]. */
10040 *cost
+= extra_cost
->mult
[0].extend
;
10041 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
10042 SIGN_EXTEND
, 0, speed_p
);
10043 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
10044 SIGN_EXTEND
, 1, speed_p
);
10048 *cost
+= extra_cost
->mult
[0].simple
;
10052 if (mode
== DImode
)
10055 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10056 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10057 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10058 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10061 *cost
+= extra_cost
->mult
[1].extend
;
10062 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
10063 ZERO_EXTEND
, 0, speed_p
)
10064 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10065 ZERO_EXTEND
, 0, speed_p
));
10069 *cost
= LIBCALL_COST (2);
10074 *cost
= LIBCALL_COST (2);
10078 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10079 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10081 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10084 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
10089 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10093 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10095 *cost
= LIBCALL_COST (1);
10099 if (mode
== SImode
)
10101 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10103 *cost
+= COSTS_N_INSNS (1);
10104 /* Assume the non-flag-changing variant. */
10106 *cost
+= (extra_cost
->alu
.log_shift
10107 + extra_cost
->alu
.arith_shift
);
10108 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
10112 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10113 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10115 *cost
+= COSTS_N_INSNS (1);
10116 /* No extra cost for MOV imm and MVN imm. */
10117 /* If the comparison op is using the flags, there's no further
10118 cost, otherwise we need to add the cost of the comparison. */
10119 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10120 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10121 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10123 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
10124 *cost
+= (COSTS_N_INSNS (1)
10125 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
10127 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
10130 *cost
+= extra_cost
->alu
.arith
;
10136 *cost
+= extra_cost
->alu
.arith
;
10140 if (GET_MODE_CLASS (mode
) == MODE_INT
10141 && GET_MODE_SIZE (mode
) < 4)
10143 /* Slightly disparage, as we might need an extend operation. */
10146 *cost
+= extra_cost
->alu
.arith
;
10150 if (mode
== DImode
)
10152 *cost
+= COSTS_N_INSNS (1);
10154 *cost
+= 2 * extra_cost
->alu
.arith
;
10159 *cost
= LIBCALL_COST (1);
10163 if (mode
== SImode
)
10166 rtx shift_reg
= NULL
;
10168 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10172 if (shift_reg
!= NULL
)
10175 *cost
+= extra_cost
->alu
.log_shift_reg
;
10176 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10179 *cost
+= extra_cost
->alu
.log_shift
;
10180 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
10185 *cost
+= extra_cost
->alu
.logical
;
10188 if (mode
== DImode
)
10190 *cost
+= COSTS_N_INSNS (1);
10196 *cost
+= LIBCALL_COST (1);
10201 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10203 *cost
+= COSTS_N_INSNS (3);
10206 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
10207 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
10209 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
10210 /* Assume that if one arm of the if_then_else is a register,
10211 that it will be tied with the result and eliminate the
10212 conditional insn. */
10213 if (REG_P (XEXP (x
, 1)))
10215 else if (REG_P (XEXP (x
, 2)))
10221 if (extra_cost
->alu
.non_exec_costs_exec
)
10222 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10224 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10227 *cost
+= op1cost
+ op2cost
;
10233 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10237 machine_mode op0mode
;
10238 /* We'll mostly assume that the cost of a compare is the cost of the
10239 LHS. However, there are some notable exceptions. */
10241 /* Floating point compares are never done as side-effects. */
10242 op0mode
= GET_MODE (XEXP (x
, 0));
10243 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10244 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10247 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10249 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10251 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
10257 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10259 *cost
= LIBCALL_COST (2);
10263 /* DImode compares normally take two insns. */
10264 if (op0mode
== DImode
)
10266 *cost
+= COSTS_N_INSNS (1);
10268 *cost
+= 2 * extra_cost
->alu
.arith
;
10272 if (op0mode
== SImode
)
10277 if (XEXP (x
, 1) == const0_rtx
10278 && !(REG_P (XEXP (x
, 0))
10279 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10280 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10282 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10284 /* Multiply operations that set the flags are often
10285 significantly more expensive. */
10287 && GET_CODE (XEXP (x
, 0)) == MULT
10288 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10289 *cost
+= extra_cost
->mult
[0].flag_setting
;
10292 && GET_CODE (XEXP (x
, 0)) == PLUS
10293 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10294 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10296 *cost
+= extra_cost
->mult
[0].flag_setting
;
10301 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10302 if (shift_op
!= NULL
)
10304 if (shift_reg
!= NULL
)
10306 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
10309 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10312 *cost
+= extra_cost
->alu
.arith_shift
;
10313 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
10314 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
10319 *cost
+= extra_cost
->alu
.arith
;
10320 if (CONST_INT_P (XEXP (x
, 1))
10321 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10323 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10331 *cost
= LIBCALL_COST (2);
10354 if (outer_code
== SET
)
10356 /* Is it a store-flag operation? */
10357 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10358 && XEXP (x
, 1) == const0_rtx
)
10360 /* Thumb also needs an IT insn. */
10361 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
10364 if (XEXP (x
, 1) == const0_rtx
)
10369 /* LSR Rd, Rn, #31. */
10371 *cost
+= extra_cost
->alu
.shift
;
10381 *cost
+= COSTS_N_INSNS (1);
10385 /* RSBS T1, Rn, Rn, LSR #31
10387 *cost
+= COSTS_N_INSNS (1);
10389 *cost
+= extra_cost
->alu
.arith_shift
;
10393 /* RSB Rd, Rn, Rn, ASR #1
10394 LSR Rd, Rd, #31. */
10395 *cost
+= COSTS_N_INSNS (1);
10397 *cost
+= (extra_cost
->alu
.arith_shift
10398 + extra_cost
->alu
.shift
);
10404 *cost
+= COSTS_N_INSNS (1);
10406 *cost
+= extra_cost
->alu
.shift
;
10410 /* Remaining cases are either meaningless or would take
10411 three insns anyway. */
10412 *cost
= COSTS_N_INSNS (3);
10415 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10420 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10421 if (CONST_INT_P (XEXP (x
, 1))
10422 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10424 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10431 /* Not directly inside a set. If it involves the condition code
10432 register it must be the condition for a branch, cond_exec or
10433 I_T_E operation. Since the comparison is performed elsewhere
10434 this is just the control part which has no additional
10436 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10437 && XEXP (x
, 1) == const0_rtx
)
10445 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10446 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10449 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10453 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10455 *cost
= LIBCALL_COST (1);
10459 if (mode
== SImode
)
10462 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10466 *cost
= LIBCALL_COST (1);
10470 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10471 && MEM_P (XEXP (x
, 0)))
10473 if (mode
== DImode
)
10474 *cost
+= COSTS_N_INSNS (1);
10479 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10480 *cost
+= extra_cost
->ldst
.load
;
10482 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10484 if (mode
== DImode
)
10485 *cost
+= extra_cost
->alu
.shift
;
10490 /* Widening from less than 32-bits requires an extend operation. */
10491 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10493 /* We have SXTB/SXTH. */
10494 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10496 *cost
+= extra_cost
->alu
.extend
;
10498 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10500 /* Needs two shifts. */
10501 *cost
+= COSTS_N_INSNS (1);
10502 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10504 *cost
+= 2 * extra_cost
->alu
.shift
;
10507 /* Widening beyond 32-bits requires one more insn. */
10508 if (mode
== DImode
)
10510 *cost
+= COSTS_N_INSNS (1);
10512 *cost
+= extra_cost
->alu
.shift
;
10519 || GET_MODE (XEXP (x
, 0)) == SImode
10520 || GET_MODE (XEXP (x
, 0)) == QImode
)
10521 && MEM_P (XEXP (x
, 0)))
10523 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10525 if (mode
== DImode
)
10526 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10531 /* Widening from less than 32-bits requires an extend operation. */
10532 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10534 /* UXTB can be a shorter instruction in Thumb2, but it might
10535 be slower than the AND Rd, Rn, #255 alternative. When
10536 optimizing for speed it should never be slower to use
10537 AND, and we don't really model 16-bit vs 32-bit insns
10540 *cost
+= extra_cost
->alu
.logical
;
10542 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10544 /* We have UXTB/UXTH. */
10545 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10547 *cost
+= extra_cost
->alu
.extend
;
10549 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10551 /* Needs two shifts. It's marginally preferable to use
10552 shifts rather than two BIC instructions as the second
10553 shift may merge with a subsequent insn as a shifter
10555 *cost
= COSTS_N_INSNS (2);
10556 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10558 *cost
+= 2 * extra_cost
->alu
.shift
;
10561 /* Widening beyond 32-bits requires one more insn. */
10562 if (mode
== DImode
)
10564 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10571 /* CONST_INT has no mode, so we cannot tell for sure how many
10572 insns are really going to be needed. The best we can do is
10573 look at the value passed. If it fits in SImode, then assume
10574 that's the mode it will be used for. Otherwise assume it
10575 will be used in DImode. */
10576 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10581 /* Avoid blowing up in arm_gen_constant (). */
10582 if (!(outer_code
== PLUS
10583 || outer_code
== AND
10584 || outer_code
== IOR
10585 || outer_code
== XOR
10586 || outer_code
== MINUS
))
10590 if (mode
== SImode
)
10592 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10593 INTVAL (x
), NULL
, NULL
,
10599 *cost
+= COSTS_N_INSNS (arm_gen_constant
10600 (outer_code
, SImode
, NULL
,
10601 trunc_int_for_mode (INTVAL (x
), SImode
),
10603 + arm_gen_constant (outer_code
, SImode
, NULL
,
10604 INTVAL (x
) >> 32, NULL
,
10616 if (arm_arch_thumb2
&& !flag_pic
)
10617 *cost
+= COSTS_N_INSNS (1);
10619 *cost
+= extra_cost
->ldst
.load
;
10622 *cost
+= COSTS_N_INSNS (1);
10626 *cost
+= COSTS_N_INSNS (1);
10628 *cost
+= extra_cost
->alu
.arith
;
10634 *cost
= COSTS_N_INSNS (4);
10639 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10640 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10642 if (vfp3_const_double_rtx (x
))
10645 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10651 if (mode
== DFmode
)
10652 *cost
+= extra_cost
->ldst
.loadd
;
10654 *cost
+= extra_cost
->ldst
.loadf
;
10657 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
10661 *cost
= COSTS_N_INSNS (4);
10667 && TARGET_HARD_FLOAT
10668 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10669 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10670 *cost
= COSTS_N_INSNS (1);
10672 *cost
= COSTS_N_INSNS (4);
10677 /* When optimizing for size, we prefer constant pool entries to
10678 MOVW/MOVT pairs, so bump the cost of these slightly. */
10685 *cost
+= extra_cost
->alu
.clz
;
10689 if (XEXP (x
, 1) == const0_rtx
)
10692 *cost
+= extra_cost
->alu
.log_shift
;
10693 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10696 /* Fall through. */
10700 *cost
+= COSTS_N_INSNS (1);
10704 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10705 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10706 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10707 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10708 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10709 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10710 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10711 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10715 *cost
+= extra_cost
->mult
[1].extend
;
10716 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
10717 ZERO_EXTEND
, 0, speed_p
)
10718 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
10719 ZERO_EXTEND
, 0, speed_p
));
10722 *cost
= LIBCALL_COST (1);
10725 case UNSPEC_VOLATILE
:
10727 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10730 /* Reading the PC is like reading any other register. Writing it
10731 is more expensive, but we take that into account elsewhere. */
10736 /* TODO: Simple zero_extract of bottom bits using AND. */
10737 /* Fall through. */
10741 && CONST_INT_P (XEXP (x
, 1))
10742 && CONST_INT_P (XEXP (x
, 2)))
10745 *cost
+= extra_cost
->alu
.bfx
;
10746 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10749 /* Without UBFX/SBFX, need to resort to shift operations. */
10750 *cost
+= COSTS_N_INSNS (1);
10752 *cost
+= 2 * extra_cost
->alu
.shift
;
10753 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
10757 if (TARGET_HARD_FLOAT
)
10760 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10762 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10764 /* Pre v8, widening HF->DF is a two-step process, first
10765 widening to SFmode. */
10766 *cost
+= COSTS_N_INSNS (1);
10768 *cost
+= extra_cost
->fp
[0].widen
;
10770 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10774 *cost
= LIBCALL_COST (1);
10777 case FLOAT_TRUNCATE
:
10778 if (TARGET_HARD_FLOAT
)
10781 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10782 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10784 /* Vector modes? */
10786 *cost
= LIBCALL_COST (1);
10790 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10792 rtx op0
= XEXP (x
, 0);
10793 rtx op1
= XEXP (x
, 1);
10794 rtx op2
= XEXP (x
, 2);
10797 /* vfms or vfnma. */
10798 if (GET_CODE (op0
) == NEG
)
10799 op0
= XEXP (op0
, 0);
10801 /* vfnms or vfnma. */
10802 if (GET_CODE (op2
) == NEG
)
10803 op2
= XEXP (op2
, 0);
10805 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
10806 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
10807 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
10810 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
10815 *cost
= LIBCALL_COST (3);
10820 if (TARGET_HARD_FLOAT
)
10822 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10823 a vcvt fixed-point conversion. */
10824 if (code
== FIX
&& mode
== SImode
10825 && GET_CODE (XEXP (x
, 0)) == FIX
10826 && GET_MODE (XEXP (x
, 0)) == SFmode
10827 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10828 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10832 *cost
+= extra_cost
->fp
[0].toint
;
10834 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10839 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10841 mode
= GET_MODE (XEXP (x
, 0));
10843 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
10844 /* Strip of the 'cost' of rounding towards zero. */
10845 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10846 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
10849 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10850 /* ??? Increase the cost to deal with transferring from
10851 FP -> CORE registers? */
10854 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10858 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
10861 /* Vector costs? */
10863 *cost
= LIBCALL_COST (1);
10867 case UNSIGNED_FLOAT
:
10868 if (TARGET_HARD_FLOAT
)
10870 /* ??? Increase the cost to deal with transferring from CORE
10871 -> FP registers? */
10873 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
10876 *cost
= LIBCALL_COST (1);
10884 /* Just a guess. Guess number of instructions in the asm
10885 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10886 though (see PR60663). */
10887 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
10888 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
10890 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
10894 if (mode
!= VOIDmode
)
10895 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
10897 *cost
= COSTS_N_INSNS (4); /* Who knows? */
10902 #undef HANDLE_NARROW_SHIFT_ARITH
10904 /* RTX costs entry point. */
10907 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
10908 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
10911 int code
= GET_CODE (x
);
10912 gcc_assert (current_tune
->insn_extra_cost
);
10914 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
10915 (enum rtx_code
) outer_code
,
10916 current_tune
->insn_extra_cost
,
10919 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
10921 print_rtl_single (dump_file
, x
);
10922 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
10923 *total
, result
? "final" : "partial");
10928 /* All address computations that can be done are free, but rtx cost returns
10929 the same for practically all of them. So we weight the different types
10930 of address here in the order (most pref first):
10931 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10933 arm_arm_address_cost (rtx x
)
10935 enum rtx_code c
= GET_CODE (x
);
10937 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
10939 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
10944 if (CONST_INT_P (XEXP (x
, 1)))
10947 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
10957 arm_thumb_address_cost (rtx x
)
10959 enum rtx_code c
= GET_CODE (x
);
10964 && REG_P (XEXP (x
, 0))
10965 && CONST_INT_P (XEXP (x
, 1)))
10972 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
10973 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
10975 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
10978 /* Adjust cost hook for XScale. */
10980 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
10983 /* Some true dependencies can have a higher cost depending
10984 on precisely how certain input operands are used. */
10986 && recog_memoized (insn
) >= 0
10987 && recog_memoized (dep
) >= 0)
10989 int shift_opnum
= get_attr_shift (insn
);
10990 enum attr_type attr_type
= get_attr_type (dep
);
10992 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10993 operand for INSN. If we have a shifted input operand and the
10994 instruction we depend on is another ALU instruction, then we may
10995 have to account for an additional stall. */
10996 if (shift_opnum
!= 0
10997 && (attr_type
== TYPE_ALU_SHIFT_IMM
10998 || attr_type
== TYPE_ALUS_SHIFT_IMM
10999 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11000 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11001 || attr_type
== TYPE_ALU_SHIFT_REG
11002 || attr_type
== TYPE_ALUS_SHIFT_REG
11003 || attr_type
== TYPE_LOGIC_SHIFT_REG
11004 || attr_type
== TYPE_LOGICS_SHIFT_REG
11005 || attr_type
== TYPE_MOV_SHIFT
11006 || attr_type
== TYPE_MVN_SHIFT
11007 || attr_type
== TYPE_MOV_SHIFT_REG
11008 || attr_type
== TYPE_MVN_SHIFT_REG
))
11010 rtx shifted_operand
;
11013 /* Get the shifted operand. */
11014 extract_insn (insn
);
11015 shifted_operand
= recog_data
.operand
[shift_opnum
];
11017 /* Iterate over all the operands in DEP. If we write an operand
11018 that overlaps with SHIFTED_OPERAND, then we have increase the
11019 cost of this dependency. */
11020 extract_insn (dep
);
11021 preprocess_constraints (dep
);
11022 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11024 /* We can ignore strict inputs. */
11025 if (recog_data
.operand_type
[opno
] == OP_IN
)
11028 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11040 /* Adjust cost hook for Cortex A9. */
11042 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11052 case REG_DEP_OUTPUT
:
11053 if (recog_memoized (insn
) >= 0
11054 && recog_memoized (dep
) >= 0)
11056 if (GET_CODE (PATTERN (insn
)) == SET
)
11059 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11061 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11063 enum attr_type attr_type_insn
= get_attr_type (insn
);
11064 enum attr_type attr_type_dep
= get_attr_type (dep
);
11066 /* By default all dependencies of the form
11069 have an extra latency of 1 cycle because
11070 of the input and output dependency in this
11071 case. However this gets modeled as an true
11072 dependency and hence all these checks. */
11073 if (REG_P (SET_DEST (PATTERN (insn
)))
11074 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
11076 /* FMACS is a special case where the dependent
11077 instruction can be issued 3 cycles before
11078 the normal latency in case of an output
11080 if ((attr_type_insn
== TYPE_FMACS
11081 || attr_type_insn
== TYPE_FMACD
)
11082 && (attr_type_dep
== TYPE_FMACS
11083 || attr_type_dep
== TYPE_FMACD
))
11085 if (dep_type
== REG_DEP_OUTPUT
)
11086 *cost
= insn_default_latency (dep
) - 3;
11088 *cost
= insn_default_latency (dep
);
11093 if (dep_type
== REG_DEP_OUTPUT
)
11094 *cost
= insn_default_latency (dep
) + 1;
11096 *cost
= insn_default_latency (dep
);
11106 gcc_unreachable ();
11112 /* Adjust cost hook for FA726TE. */
11114 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11117 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11118 have penalty of 3. */
11119 if (dep_type
== REG_DEP_TRUE
11120 && recog_memoized (insn
) >= 0
11121 && recog_memoized (dep
) >= 0
11122 && get_attr_conds (dep
) == CONDS_SET
)
11124 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11125 if (get_attr_conds (insn
) == CONDS_USE
11126 && get_attr_type (insn
) != TYPE_BRANCH
)
11132 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11133 || get_attr_conds (insn
) == CONDS_USE
)
11143 /* Implement TARGET_REGISTER_MOVE_COST.
11145 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11146 it is typically more expensive than a single memory access. We set
11147 the cost to less than two memory accesses so that floating
11148 point to integer conversion does not go through memory. */
11151 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11152 reg_class_t from
, reg_class_t to
)
11156 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11157 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11159 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11160 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11162 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11169 if (from
== HI_REGS
|| to
== HI_REGS
)
11176 /* Implement TARGET_MEMORY_MOVE_COST. */
11179 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11180 bool in ATTRIBUTE_UNUSED
)
11186 if (GET_MODE_SIZE (mode
) < 4)
11189 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11193 /* Vectorizer cost model implementation. */
11195 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11197 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11199 int misalign ATTRIBUTE_UNUSED
)
11203 switch (type_of_cost
)
11206 return current_tune
->vec_costs
->scalar_stmt_cost
;
11209 return current_tune
->vec_costs
->scalar_load_cost
;
11212 return current_tune
->vec_costs
->scalar_store_cost
;
11215 return current_tune
->vec_costs
->vec_stmt_cost
;
11218 return current_tune
->vec_costs
->vec_align_load_cost
;
11221 return current_tune
->vec_costs
->vec_store_cost
;
11223 case vec_to_scalar
:
11224 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11226 case scalar_to_vec
:
11227 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11229 case unaligned_load
:
11230 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11232 case unaligned_store
:
11233 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11235 case cond_branch_taken
:
11236 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11238 case cond_branch_not_taken
:
11239 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11242 case vec_promote_demote
:
11243 return current_tune
->vec_costs
->vec_stmt_cost
;
11245 case vec_construct
:
11246 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11247 return elements
/ 2 + 1;
11250 gcc_unreachable ();
11254 /* Implement targetm.vectorize.add_stmt_cost. */
11257 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11258 struct _stmt_vec_info
*stmt_info
, int misalign
,
11259 enum vect_cost_model_location where
)
11261 unsigned *cost
= (unsigned *) data
;
11262 unsigned retval
= 0;
11264 if (flag_vect_cost_model
)
11266 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11267 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11269 /* Statements in an inner loop relative to the loop being
11270 vectorized are weighted more heavily. The value here is
11271 arbitrary and could potentially be improved with analysis. */
11272 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11273 count
*= 50; /* FIXME. */
11275 retval
= (unsigned) (count
* stmt_cost
);
11276 cost
[where
] += retval
;
11282 /* Return true if and only if this insn can dual-issue only as older. */
11284 cortexa7_older_only (rtx_insn
*insn
)
11286 if (recog_memoized (insn
) < 0)
11289 switch (get_attr_type (insn
))
11291 case TYPE_ALU_DSP_REG
:
11292 case TYPE_ALU_SREG
:
11293 case TYPE_ALUS_SREG
:
11294 case TYPE_LOGIC_REG
:
11295 case TYPE_LOGICS_REG
:
11297 case TYPE_ADCS_REG
:
11302 case TYPE_SHIFT_IMM
:
11303 case TYPE_SHIFT_REG
:
11304 case TYPE_LOAD_BYTE
:
11307 case TYPE_FFARITHS
:
11309 case TYPE_FFARITHD
:
11327 case TYPE_F_STORES
:
11334 /* Return true if and only if this insn can dual-issue as younger. */
11336 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11338 if (recog_memoized (insn
) < 0)
11341 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11345 switch (get_attr_type (insn
))
11348 case TYPE_ALUS_IMM
:
11349 case TYPE_LOGIC_IMM
:
11350 case TYPE_LOGICS_IMM
:
11355 case TYPE_MOV_SHIFT
:
11356 case TYPE_MOV_SHIFT_REG
:
11366 /* Look for an instruction that can dual issue only as an older
11367 instruction, and move it in front of any instructions that can
11368 dual-issue as younger, while preserving the relative order of all
11369 other instructions in the ready list. This is a hueuristic to help
11370 dual-issue in later cycles, by postponing issue of more flexible
11371 instructions. This heuristic may affect dual issue opportunities
11372 in the current cycle. */
11374 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11375 int *n_readyp
, int clock
)
11378 int first_older_only
= -1, first_younger
= -1;
11382 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11386 /* Traverse the ready list from the head (the instruction to issue
11387 first), and looking for the first instruction that can issue as
11388 younger and the first instruction that can dual-issue only as
11390 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11392 rtx_insn
*insn
= ready
[i
];
11393 if (cortexa7_older_only (insn
))
11395 first_older_only
= i
;
11397 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11400 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11404 /* Nothing to reorder because either no younger insn found or insn
11405 that can dual-issue only as older appears before any insn that
11406 can dual-issue as younger. */
11407 if (first_younger
== -1)
11410 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11414 /* Nothing to reorder because no older-only insn in the ready list. */
11415 if (first_older_only
== -1)
11418 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11422 /* Move first_older_only insn before first_younger. */
11424 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11425 INSN_UID(ready
[first_older_only
]),
11426 INSN_UID(ready
[first_younger
]));
11427 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11428 for (i
= first_older_only
; i
< first_younger
; i
++)
11430 ready
[i
] = ready
[i
+1];
11433 ready
[i
] = first_older_only_insn
;
11437 /* Implement TARGET_SCHED_REORDER. */
11439 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11444 case TARGET_CPU_cortexa7
:
11445 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11448 /* Do nothing for other cores. */
11452 return arm_issue_rate ();
11455 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11456 It corrects the value of COST based on the relationship between
11457 INSN and DEP through the dependence LINK. It returns the new
11458 value. There is a per-core adjust_cost hook to adjust scheduler costs
11459 and the per-core hook can choose to completely override the generic
11460 adjust_cost function. Only put bits of code into arm_adjust_cost that
11461 are common across all cores. */
11463 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
11468 /* When generating Thumb-1 code, we want to place flag-setting operations
11469 close to a conditional branch which depends on them, so that we can
11470 omit the comparison. */
11473 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11474 && recog_memoized (dep
) >= 0
11475 && get_attr_conds (dep
) == CONDS_SET
)
11478 if (current_tune
->sched_adjust_cost
!= NULL
)
11480 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
11484 /* XXX Is this strictly true? */
11485 if (dep_type
== REG_DEP_ANTI
11486 || dep_type
== REG_DEP_OUTPUT
)
11489 /* Call insns don't incur a stall, even if they follow a load. */
11494 if ((i_pat
= single_set (insn
)) != NULL
11495 && MEM_P (SET_SRC (i_pat
))
11496 && (d_pat
= single_set (dep
)) != NULL
11497 && MEM_P (SET_DEST (d_pat
)))
11499 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11500 /* This is a load after a store, there is no conflict if the load reads
11501 from a cached area. Assume that loads from the stack, and from the
11502 constant pool are cached, and that others will miss. This is a
11505 if ((GET_CODE (src_mem
) == SYMBOL_REF
11506 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11507 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11508 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11509 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11517 arm_max_conditional_execute (void)
11519 return max_insns_skipped
;
11523 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11526 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11528 return (optimize
> 0) ? 2 : 0;
11532 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11534 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11537 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11538 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11539 sequences of non-executed instructions in IT blocks probably take the same
11540 amount of time as executed instructions (and the IT instruction itself takes
11541 space in icache). This function was experimentally determined to give good
11542 results on a popular embedded benchmark. */
11545 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11547 return (TARGET_32BIT
&& speed_p
) ? 1
11548 : arm_default_branch_cost (speed_p
, predictable_p
);
11552 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
11554 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11557 static bool fp_consts_inited
= false;
11559 static REAL_VALUE_TYPE value_fp0
;
11562 init_fp_table (void)
11566 r
= REAL_VALUE_ATOF ("0", DFmode
);
11568 fp_consts_inited
= true;
11571 /* Return TRUE if rtx X is a valid immediate FP constant. */
11573 arm_const_double_rtx (rtx x
)
11575 const REAL_VALUE_TYPE
*r
;
11577 if (!fp_consts_inited
)
11580 r
= CONST_DOUBLE_REAL_VALUE (x
);
11581 if (REAL_VALUE_MINUS_ZERO (*r
))
11584 if (real_equal (r
, &value_fp0
))
11590 /* VFPv3 has a fairly wide range of representable immediates, formed from
11591 "quarter-precision" floating-point values. These can be evaluated using this
11592 formula (with ^ for exponentiation):
11596 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11597 16 <= n <= 31 and 0 <= r <= 7.
11599 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11601 - A (most-significant) is the sign bit.
11602 - BCD are the exponent (encoded as r XOR 3).
11603 - EFGH are the mantissa (encoded as n - 16).
11606 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11607 fconst[sd] instruction, or -1 if X isn't suitable. */
11609 vfp3_const_double_index (rtx x
)
11611 REAL_VALUE_TYPE r
, m
;
11612 int sign
, exponent
;
11613 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11614 unsigned HOST_WIDE_INT mask
;
11615 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11618 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11621 r
= *CONST_DOUBLE_REAL_VALUE (x
);
11623 /* We can't represent these things, so detect them first. */
11624 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11627 /* Extract sign, exponent and mantissa. */
11628 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
11629 r
= real_value_abs (&r
);
11630 exponent
= REAL_EXP (&r
);
11631 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11632 highest (sign) bit, with a fixed binary point at bit point_pos.
11633 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11634 bits for the mantissa, this may fail (low bits would be lost). */
11635 real_ldexp (&m
, &r
, point_pos
- exponent
);
11636 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
11637 mantissa
= w
.elt (0);
11638 mant_hi
= w
.elt (1);
11640 /* If there are bits set in the low part of the mantissa, we can't
11641 represent this value. */
11645 /* Now make it so that mantissa contains the most-significant bits, and move
11646 the point_pos to indicate that the least-significant bits have been
11648 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11649 mantissa
= mant_hi
;
11651 /* We can permit four significant bits of mantissa only, plus a high bit
11652 which is always 1. */
11653 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
11654 if ((mantissa
& mask
) != 0)
11657 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11658 mantissa
>>= point_pos
- 5;
11660 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11661 floating-point immediate zero with Neon using an integer-zero load, but
11662 that case is handled elsewhere.) */
11666 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
11668 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11669 normalized significands are in the range [1, 2). (Our mantissa is shifted
11670 left 4 places at this point relative to normalized IEEE754 values). GCC
11671 internally uses [0.5, 1) (see real.c), so the exponent returned from
11672 REAL_EXP must be altered. */
11673 exponent
= 5 - exponent
;
11675 if (exponent
< 0 || exponent
> 7)
11678 /* Sign, mantissa and exponent are now in the correct form to plug into the
11679 formula described in the comment above. */
11680 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
11683 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11685 vfp3_const_double_rtx (rtx x
)
11690 return vfp3_const_double_index (x
) != -1;
11693 /* Recognize immediates which can be used in various Neon instructions. Legal
11694 immediates are described by the following table (for VMVN variants, the
11695 bitwise inverse of the constant shown is recognized. In either case, VMOV
11696 is output and the correct instruction to use for a given constant is chosen
11697 by the assembler). The constant shown is replicated across all elements of
11698 the destination vector.
11700 insn elems variant constant (binary)
11701 ---- ----- ------- -----------------
11702 vmov i32 0 00000000 00000000 00000000 abcdefgh
11703 vmov i32 1 00000000 00000000 abcdefgh 00000000
11704 vmov i32 2 00000000 abcdefgh 00000000 00000000
11705 vmov i32 3 abcdefgh 00000000 00000000 00000000
11706 vmov i16 4 00000000 abcdefgh
11707 vmov i16 5 abcdefgh 00000000
11708 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11709 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11710 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11711 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11712 vmvn i16 10 00000000 abcdefgh
11713 vmvn i16 11 abcdefgh 00000000
11714 vmov i32 12 00000000 00000000 abcdefgh 11111111
11715 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11716 vmov i32 14 00000000 abcdefgh 11111111 11111111
11717 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11718 vmov i8 16 abcdefgh
11719 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11720 eeeeeeee ffffffff gggggggg hhhhhhhh
11721 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11722 vmov f32 19 00000000 00000000 00000000 00000000
11724 For case 18, B = !b. Representable values are exactly those accepted by
11725 vfp3_const_double_index, but are output as floating-point numbers rather
11728 For case 19, we will change it to vmov.i32 when assembling.
11730 Variants 0-5 (inclusive) may also be used as immediates for the second
11731 operand of VORR/VBIC instructions.
11733 The INVERSE argument causes the bitwise inverse of the given operand to be
11734 recognized instead (used for recognizing legal immediates for the VAND/VORN
11735 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11736 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11737 output, rather than the real insns vbic/vorr).
11739 INVERSE makes no difference to the recognition of float vectors.
11741 The return value is the variant of immediate as shown in the above table, or
11742 -1 if the given value doesn't match any of the listed patterns.
11745 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
11746 rtx
*modconst
, int *elementwidth
)
11748 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11750 for (i = 0; i < idx; i += (STRIDE)) \
11755 immtype = (CLASS); \
11756 elsize = (ELSIZE); \
11760 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
11761 unsigned int innersize
;
11762 unsigned char bytes
[16];
11763 int immtype
= -1, matches
;
11764 unsigned int invmask
= inverse
? 0xff : 0;
11765 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
11768 n_elts
= CONST_VECTOR_NUNITS (op
);
11772 if (mode
== VOIDmode
)
11776 innersize
= GET_MODE_UNIT_SIZE (mode
);
11778 /* Vectors of float constants. */
11779 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11781 rtx el0
= CONST_VECTOR_ELT (op
, 0);
11783 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
11786 /* FP16 vectors cannot be represented. */
11787 if (GET_MODE_INNER (mode
) == HFmode
)
11790 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11791 are distinct in this context. */
11792 if (!const_vec_duplicate_p (op
))
11796 *modconst
= CONST_VECTOR_ELT (op
, 0);
11801 if (el0
== CONST0_RTX (GET_MODE (el0
)))
11807 /* The tricks done in the code below apply for little-endian vector layout.
11808 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11809 FIXME: Implement logic for big-endian vectors. */
11810 if (BYTES_BIG_ENDIAN
&& vector
&& !const_vec_duplicate_p (op
))
11813 /* Splat vector constant out into a byte vector. */
11814 for (i
= 0; i
< n_elts
; i
++)
11816 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
11817 unsigned HOST_WIDE_INT elpart
;
11819 gcc_assert (CONST_INT_P (el
));
11820 elpart
= INTVAL (el
);
11822 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
11824 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
11825 elpart
>>= BITS_PER_UNIT
;
11829 /* Sanity check. */
11830 gcc_assert (idx
== GET_MODE_SIZE (mode
));
11834 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
11835 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11837 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11838 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11840 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11841 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11843 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11844 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
11846 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
11848 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
11850 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
11851 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11853 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11854 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11856 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11857 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11859 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11860 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
11862 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
11864 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
11866 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11867 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11869 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11870 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11872 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11873 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11875 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11876 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11878 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
11880 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
11881 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
11889 *elementwidth
= elsize
;
11893 unsigned HOST_WIDE_INT imm
= 0;
11895 /* Un-invert bytes of recognized vector, if necessary. */
11897 for (i
= 0; i
< idx
; i
++)
11898 bytes
[i
] ^= invmask
;
11902 /* FIXME: Broken on 32-bit H_W_I hosts. */
11903 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
11905 for (i
= 0; i
< 8; i
++)
11906 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
11907 << (i
* BITS_PER_UNIT
);
11909 *modconst
= GEN_INT (imm
);
11913 unsigned HOST_WIDE_INT imm
= 0;
11915 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
11916 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
11918 *modconst
= GEN_INT (imm
);
11926 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11927 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11928 float elements), and a modified constant (whatever should be output for a
11929 VMOV) in *MODCONST. */
11932 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
11933 rtx
*modconst
, int *elementwidth
)
11937 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
11943 *modconst
= tmpconst
;
11946 *elementwidth
= tmpwidth
;
11951 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11952 the immediate is valid, write a constant suitable for using as an operand
11953 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11954 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11957 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
11958 rtx
*modconst
, int *elementwidth
)
11962 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
11964 if (retval
< 0 || retval
> 5)
11968 *modconst
= tmpconst
;
11971 *elementwidth
= tmpwidth
;
11976 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11977 the immediate is valid, write a constant suitable for using as an operand
11978 to VSHR/VSHL to *MODCONST and the corresponding element width to
11979 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11980 because they have different limitations. */
11983 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
11984 rtx
*modconst
, int *elementwidth
,
11987 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
11988 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
11989 unsigned HOST_WIDE_INT last_elt
= 0;
11990 unsigned HOST_WIDE_INT maxshift
;
11992 /* Split vector constant out into a byte vector. */
11993 for (i
= 0; i
< n_elts
; i
++)
11995 rtx el
= CONST_VECTOR_ELT (op
, i
);
11996 unsigned HOST_WIDE_INT elpart
;
11998 if (CONST_INT_P (el
))
11999 elpart
= INTVAL (el
);
12000 else if (CONST_DOUBLE_P (el
))
12003 gcc_unreachable ();
12005 if (i
!= 0 && elpart
!= last_elt
)
12011 /* Shift less than element size. */
12012 maxshift
= innersize
* 8;
12016 /* Left shift immediate value can be from 0 to <size>-1. */
12017 if (last_elt
>= maxshift
)
12022 /* Right shift immediate value can be from 1 to <size>. */
12023 if (last_elt
== 0 || last_elt
> maxshift
)
12028 *elementwidth
= innersize
* 8;
12031 *modconst
= CONST_VECTOR_ELT (op
, 0);
12036 /* Return a string suitable for output of Neon immediate logic operation
12040 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12041 int inverse
, int quad
)
12043 int width
, is_valid
;
12044 static char templ
[40];
12046 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12048 gcc_assert (is_valid
!= 0);
12051 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12053 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12058 /* Return a string suitable for output of Neon immediate shift operation
12059 (VSHR or VSHL) MNEM. */
12062 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12063 machine_mode mode
, int quad
,
12066 int width
, is_valid
;
12067 static char templ
[40];
12069 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12070 gcc_assert (is_valid
!= 0);
12073 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12075 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12080 /* Output a sequence of pairwise operations to implement a reduction.
12081 NOTE: We do "too much work" here, because pairwise operations work on two
12082 registers-worth of operands in one go. Unfortunately we can't exploit those
12083 extra calculations to do the full operation in fewer steps, I don't think.
12084 Although all vector elements of the result but the first are ignored, we
12085 actually calculate the same result in each of the elements. An alternative
12086 such as initially loading a vector with zero to use as each of the second
12087 operands would use up an additional register and take an extra instruction,
12088 for no particular gain. */
12091 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12092 rtx (*reduc
) (rtx
, rtx
, rtx
))
12094 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
12097 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12099 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12100 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12105 /* If VALS is a vector constant that can be loaded into a register
12106 using VDUP, generate instructions to do so and return an RTX to
12107 assign to the register. Otherwise return NULL_RTX. */
12110 neon_vdup_constant (rtx vals
)
12112 machine_mode mode
= GET_MODE (vals
);
12113 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12116 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12119 if (!const_vec_duplicate_p (vals
, &x
))
12120 /* The elements are not all the same. We could handle repeating
12121 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12122 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12126 /* We can load this constant by using VDUP and a constant in a
12127 single ARM register. This will be cheaper than a vector
12130 x
= copy_to_mode_reg (inner_mode
, x
);
12131 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12134 /* Generate code to load VALS, which is a PARALLEL containing only
12135 constants (for vec_init) or CONST_VECTOR, efficiently into a
12136 register. Returns an RTX to copy into the register, or NULL_RTX
12137 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12140 neon_make_constant (rtx vals
)
12142 machine_mode mode
= GET_MODE (vals
);
12144 rtx const_vec
= NULL_RTX
;
12145 int n_elts
= GET_MODE_NUNITS (mode
);
12149 if (GET_CODE (vals
) == CONST_VECTOR
)
12151 else if (GET_CODE (vals
) == PARALLEL
)
12153 /* A CONST_VECTOR must contain only CONST_INTs and
12154 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12155 Only store valid constants in a CONST_VECTOR. */
12156 for (i
= 0; i
< n_elts
; ++i
)
12158 rtx x
= XVECEXP (vals
, 0, i
);
12159 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12162 if (n_const
== n_elts
)
12163 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12166 gcc_unreachable ();
12168 if (const_vec
!= NULL
12169 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12170 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12172 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12173 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12174 pipeline cycle; creating the constant takes one or two ARM
12175 pipeline cycles. */
12177 else if (const_vec
!= NULL_RTX
)
12178 /* Load from constant pool. On Cortex-A8 this takes two cycles
12179 (for either double or quad vectors). We can not take advantage
12180 of single-cycle VLD1 because we need a PC-relative addressing
12184 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12185 We can not construct an initializer. */
12189 /* Initialize vector TARGET to VALS. */
12192 neon_expand_vector_init (rtx target
, rtx vals
)
12194 machine_mode mode
= GET_MODE (target
);
12195 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12196 int n_elts
= GET_MODE_NUNITS (mode
);
12197 int n_var
= 0, one_var
= -1;
12198 bool all_same
= true;
12202 for (i
= 0; i
< n_elts
; ++i
)
12204 x
= XVECEXP (vals
, 0, i
);
12205 if (!CONSTANT_P (x
))
12206 ++n_var
, one_var
= i
;
12208 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12214 rtx constant
= neon_make_constant (vals
);
12215 if (constant
!= NULL_RTX
)
12217 emit_move_insn (target
, constant
);
12222 /* Splat a single non-constant element if we can. */
12223 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12225 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12226 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
12230 /* One field is non-constant. Load constant then overwrite varying
12231 field. This is more efficient than using the stack. */
12234 rtx copy
= copy_rtx (vals
);
12235 rtx index
= GEN_INT (one_var
);
12237 /* Load constant part of vector, substitute neighboring value for
12238 varying element. */
12239 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12240 neon_expand_vector_init (target
, copy
);
12242 /* Insert variable. */
12243 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12247 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12250 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12253 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12256 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12259 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12262 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12265 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12268 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12271 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12274 gcc_unreachable ();
12279 /* Construct the vector in memory one field at a time
12280 and load the whole vector. */
12281 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12282 for (i
= 0; i
< n_elts
; i
++)
12283 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12284 i
* GET_MODE_SIZE (inner_mode
)),
12285 XVECEXP (vals
, 0, i
));
12286 emit_move_insn (target
, mem
);
12289 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12290 ERR if it doesn't. EXP indicates the source location, which includes the
12291 inlining history for intrinsics. */
12294 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12295 const_tree exp
, const char *desc
)
12297 HOST_WIDE_INT lane
;
12299 gcc_assert (CONST_INT_P (operand
));
12301 lane
= INTVAL (operand
);
12303 if (lane
< low
|| lane
>= high
)
12306 error ("%K%s %wd out of range %wd - %wd",
12307 exp
, desc
, lane
, low
, high
- 1);
12309 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
12313 /* Bounds-check lanes. */
12316 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12319 bounds_check (operand
, low
, high
, exp
, "lane");
12322 /* Bounds-check constants. */
12325 arm_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12327 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
12331 neon_element_bits (machine_mode mode
)
12333 return GET_MODE_UNIT_BITSIZE (mode
);
12337 /* Predicates for `match_operand' and `match_operator'. */
12339 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12340 WB is true if full writeback address modes are allowed and is false
12341 if limited writeback address modes (POST_INC and PRE_DEC) are
12345 arm_coproc_mem_operand (rtx op
, bool wb
)
12349 /* Reject eliminable registers. */
12350 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12351 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12352 || reg_mentioned_p (arg_pointer_rtx
, op
)
12353 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12354 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12355 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12356 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12359 /* Constants are converted into offsets from labels. */
12363 ind
= XEXP (op
, 0);
12365 if (reload_completed
12366 && (GET_CODE (ind
) == LABEL_REF
12367 || (GET_CODE (ind
) == CONST
12368 && GET_CODE (XEXP (ind
, 0)) == PLUS
12369 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12370 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12373 /* Match: (mem (reg)). */
12375 return arm_address_register_rtx_p (ind
, 0);
12377 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12378 acceptable in any case (subject to verification by
12379 arm_address_register_rtx_p). We need WB to be true to accept
12380 PRE_INC and POST_DEC. */
12381 if (GET_CODE (ind
) == POST_INC
12382 || GET_CODE (ind
) == PRE_DEC
12384 && (GET_CODE (ind
) == PRE_INC
12385 || GET_CODE (ind
) == POST_DEC
)))
12386 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12389 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12390 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12391 && GET_CODE (XEXP (ind
, 1)) == PLUS
12392 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12393 ind
= XEXP (ind
, 1);
12398 if (GET_CODE (ind
) == PLUS
12399 && REG_P (XEXP (ind
, 0))
12400 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12401 && CONST_INT_P (XEXP (ind
, 1))
12402 && INTVAL (XEXP (ind
, 1)) > -1024
12403 && INTVAL (XEXP (ind
, 1)) < 1024
12404 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12410 /* Return TRUE if OP is a memory operand which we can load or store a vector
12411 to/from. TYPE is one of the following values:
12412 0 - Vector load/stor (vldr)
12413 1 - Core registers (ldm)
12414 2 - Element/structure loads (vld1)
12417 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12421 /* Reject eliminable registers. */
12422 if (strict
&& ! (reload_in_progress
|| reload_completed
)
12423 && (reg_mentioned_p (frame_pointer_rtx
, op
)
12424 || reg_mentioned_p (arg_pointer_rtx
, op
)
12425 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12426 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12427 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12428 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12431 /* Constants are converted into offsets from labels. */
12435 ind
= XEXP (op
, 0);
12437 if (reload_completed
12438 && (GET_CODE (ind
) == LABEL_REF
12439 || (GET_CODE (ind
) == CONST
12440 && GET_CODE (XEXP (ind
, 0)) == PLUS
12441 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12442 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12445 /* Match: (mem (reg)). */
12447 return arm_address_register_rtx_p (ind
, 0);
12449 /* Allow post-increment with Neon registers. */
12450 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12451 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12452 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12454 /* Allow post-increment by register for VLDn */
12455 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12456 && GET_CODE (XEXP (ind
, 1)) == PLUS
12457 && REG_P (XEXP (XEXP (ind
, 1), 1)))
12464 && GET_CODE (ind
) == PLUS
12465 && REG_P (XEXP (ind
, 0))
12466 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12467 && CONST_INT_P (XEXP (ind
, 1))
12468 && INTVAL (XEXP (ind
, 1)) > -1024
12469 /* For quad modes, we restrict the constant offset to be slightly less
12470 than what the instruction format permits. We have no such constraint
12471 on double mode offsets. (This must match arm_legitimate_index_p.) */
12472 && (INTVAL (XEXP (ind
, 1))
12473 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12474 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12480 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12483 neon_struct_mem_operand (rtx op
)
12487 /* Reject eliminable registers. */
12488 if (! (reload_in_progress
|| reload_completed
)
12489 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12490 || reg_mentioned_p (arg_pointer_rtx
, op
)
12491 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12492 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12493 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12494 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12497 /* Constants are converted into offsets from labels. */
12501 ind
= XEXP (op
, 0);
12503 if (reload_completed
12504 && (GET_CODE (ind
) == LABEL_REF
12505 || (GET_CODE (ind
) == CONST
12506 && GET_CODE (XEXP (ind
, 0)) == PLUS
12507 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12508 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12511 /* Match: (mem (reg)). */
12513 return arm_address_register_rtx_p (ind
, 0);
12515 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12516 if (GET_CODE (ind
) == POST_INC
12517 || GET_CODE (ind
) == PRE_DEC
)
12518 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12523 /* Return true if X is a register that will be eliminated later on. */
12525 arm_eliminable_register (rtx x
)
12527 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12528 || REGNO (x
) == ARG_POINTER_REGNUM
12529 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12530 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12533 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12534 coprocessor registers. Otherwise return NO_REGS. */
12537 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
12539 if (mode
== HFmode
)
12541 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
12542 return GENERAL_REGS
;
12543 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12545 return GENERAL_REGS
;
12548 /* The neon move patterns handle all legitimate vector and struct
12551 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12552 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12553 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12554 || VALID_NEON_STRUCT_MODE (mode
)))
12557 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12560 return GENERAL_REGS
;
12563 /* Values which must be returned in the most-significant end of the return
12567 arm_return_in_msb (const_tree valtype
)
12569 return (TARGET_AAPCS_BASED
12570 && BYTES_BIG_ENDIAN
12571 && (AGGREGATE_TYPE_P (valtype
)
12572 || TREE_CODE (valtype
) == COMPLEX_TYPE
12573 || FIXED_POINT_TYPE_P (valtype
)));
12576 /* Return TRUE if X references a SYMBOL_REF. */
12578 symbol_mentioned_p (rtx x
)
12583 if (GET_CODE (x
) == SYMBOL_REF
)
12586 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12587 are constant offsets, not symbols. */
12588 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12591 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12593 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12599 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12600 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
12603 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
12610 /* Return TRUE if X references a LABEL_REF. */
12612 label_mentioned_p (rtx x
)
12617 if (GET_CODE (x
) == LABEL_REF
)
12620 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12621 instruction, but they are constant offsets, not symbols. */
12622 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12625 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12626 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12632 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12633 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
12636 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
12644 tls_mentioned_p (rtx x
)
12646 switch (GET_CODE (x
))
12649 return tls_mentioned_p (XEXP (x
, 0));
12652 if (XINT (x
, 1) == UNSPEC_TLS
)
12655 /* Fall through. */
12661 /* Must not copy any rtx that uses a pc-relative address.
12662 Also, disallow copying of load-exclusive instructions that
12663 may appear after splitting of compare-and-swap-style operations
12664 so as to prevent those loops from being transformed away from their
12665 canonical forms (see PR 69904). */
12668 arm_cannot_copy_insn_p (rtx_insn
*insn
)
12670 /* The tls call insn cannot be copied, as it is paired with a data
12672 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
12675 subrtx_iterator::array_type array
;
12676 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
12678 const_rtx x
= *iter
;
12679 if (GET_CODE (x
) == UNSPEC
12680 && (XINT (x
, 1) == UNSPEC_PIC_BASE
12681 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
12685 rtx set
= single_set (insn
);
12688 rtx src
= SET_SRC (set
);
12689 if (GET_CODE (src
) == ZERO_EXTEND
)
12690 src
= XEXP (src
, 0);
12692 /* Catch the load-exclusive and load-acquire operations. */
12693 if (GET_CODE (src
) == UNSPEC_VOLATILE
12694 && (XINT (src
, 1) == VUNSPEC_LL
12695 || XINT (src
, 1) == VUNSPEC_LAX
))
12702 minmax_code (rtx x
)
12704 enum rtx_code code
= GET_CODE (x
);
12717 gcc_unreachable ();
12721 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12724 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
12725 int *mask
, bool *signed_sat
)
12727 /* The high bound must be a power of two minus one. */
12728 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
12732 /* The low bound is either zero (for usat) or one less than the
12733 negation of the high bound (for ssat). */
12734 if (INTVAL (lo_bound
) == 0)
12739 *signed_sat
= false;
12744 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
12749 *signed_sat
= true;
12757 /* Return 1 if memory locations are adjacent. */
12759 adjacent_mem_locations (rtx a
, rtx b
)
12761 /* We don't guarantee to preserve the order of these memory refs. */
12762 if (volatile_refs_p (a
) || volatile_refs_p (b
))
12765 if ((REG_P (XEXP (a
, 0))
12766 || (GET_CODE (XEXP (a
, 0)) == PLUS
12767 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
12768 && (REG_P (XEXP (b
, 0))
12769 || (GET_CODE (XEXP (b
, 0)) == PLUS
12770 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
12772 HOST_WIDE_INT val0
= 0, val1
= 0;
12776 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
12778 reg0
= XEXP (XEXP (a
, 0), 0);
12779 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
12782 reg0
= XEXP (a
, 0);
12784 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
12786 reg1
= XEXP (XEXP (b
, 0), 0);
12787 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
12790 reg1
= XEXP (b
, 0);
12792 /* Don't accept any offset that will require multiple
12793 instructions to handle, since this would cause the
12794 arith_adjacentmem pattern to output an overlong sequence. */
12795 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
12798 /* Don't allow an eliminable register: register elimination can make
12799 the offset too large. */
12800 if (arm_eliminable_register (reg0
))
12803 val_diff
= val1
- val0
;
12807 /* If the target has load delay slots, then there's no benefit
12808 to using an ldm instruction unless the offset is zero and
12809 we are optimizing for size. */
12810 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
12811 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
12812 && (val_diff
== 4 || val_diff
== -4));
12815 return ((REGNO (reg0
) == REGNO (reg1
))
12816 && (val_diff
== 4 || val_diff
== -4));
12822 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12823 for load operations, false for store operations. CONSECUTIVE is true
12824 if the register numbers in the operation must be consecutive in the register
12825 bank. RETURN_PC is true if value is to be loaded in PC.
12826 The pattern we are trying to match for load is:
12827 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12828 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12831 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12834 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12835 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12836 3. If consecutive is TRUE, then for kth register being loaded,
12837 REGNO (R_dk) = REGNO (R_d0) + k.
12838 The pattern for store is similar. */
12840 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
12841 bool consecutive
, bool return_pc
)
12843 HOST_WIDE_INT count
= XVECLEN (op
, 0);
12844 rtx reg
, mem
, addr
;
12846 unsigned first_regno
;
12847 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
12849 bool addr_reg_in_reglist
= false;
12850 bool update
= false;
12855 /* If not in SImode, then registers must be consecutive
12856 (e.g., VLDM instructions for DFmode). */
12857 gcc_assert ((mode
== SImode
) || consecutive
);
12858 /* Setting return_pc for stores is illegal. */
12859 gcc_assert (!return_pc
|| load
);
12861 /* Set up the increments and the regs per val based on the mode. */
12862 reg_increment
= GET_MODE_SIZE (mode
);
12863 regs_per_val
= reg_increment
/ 4;
12864 offset_adj
= return_pc
? 1 : 0;
12867 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
12868 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
12871 /* Check if this is a write-back. */
12872 elt
= XVECEXP (op
, 0, offset_adj
);
12873 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
12879 /* The offset adjustment must be the number of registers being
12880 popped times the size of a single register. */
12881 if (!REG_P (SET_DEST (elt
))
12882 || !REG_P (XEXP (SET_SRC (elt
), 0))
12883 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
12884 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
12885 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
12886 ((count
- 1 - offset_adj
) * reg_increment
))
12890 i
= i
+ offset_adj
;
12891 base
= base
+ offset_adj
;
12892 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12893 success depends on the type: VLDM can do just one reg,
12894 LDM must do at least two. */
12895 if ((count
<= i
) && (mode
== SImode
))
12898 elt
= XVECEXP (op
, 0, i
- 1);
12899 if (GET_CODE (elt
) != SET
)
12904 reg
= SET_DEST (elt
);
12905 mem
= SET_SRC (elt
);
12909 reg
= SET_SRC (elt
);
12910 mem
= SET_DEST (elt
);
12913 if (!REG_P (reg
) || !MEM_P (mem
))
12916 regno
= REGNO (reg
);
12917 first_regno
= regno
;
12918 addr
= XEXP (mem
, 0);
12919 if (GET_CODE (addr
) == PLUS
)
12921 if (!CONST_INT_P (XEXP (addr
, 1)))
12924 offset
= INTVAL (XEXP (addr
, 1));
12925 addr
= XEXP (addr
, 0);
12931 /* Don't allow SP to be loaded unless it is also the base register. It
12932 guarantees that SP is reset correctly when an LDM instruction
12933 is interrupted. Otherwise, we might end up with a corrupt stack. */
12934 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12937 for (; i
< count
; i
++)
12939 elt
= XVECEXP (op
, 0, i
);
12940 if (GET_CODE (elt
) != SET
)
12945 reg
= SET_DEST (elt
);
12946 mem
= SET_SRC (elt
);
12950 reg
= SET_SRC (elt
);
12951 mem
= SET_DEST (elt
);
12955 || GET_MODE (reg
) != mode
12956 || REGNO (reg
) <= regno
12959 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
12960 /* Don't allow SP to be loaded unless it is also the base register. It
12961 guarantees that SP is reset correctly when an LDM instruction
12962 is interrupted. Otherwise, we might end up with a corrupt stack. */
12963 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12965 || GET_MODE (mem
) != mode
12966 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
12967 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
12968 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
12969 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
12970 offset
+ (i
- base
) * reg_increment
))
12971 && (!REG_P (XEXP (mem
, 0))
12972 || offset
+ (i
- base
) * reg_increment
!= 0)))
12975 regno
= REGNO (reg
);
12976 if (regno
== REGNO (addr
))
12977 addr_reg_in_reglist
= true;
12982 if (update
&& addr_reg_in_reglist
)
12985 /* For Thumb-1, address register is always modified - either by write-back
12986 or by explicit load. If the pattern does not describe an update,
12987 then the address register must be in the list of loaded registers. */
12989 return update
|| addr_reg_in_reglist
;
12995 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12996 or stores (depending on IS_STORE) into a load-multiple or store-multiple
12997 instruction. ADD_OFFSET is nonzero if the base address register needs
12998 to be modified with an add instruction before we can use it. */
13001 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13002 int nops
, HOST_WIDE_INT add_offset
)
13004 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13005 if the offset isn't small enough. The reason 2 ldrs are faster
13006 is because these ARMs are able to do more than one cache access
13007 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13008 whilst the ARM8 has a double bandwidth cache. This means that
13009 these cores can do both an instruction fetch and a data fetch in
13010 a single cycle, so the trick of calculating the address into a
13011 scratch register (one of the result regs) and then doing a load
13012 multiple actually becomes slower (and no smaller in code size).
13013 That is the transformation
13015 ldr rd1, [rbase + offset]
13016 ldr rd2, [rbase + offset + 4]
13020 add rd1, rbase, offset
13021 ldmia rd1, {rd1, rd2}
13023 produces worse code -- '3 cycles + any stalls on rd2' instead of
13024 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13025 access per cycle, the first sequence could never complete in less
13026 than 6 cycles, whereas the ldm sequence would only take 5 and
13027 would make better use of sequential accesses if not hitting the
13030 We cheat here and test 'arm_ld_sched' which we currently know to
13031 only be true for the ARM8, ARM9 and StrongARM. If this ever
13032 changes, then the test below needs to be reworked. */
13033 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13036 /* XScale has load-store double instructions, but they have stricter
13037 alignment requirements than load-store multiple, so we cannot
13040 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13041 the pipeline until completion.
13049 An ldr instruction takes 1-3 cycles, but does not block the
13058 Best case ldr will always win. However, the more ldr instructions
13059 we issue, the less likely we are to be able to schedule them well.
13060 Using ldr instructions also increases code size.
13062 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13063 for counts of 3 or 4 regs. */
13064 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13069 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13070 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13071 an array ORDER which describes the sequence to use when accessing the
13072 offsets that produces an ascending order. In this sequence, each
13073 offset must be larger by exactly 4 than the previous one. ORDER[0]
13074 must have been filled in with the lowest offset by the caller.
13075 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13076 we use to verify that ORDER produces an ascending order of registers.
13077 Return true if it was possible to construct such an order, false if
13081 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13082 int *unsorted_regs
)
13085 for (i
= 1; i
< nops
; i
++)
13089 order
[i
] = order
[i
- 1];
13090 for (j
= 0; j
< nops
; j
++)
13091 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13093 /* We must find exactly one offset that is higher than the
13094 previous one by 4. */
13095 if (order
[i
] != order
[i
- 1])
13099 if (order
[i
] == order
[i
- 1])
13101 /* The register numbers must be ascending. */
13102 if (unsorted_regs
!= NULL
13103 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13109 /* Used to determine in a peephole whether a sequence of load
13110 instructions can be changed into a load-multiple instruction.
13111 NOPS is the number of separate load instructions we are examining. The
13112 first NOPS entries in OPERANDS are the destination registers, the
13113 next NOPS entries are memory operands. If this function is
13114 successful, *BASE is set to the common base register of the memory
13115 accesses; *LOAD_OFFSET is set to the first memory location's offset
13116 from that base register.
13117 REGS is an array filled in with the destination register numbers.
13118 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13119 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13120 the sequence of registers in REGS matches the loads from ascending memory
13121 locations, and the function verifies that the register numbers are
13122 themselves ascending. If CHECK_REGS is false, the register numbers
13123 are stored in the order they are found in the operands. */
13125 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13126 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13128 int unsorted_regs
[MAX_LDM_STM_OPS
];
13129 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13130 int order
[MAX_LDM_STM_OPS
];
13131 rtx base_reg_rtx
= NULL
;
13135 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13136 easily extended if required. */
13137 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13139 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13141 /* Loop over the operands and check that the memory references are
13142 suitable (i.e. immediate offsets from the same base register). At
13143 the same time, extract the target register, and the memory
13145 for (i
= 0; i
< nops
; i
++)
13150 /* Convert a subreg of a mem into the mem itself. */
13151 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13152 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13154 gcc_assert (MEM_P (operands
[nops
+ i
]));
13156 /* Don't reorder volatile memory references; it doesn't seem worth
13157 looking for the case where the order is ok anyway. */
13158 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13161 offset
= const0_rtx
;
13163 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13164 || (GET_CODE (reg
) == SUBREG
13165 && REG_P (reg
= SUBREG_REG (reg
))))
13166 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13167 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13168 || (GET_CODE (reg
) == SUBREG
13169 && REG_P (reg
= SUBREG_REG (reg
))))
13170 && (CONST_INT_P (offset
13171 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13175 base_reg
= REGNO (reg
);
13176 base_reg_rtx
= reg
;
13177 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13180 else if (base_reg
!= (int) REGNO (reg
))
13181 /* Not addressed from the same base register. */
13184 unsorted_regs
[i
] = (REG_P (operands
[i
])
13185 ? REGNO (operands
[i
])
13186 : REGNO (SUBREG_REG (operands
[i
])));
13188 /* If it isn't an integer register, or if it overwrites the
13189 base register but isn't the last insn in the list, then
13190 we can't do this. */
13191 if (unsorted_regs
[i
] < 0
13192 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13193 || unsorted_regs
[i
] > 14
13194 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13197 /* Don't allow SP to be loaded unless it is also the base
13198 register. It guarantees that SP is reset correctly when
13199 an LDM instruction is interrupted. Otherwise, we might
13200 end up with a corrupt stack. */
13201 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13204 unsorted_offsets
[i
] = INTVAL (offset
);
13205 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13209 /* Not a suitable memory address. */
13213 /* All the useful information has now been extracted from the
13214 operands into unsorted_regs and unsorted_offsets; additionally,
13215 order[0] has been set to the lowest offset in the list. Sort
13216 the offsets into order, verifying that they are adjacent, and
13217 check that the register numbers are ascending. */
13218 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13219 check_regs
? unsorted_regs
: NULL
))
13223 memcpy (saved_order
, order
, sizeof order
);
13229 for (i
= 0; i
< nops
; i
++)
13230 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13232 *load_offset
= unsorted_offsets
[order
[0]];
13236 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13239 if (unsorted_offsets
[order
[0]] == 0)
13240 ldm_case
= 1; /* ldmia */
13241 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13242 ldm_case
= 2; /* ldmib */
13243 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13244 ldm_case
= 3; /* ldmda */
13245 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13246 ldm_case
= 4; /* ldmdb */
13247 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13248 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13253 if (!multiple_operation_profitable_p (false, nops
,
13255 ? unsorted_offsets
[order
[0]] : 0))
13261 /* Used to determine in a peephole whether a sequence of store instructions can
13262 be changed into a store-multiple instruction.
13263 NOPS is the number of separate store instructions we are examining.
13264 NOPS_TOTAL is the total number of instructions recognized by the peephole
13266 The first NOPS entries in OPERANDS are the source registers, the next
13267 NOPS entries are memory operands. If this function is successful, *BASE is
13268 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13269 to the first memory location's offset from that base register. REGS is an
13270 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13271 likewise filled with the corresponding rtx's.
13272 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13273 numbers to an ascending order of stores.
13274 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13275 from ascending memory locations, and the function verifies that the register
13276 numbers are themselves ascending. If CHECK_REGS is false, the register
13277 numbers are stored in the order they are found in the operands. */
13279 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13280 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13281 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13283 int unsorted_regs
[MAX_LDM_STM_OPS
];
13284 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13285 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13286 int order
[MAX_LDM_STM_OPS
];
13288 rtx base_reg_rtx
= NULL
;
13291 /* Write back of base register is currently only supported for Thumb 1. */
13292 int base_writeback
= TARGET_THUMB1
;
13294 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13295 easily extended if required. */
13296 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13298 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13300 /* Loop over the operands and check that the memory references are
13301 suitable (i.e. immediate offsets from the same base register). At
13302 the same time, extract the target register, and the memory
13304 for (i
= 0; i
< nops
; i
++)
13309 /* Convert a subreg of a mem into the mem itself. */
13310 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13311 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13313 gcc_assert (MEM_P (operands
[nops
+ i
]));
13315 /* Don't reorder volatile memory references; it doesn't seem worth
13316 looking for the case where the order is ok anyway. */
13317 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13320 offset
= const0_rtx
;
13322 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13323 || (GET_CODE (reg
) == SUBREG
13324 && REG_P (reg
= SUBREG_REG (reg
))))
13325 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13326 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13327 || (GET_CODE (reg
) == SUBREG
13328 && REG_P (reg
= SUBREG_REG (reg
))))
13329 && (CONST_INT_P (offset
13330 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13332 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13333 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13334 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13338 base_reg
= REGNO (reg
);
13339 base_reg_rtx
= reg
;
13340 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13343 else if (base_reg
!= (int) REGNO (reg
))
13344 /* Not addressed from the same base register. */
13347 /* If it isn't an integer register, then we can't do this. */
13348 if (unsorted_regs
[i
] < 0
13349 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13350 /* The effects are unpredictable if the base register is
13351 both updated and stored. */
13352 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13353 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13354 || unsorted_regs
[i
] > 14)
13357 unsorted_offsets
[i
] = INTVAL (offset
);
13358 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13362 /* Not a suitable memory address. */
13366 /* All the useful information has now been extracted from the
13367 operands into unsorted_regs and unsorted_offsets; additionally,
13368 order[0] has been set to the lowest offset in the list. Sort
13369 the offsets into order, verifying that they are adjacent, and
13370 check that the register numbers are ascending. */
13371 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13372 check_regs
? unsorted_regs
: NULL
))
13376 memcpy (saved_order
, order
, sizeof order
);
13382 for (i
= 0; i
< nops
; i
++)
13384 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13386 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13389 *load_offset
= unsorted_offsets
[order
[0]];
13393 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13396 if (unsorted_offsets
[order
[0]] == 0)
13397 stm_case
= 1; /* stmia */
13398 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13399 stm_case
= 2; /* stmib */
13400 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13401 stm_case
= 3; /* stmda */
13402 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13403 stm_case
= 4; /* stmdb */
13407 if (!multiple_operation_profitable_p (false, nops
, 0))
13413 /* Routines for use in generating RTL. */
13415 /* Generate a load-multiple instruction. COUNT is the number of loads in
13416 the instruction; REGS and MEMS are arrays containing the operands.
13417 BASEREG is the base register to be used in addressing the memory operands.
13418 WBACK_OFFSET is nonzero if the instruction should update the base
13422 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13423 HOST_WIDE_INT wback_offset
)
13428 if (!multiple_operation_profitable_p (false, count
, 0))
13434 for (i
= 0; i
< count
; i
++)
13435 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13437 if (wback_offset
!= 0)
13438 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13440 seq
= get_insns ();
13446 result
= gen_rtx_PARALLEL (VOIDmode
,
13447 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13448 if (wback_offset
!= 0)
13450 XVECEXP (result
, 0, 0)
13451 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13456 for (j
= 0; i
< count
; i
++, j
++)
13457 XVECEXP (result
, 0, i
)
13458 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13463 /* Generate a store-multiple instruction. COUNT is the number of stores in
13464 the instruction; REGS and MEMS are arrays containing the operands.
13465 BASEREG is the base register to be used in addressing the memory operands.
13466 WBACK_OFFSET is nonzero if the instruction should update the base
13470 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13471 HOST_WIDE_INT wback_offset
)
13476 if (GET_CODE (basereg
) == PLUS
)
13477 basereg
= XEXP (basereg
, 0);
13479 if (!multiple_operation_profitable_p (false, count
, 0))
13485 for (i
= 0; i
< count
; i
++)
13486 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13488 if (wback_offset
!= 0)
13489 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13491 seq
= get_insns ();
13497 result
= gen_rtx_PARALLEL (VOIDmode
,
13498 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13499 if (wback_offset
!= 0)
13501 XVECEXP (result
, 0, 0)
13502 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13507 for (j
= 0; i
< count
; i
++, j
++)
13508 XVECEXP (result
, 0, i
)
13509 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13514 /* Generate either a load-multiple or a store-multiple instruction. This
13515 function can be used in situations where we can start with a single MEM
13516 rtx and adjust its address upwards.
13517 COUNT is the number of operations in the instruction, not counting a
13518 possible update of the base register. REGS is an array containing the
13520 BASEREG is the base register to be used in addressing the memory operands,
13521 which are constructed from BASEMEM.
13522 WRITE_BACK specifies whether the generated instruction should include an
13523 update of the base register.
13524 OFFSETP is used to pass an offset to and from this function; this offset
13525 is not used when constructing the address (instead BASEMEM should have an
13526 appropriate offset in its address), it is used only for setting
13527 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13530 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13531 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13533 rtx mems
[MAX_LDM_STM_OPS
];
13534 HOST_WIDE_INT offset
= *offsetp
;
13537 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13539 if (GET_CODE (basereg
) == PLUS
)
13540 basereg
= XEXP (basereg
, 0);
13542 for (i
= 0; i
< count
; i
++)
13544 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13545 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13553 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13554 write_back
? 4 * count
: 0);
13556 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13557 write_back
? 4 * count
: 0);
13561 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13562 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13564 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13569 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13570 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13572 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13576 /* Called from a peephole2 expander to turn a sequence of loads into an
13577 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13578 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13579 is true if we can reorder the registers because they are used commutatively
13581 Returns true iff we could generate a new instruction. */
13584 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13586 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13587 rtx mems
[MAX_LDM_STM_OPS
];
13588 int i
, j
, base_reg
;
13590 HOST_WIDE_INT offset
;
13591 int write_back
= FALSE
;
13595 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13596 &base_reg
, &offset
, !sort_regs
);
13602 for (i
= 0; i
< nops
- 1; i
++)
13603 for (j
= i
+ 1; j
< nops
; j
++)
13604 if (regs
[i
] > regs
[j
])
13610 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13614 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
13615 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13621 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
13622 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
13624 if (!TARGET_THUMB1
)
13625 base_reg_rtx
= newbase
;
13628 for (i
= 0; i
< nops
; i
++)
13630 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13631 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13634 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13635 write_back
? offset
+ i
* 4 : 0));
13639 /* Called from a peephole2 expander to turn a sequence of stores into an
13640 STM instruction. OPERANDS are the operands found by the peephole matcher;
13641 NOPS indicates how many separate stores we are trying to combine.
13642 Returns true iff we could generate a new instruction. */
13645 gen_stm_seq (rtx
*operands
, int nops
)
13648 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13649 rtx mems
[MAX_LDM_STM_OPS
];
13652 HOST_WIDE_INT offset
;
13653 int write_back
= FALSE
;
13656 bool base_reg_dies
;
13658 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
13659 mem_order
, &base_reg
, &offset
, true);
13664 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13666 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
13669 gcc_assert (base_reg_dies
);
13675 gcc_assert (base_reg_dies
);
13676 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13680 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13682 for (i
= 0; i
< nops
; i
++)
13684 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13685 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13688 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13689 write_back
? offset
+ i
* 4 : 0));
13693 /* Called from a peephole2 expander to turn a sequence of stores that are
13694 preceded by constant loads into an STM instruction. OPERANDS are the
13695 operands found by the peephole matcher; NOPS indicates how many
13696 separate stores we are trying to combine; there are 2 * NOPS
13697 instructions in the peephole.
13698 Returns true iff we could generate a new instruction. */
13701 gen_const_stm_seq (rtx
*operands
, int nops
)
13703 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
13704 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13705 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
13706 rtx mems
[MAX_LDM_STM_OPS
];
13709 HOST_WIDE_INT offset
;
13710 int write_back
= FALSE
;
13713 bool base_reg_dies
;
13715 HARD_REG_SET allocated
;
13717 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
13718 mem_order
, &base_reg
, &offset
, false);
13723 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
13725 /* If the same register is used more than once, try to find a free
13727 CLEAR_HARD_REG_SET (allocated
);
13728 for (i
= 0; i
< nops
; i
++)
13730 for (j
= i
+ 1; j
< nops
; j
++)
13731 if (regs
[i
] == regs
[j
])
13733 rtx t
= peep2_find_free_register (0, nops
* 2,
13734 TARGET_THUMB1
? "l" : "r",
13735 SImode
, &allocated
);
13739 regs
[i
] = REGNO (t
);
13743 /* Compute an ordering that maps the register numbers to an ascending
13746 for (i
= 0; i
< nops
; i
++)
13747 if (regs
[i
] < regs
[reg_order
[0]])
13750 for (i
= 1; i
< nops
; i
++)
13752 int this_order
= reg_order
[i
- 1];
13753 for (j
= 0; j
< nops
; j
++)
13754 if (regs
[j
] > regs
[reg_order
[i
- 1]]
13755 && (this_order
== reg_order
[i
- 1]
13756 || regs
[j
] < regs
[this_order
]))
13758 reg_order
[i
] = this_order
;
13761 /* Ensure that registers that must be live after the instruction end
13762 up with the correct value. */
13763 for (i
= 0; i
< nops
; i
++)
13765 int this_order
= reg_order
[i
];
13766 if ((this_order
!= mem_order
[i
]
13767 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
13768 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
13772 /* Load the constants. */
13773 for (i
= 0; i
< nops
; i
++)
13775 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
13776 sorted_regs
[i
] = regs
[reg_order
[i
]];
13777 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
13780 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13782 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
13785 gcc_assert (base_reg_dies
);
13791 gcc_assert (base_reg_dies
);
13792 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13796 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13798 for (i
= 0; i
< nops
; i
++)
13800 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13801 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13804 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
13805 write_back
? offset
+ i
* 4 : 0));
13809 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13810 unaligned copies on processors which support unaligned semantics for those
13811 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13812 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13813 An interleave factor of 1 (the minimum) will perform no interleaving.
13814 Load/store multiple are used for aligned addresses where possible. */
13817 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
13818 HOST_WIDE_INT length
,
13819 unsigned int interleave_factor
)
13821 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
13822 int *regnos
= XALLOCAVEC (int, interleave_factor
);
13823 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
13824 HOST_WIDE_INT i
, j
;
13825 HOST_WIDE_INT remaining
= length
, words
;
13826 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
13828 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
13829 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
13830 HOST_WIDE_INT srcoffset
, dstoffset
;
13831 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
13834 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
13836 /* Use hard registers if we have aligned source or destination so we can use
13837 load/store multiple with contiguous registers. */
13838 if (dst_aligned
|| src_aligned
)
13839 for (i
= 0; i
< interleave_factor
; i
++)
13840 regs
[i
] = gen_rtx_REG (SImode
, i
);
13842 for (i
= 0; i
< interleave_factor
; i
++)
13843 regs
[i
] = gen_reg_rtx (SImode
);
13845 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
13846 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
13848 srcoffset
= dstoffset
= 0;
13850 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13851 For copying the last bytes we want to subtract this offset again. */
13852 src_autoinc
= dst_autoinc
= 0;
13854 for (i
= 0; i
< interleave_factor
; i
++)
13857 /* Copy BLOCK_SIZE_BYTES chunks. */
13859 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
13862 if (src_aligned
&& interleave_factor
> 1)
13864 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
13865 TRUE
, srcbase
, &srcoffset
));
13866 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13870 for (j
= 0; j
< interleave_factor
; j
++)
13872 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
13874 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13875 srcoffset
+ j
* UNITS_PER_WORD
);
13876 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13878 srcoffset
+= block_size_bytes
;
13882 if (dst_aligned
&& interleave_factor
> 1)
13884 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
13885 TRUE
, dstbase
, &dstoffset
));
13886 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13890 for (j
= 0; j
< interleave_factor
; j
++)
13892 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
13894 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13895 dstoffset
+ j
* UNITS_PER_WORD
);
13896 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13898 dstoffset
+= block_size_bytes
;
13901 remaining
-= block_size_bytes
;
13904 /* Copy any whole words left (note these aren't interleaved with any
13905 subsequent halfword/byte load/stores in the interests of simplicity). */
13907 words
= remaining
/ UNITS_PER_WORD
;
13909 gcc_assert (words
< interleave_factor
);
13911 if (src_aligned
&& words
> 1)
13913 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
13915 src_autoinc
+= UNITS_PER_WORD
* words
;
13919 for (j
= 0; j
< words
; j
++)
13921 addr
= plus_constant (Pmode
, src
,
13922 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
13923 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13924 srcoffset
+ j
* UNITS_PER_WORD
);
13926 emit_move_insn (regs
[j
], mem
);
13928 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13930 srcoffset
+= words
* UNITS_PER_WORD
;
13933 if (dst_aligned
&& words
> 1)
13935 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
13937 dst_autoinc
+= words
* UNITS_PER_WORD
;
13941 for (j
= 0; j
< words
; j
++)
13943 addr
= plus_constant (Pmode
, dst
,
13944 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
13945 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13946 dstoffset
+ j
* UNITS_PER_WORD
);
13948 emit_move_insn (mem
, regs
[j
]);
13950 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13952 dstoffset
+= words
* UNITS_PER_WORD
;
13955 remaining
-= words
* UNITS_PER_WORD
;
13957 gcc_assert (remaining
< 4);
13959 /* Copy a halfword if necessary. */
13961 if (remaining
>= 2)
13963 halfword_tmp
= gen_reg_rtx (SImode
);
13965 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13966 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
13967 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
13969 /* Either write out immediately, or delay until we've loaded the last
13970 byte, depending on interleave factor. */
13971 if (interleave_factor
== 1)
13973 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13974 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
13975 emit_insn (gen_unaligned_storehi (mem
,
13976 gen_lowpart (HImode
, halfword_tmp
)));
13977 halfword_tmp
= NULL
;
13985 gcc_assert (remaining
< 2);
13987 /* Copy last byte. */
13989 if ((remaining
& 1) != 0)
13991 byte_tmp
= gen_reg_rtx (SImode
);
13993 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13994 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
13995 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
13997 if (interleave_factor
== 1)
13999 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14000 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14001 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14010 /* Store last halfword if we haven't done so already. */
14014 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14015 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14016 emit_insn (gen_unaligned_storehi (mem
,
14017 gen_lowpart (HImode
, halfword_tmp
)));
14021 /* Likewise for last byte. */
14025 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14026 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14027 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14031 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14034 /* From mips_adjust_block_mem:
14036 Helper function for doing a loop-based block operation on memory
14037 reference MEM. Each iteration of the loop will operate on LENGTH
14040 Create a new base register for use within the loop and point it to
14041 the start of MEM. Create a new memory reference that uses this
14042 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14045 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14048 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14050 /* Although the new mem does not refer to a known location,
14051 it does keep up to LENGTH bytes of alignment. */
14052 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14053 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14056 /* From mips_block_move_loop:
14058 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14059 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14060 the memory regions do not overlap. */
14063 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14064 unsigned int interleave_factor
,
14065 HOST_WIDE_INT bytes_per_iter
)
14067 rtx src_reg
, dest_reg
, final_src
, test
;
14068 HOST_WIDE_INT leftover
;
14070 leftover
= length
% bytes_per_iter
;
14071 length
-= leftover
;
14073 /* Create registers and memory references for use within the loop. */
14074 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14075 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14077 /* Calculate the value that SRC_REG should have after the last iteration of
14079 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14080 0, 0, OPTAB_WIDEN
);
14082 /* Emit the start of the loop. */
14083 rtx_code_label
*label
= gen_label_rtx ();
14084 emit_label (label
);
14086 /* Emit the loop body. */
14087 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14088 interleave_factor
);
14090 /* Move on to the next block. */
14091 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14092 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14094 /* Emit the loop condition. */
14095 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14096 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14098 /* Mop up any left-over bytes. */
14100 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14103 /* Emit a block move when either the source or destination is unaligned (not
14104 aligned to a four-byte boundary). This may need further tuning depending on
14105 core type, optimize_size setting, etc. */
14108 arm_movmemqi_unaligned (rtx
*operands
)
14110 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14114 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14115 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14116 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14117 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14118 or dst_aligned though: allow more interleaving in those cases since the
14119 resulting code can be smaller. */
14120 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14121 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14124 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14125 interleave_factor
, bytes_per_iter
);
14127 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14128 interleave_factor
);
14132 /* Note that the loop created by arm_block_move_unaligned_loop may be
14133 subject to loop unrolling, which makes tuning this condition a little
14136 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14138 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14145 arm_gen_movmemqi (rtx
*operands
)
14147 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14148 HOST_WIDE_INT srcoffset
, dstoffset
;
14149 rtx src
, dst
, srcbase
, dstbase
;
14150 rtx part_bytes_reg
= NULL
;
14153 if (!CONST_INT_P (operands
[2])
14154 || !CONST_INT_P (operands
[3])
14155 || INTVAL (operands
[2]) > 64)
14158 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14159 return arm_movmemqi_unaligned (operands
);
14161 if (INTVAL (operands
[3]) & 3)
14164 dstbase
= operands
[0];
14165 srcbase
= operands
[1];
14167 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14168 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14170 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14171 out_words_to_go
= INTVAL (operands
[2]) / 4;
14172 last_bytes
= INTVAL (operands
[2]) & 3;
14173 dstoffset
= srcoffset
= 0;
14175 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14176 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14178 while (in_words_to_go
>= 2)
14180 if (in_words_to_go
> 4)
14181 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14182 TRUE
, srcbase
, &srcoffset
));
14184 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14185 src
, FALSE
, srcbase
,
14188 if (out_words_to_go
)
14190 if (out_words_to_go
> 4)
14191 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14192 TRUE
, dstbase
, &dstoffset
));
14193 else if (out_words_to_go
!= 1)
14194 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14195 out_words_to_go
, dst
,
14198 dstbase
, &dstoffset
));
14201 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14202 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14203 if (last_bytes
!= 0)
14205 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14211 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14212 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14215 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14216 if (out_words_to_go
)
14220 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14221 sreg
= copy_to_reg (mem
);
14223 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14224 emit_move_insn (mem
, sreg
);
14227 gcc_assert (!in_words_to_go
); /* Sanity check */
14230 if (in_words_to_go
)
14232 gcc_assert (in_words_to_go
> 0);
14234 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14235 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14238 gcc_assert (!last_bytes
|| part_bytes_reg
);
14240 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14242 rtx tmp
= gen_reg_rtx (SImode
);
14244 /* The bytes we want are in the top end of the word. */
14245 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14246 GEN_INT (8 * (4 - last_bytes
))));
14247 part_bytes_reg
= tmp
;
14251 mem
= adjust_automodify_address (dstbase
, QImode
,
14252 plus_constant (Pmode
, dst
,
14254 dstoffset
+ last_bytes
- 1);
14255 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14259 tmp
= gen_reg_rtx (SImode
);
14260 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14261 part_bytes_reg
= tmp
;
14268 if (last_bytes
> 1)
14270 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14271 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14275 rtx tmp
= gen_reg_rtx (SImode
);
14276 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14277 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14278 part_bytes_reg
= tmp
;
14285 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14286 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14293 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14296 next_consecutive_mem (rtx mem
)
14298 machine_mode mode
= GET_MODE (mem
);
14299 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14300 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14302 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14305 /* Copy using LDRD/STRD instructions whenever possible.
14306 Returns true upon success. */
14308 gen_movmem_ldrd_strd (rtx
*operands
)
14310 unsigned HOST_WIDE_INT len
;
14311 HOST_WIDE_INT align
;
14312 rtx src
, dst
, base
;
14314 bool src_aligned
, dst_aligned
;
14315 bool src_volatile
, dst_volatile
;
14317 gcc_assert (CONST_INT_P (operands
[2]));
14318 gcc_assert (CONST_INT_P (operands
[3]));
14320 len
= UINTVAL (operands
[2]);
14324 /* Maximum alignment we can assume for both src and dst buffers. */
14325 align
= INTVAL (operands
[3]);
14327 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14330 /* Place src and dst addresses in registers
14331 and update the corresponding mem rtx. */
14333 dst_volatile
= MEM_VOLATILE_P (dst
);
14334 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14335 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14336 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14339 src_volatile
= MEM_VOLATILE_P (src
);
14340 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14341 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14342 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14344 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14347 if (src_volatile
|| dst_volatile
)
14350 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14351 if (!(dst_aligned
|| src_aligned
))
14352 return arm_gen_movmemqi (operands
);
14354 /* If the either src or dst is unaligned we'll be accessing it as pairs
14355 of unaligned SImode accesses. Otherwise we can generate DImode
14356 ldrd/strd instructions. */
14357 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
14358 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
14363 reg0
= gen_reg_rtx (DImode
);
14364 rtx low_reg
= NULL_RTX
;
14365 rtx hi_reg
= NULL_RTX
;
14367 if (!src_aligned
|| !dst_aligned
)
14369 low_reg
= gen_lowpart (SImode
, reg0
);
14370 hi_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
14373 emit_move_insn (reg0
, src
);
14376 emit_insn (gen_unaligned_loadsi (low_reg
, src
));
14377 src
= next_consecutive_mem (src
);
14378 emit_insn (gen_unaligned_loadsi (hi_reg
, src
));
14382 emit_move_insn (dst
, reg0
);
14385 emit_insn (gen_unaligned_storesi (dst
, low_reg
));
14386 dst
= next_consecutive_mem (dst
);
14387 emit_insn (gen_unaligned_storesi (dst
, hi_reg
));
14390 src
= next_consecutive_mem (src
);
14391 dst
= next_consecutive_mem (dst
);
14394 gcc_assert (len
< 8);
14397 /* More than a word but less than a double-word to copy. Copy a word. */
14398 reg0
= gen_reg_rtx (SImode
);
14399 src
= adjust_address (src
, SImode
, 0);
14400 dst
= adjust_address (dst
, SImode
, 0);
14402 emit_move_insn (reg0
, src
);
14404 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14407 emit_move_insn (dst
, reg0
);
14409 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14411 src
= next_consecutive_mem (src
);
14412 dst
= next_consecutive_mem (dst
);
14419 /* Copy the remaining bytes. */
14422 dst
= adjust_address (dst
, HImode
, 0);
14423 src
= adjust_address (src
, HImode
, 0);
14424 reg0
= gen_reg_rtx (SImode
);
14426 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14428 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14431 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14433 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14435 src
= next_consecutive_mem (src
);
14436 dst
= next_consecutive_mem (dst
);
14441 dst
= adjust_address (dst
, QImode
, 0);
14442 src
= adjust_address (src
, QImode
, 0);
14443 reg0
= gen_reg_rtx (QImode
);
14444 emit_move_insn (reg0
, src
);
14445 emit_move_insn (dst
, reg0
);
14449 /* Select a dominance comparison mode if possible for a test of the general
14450 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14451 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14452 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14453 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14454 In all cases OP will be either EQ or NE, but we don't need to know which
14455 here. If we are unable to support a dominance comparison we return
14456 CC mode. This will then fail to match for the RTL expressions that
14457 generate this call. */
14459 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14461 enum rtx_code cond1
, cond2
;
14464 /* Currently we will probably get the wrong result if the individual
14465 comparisons are not simple. This also ensures that it is safe to
14466 reverse a comparison if necessary. */
14467 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14469 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14473 /* The if_then_else variant of this tests the second condition if the
14474 first passes, but is true if the first fails. Reverse the first
14475 condition to get a true "inclusive-or" expression. */
14476 if (cond_or
== DOM_CC_NX_OR_Y
)
14477 cond1
= reverse_condition (cond1
);
14479 /* If the comparisons are not equal, and one doesn't dominate the other,
14480 then we can't do this. */
14482 && !comparison_dominates_p (cond1
, cond2
)
14483 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14487 std::swap (cond1
, cond2
);
14492 if (cond_or
== DOM_CC_X_AND_Y
)
14497 case EQ
: return CC_DEQmode
;
14498 case LE
: return CC_DLEmode
;
14499 case LEU
: return CC_DLEUmode
;
14500 case GE
: return CC_DGEmode
;
14501 case GEU
: return CC_DGEUmode
;
14502 default: gcc_unreachable ();
14506 if (cond_or
== DOM_CC_X_AND_Y
)
14518 gcc_unreachable ();
14522 if (cond_or
== DOM_CC_X_AND_Y
)
14534 gcc_unreachable ();
14538 if (cond_or
== DOM_CC_X_AND_Y
)
14539 return CC_DLTUmode
;
14544 return CC_DLTUmode
;
14546 return CC_DLEUmode
;
14550 gcc_unreachable ();
14554 if (cond_or
== DOM_CC_X_AND_Y
)
14555 return CC_DGTUmode
;
14560 return CC_DGTUmode
;
14562 return CC_DGEUmode
;
14566 gcc_unreachable ();
14569 /* The remaining cases only occur when both comparisons are the
14572 gcc_assert (cond1
== cond2
);
14576 gcc_assert (cond1
== cond2
);
14580 gcc_assert (cond1
== cond2
);
14584 gcc_assert (cond1
== cond2
);
14585 return CC_DLEUmode
;
14588 gcc_assert (cond1
== cond2
);
14589 return CC_DGEUmode
;
14592 gcc_unreachable ();
14597 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14599 /* All floating point compares return CCFP if it is an equality
14600 comparison, and CCFPE otherwise. */
14601 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14624 gcc_unreachable ();
14628 /* A compare with a shifted operand. Because of canonicalization, the
14629 comparison will have to be swapped when we emit the assembler. */
14630 if (GET_MODE (y
) == SImode
14631 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14632 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14633 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14634 || GET_CODE (x
) == ROTATERT
))
14637 /* This operation is performed swapped, but since we only rely on the Z
14638 flag we don't need an additional mode. */
14639 if (GET_MODE (y
) == SImode
14640 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14641 && GET_CODE (x
) == NEG
14642 && (op
== EQ
|| op
== NE
))
14645 /* This is a special case that is used by combine to allow a
14646 comparison of a shifted byte load to be split into a zero-extend
14647 followed by a comparison of the shifted integer (only valid for
14648 equalities and unsigned inequalities). */
14649 if (GET_MODE (x
) == SImode
14650 && GET_CODE (x
) == ASHIFT
14651 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
14652 && GET_CODE (XEXP (x
, 0)) == SUBREG
14653 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
14654 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
14655 && (op
== EQ
|| op
== NE
14656 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
14657 && CONST_INT_P (y
))
14660 /* A construct for a conditional compare, if the false arm contains
14661 0, then both conditions must be true, otherwise either condition
14662 must be true. Not all conditions are possible, so CCmode is
14663 returned if it can't be done. */
14664 if (GET_CODE (x
) == IF_THEN_ELSE
14665 && (XEXP (x
, 2) == const0_rtx
14666 || XEXP (x
, 2) == const1_rtx
)
14667 && COMPARISON_P (XEXP (x
, 0))
14668 && COMPARISON_P (XEXP (x
, 1)))
14669 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14670 INTVAL (XEXP (x
, 2)));
14672 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14673 if (GET_CODE (x
) == AND
14674 && (op
== EQ
|| op
== NE
)
14675 && COMPARISON_P (XEXP (x
, 0))
14676 && COMPARISON_P (XEXP (x
, 1)))
14677 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14680 if (GET_CODE (x
) == IOR
14681 && (op
== EQ
|| op
== NE
)
14682 && COMPARISON_P (XEXP (x
, 0))
14683 && COMPARISON_P (XEXP (x
, 1)))
14684 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14687 /* An operation (on Thumb) where we want to test for a single bit.
14688 This is done by shifting that bit up into the top bit of a
14689 scratch register; we can then branch on the sign bit. */
14691 && GET_MODE (x
) == SImode
14692 && (op
== EQ
|| op
== NE
)
14693 && GET_CODE (x
) == ZERO_EXTRACT
14694 && XEXP (x
, 1) == const1_rtx
)
14697 /* An operation that sets the condition codes as a side-effect, the
14698 V flag is not set correctly, so we can only use comparisons where
14699 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14701 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14702 if (GET_MODE (x
) == SImode
14704 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
14705 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
14706 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
14707 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
14708 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
14709 || GET_CODE (x
) == LSHIFTRT
14710 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14711 || GET_CODE (x
) == ROTATERT
14712 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
14713 return CC_NOOVmode
;
14715 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
14718 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
14719 && GET_CODE (x
) == PLUS
14720 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
14723 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
14729 /* A DImode comparison against zero can be implemented by
14730 or'ing the two halves together. */
14731 if (y
== const0_rtx
)
14734 /* We can do an equality test in three Thumb instructions. */
14744 /* DImode unsigned comparisons can be implemented by cmp +
14745 cmpeq without a scratch register. Not worth doing in
14756 /* DImode signed and unsigned comparisons can be implemented
14757 by cmp + sbcs with a scratch register, but that does not
14758 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14759 gcc_assert (op
!= EQ
&& op
!= NE
);
14763 gcc_unreachable ();
14767 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
14768 return GET_MODE (x
);
14773 /* X and Y are two things to compare using CODE. Emit the compare insn and
14774 return the rtx for register 0 in the proper mode. FP means this is a
14775 floating point compare: I don't think that it is needed on the arm. */
14777 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
14781 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
14783 /* We might have X as a constant, Y as a register because of the predicates
14784 used for cmpdi. If so, force X to a register here. */
14785 if (dimode_comparison
&& !REG_P (x
))
14786 x
= force_reg (DImode
, x
);
14788 mode
= SELECT_CC_MODE (code
, x
, y
);
14789 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
14791 if (dimode_comparison
14792 && mode
!= CC_CZmode
)
14796 /* To compare two non-zero values for equality, XOR them and
14797 then compare against zero. Not used for ARM mode; there
14798 CC_CZmode is cheaper. */
14799 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
14801 gcc_assert (!reload_completed
);
14802 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
14806 /* A scratch register is required. */
14807 if (reload_completed
)
14808 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
14810 scratch
= gen_rtx_SCRATCH (SImode
);
14812 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
14813 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14814 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
14817 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14822 /* Generate a sequence of insns that will generate the correct return
14823 address mask depending on the physical architecture that the program
14826 arm_gen_return_addr_mask (void)
14828 rtx reg
= gen_reg_rtx (Pmode
);
14830 emit_insn (gen_return_addr_mask (reg
));
14835 arm_reload_in_hi (rtx
*operands
)
14837 rtx ref
= operands
[1];
14839 HOST_WIDE_INT offset
= 0;
14841 if (GET_CODE (ref
) == SUBREG
)
14843 offset
= SUBREG_BYTE (ref
);
14844 ref
= SUBREG_REG (ref
);
14849 /* We have a pseudo which has been spilt onto the stack; there
14850 are two cases here: the first where there is a simple
14851 stack-slot replacement and a second where the stack-slot is
14852 out of range, or is used as a subreg. */
14853 if (reg_equiv_mem (REGNO (ref
)))
14855 ref
= reg_equiv_mem (REGNO (ref
));
14856 base
= find_replacement (&XEXP (ref
, 0));
14859 /* The slot is out of range, or was dressed up in a SUBREG. */
14860 base
= reg_equiv_address (REGNO (ref
));
14862 /* PR 62554: If there is no equivalent memory location then just move
14863 the value as an SImode register move. This happens when the target
14864 architecture variant does not have an HImode register move. */
14867 gcc_assert (REG_P (operands
[0]));
14868 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14869 gen_rtx_SUBREG (SImode
, ref
, 0)));
14874 base
= find_replacement (&XEXP (ref
, 0));
14876 /* Handle the case where the address is too complex to be offset by 1. */
14877 if (GET_CODE (base
) == MINUS
14878 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14880 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14882 emit_set_insn (base_plus
, base
);
14885 else if (GET_CODE (base
) == PLUS
)
14887 /* The addend must be CONST_INT, or we would have dealt with it above. */
14888 HOST_WIDE_INT hi
, lo
;
14890 offset
+= INTVAL (XEXP (base
, 1));
14891 base
= XEXP (base
, 0);
14893 /* Rework the address into a legal sequence of insns. */
14894 /* Valid range for lo is -4095 -> 4095 */
14897 : -((-offset
) & 0xfff));
14899 /* Corner case, if lo is the max offset then we would be out of range
14900 once we have added the additional 1 below, so bump the msb into the
14901 pre-loading insn(s). */
14905 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14906 ^ (HOST_WIDE_INT
) 0x80000000)
14907 - (HOST_WIDE_INT
) 0x80000000);
14909 gcc_assert (hi
+ lo
== offset
);
14913 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14915 /* Get the base address; addsi3 knows how to handle constants
14916 that require more than one insn. */
14917 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
14923 /* Operands[2] may overlap operands[0] (though it won't overlap
14924 operands[1]), that's why we asked for a DImode reg -- so we can
14925 use the bit that does not overlap. */
14926 if (REGNO (operands
[2]) == REGNO (operands
[0]))
14927 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14929 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14931 emit_insn (gen_zero_extendqisi2 (scratch
,
14932 gen_rtx_MEM (QImode
,
14933 plus_constant (Pmode
, base
,
14935 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14936 gen_rtx_MEM (QImode
,
14937 plus_constant (Pmode
, base
,
14939 if (!BYTES_BIG_ENDIAN
)
14940 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14941 gen_rtx_IOR (SImode
,
14944 gen_rtx_SUBREG (SImode
, operands
[0], 0),
14948 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14949 gen_rtx_IOR (SImode
,
14950 gen_rtx_ASHIFT (SImode
, scratch
,
14952 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
14955 /* Handle storing a half-word to memory during reload by synthesizing as two
14956 byte stores. Take care not to clobber the input values until after we
14957 have moved them somewhere safe. This code assumes that if the DImode
14958 scratch in operands[2] overlaps either the input value or output address
14959 in some way, then that value must die in this insn (we absolutely need
14960 two scratch registers for some corner cases). */
14962 arm_reload_out_hi (rtx
*operands
)
14964 rtx ref
= operands
[0];
14965 rtx outval
= operands
[1];
14967 HOST_WIDE_INT offset
= 0;
14969 if (GET_CODE (ref
) == SUBREG
)
14971 offset
= SUBREG_BYTE (ref
);
14972 ref
= SUBREG_REG (ref
);
14977 /* We have a pseudo which has been spilt onto the stack; there
14978 are two cases here: the first where there is a simple
14979 stack-slot replacement and a second where the stack-slot is
14980 out of range, or is used as a subreg. */
14981 if (reg_equiv_mem (REGNO (ref
)))
14983 ref
= reg_equiv_mem (REGNO (ref
));
14984 base
= find_replacement (&XEXP (ref
, 0));
14987 /* The slot is out of range, or was dressed up in a SUBREG. */
14988 base
= reg_equiv_address (REGNO (ref
));
14990 /* PR 62254: If there is no equivalent memory location then just move
14991 the value as an SImode register move. This happens when the target
14992 architecture variant does not have an HImode register move. */
14995 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
14997 if (REG_P (outval
))
14999 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15000 gen_rtx_SUBREG (SImode
, outval
, 0)));
15002 else /* SUBREG_P (outval) */
15004 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
15005 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15006 SUBREG_REG (outval
)));
15008 /* FIXME: Handle other cases ? */
15009 gcc_unreachable ();
15015 base
= find_replacement (&XEXP (ref
, 0));
15017 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15019 /* Handle the case where the address is too complex to be offset by 1. */
15020 if (GET_CODE (base
) == MINUS
15021 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15023 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15025 /* Be careful not to destroy OUTVAL. */
15026 if (reg_overlap_mentioned_p (base_plus
, outval
))
15028 /* Updating base_plus might destroy outval, see if we can
15029 swap the scratch and base_plus. */
15030 if (!reg_overlap_mentioned_p (scratch
, outval
))
15031 std::swap (scratch
, base_plus
);
15034 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15036 /* Be conservative and copy OUTVAL into the scratch now,
15037 this should only be necessary if outval is a subreg
15038 of something larger than a word. */
15039 /* XXX Might this clobber base? I can't see how it can,
15040 since scratch is known to overlap with OUTVAL, and
15041 must be wider than a word. */
15042 emit_insn (gen_movhi (scratch_hi
, outval
));
15043 outval
= scratch_hi
;
15047 emit_set_insn (base_plus
, base
);
15050 else if (GET_CODE (base
) == PLUS
)
15052 /* The addend must be CONST_INT, or we would have dealt with it above. */
15053 HOST_WIDE_INT hi
, lo
;
15055 offset
+= INTVAL (XEXP (base
, 1));
15056 base
= XEXP (base
, 0);
15058 /* Rework the address into a legal sequence of insns. */
15059 /* Valid range for lo is -4095 -> 4095 */
15062 : -((-offset
) & 0xfff));
15064 /* Corner case, if lo is the max offset then we would be out of range
15065 once we have added the additional 1 below, so bump the msb into the
15066 pre-loading insn(s). */
15070 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15071 ^ (HOST_WIDE_INT
) 0x80000000)
15072 - (HOST_WIDE_INT
) 0x80000000);
15074 gcc_assert (hi
+ lo
== offset
);
15078 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15080 /* Be careful not to destroy OUTVAL. */
15081 if (reg_overlap_mentioned_p (base_plus
, outval
))
15083 /* Updating base_plus might destroy outval, see if we
15084 can swap the scratch and base_plus. */
15085 if (!reg_overlap_mentioned_p (scratch
, outval
))
15086 std::swap (scratch
, base_plus
);
15089 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15091 /* Be conservative and copy outval into scratch now,
15092 this should only be necessary if outval is a
15093 subreg of something larger than a word. */
15094 /* XXX Might this clobber base? I can't see how it
15095 can, since scratch is known to overlap with
15097 emit_insn (gen_movhi (scratch_hi
, outval
));
15098 outval
= scratch_hi
;
15102 /* Get the base address; addsi3 knows how to handle constants
15103 that require more than one insn. */
15104 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15110 if (BYTES_BIG_ENDIAN
)
15112 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15113 plus_constant (Pmode
, base
,
15115 gen_lowpart (QImode
, outval
)));
15116 emit_insn (gen_lshrsi3 (scratch
,
15117 gen_rtx_SUBREG (SImode
, outval
, 0),
15119 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15121 gen_lowpart (QImode
, scratch
)));
15125 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15127 gen_lowpart (QImode
, outval
)));
15128 emit_insn (gen_lshrsi3 (scratch
,
15129 gen_rtx_SUBREG (SImode
, outval
, 0),
15131 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15132 plus_constant (Pmode
, base
,
15134 gen_lowpart (QImode
, scratch
)));
15138 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15139 (padded to the size of a word) should be passed in a register. */
15142 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15144 if (TARGET_AAPCS_BASED
)
15145 return must_pass_in_stack_var_size (mode
, type
);
15147 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15151 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15152 Return true if an argument passed on the stack should be padded upwards,
15153 i.e. if the least-significant byte has useful data.
15154 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15155 aggregate types are placed in the lowest memory address. */
15158 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15160 if (!TARGET_AAPCS_BASED
)
15161 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15163 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15170 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15171 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15172 register has useful data, and return the opposite if the most
15173 significant byte does. */
15176 arm_pad_reg_upward (machine_mode mode
,
15177 tree type
, int first ATTRIBUTE_UNUSED
)
15179 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15181 /* For AAPCS, small aggregates, small fixed-point types,
15182 and small complex types are always padded upwards. */
15185 if ((AGGREGATE_TYPE_P (type
)
15186 || TREE_CODE (type
) == COMPLEX_TYPE
15187 || FIXED_POINT_TYPE_P (type
))
15188 && int_size_in_bytes (type
) <= 4)
15193 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15194 && GET_MODE_SIZE (mode
) <= 4)
15199 /* Otherwise, use default padding. */
15200 return !BYTES_BIG_ENDIAN
;
15203 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15204 assuming that the address in the base register is word aligned. */
15206 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15208 HOST_WIDE_INT max_offset
;
15210 /* Offset must be a multiple of 4 in Thumb mode. */
15211 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15216 else if (TARGET_ARM
)
15221 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15224 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15225 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15226 Assumes that the address in the base register RN is word aligned. Pattern
15227 guarantees that both memory accesses use the same base register,
15228 the offsets are constants within the range, and the gap between the offsets is 4.
15229 If preload complete then check that registers are legal. WBACK indicates whether
15230 address is updated. LOAD indicates whether memory access is load or store. */
15232 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15233 bool wback
, bool load
)
15235 unsigned int t
, t2
, n
;
15237 if (!reload_completed
)
15240 if (!offset_ok_for_ldrd_strd (offset
))
15247 if ((TARGET_THUMB2
)
15248 && ((wback
&& (n
== t
|| n
== t2
))
15249 || (t
== SP_REGNUM
)
15250 || (t
== PC_REGNUM
)
15251 || (t2
== SP_REGNUM
)
15252 || (t2
== PC_REGNUM
)
15253 || (!load
&& (n
== PC_REGNUM
))
15254 || (load
&& (t
== t2
))
15255 /* Triggers Cortex-M3 LDRD errata. */
15256 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15260 && ((wback
&& (n
== t
|| n
== t2
))
15261 || (t2
== PC_REGNUM
)
15262 || (t
% 2 != 0) /* First destination register is not even. */
15264 /* PC can be used as base register (for offset addressing only),
15265 but it is depricated. */
15266 || (n
== PC_REGNUM
)))
15272 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15273 operand MEM's address contains an immediate offset from the base
15274 register and has no side effects, in which case it sets BASE and
15275 OFFSET accordingly. */
15277 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15281 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15283 /* TODO: Handle more general memory operand patterns, such as
15284 PRE_DEC and PRE_INC. */
15286 if (side_effects_p (mem
))
15289 /* Can't deal with subregs. */
15290 if (GET_CODE (mem
) == SUBREG
)
15293 gcc_assert (MEM_P (mem
));
15295 *offset
= const0_rtx
;
15297 addr
= XEXP (mem
, 0);
15299 /* If addr isn't valid for DImode, then we can't handle it. */
15300 if (!arm_legitimate_address_p (DImode
, addr
,
15301 reload_in_progress
|| reload_completed
))
15309 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15311 *base
= XEXP (addr
, 0);
15312 *offset
= XEXP (addr
, 1);
15313 return (REG_P (*base
) && CONST_INT_P (*offset
));
15319 /* Called from a peephole2 to replace two word-size accesses with a
15320 single LDRD/STRD instruction. Returns true iff we can generate a
15321 new instruction sequence. That is, both accesses use the same base
15322 register and the gap between constant offsets is 4. This function
15323 may reorder its operands to match ldrd/strd RTL templates.
15324 OPERANDS are the operands found by the peephole matcher;
15325 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15326 corresponding memory operands. LOAD indicaates whether the access
15327 is load or store. CONST_STORE indicates a store of constant
15328 integer values held in OPERANDS[4,5] and assumes that the pattern
15329 is of length 4 insn, for the purpose of checking dead registers.
15330 COMMUTE indicates that register operands may be reordered. */
15332 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15333 bool const_store
, bool commute
)
15336 HOST_WIDE_INT offsets
[2], offset
;
15337 rtx base
= NULL_RTX
;
15338 rtx cur_base
, cur_offset
, tmp
;
15340 HARD_REG_SET regset
;
15342 gcc_assert (!const_store
|| !load
);
15343 /* Check that the memory references are immediate offsets from the
15344 same base register. Extract the base register, the destination
15345 registers, and the corresponding memory offsets. */
15346 for (i
= 0; i
< nops
; i
++)
15348 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15353 else if (REGNO (base
) != REGNO (cur_base
))
15356 offsets
[i
] = INTVAL (cur_offset
);
15357 if (GET_CODE (operands
[i
]) == SUBREG
)
15359 tmp
= SUBREG_REG (operands
[i
]);
15360 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15365 /* Make sure there is no dependency between the individual loads. */
15366 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15367 return false; /* RAW */
15369 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15370 return false; /* WAW */
15372 /* If the same input register is used in both stores
15373 when storing different constants, try to find a free register.
15374 For example, the code
15379 can be transformed into
15383 in Thumb mode assuming that r1 is free.
15384 For ARM mode do the same but only if the starting register
15385 can be made to be even. */
15387 && REGNO (operands
[0]) == REGNO (operands
[1])
15388 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15392 CLEAR_HARD_REG_SET (regset
);
15393 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15394 if (tmp
== NULL_RTX
)
15397 /* Use the new register in the first load to ensure that
15398 if the original input register is not dead after peephole,
15399 then it will have the correct constant value. */
15402 else if (TARGET_ARM
)
15404 int regno
= REGNO (operands
[0]);
15405 if (!peep2_reg_dead_p (4, operands
[0]))
15407 /* When the input register is even and is not dead after the
15408 pattern, it has to hold the second constant but we cannot
15409 form a legal STRD in ARM mode with this register as the second
15411 if (regno
% 2 == 0)
15414 /* Is regno-1 free? */
15415 SET_HARD_REG_SET (regset
);
15416 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15417 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15418 if (tmp
== NULL_RTX
)
15425 /* Find a DImode register. */
15426 CLEAR_HARD_REG_SET (regset
);
15427 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15428 if (tmp
!= NULL_RTX
)
15430 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15431 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15435 /* Can we use the input register to form a DI register? */
15436 SET_HARD_REG_SET (regset
);
15437 CLEAR_HARD_REG_BIT(regset
,
15438 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15439 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15440 if (tmp
== NULL_RTX
)
15442 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15446 gcc_assert (operands
[0] != NULL_RTX
);
15447 gcc_assert (operands
[1] != NULL_RTX
);
15448 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15449 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15453 /* Make sure the instructions are ordered with lower memory access first. */
15454 if (offsets
[0] > offsets
[1])
15456 gap
= offsets
[0] - offsets
[1];
15457 offset
= offsets
[1];
15459 /* Swap the instructions such that lower memory is accessed first. */
15460 std::swap (operands
[0], operands
[1]);
15461 std::swap (operands
[2], operands
[3]);
15463 std::swap (operands
[4], operands
[5]);
15467 gap
= offsets
[1] - offsets
[0];
15468 offset
= offsets
[0];
15471 /* Make sure accesses are to consecutive memory locations. */
15475 /* Make sure we generate legal instructions. */
15476 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15480 /* In Thumb state, where registers are almost unconstrained, there
15481 is little hope to fix it. */
15485 if (load
&& commute
)
15487 /* Try reordering registers. */
15488 std::swap (operands
[0], operands
[1]);
15489 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15496 /* If input registers are dead after this pattern, they can be
15497 reordered or replaced by other registers that are free in the
15498 current pattern. */
15499 if (!peep2_reg_dead_p (4, operands
[0])
15500 || !peep2_reg_dead_p (4, operands
[1]))
15503 /* Try to reorder the input registers. */
15504 /* For example, the code
15509 can be transformed into
15514 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15517 std::swap (operands
[0], operands
[1]);
15521 /* Try to find a free DI register. */
15522 CLEAR_HARD_REG_SET (regset
);
15523 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15524 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15527 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15528 if (tmp
== NULL_RTX
)
15531 /* DREG must be an even-numbered register in DImode.
15532 Split it into SI registers. */
15533 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15534 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15535 gcc_assert (operands
[0] != NULL_RTX
);
15536 gcc_assert (operands
[1] != NULL_RTX
);
15537 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15538 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15540 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15552 /* Print a symbolic form of X to the debug file, F. */
15554 arm_print_value (FILE *f
, rtx x
)
15556 switch (GET_CODE (x
))
15559 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15563 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15571 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15573 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15574 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15582 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15586 fprintf (f
, "`%s'", XSTR (x
, 0));
15590 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15594 arm_print_value (f
, XEXP (x
, 0));
15598 arm_print_value (f
, XEXP (x
, 0));
15600 arm_print_value (f
, XEXP (x
, 1));
15608 fprintf (f
, "????");
15613 /* Routines for manipulation of the constant pool. */
15615 /* Arm instructions cannot load a large constant directly into a
15616 register; they have to come from a pc relative load. The constant
15617 must therefore be placed in the addressable range of the pc
15618 relative load. Depending on the precise pc relative load
15619 instruction the range is somewhere between 256 bytes and 4k. This
15620 means that we often have to dump a constant inside a function, and
15621 generate code to branch around it.
15623 It is important to minimize this, since the branches will slow
15624 things down and make the code larger.
15626 Normally we can hide the table after an existing unconditional
15627 branch so that there is no interruption of the flow, but in the
15628 worst case the code looks like this:
15646 We fix this by performing a scan after scheduling, which notices
15647 which instructions need to have their operands fetched from the
15648 constant table and builds the table.
15650 The algorithm starts by building a table of all the constants that
15651 need fixing up and all the natural barriers in the function (places
15652 where a constant table can be dropped without breaking the flow).
15653 For each fixup we note how far the pc-relative replacement will be
15654 able to reach and the offset of the instruction into the function.
15656 Having built the table we then group the fixes together to form
15657 tables that are as large as possible (subject to addressing
15658 constraints) and emit each table of constants after the last
15659 barrier that is within range of all the instructions in the group.
15660 If a group does not contain a barrier, then we forcibly create one
15661 by inserting a jump instruction into the flow. Once the table has
15662 been inserted, the insns are then modified to reference the
15663 relevant entry in the pool.
15665 Possible enhancements to the algorithm (not implemented) are:
15667 1) For some processors and object formats, there may be benefit in
15668 aligning the pools to the start of cache lines; this alignment
15669 would need to be taken into account when calculating addressability
15672 /* These typedefs are located at the start of this file, so that
15673 they can be used in the prototypes there. This comment is to
15674 remind readers of that fact so that the following structures
15675 can be understood more easily.
15677 typedef struct minipool_node Mnode;
15678 typedef struct minipool_fixup Mfix; */
15680 struct minipool_node
15682 /* Doubly linked chain of entries. */
15685 /* The maximum offset into the code that this entry can be placed. While
15686 pushing fixes for forward references, all entries are sorted in order
15687 of increasing max_address. */
15688 HOST_WIDE_INT max_address
;
15689 /* Similarly for an entry inserted for a backwards ref. */
15690 HOST_WIDE_INT min_address
;
15691 /* The number of fixes referencing this entry. This can become zero
15692 if we "unpush" an entry. In this case we ignore the entry when we
15693 come to emit the code. */
15695 /* The offset from the start of the minipool. */
15696 HOST_WIDE_INT offset
;
15697 /* The value in table. */
15699 /* The mode of value. */
15701 /* The size of the value. With iWMMXt enabled
15702 sizes > 4 also imply an alignment of 8-bytes. */
15706 struct minipool_fixup
15710 HOST_WIDE_INT address
;
15716 HOST_WIDE_INT forwards
;
15717 HOST_WIDE_INT backwards
;
15720 /* Fixes less than a word need padding out to a word boundary. */
15721 #define MINIPOOL_FIX_SIZE(mode) \
15722 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15724 static Mnode
* minipool_vector_head
;
15725 static Mnode
* minipool_vector_tail
;
15726 static rtx_code_label
*minipool_vector_label
;
15727 static int minipool_pad
;
15729 /* The linked list of all minipool fixes required for this function. */
15730 Mfix
* minipool_fix_head
;
15731 Mfix
* minipool_fix_tail
;
15732 /* The fix entry for the current minipool, once it has been placed. */
15733 Mfix
* minipool_barrier
;
15735 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15736 #define JUMP_TABLES_IN_TEXT_SECTION 0
15739 static HOST_WIDE_INT
15740 get_jump_table_size (rtx_jump_table_data
*insn
)
15742 /* ADDR_VECs only take room if read-only data does into the text
15744 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
15746 rtx body
= PATTERN (insn
);
15747 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
15748 HOST_WIDE_INT size
;
15749 HOST_WIDE_INT modesize
;
15751 modesize
= GET_MODE_SIZE (GET_MODE (body
));
15752 size
= modesize
* XVECLEN (body
, elt
);
15756 /* Round up size of TBB table to a halfword boundary. */
15757 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
15760 /* No padding necessary for TBH. */
15763 /* Add two bytes for alignment on Thumb. */
15768 gcc_unreachable ();
15776 /* Return the maximum amount of padding that will be inserted before
15779 static HOST_WIDE_INT
15780 get_label_padding (rtx label
)
15782 HOST_WIDE_INT align
, min_insn_size
;
15784 align
= 1 << label_to_alignment (label
);
15785 min_insn_size
= TARGET_THUMB
? 2 : 4;
15786 return align
> min_insn_size
? align
- min_insn_size
: 0;
15789 /* Move a minipool fix MP from its current location to before MAX_MP.
15790 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15791 constraints may need updating. */
15793 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
15794 HOST_WIDE_INT max_address
)
15796 /* The code below assumes these are different. */
15797 gcc_assert (mp
!= max_mp
);
15799 if (max_mp
== NULL
)
15801 if (max_address
< mp
->max_address
)
15802 mp
->max_address
= max_address
;
15806 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15807 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15809 mp
->max_address
= max_address
;
15811 /* Unlink MP from its current position. Since max_mp is non-null,
15812 mp->prev must be non-null. */
15813 mp
->prev
->next
= mp
->next
;
15814 if (mp
->next
!= NULL
)
15815 mp
->next
->prev
= mp
->prev
;
15817 minipool_vector_tail
= mp
->prev
;
15819 /* Re-insert it before MAX_MP. */
15821 mp
->prev
= max_mp
->prev
;
15824 if (mp
->prev
!= NULL
)
15825 mp
->prev
->next
= mp
;
15827 minipool_vector_head
= mp
;
15830 /* Save the new entry. */
15833 /* Scan over the preceding entries and adjust their addresses as
15835 while (mp
->prev
!= NULL
15836 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15838 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15845 /* Add a constant to the minipool for a forward reference. Returns the
15846 node added or NULL if the constant will not fit in this pool. */
15848 add_minipool_forward_ref (Mfix
*fix
)
15850 /* If set, max_mp is the first pool_entry that has a lower
15851 constraint than the one we are trying to add. */
15852 Mnode
* max_mp
= NULL
;
15853 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
15856 /* If the minipool starts before the end of FIX->INSN then this FIX
15857 can not be placed into the current pool. Furthermore, adding the
15858 new constant pool entry may cause the pool to start FIX_SIZE bytes
15860 if (minipool_vector_head
&&
15861 (fix
->address
+ get_attr_length (fix
->insn
)
15862 >= minipool_vector_head
->max_address
- fix
->fix_size
))
15865 /* Scan the pool to see if a constant with the same value has
15866 already been added. While we are doing this, also note the
15867 location where we must insert the constant if it doesn't already
15869 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15871 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15872 && fix
->mode
== mp
->mode
15873 && (!LABEL_P (fix
->value
)
15874 || (CODE_LABEL_NUMBER (fix
->value
)
15875 == CODE_LABEL_NUMBER (mp
->value
)))
15876 && rtx_equal_p (fix
->value
, mp
->value
))
15878 /* More than one fix references this entry. */
15880 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
15883 /* Note the insertion point if necessary. */
15885 && mp
->max_address
> max_address
)
15888 /* If we are inserting an 8-bytes aligned quantity and
15889 we have not already found an insertion point, then
15890 make sure that all such 8-byte aligned quantities are
15891 placed at the start of the pool. */
15892 if (ARM_DOUBLEWORD_ALIGN
15894 && fix
->fix_size
>= 8
15895 && mp
->fix_size
< 8)
15898 max_address
= mp
->max_address
;
15902 /* The value is not currently in the minipool, so we need to create
15903 a new entry for it. If MAX_MP is NULL, the entry will be put on
15904 the end of the list since the placement is less constrained than
15905 any existing entry. Otherwise, we insert the new fix before
15906 MAX_MP and, if necessary, adjust the constraints on the other
15909 mp
->fix_size
= fix
->fix_size
;
15910 mp
->mode
= fix
->mode
;
15911 mp
->value
= fix
->value
;
15913 /* Not yet required for a backwards ref. */
15914 mp
->min_address
= -65536;
15916 if (max_mp
== NULL
)
15918 mp
->max_address
= max_address
;
15920 mp
->prev
= minipool_vector_tail
;
15922 if (mp
->prev
== NULL
)
15924 minipool_vector_head
= mp
;
15925 minipool_vector_label
= gen_label_rtx ();
15928 mp
->prev
->next
= mp
;
15930 minipool_vector_tail
= mp
;
15934 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15935 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15937 mp
->max_address
= max_address
;
15940 mp
->prev
= max_mp
->prev
;
15942 if (mp
->prev
!= NULL
)
15943 mp
->prev
->next
= mp
;
15945 minipool_vector_head
= mp
;
15948 /* Save the new entry. */
15951 /* Scan over the preceding entries and adjust their addresses as
15953 while (mp
->prev
!= NULL
15954 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15956 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15964 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
15965 HOST_WIDE_INT min_address
)
15967 HOST_WIDE_INT offset
;
15969 /* The code below assumes these are different. */
15970 gcc_assert (mp
!= min_mp
);
15972 if (min_mp
== NULL
)
15974 if (min_address
> mp
->min_address
)
15975 mp
->min_address
= min_address
;
15979 /* We will adjust this below if it is too loose. */
15980 mp
->min_address
= min_address
;
15982 /* Unlink MP from its current position. Since min_mp is non-null,
15983 mp->next must be non-null. */
15984 mp
->next
->prev
= mp
->prev
;
15985 if (mp
->prev
!= NULL
)
15986 mp
->prev
->next
= mp
->next
;
15988 minipool_vector_head
= mp
->next
;
15990 /* Reinsert it after MIN_MP. */
15992 mp
->next
= min_mp
->next
;
15994 if (mp
->next
!= NULL
)
15995 mp
->next
->prev
= mp
;
15997 minipool_vector_tail
= mp
;
16003 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16005 mp
->offset
= offset
;
16006 if (mp
->refcount
> 0)
16007 offset
+= mp
->fix_size
;
16009 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16010 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16016 /* Add a constant to the minipool for a backward reference. Returns the
16017 node added or NULL if the constant will not fit in this pool.
16019 Note that the code for insertion for a backwards reference can be
16020 somewhat confusing because the calculated offsets for each fix do
16021 not take into account the size of the pool (which is still under
16024 add_minipool_backward_ref (Mfix
*fix
)
16026 /* If set, min_mp is the last pool_entry that has a lower constraint
16027 than the one we are trying to add. */
16028 Mnode
*min_mp
= NULL
;
16029 /* This can be negative, since it is only a constraint. */
16030 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16033 /* If we can't reach the current pool from this insn, or if we can't
16034 insert this entry at the end of the pool without pushing other
16035 fixes out of range, then we don't try. This ensures that we
16036 can't fail later on. */
16037 if (min_address
>= minipool_barrier
->address
16038 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16039 >= minipool_barrier
->address
))
16042 /* Scan the pool to see if a constant with the same value has
16043 already been added. While we are doing this, also note the
16044 location where we must insert the constant if it doesn't already
16046 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16048 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16049 && fix
->mode
== mp
->mode
16050 && (!LABEL_P (fix
->value
)
16051 || (CODE_LABEL_NUMBER (fix
->value
)
16052 == CODE_LABEL_NUMBER (mp
->value
)))
16053 && rtx_equal_p (fix
->value
, mp
->value
)
16054 /* Check that there is enough slack to move this entry to the
16055 end of the table (this is conservative). */
16056 && (mp
->max_address
16057 > (minipool_barrier
->address
16058 + minipool_vector_tail
->offset
16059 + minipool_vector_tail
->fix_size
)))
16062 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16065 if (min_mp
!= NULL
)
16066 mp
->min_address
+= fix
->fix_size
;
16069 /* Note the insertion point if necessary. */
16070 if (mp
->min_address
< min_address
)
16072 /* For now, we do not allow the insertion of 8-byte alignment
16073 requiring nodes anywhere but at the start of the pool. */
16074 if (ARM_DOUBLEWORD_ALIGN
16075 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16080 else if (mp
->max_address
16081 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16083 /* Inserting before this entry would push the fix beyond
16084 its maximum address (which can happen if we have
16085 re-located a forwards fix); force the new fix to come
16087 if (ARM_DOUBLEWORD_ALIGN
16088 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16093 min_address
= mp
->min_address
+ fix
->fix_size
;
16096 /* Do not insert a non-8-byte aligned quantity before 8-byte
16097 aligned quantities. */
16098 else if (ARM_DOUBLEWORD_ALIGN
16099 && fix
->fix_size
< 8
16100 && mp
->fix_size
>= 8)
16103 min_address
= mp
->min_address
+ fix
->fix_size
;
16108 /* We need to create a new entry. */
16110 mp
->fix_size
= fix
->fix_size
;
16111 mp
->mode
= fix
->mode
;
16112 mp
->value
= fix
->value
;
16114 mp
->max_address
= minipool_barrier
->address
+ 65536;
16116 mp
->min_address
= min_address
;
16118 if (min_mp
== NULL
)
16121 mp
->next
= minipool_vector_head
;
16123 if (mp
->next
== NULL
)
16125 minipool_vector_tail
= mp
;
16126 minipool_vector_label
= gen_label_rtx ();
16129 mp
->next
->prev
= mp
;
16131 minipool_vector_head
= mp
;
16135 mp
->next
= min_mp
->next
;
16139 if (mp
->next
!= NULL
)
16140 mp
->next
->prev
= mp
;
16142 minipool_vector_tail
= mp
;
16145 /* Save the new entry. */
16153 /* Scan over the following entries and adjust their offsets. */
16154 while (mp
->next
!= NULL
)
16156 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16157 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16160 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16162 mp
->next
->offset
= mp
->offset
;
16171 assign_minipool_offsets (Mfix
*barrier
)
16173 HOST_WIDE_INT offset
= 0;
16176 minipool_barrier
= barrier
;
16178 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16180 mp
->offset
= offset
;
16182 if (mp
->refcount
> 0)
16183 offset
+= mp
->fix_size
;
16187 /* Output the literal table */
16189 dump_minipool (rtx_insn
*scan
)
16195 if (ARM_DOUBLEWORD_ALIGN
)
16196 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16197 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16204 fprintf (dump_file
,
16205 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16206 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16208 scan
= emit_label_after (gen_label_rtx (), scan
);
16209 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16210 scan
= emit_label_after (minipool_vector_label
, scan
);
16212 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16214 if (mp
->refcount
> 0)
16218 fprintf (dump_file
,
16219 ";; Offset %u, min %ld, max %ld ",
16220 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16221 (unsigned long) mp
->max_address
);
16222 arm_print_value (dump_file
, mp
->value
);
16223 fputc ('\n', dump_file
);
16226 rtx val
= copy_rtx (mp
->value
);
16228 switch (GET_MODE_SIZE (mp
->mode
))
16230 #ifdef HAVE_consttable_1
16232 scan
= emit_insn_after (gen_consttable_1 (val
), scan
);
16236 #ifdef HAVE_consttable_2
16238 scan
= emit_insn_after (gen_consttable_2 (val
), scan
);
16242 #ifdef HAVE_consttable_4
16244 scan
= emit_insn_after (gen_consttable_4 (val
), scan
);
16248 #ifdef HAVE_consttable_8
16250 scan
= emit_insn_after (gen_consttable_8 (val
), scan
);
16254 #ifdef HAVE_consttable_16
16256 scan
= emit_insn_after (gen_consttable_16 (val
), scan
);
16261 gcc_unreachable ();
16269 minipool_vector_head
= minipool_vector_tail
= NULL
;
16270 scan
= emit_insn_after (gen_consttable_end (), scan
);
16271 scan
= emit_barrier_after (scan
);
16274 /* Return the cost of forcibly inserting a barrier after INSN. */
16276 arm_barrier_cost (rtx_insn
*insn
)
16278 /* Basing the location of the pool on the loop depth is preferable,
16279 but at the moment, the basic block information seems to be
16280 corrupt by this stage of the compilation. */
16281 int base_cost
= 50;
16282 rtx_insn
*next
= next_nonnote_insn (insn
);
16284 if (next
!= NULL
&& LABEL_P (next
))
16287 switch (GET_CODE (insn
))
16290 /* It will always be better to place the table before the label, rather
16299 return base_cost
- 10;
16302 return base_cost
+ 10;
16306 /* Find the best place in the insn stream in the range
16307 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16308 Create the barrier by inserting a jump and add a new fix entry for
16311 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16313 HOST_WIDE_INT count
= 0;
16314 rtx_barrier
*barrier
;
16315 rtx_insn
*from
= fix
->insn
;
16316 /* The instruction after which we will insert the jump. */
16317 rtx_insn
*selected
= NULL
;
16319 /* The address at which the jump instruction will be placed. */
16320 HOST_WIDE_INT selected_address
;
16322 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16323 rtx_code_label
*label
= gen_label_rtx ();
16325 selected_cost
= arm_barrier_cost (from
);
16326 selected_address
= fix
->address
;
16328 while (from
&& count
< max_count
)
16330 rtx_jump_table_data
*tmp
;
16333 /* This code shouldn't have been called if there was a natural barrier
16335 gcc_assert (!BARRIER_P (from
));
16337 /* Count the length of this insn. This must stay in sync with the
16338 code that pushes minipool fixes. */
16339 if (LABEL_P (from
))
16340 count
+= get_label_padding (from
);
16342 count
+= get_attr_length (from
);
16344 /* If there is a jump table, add its length. */
16345 if (tablejump_p (from
, NULL
, &tmp
))
16347 count
+= get_jump_table_size (tmp
);
16349 /* Jump tables aren't in a basic block, so base the cost on
16350 the dispatch insn. If we select this location, we will
16351 still put the pool after the table. */
16352 new_cost
= arm_barrier_cost (from
);
16354 if (count
< max_count
16355 && (!selected
|| new_cost
<= selected_cost
))
16358 selected_cost
= new_cost
;
16359 selected_address
= fix
->address
+ count
;
16362 /* Continue after the dispatch table. */
16363 from
= NEXT_INSN (tmp
);
16367 new_cost
= arm_barrier_cost (from
);
16369 if (count
< max_count
16370 && (!selected
|| new_cost
<= selected_cost
))
16373 selected_cost
= new_cost
;
16374 selected_address
= fix
->address
+ count
;
16377 from
= NEXT_INSN (from
);
16380 /* Make sure that we found a place to insert the jump. */
16381 gcc_assert (selected
);
16383 /* Make sure we do not split a call and its corresponding
16384 CALL_ARG_LOCATION note. */
16385 if (CALL_P (selected
))
16387 rtx_insn
*next
= NEXT_INSN (selected
);
16388 if (next
&& NOTE_P (next
)
16389 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16393 /* Create a new JUMP_INSN that branches around a barrier. */
16394 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16395 JUMP_LABEL (from
) = label
;
16396 barrier
= emit_barrier_after (from
);
16397 emit_label_after (label
, barrier
);
16399 /* Create a minipool barrier entry for the new barrier. */
16400 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16401 new_fix
->insn
= barrier
;
16402 new_fix
->address
= selected_address
;
16403 new_fix
->next
= fix
->next
;
16404 fix
->next
= new_fix
;
16409 /* Record that there is a natural barrier in the insn stream at
16412 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16414 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16417 fix
->address
= address
;
16420 if (minipool_fix_head
!= NULL
)
16421 minipool_fix_tail
->next
= fix
;
16423 minipool_fix_head
= fix
;
16425 minipool_fix_tail
= fix
;
16428 /* Record INSN, which will need fixing up to load a value from the
16429 minipool. ADDRESS is the offset of the insn since the start of the
16430 function; LOC is a pointer to the part of the insn which requires
16431 fixing; VALUE is the constant that must be loaded, which is of type
16434 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16435 machine_mode mode
, rtx value
)
16437 gcc_assert (!arm_disable_literal_pool
);
16438 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16441 fix
->address
= address
;
16444 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16445 fix
->value
= value
;
16446 fix
->forwards
= get_attr_pool_range (insn
);
16447 fix
->backwards
= get_attr_neg_pool_range (insn
);
16448 fix
->minipool
= NULL
;
16450 /* If an insn doesn't have a range defined for it, then it isn't
16451 expecting to be reworked by this code. Better to stop now than
16452 to generate duff assembly code. */
16453 gcc_assert (fix
->forwards
|| fix
->backwards
);
16455 /* If an entry requires 8-byte alignment then assume all constant pools
16456 require 4 bytes of padding. Trying to do this later on a per-pool
16457 basis is awkward because existing pool entries have to be modified. */
16458 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16463 fprintf (dump_file
,
16464 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16465 GET_MODE_NAME (mode
),
16466 INSN_UID (insn
), (unsigned long) address
,
16467 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16468 arm_print_value (dump_file
, fix
->value
);
16469 fprintf (dump_file
, "\n");
16472 /* Add it to the chain of fixes. */
16475 if (minipool_fix_head
!= NULL
)
16476 minipool_fix_tail
->next
= fix
;
16478 minipool_fix_head
= fix
;
16480 minipool_fix_tail
= fix
;
16483 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16484 Returns the number of insns needed, or 99 if we always want to synthesize
16487 arm_max_const_double_inline_cost ()
16489 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16492 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16493 Returns the number of insns needed, or 99 if we don't know how to
16496 arm_const_double_inline_cost (rtx val
)
16498 rtx lowpart
, highpart
;
16501 mode
= GET_MODE (val
);
16503 if (mode
== VOIDmode
)
16506 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16508 lowpart
= gen_lowpart (SImode
, val
);
16509 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16511 gcc_assert (CONST_INT_P (lowpart
));
16512 gcc_assert (CONST_INT_P (highpart
));
16514 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16515 NULL_RTX
, NULL_RTX
, 0, 0)
16516 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16517 NULL_RTX
, NULL_RTX
, 0, 0));
16520 /* Cost of loading a SImode constant. */
16522 arm_const_inline_cost (enum rtx_code code
, rtx val
)
16524 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
16525 NULL_RTX
, NULL_RTX
, 1, 0);
16528 /* Return true if it is worthwhile to split a 64-bit constant into two
16529 32-bit operations. This is the case if optimizing for size, or
16530 if we have load delay slots, or if one 32-bit part can be done with
16531 a single data operation. */
16533 arm_const_double_by_parts (rtx val
)
16535 machine_mode mode
= GET_MODE (val
);
16538 if (optimize_size
|| arm_ld_sched
)
16541 if (mode
== VOIDmode
)
16544 part
= gen_highpart_mode (SImode
, mode
, val
);
16546 gcc_assert (CONST_INT_P (part
));
16548 if (const_ok_for_arm (INTVAL (part
))
16549 || const_ok_for_arm (~INTVAL (part
)))
16552 part
= gen_lowpart (SImode
, val
);
16554 gcc_assert (CONST_INT_P (part
));
16556 if (const_ok_for_arm (INTVAL (part
))
16557 || const_ok_for_arm (~INTVAL (part
)))
16563 /* Return true if it is possible to inline both the high and low parts
16564 of a 64-bit constant into 32-bit data processing instructions. */
16566 arm_const_double_by_immediates (rtx val
)
16568 machine_mode mode
= GET_MODE (val
);
16571 if (mode
== VOIDmode
)
16574 part
= gen_highpart_mode (SImode
, mode
, val
);
16576 gcc_assert (CONST_INT_P (part
));
16578 if (!const_ok_for_arm (INTVAL (part
)))
16581 part
= gen_lowpart (SImode
, val
);
16583 gcc_assert (CONST_INT_P (part
));
16585 if (!const_ok_for_arm (INTVAL (part
)))
16591 /* Scan INSN and note any of its operands that need fixing.
16592 If DO_PUSHES is false we do not actually push any of the fixups
16595 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
16599 extract_constrain_insn (insn
);
16601 if (recog_data
.n_alternatives
== 0)
16604 /* Fill in recog_op_alt with information about the constraints of
16606 preprocess_constraints (insn
);
16608 const operand_alternative
*op_alt
= which_op_alt ();
16609 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16611 /* Things we need to fix can only occur in inputs. */
16612 if (recog_data
.operand_type
[opno
] != OP_IN
)
16615 /* If this alternative is a memory reference, then any mention
16616 of constants in this alternative is really to fool reload
16617 into allowing us to accept one there. We need to fix them up
16618 now so that we output the right code. */
16619 if (op_alt
[opno
].memory_ok
)
16621 rtx op
= recog_data
.operand
[opno
];
16623 if (CONSTANT_P (op
))
16626 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16627 recog_data
.operand_mode
[opno
], op
);
16629 else if (MEM_P (op
)
16630 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16631 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16635 rtx cop
= avoid_constant_pool_reference (op
);
16637 /* Casting the address of something to a mode narrower
16638 than a word can cause avoid_constant_pool_reference()
16639 to return the pool reference itself. That's no good to
16640 us here. Lets just hope that we can use the
16641 constant pool value directly. */
16643 cop
= get_pool_constant (XEXP (op
, 0));
16645 push_minipool_fix (insn
, address
,
16646 recog_data
.operand_loc
[opno
],
16647 recog_data
.operand_mode
[opno
], cop
);
16657 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16658 and unions in the context of ARMv8-M Security Extensions. It is used as a
16659 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16660 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16661 or four masks, depending on whether it is being computed for a
16662 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16663 respectively. The tree for the type of the argument or a field within an
16664 argument is passed in ARG_TYPE, the current register this argument or field
16665 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16666 argument or field starts at is passed in STARTING_BIT and the last used bit
16667 is kept in LAST_USED_BIT which is also updated accordingly. */
16669 static unsigned HOST_WIDE_INT
16670 comp_not_to_clear_mask_str_un (tree arg_type
, int * regno
,
16671 uint32_t * padding_bits_to_clear
,
16672 unsigned starting_bit
, int * last_used_bit
)
16675 unsigned HOST_WIDE_INT not_to_clear_reg_mask
= 0;
16677 if (TREE_CODE (arg_type
) == RECORD_TYPE
)
16679 unsigned current_bit
= starting_bit
;
16681 long int offset
, size
;
16684 field
= TYPE_FIELDS (arg_type
);
16687 /* The offset within a structure is always an offset from
16688 the start of that structure. Make sure we take that into the
16689 calculation of the register based offset that we use here. */
16690 offset
= starting_bit
;
16691 offset
+= TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field
), 0);
16694 /* This is the actual size of the field, for bitfields this is the
16695 bitfield width and not the container size. */
16696 size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16698 if (*last_used_bit
!= offset
)
16700 if (offset
< *last_used_bit
)
16702 /* This field's offset is before the 'last_used_bit', that
16703 means this field goes on the next register. So we need to
16704 pad the rest of the current register and increase the
16705 register number. */
16707 mask
= ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit
);
16710 padding_bits_to_clear
[*regno
] |= mask
;
16711 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16716 /* Otherwise we pad the bits between the last field's end and
16717 the start of the new field. */
16720 mask
= ((uint32_t)-1) >> (32 - offset
);
16721 mask
-= ((uint32_t) 1 << *last_used_bit
) - 1;
16722 padding_bits_to_clear
[*regno
] |= mask
;
16724 current_bit
= offset
;
16727 /* Calculate further padding bits for inner structs/unions too. */
16728 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field
)))
16730 *last_used_bit
= current_bit
;
16731 not_to_clear_reg_mask
16732 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field
), regno
,
16733 padding_bits_to_clear
, offset
,
16738 /* Update 'current_bit' with this field's size. If the
16739 'current_bit' lies in a subsequent register, update 'regno' and
16740 reset 'current_bit' to point to the current bit in that new
16742 current_bit
+= size
;
16743 while (current_bit
>= 32)
16746 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16749 *last_used_bit
= current_bit
;
16752 field
= TREE_CHAIN (field
);
16754 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16756 else if (TREE_CODE (arg_type
) == UNION_TYPE
)
16758 tree field
, field_t
;
16759 int i
, regno_t
, field_size
;
16763 uint32_t padding_bits_to_clear_res
[NUM_ARG_REGS
]
16764 = {-1, -1, -1, -1};
16766 /* To compute the padding bits in a union we only consider bits as
16767 padding bits if they are always either a padding bit or fall outside a
16768 fields size for all fields in the union. */
16769 field
= TYPE_FIELDS (arg_type
);
16772 uint32_t padding_bits_to_clear_t
[NUM_ARG_REGS
]
16773 = {0U, 0U, 0U, 0U};
16774 int last_used_bit_t
= *last_used_bit
;
16776 field_t
= TREE_TYPE (field
);
16778 /* If the field's type is either a record or a union make sure to
16779 compute their padding bits too. */
16780 if (RECORD_OR_UNION_TYPE_P (field_t
))
16781 not_to_clear_reg_mask
16782 |= comp_not_to_clear_mask_str_un (field_t
, ®no_t
,
16783 &padding_bits_to_clear_t
[0],
16784 starting_bit
, &last_used_bit_t
);
16787 field_size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16788 regno_t
= (field_size
/ 32) + *regno
;
16789 last_used_bit_t
= (starting_bit
+ field_size
) % 32;
16792 for (i
= *regno
; i
< regno_t
; i
++)
16794 /* For all but the last register used by this field only keep the
16795 padding bits that were padding bits in this field. */
16796 padding_bits_to_clear_res
[i
] &= padding_bits_to_clear_t
[i
];
16799 /* For the last register, keep all padding bits that were padding
16800 bits in this field and any padding bits that are still valid
16801 as padding bits but fall outside of this field's size. */
16802 mask
= (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t
)) + 1;
16803 padding_bits_to_clear_res
[regno_t
]
16804 &= padding_bits_to_clear_t
[regno_t
] | mask
;
16806 /* Update the maximum size of the fields in terms of registers used
16807 ('max_reg') and the 'last_used_bit' in said register. */
16808 if (max_reg
< regno_t
)
16811 max_bit
= last_used_bit_t
;
16813 else if (max_reg
== regno_t
&& max_bit
< last_used_bit_t
)
16814 max_bit
= last_used_bit_t
;
16816 field
= TREE_CHAIN (field
);
16819 /* Update the current padding_bits_to_clear using the intersection of the
16820 padding bits of all the fields. */
16821 for (i
=*regno
; i
< max_reg
; i
++)
16822 padding_bits_to_clear
[i
] |= padding_bits_to_clear_res
[i
];
16824 /* Do not keep trailing padding bits, we do not know yet whether this
16825 is the end of the argument. */
16826 mask
= ((uint32_t) 1 << max_bit
) - 1;
16827 padding_bits_to_clear
[max_reg
]
16828 |= padding_bits_to_clear_res
[max_reg
] & mask
;
16831 *last_used_bit
= max_bit
;
16834 /* This function should only be used for structs and unions. */
16835 gcc_unreachable ();
16837 return not_to_clear_reg_mask
;
16840 /* In the context of ARMv8-M Security Extensions, this function is used for both
16841 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16842 registers are used when returning or passing arguments, which is then
16843 returned as a mask. It will also compute a mask to indicate padding/unused
16844 bits for each of these registers, and passes this through the
16845 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16846 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16847 the starting register used to pass this argument or return value is passed
16848 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16849 for struct and union types. */
16851 static unsigned HOST_WIDE_INT
16852 compute_not_to_clear_mask (tree arg_type
, rtx arg_rtx
, int regno
,
16853 uint32_t * padding_bits_to_clear
)
16856 int last_used_bit
= 0;
16857 unsigned HOST_WIDE_INT not_to_clear_mask
;
16859 if (RECORD_OR_UNION_TYPE_P (arg_type
))
16862 = comp_not_to_clear_mask_str_un (arg_type
, ®no
,
16863 padding_bits_to_clear
, 0,
16867 /* If the 'last_used_bit' is not zero, that means we are still using a
16868 part of the last 'regno'. In such cases we must clear the trailing
16869 bits. Otherwise we are not using regno and we should mark it as to
16871 if (last_used_bit
!= 0)
16872 padding_bits_to_clear
[regno
]
16873 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit
) + 1;
16875 not_to_clear_mask
&= ~(HOST_WIDE_INT_1U
<< regno
);
16879 not_to_clear_mask
= 0;
16880 /* We are not dealing with structs nor unions. So these arguments may be
16881 passed in floating point registers too. In some cases a BLKmode is
16882 used when returning or passing arguments in multiple VFP registers. */
16883 if (GET_MODE (arg_rtx
) == BLKmode
)
16888 /* This should really only occur when dealing with the hard-float
16890 gcc_assert (TARGET_HARD_FLOAT_ABI
);
16892 for (i
= 0; i
< XVECLEN (arg_rtx
, 0); i
++)
16894 reg
= XEXP (XVECEXP (arg_rtx
, 0, i
), 0);
16895 gcc_assert (REG_P (reg
));
16897 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (reg
);
16899 /* If we are dealing with DF mode, make sure we don't
16900 clear either of the registers it addresses. */
16901 arg_regs
= ARM_NUM_REGS (GET_MODE (reg
));
16904 unsigned HOST_WIDE_INT mask
;
16905 mask
= HOST_WIDE_INT_1U
<< (REGNO (reg
) + arg_regs
);
16906 mask
-= HOST_WIDE_INT_1U
<< REGNO (reg
);
16907 not_to_clear_mask
|= mask
;
16913 /* Otherwise we can rely on the MODE to determine how many registers
16914 are being used by this argument. */
16915 int arg_regs
= ARM_NUM_REGS (GET_MODE (arg_rtx
));
16916 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
16919 unsigned HOST_WIDE_INT
16920 mask
= HOST_WIDE_INT_1U
<< (REGNO (arg_rtx
) + arg_regs
);
16921 mask
-= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
16922 not_to_clear_mask
|= mask
;
16927 return not_to_clear_mask
;
16930 /* Clears caller saved registers not used to pass arguments before a
16931 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
16932 registers is done in __gnu_cmse_nonsecure_call libcall.
16933 See libgcc/config/arm/cmse_nonsecure_call.S. */
16936 cmse_nonsecure_call_clear_caller_saved (void)
16940 FOR_EACH_BB_FN (bb
, cfun
)
16944 FOR_BB_INSNS (bb
, insn
)
16946 uint64_t to_clear_mask
, float_mask
;
16948 rtx pat
, call
, unspec
, reg
, cleared_reg
, tmp
;
16949 unsigned int regno
, maxregno
;
16951 CUMULATIVE_ARGS args_so_far_v
;
16952 cumulative_args_t args_so_far
;
16953 tree arg_type
, fntype
;
16954 bool using_r4
, first_param
= true;
16955 function_args_iterator args_iter
;
16956 uint32_t padding_bits_to_clear
[4] = {0U, 0U, 0U, 0U};
16957 uint32_t * padding_bits_to_clear_ptr
= &padding_bits_to_clear
[0];
16959 if (!NONDEBUG_INSN_P (insn
))
16962 if (!CALL_P (insn
))
16965 pat
= PATTERN (insn
);
16966 gcc_assert (GET_CODE (pat
) == PARALLEL
&& XVECLEN (pat
, 0) > 0);
16967 call
= XVECEXP (pat
, 0, 0);
16969 /* Get the real call RTX if the insn sets a value, ie. returns. */
16970 if (GET_CODE (call
) == SET
)
16971 call
= SET_SRC (call
);
16973 /* Check if it is a cmse_nonsecure_call. */
16974 unspec
= XEXP (call
, 0);
16975 if (GET_CODE (unspec
) != UNSPEC
16976 || XINT (unspec
, 1) != UNSPEC_NONSECURE_MEM
)
16979 /* Determine the caller-saved registers we need to clear. */
16980 to_clear_mask
= (1LL << (NUM_ARG_REGS
)) - 1;
16981 maxregno
= NUM_ARG_REGS
- 1;
16982 /* Only look at the caller-saved floating point registers in case of
16983 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
16984 lazy store and loads which clear both caller- and callee-saved
16986 if (TARGET_HARD_FLOAT_ABI
)
16988 float_mask
= (1LL << (D7_VFP_REGNUM
+ 1)) - 1;
16989 float_mask
&= ~((1LL << FIRST_VFP_REGNUM
) - 1);
16990 to_clear_mask
|= float_mask
;
16991 maxregno
= D7_VFP_REGNUM
;
16994 /* Make sure the register used to hold the function address is not
16996 address
= RTVEC_ELT (XVEC (unspec
, 0), 0);
16997 gcc_assert (MEM_P (address
));
16998 gcc_assert (REG_P (XEXP (address
, 0)));
16999 to_clear_mask
&= ~(1LL << REGNO (XEXP (address
, 0)));
17001 /* Set basic block of call insn so that df rescan is performed on
17002 insns inserted here. */
17003 set_block_for_insn (insn
, bb
);
17004 df_set_flags (DF_DEFER_INSN_RESCAN
);
17007 /* Make sure the scheduler doesn't schedule other insns beyond
17009 emit_insn (gen_blockage ());
17011 /* Walk through all arguments and clear registers appropriately.
17013 fntype
= TREE_TYPE (MEM_EXPR (address
));
17014 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
,
17016 args_so_far
= pack_cumulative_args (&args_so_far_v
);
17017 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
17020 machine_mode arg_mode
= TYPE_MODE (arg_type
);
17022 if (VOID_TYPE_P (arg_type
))
17026 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
,
17029 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
,
17031 gcc_assert (REG_P (arg_rtx
));
17033 &= ~compute_not_to_clear_mask (arg_type
, arg_rtx
,
17035 padding_bits_to_clear_ptr
);
17037 first_param
= false;
17040 /* Clear padding bits where needed. */
17041 cleared_reg
= XEXP (address
, 0);
17042 reg
= gen_rtx_REG (SImode
, IP_REGNUM
);
17044 for (regno
= R0_REGNUM
; regno
< NUM_ARG_REGS
; regno
++)
17046 if (padding_bits_to_clear
[regno
] == 0)
17049 /* If this is a Thumb-1 target copy the address of the function
17050 we are calling from 'r4' into 'ip' such that we can use r4 to
17051 clear the unused bits in the arguments. */
17052 if (TARGET_THUMB1
&& !using_r4
)
17056 emit_move_insn (gen_rtx_REG (SImode
, IP_REGNUM
),
17060 tmp
= GEN_INT ((((~padding_bits_to_clear
[regno
]) << 16u) >> 16u));
17061 emit_move_insn (reg
, tmp
);
17062 /* Also fill the top half of the negated
17063 padding_bits_to_clear. */
17064 if (((~padding_bits_to_clear
[regno
]) >> 16) > 0)
17066 tmp
= GEN_INT ((~padding_bits_to_clear
[regno
]) >> 16);
17067 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode
, reg
,
17073 emit_insn (gen_andsi3 (gen_rtx_REG (SImode
, regno
),
17074 gen_rtx_REG (SImode
, regno
),
17079 emit_move_insn (cleared_reg
,
17080 gen_rtx_REG (SImode
, IP_REGNUM
));
17082 /* We use right shift and left shift to clear the LSB of the address
17083 we jump to instead of using bic, to avoid having to use an extra
17084 register on Thumb-1. */
17085 tmp
= gen_rtx_LSHIFTRT (SImode
, cleared_reg
, const1_rtx
);
17086 emit_insn (gen_rtx_SET (cleared_reg
, tmp
));
17087 tmp
= gen_rtx_ASHIFT (SImode
, cleared_reg
, const1_rtx
);
17088 emit_insn (gen_rtx_SET (cleared_reg
, tmp
));
17090 /* Clearing all registers that leak before doing a non-secure
17092 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
17094 if (!(to_clear_mask
& (1LL << regno
)))
17097 /* If regno is an even vfp register and its successor is also to
17098 be cleared, use vmov. */
17099 if (IS_VFP_REGNUM (regno
))
17101 if (TARGET_VFP_DOUBLE
17102 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
17103 && to_clear_mask
& (1LL << (regno
+ 1)))
17104 emit_move_insn (gen_rtx_REG (DFmode
, regno
++),
17105 CONST0_RTX (DFmode
));
17107 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
17108 CONST0_RTX (SFmode
));
17111 emit_move_insn (gen_rtx_REG (SImode
, regno
), cleared_reg
);
17114 seq
= get_insns ();
17116 emit_insn_before (seq
, insn
);
17122 /* Rewrite move insn into subtract of 0 if the condition codes will
17123 be useful in next conditional jump insn. */
17126 thumb1_reorg (void)
17130 FOR_EACH_BB_FN (bb
, cfun
)
17133 rtx cmp
, op0
, op1
, set
= NULL
;
17134 rtx_insn
*prev
, *insn
= BB_END (bb
);
17135 bool insn_clobbered
= false;
17137 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17138 insn
= PREV_INSN (insn
);
17140 /* Find the last cbranchsi4_insn in basic block BB. */
17141 if (insn
== BB_HEAD (bb
)
17142 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17145 /* Get the register with which we are comparing. */
17146 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
17147 op0
= XEXP (cmp
, 0);
17148 op1
= XEXP (cmp
, 1);
17150 /* Check that comparison is against ZERO. */
17151 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
17154 /* Find the first flag setting insn before INSN in basic block BB. */
17155 gcc_assert (insn
!= BB_HEAD (bb
));
17156 for (prev
= PREV_INSN (insn
);
17158 && prev
!= BB_HEAD (bb
)
17160 || DEBUG_INSN_P (prev
)
17161 || ((set
= single_set (prev
)) != NULL
17162 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17163 prev
= PREV_INSN (prev
))
17165 if (reg_set_p (op0
, prev
))
17166 insn_clobbered
= true;
17169 /* Skip if op0 is clobbered by insn other than prev. */
17170 if (insn_clobbered
)
17176 dest
= SET_DEST (set
);
17177 src
= SET_SRC (set
);
17178 if (!low_register_operand (dest
, SImode
)
17179 || !low_register_operand (src
, SImode
))
17182 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17183 in INSN. Both src and dest of the move insn are checked. */
17184 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17186 dest
= copy_rtx (dest
);
17187 src
= copy_rtx (src
);
17188 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17189 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
17190 INSN_CODE (prev
) = -1;
17191 /* Set test register in INSN to dest. */
17192 XEXP (cmp
, 0) = copy_rtx (dest
);
17193 INSN_CODE (insn
) = -1;
17198 /* Convert instructions to their cc-clobbering variant if possible, since
17199 that allows us to use smaller encodings. */
17202 thumb2_reorg (void)
17207 INIT_REG_SET (&live
);
17209 /* We are freeing block_for_insn in the toplev to keep compatibility
17210 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17211 compute_bb_for_insn ();
17214 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17216 FOR_EACH_BB_FN (bb
, cfun
)
17218 if ((current_tune
->disparage_flag_setting_t16_encodings
17219 == tune_params::DISPARAGE_FLAGS_ALL
)
17220 && optimize_bb_for_speed_p (bb
))
17224 Convert_Action action
= SKIP
;
17225 Convert_Action action_for_partial_flag_setting
17226 = ((current_tune
->disparage_flag_setting_t16_encodings
17227 != tune_params::DISPARAGE_FLAGS_NEITHER
)
17228 && optimize_bb_for_speed_p (bb
))
17231 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17232 df_simulate_initialize_backwards (bb
, &live
);
17233 FOR_BB_INSNS_REVERSE (bb
, insn
)
17235 if (NONJUMP_INSN_P (insn
)
17236 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17237 && GET_CODE (PATTERN (insn
)) == SET
)
17240 rtx pat
= PATTERN (insn
);
17241 rtx dst
= XEXP (pat
, 0);
17242 rtx src
= XEXP (pat
, 1);
17243 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17245 if (UNARY_P (src
) || BINARY_P (src
))
17246 op0
= XEXP (src
, 0);
17248 if (BINARY_P (src
))
17249 op1
= XEXP (src
, 1);
17251 if (low_register_operand (dst
, SImode
))
17253 switch (GET_CODE (src
))
17256 /* Adding two registers and storing the result
17257 in the first source is already a 16-bit
17259 if (rtx_equal_p (dst
, op0
)
17260 && register_operand (op1
, SImode
))
17263 if (low_register_operand (op0
, SImode
))
17265 /* ADDS <Rd>,<Rn>,<Rm> */
17266 if (low_register_operand (op1
, SImode
))
17268 /* ADDS <Rdn>,#<imm8> */
17269 /* SUBS <Rdn>,#<imm8> */
17270 else if (rtx_equal_p (dst
, op0
)
17271 && CONST_INT_P (op1
)
17272 && IN_RANGE (INTVAL (op1
), -255, 255))
17274 /* ADDS <Rd>,<Rn>,#<imm3> */
17275 /* SUBS <Rd>,<Rn>,#<imm3> */
17276 else if (CONST_INT_P (op1
)
17277 && IN_RANGE (INTVAL (op1
), -7, 7))
17280 /* ADCS <Rd>, <Rn> */
17281 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17282 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17283 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17285 && COMPARISON_P (op1
)
17286 && cc_register (XEXP (op1
, 0), VOIDmode
)
17287 && maybe_get_arm_condition_code (op1
) == ARM_CS
17288 && XEXP (op1
, 1) == const0_rtx
)
17293 /* RSBS <Rd>,<Rn>,#0
17294 Not handled here: see NEG below. */
17295 /* SUBS <Rd>,<Rn>,#<imm3>
17297 Not handled here: see PLUS above. */
17298 /* SUBS <Rd>,<Rn>,<Rm> */
17299 if (low_register_operand (op0
, SImode
)
17300 && low_register_operand (op1
, SImode
))
17305 /* MULS <Rdm>,<Rn>,<Rdm>
17306 As an exception to the rule, this is only used
17307 when optimizing for size since MULS is slow on all
17308 known implementations. We do not even want to use
17309 MULS in cold code, if optimizing for speed, so we
17310 test the global flag here. */
17311 if (!optimize_size
)
17313 /* Fall through. */
17317 /* ANDS <Rdn>,<Rm> */
17318 if (rtx_equal_p (dst
, op0
)
17319 && low_register_operand (op1
, SImode
))
17320 action
= action_for_partial_flag_setting
;
17321 else if (rtx_equal_p (dst
, op1
)
17322 && low_register_operand (op0
, SImode
))
17323 action
= action_for_partial_flag_setting
== SKIP
17324 ? SKIP
: SWAP_CONV
;
17330 /* ASRS <Rdn>,<Rm> */
17331 /* LSRS <Rdn>,<Rm> */
17332 /* LSLS <Rdn>,<Rm> */
17333 if (rtx_equal_p (dst
, op0
)
17334 && low_register_operand (op1
, SImode
))
17335 action
= action_for_partial_flag_setting
;
17336 /* ASRS <Rd>,<Rm>,#<imm5> */
17337 /* LSRS <Rd>,<Rm>,#<imm5> */
17338 /* LSLS <Rd>,<Rm>,#<imm5> */
17339 else if (low_register_operand (op0
, SImode
)
17340 && CONST_INT_P (op1
)
17341 && IN_RANGE (INTVAL (op1
), 0, 31))
17342 action
= action_for_partial_flag_setting
;
17346 /* RORS <Rdn>,<Rm> */
17347 if (rtx_equal_p (dst
, op0
)
17348 && low_register_operand (op1
, SImode
))
17349 action
= action_for_partial_flag_setting
;
17353 /* MVNS <Rd>,<Rm> */
17354 if (low_register_operand (op0
, SImode
))
17355 action
= action_for_partial_flag_setting
;
17359 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17360 if (low_register_operand (op0
, SImode
))
17365 /* MOVS <Rd>,#<imm8> */
17366 if (CONST_INT_P (src
)
17367 && IN_RANGE (INTVAL (src
), 0, 255))
17368 action
= action_for_partial_flag_setting
;
17372 /* MOVS and MOV<c> with registers have different
17373 encodings, so are not relevant here. */
17381 if (action
!= SKIP
)
17383 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17384 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17387 if (action
== SWAP_CONV
)
17389 src
= copy_rtx (src
);
17390 XEXP (src
, 0) = op1
;
17391 XEXP (src
, 1) = op0
;
17392 pat
= gen_rtx_SET (dst
, src
);
17393 vec
= gen_rtvec (2, pat
, clobber
);
17395 else /* action == CONV */
17396 vec
= gen_rtvec (2, pat
, clobber
);
17398 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17399 INSN_CODE (insn
) = -1;
17403 if (NONDEBUG_INSN_P (insn
))
17404 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17408 CLEAR_REG_SET (&live
);
17411 /* Gcc puts the pool in the wrong place for ARM, since we can only
17412 load addresses a limited distance around the pc. We do some
17413 special munging to move the constant pool values to the correct
17414 point in the code. */
17419 HOST_WIDE_INT address
= 0;
17423 cmse_nonsecure_call_clear_caller_saved ();
17426 else if (TARGET_THUMB2
)
17429 /* Ensure all insns that must be split have been split at this point.
17430 Otherwise, the pool placement code below may compute incorrect
17431 insn lengths. Note that when optimizing, all insns have already
17432 been split at this point. */
17434 split_all_insns_noflow ();
17436 /* Make sure we do not attempt to create a literal pool even though it should
17437 no longer be necessary to create any. */
17438 if (arm_disable_literal_pool
)
17441 minipool_fix_head
= minipool_fix_tail
= NULL
;
17443 /* The first insn must always be a note, or the code below won't
17444 scan it properly. */
17445 insn
= get_insns ();
17446 gcc_assert (NOTE_P (insn
));
17449 /* Scan all the insns and record the operands that will need fixing. */
17450 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17452 if (BARRIER_P (insn
))
17453 push_minipool_barrier (insn
, address
);
17454 else if (INSN_P (insn
))
17456 rtx_jump_table_data
*table
;
17458 note_invalid_constants (insn
, address
, true);
17459 address
+= get_attr_length (insn
);
17461 /* If the insn is a vector jump, add the size of the table
17462 and skip the table. */
17463 if (tablejump_p (insn
, NULL
, &table
))
17465 address
+= get_jump_table_size (table
);
17469 else if (LABEL_P (insn
))
17470 /* Add the worst-case padding due to alignment. We don't add
17471 the _current_ padding because the minipool insertions
17472 themselves might change it. */
17473 address
+= get_label_padding (insn
);
17476 fix
= minipool_fix_head
;
17478 /* Now scan the fixups and perform the required changes. */
17483 Mfix
* last_added_fix
;
17484 Mfix
* last_barrier
= NULL
;
17487 /* Skip any further barriers before the next fix. */
17488 while (fix
&& BARRIER_P (fix
->insn
))
17491 /* No more fixes. */
17495 last_added_fix
= NULL
;
17497 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17499 if (BARRIER_P (ftmp
->insn
))
17501 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17504 last_barrier
= ftmp
;
17506 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17509 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17512 /* If we found a barrier, drop back to that; any fixes that we
17513 could have reached but come after the barrier will now go in
17514 the next mini-pool. */
17515 if (last_barrier
!= NULL
)
17517 /* Reduce the refcount for those fixes that won't go into this
17519 for (fdel
= last_barrier
->next
;
17520 fdel
&& fdel
!= ftmp
;
17523 fdel
->minipool
->refcount
--;
17524 fdel
->minipool
= NULL
;
17527 ftmp
= last_barrier
;
17531 /* ftmp is first fix that we can't fit into this pool and
17532 there no natural barriers that we could use. Insert a
17533 new barrier in the code somewhere between the previous
17534 fix and this one, and arrange to jump around it. */
17535 HOST_WIDE_INT max_address
;
17537 /* The last item on the list of fixes must be a barrier, so
17538 we can never run off the end of the list of fixes without
17539 last_barrier being set. */
17542 max_address
= minipool_vector_head
->max_address
;
17543 /* Check that there isn't another fix that is in range that
17544 we couldn't fit into this pool because the pool was
17545 already too large: we need to put the pool before such an
17546 instruction. The pool itself may come just after the
17547 fix because create_fix_barrier also allows space for a
17548 jump instruction. */
17549 if (ftmp
->address
< max_address
)
17550 max_address
= ftmp
->address
+ 1;
17552 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17555 assign_minipool_offsets (last_barrier
);
17559 if (!BARRIER_P (ftmp
->insn
)
17560 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17567 /* Scan over the fixes we have identified for this pool, fixing them
17568 up and adding the constants to the pool itself. */
17569 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17570 this_fix
= this_fix
->next
)
17571 if (!BARRIER_P (this_fix
->insn
))
17574 = plus_constant (Pmode
,
17575 gen_rtx_LABEL_REF (VOIDmode
,
17576 minipool_vector_label
),
17577 this_fix
->minipool
->offset
);
17578 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17581 dump_minipool (last_barrier
->insn
);
17585 /* From now on we must synthesize any constants that we can't handle
17586 directly. This can happen if the RTL gets split during final
17587 instruction generation. */
17588 cfun
->machine
->after_arm_reorg
= 1;
17590 /* Free the minipool memory. */
17591 obstack_free (&minipool_obstack
, minipool_startobj
);
17594 /* Routines to output assembly language. */
17596 /* Return string representation of passed in real value. */
17597 static const char *
17598 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17600 if (!fp_consts_inited
)
17603 gcc_assert (real_equal (r
, &value_fp0
));
17607 /* OPERANDS[0] is the entire list of insns that constitute pop,
17608 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17609 is in the list, UPDATE is true iff the list contains explicit
17610 update of base register. */
17612 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17618 const char *conditional
;
17619 int num_saves
= XVECLEN (operands
[0], 0);
17620 unsigned int regno
;
17621 unsigned int regno_base
= REGNO (operands
[1]);
17622 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
17625 offset
+= update
? 1 : 0;
17626 offset
+= return_pc
? 1 : 0;
17628 /* Is the base register in the list? */
17629 for (i
= offset
; i
< num_saves
; i
++)
17631 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17632 /* If SP is in the list, then the base register must be SP. */
17633 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17634 /* If base register is in the list, there must be no explicit update. */
17635 if (regno
== regno_base
)
17636 gcc_assert (!update
);
17639 conditional
= reverse
? "%?%D0" : "%?%d0";
17640 /* Can't use POP if returning from an interrupt. */
17641 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
17642 sprintf (pattern
, "pop%s\t{", conditional
);
17645 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17646 It's just a convention, their semantics are identical. */
17647 if (regno_base
== SP_REGNUM
)
17648 sprintf (pattern
, "ldmfd%s\t", conditional
);
17650 sprintf (pattern
, "ldmia%s\t", conditional
);
17652 sprintf (pattern
, "ldm%s\t", conditional
);
17654 strcat (pattern
, reg_names
[regno_base
]);
17656 strcat (pattern
, "!, {");
17658 strcat (pattern
, ", {");
17661 /* Output the first destination register. */
17663 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17665 /* Output the rest of the destination registers. */
17666 for (i
= offset
+ 1; i
< num_saves
; i
++)
17668 strcat (pattern
, ", ");
17670 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17673 strcat (pattern
, "}");
17675 if (interrupt_p
&& return_pc
)
17676 strcat (pattern
, "^");
17678 output_asm_insn (pattern
, &cond
);
17682 /* Output the assembly for a store multiple. */
17685 vfp_output_vstmd (rtx
* operands
)
17691 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17692 ? XEXP (operands
[0], 0)
17693 : XEXP (XEXP (operands
[0], 0), 0);
17694 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17697 strcpy (pattern
, "vpush%?.64\t{%P1");
17699 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17701 p
= strlen (pattern
);
17703 gcc_assert (REG_P (operands
[1]));
17705 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17706 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17708 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17710 strcpy (&pattern
[p
], "}");
17712 output_asm_insn (pattern
, operands
);
17717 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17718 number of bytes pushed. */
17721 vfp_emit_fstmd (int base_reg
, int count
)
17728 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17729 register pairs are stored by a store multiple insn. We avoid this
17730 by pushing an extra pair. */
17731 if (count
== 2 && !arm_arch6
)
17733 if (base_reg
== LAST_VFP_REGNUM
- 3)
17738 /* FSTMD may not store more than 16 doubleword registers at once. Split
17739 larger stores into multiple parts (up to a maximum of two, in
17744 /* NOTE: base_reg is an internal register number, so each D register
17746 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17747 saved
+= vfp_emit_fstmd (base_reg
, 16);
17751 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17752 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17754 reg
= gen_rtx_REG (DFmode
, base_reg
);
17757 XVECEXP (par
, 0, 0)
17758 = gen_rtx_SET (gen_frame_mem
17760 gen_rtx_PRE_MODIFY (Pmode
,
17763 (Pmode
, stack_pointer_rtx
,
17766 gen_rtx_UNSPEC (BLKmode
,
17767 gen_rtvec (1, reg
),
17768 UNSPEC_PUSH_MULT
));
17770 tmp
= gen_rtx_SET (stack_pointer_rtx
,
17771 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17772 RTX_FRAME_RELATED_P (tmp
) = 1;
17773 XVECEXP (dwarf
, 0, 0) = tmp
;
17775 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
17776 RTX_FRAME_RELATED_P (tmp
) = 1;
17777 XVECEXP (dwarf
, 0, 1) = tmp
;
17779 for (i
= 1; i
< count
; i
++)
17781 reg
= gen_rtx_REG (DFmode
, base_reg
);
17783 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17785 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
17786 plus_constant (Pmode
,
17790 RTX_FRAME_RELATED_P (tmp
) = 1;
17791 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17794 par
= emit_insn (par
);
17795 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17796 RTX_FRAME_RELATED_P (par
) = 1;
17801 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17802 has the cmse_nonsecure_call attribute and returns false otherwise. */
17805 detect_cmse_nonsecure_call (tree addr
)
17810 tree fntype
= TREE_TYPE (addr
);
17811 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_call",
17812 TYPE_ATTRIBUTES (fntype
)))
17818 /* Emit a call instruction with pattern PAT. ADDR is the address of
17819 the call target. */
17822 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
17826 insn
= emit_call_insn (pat
);
17828 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17829 If the call might use such an entry, add a use of the PIC register
17830 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17831 if (TARGET_VXWORKS_RTP
17834 && GET_CODE (addr
) == SYMBOL_REF
17835 && (SYMBOL_REF_DECL (addr
)
17836 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17837 : !SYMBOL_REF_LOCAL_P (addr
)))
17839 require_pic_register ();
17840 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17843 if (TARGET_AAPCS_BASED
)
17845 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17846 linker. We need to add an IP clobber to allow setting
17847 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17848 is not needed since it's a fixed register. */
17849 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
17850 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
17854 /* Output a 'call' insn. */
17856 output_call (rtx
*operands
)
17858 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17860 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17861 if (REGNO (operands
[0]) == LR_REGNUM
)
17863 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17864 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17867 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17869 if (TARGET_INTERWORK
|| arm_arch4t
)
17870 output_asm_insn ("bx%?\t%0", operands
);
17872 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17877 /* Output a move from arm registers to arm registers of a long double
17878 OPERANDS[0] is the destination.
17879 OPERANDS[1] is the source. */
17881 output_mov_long_double_arm_from_arm (rtx
*operands
)
17883 /* We have to be careful here because the two might overlap. */
17884 int dest_start
= REGNO (operands
[0]);
17885 int src_start
= REGNO (operands
[1]);
17889 if (dest_start
< src_start
)
17891 for (i
= 0; i
< 3; i
++)
17893 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17894 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17895 output_asm_insn ("mov%?\t%0, %1", ops
);
17900 for (i
= 2; i
>= 0; i
--)
17902 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17903 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17904 output_asm_insn ("mov%?\t%0, %1", ops
);
17912 arm_emit_movpair (rtx dest
, rtx src
)
17914 /* If the src is an immediate, simplify it. */
17915 if (CONST_INT_P (src
))
17917 HOST_WIDE_INT val
= INTVAL (src
);
17918 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17919 if ((val
>> 16) & 0x0000ffff)
17921 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17923 GEN_INT ((val
>> 16) & 0x0000ffff));
17924 rtx_insn
*insn
= get_last_insn ();
17925 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17929 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17930 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17931 rtx_insn
*insn
= get_last_insn ();
17932 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17935 /* Output a move between double words. It must be REG<-MEM
17938 output_move_double (rtx
*operands
, bool emit
, int *count
)
17940 enum rtx_code code0
= GET_CODE (operands
[0]);
17941 enum rtx_code code1
= GET_CODE (operands
[1]);
17946 /* The only case when this might happen is when
17947 you are looking at the length of a DImode instruction
17948 that has an invalid constant in it. */
17949 if (code0
== REG
&& code1
!= MEM
)
17951 gcc_assert (!emit
);
17958 unsigned int reg0
= REGNO (operands
[0]);
17960 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17962 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17964 switch (GET_CODE (XEXP (operands
[1], 0)))
17971 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17972 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
17974 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
17979 gcc_assert (TARGET_LDRD
);
17981 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
17988 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
17990 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
17998 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
18000 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
18005 gcc_assert (TARGET_LDRD
);
18007 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
18012 /* Autoicrement addressing modes should never have overlapping
18013 base and destination registers, and overlapping index registers
18014 are already prohibited, so this doesn't need to worry about
18016 otherops
[0] = operands
[0];
18017 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
18018 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
18020 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
18022 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
18024 /* Registers overlap so split out the increment. */
18027 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
18028 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
18035 /* Use a single insn if we can.
18036 FIXME: IWMMXT allows offsets larger than ldrd can
18037 handle, fix these up with a pair of ldr. */
18039 || !CONST_INT_P (otherops
[2])
18040 || (INTVAL (otherops
[2]) > -256
18041 && INTVAL (otherops
[2]) < 256))
18044 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
18050 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
18051 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18061 /* Use a single insn if we can.
18062 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18063 fix these up with a pair of ldr. */
18065 || !CONST_INT_P (otherops
[2])
18066 || (INTVAL (otherops
[2]) > -256
18067 && INTVAL (otherops
[2]) < 256))
18070 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
18076 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18077 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18087 /* We might be able to use ldrd %0, %1 here. However the range is
18088 different to ldr/adr, and it is broken on some ARMv7-M
18089 implementations. */
18090 /* Use the second register of the pair to avoid problematic
18092 otherops
[1] = operands
[1];
18094 output_asm_insn ("adr%?\t%0, %1", otherops
);
18095 operands
[1] = otherops
[0];
18099 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18101 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
18108 /* ??? This needs checking for thumb2. */
18110 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18111 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18113 otherops
[0] = operands
[0];
18114 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18115 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18117 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18119 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18121 switch ((int) INTVAL (otherops
[2]))
18125 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
18131 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
18137 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
18141 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18142 operands
[1] = otherops
[0];
18144 && (REG_P (otherops
[2])
18146 || (CONST_INT_P (otherops
[2])
18147 && INTVAL (otherops
[2]) > -256
18148 && INTVAL (otherops
[2]) < 256)))
18150 if (reg_overlap_mentioned_p (operands
[0],
18153 /* Swap base and index registers over to
18154 avoid a conflict. */
18155 std::swap (otherops
[1], otherops
[2]);
18157 /* If both registers conflict, it will usually
18158 have been fixed by a splitter. */
18159 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18160 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18164 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18165 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18172 otherops
[0] = operands
[0];
18174 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
18179 if (CONST_INT_P (otherops
[2]))
18183 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18184 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18186 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18192 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18198 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18205 return "ldrd%?\t%0, [%1]";
18207 return "ldmia%?\t%1, %M0";
18211 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18212 /* Take care of overlapping base/data reg. */
18213 if (reg_mentioned_p (operands
[0], operands
[1]))
18217 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18218 output_asm_insn ("ldr%?\t%0, %1", operands
);
18228 output_asm_insn ("ldr%?\t%0, %1", operands
);
18229 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18239 /* Constraints should ensure this. */
18240 gcc_assert (code0
== MEM
&& code1
== REG
);
18241 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18242 || (TARGET_ARM
&& TARGET_LDRD
));
18244 switch (GET_CODE (XEXP (operands
[0], 0)))
18250 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
18252 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18257 gcc_assert (TARGET_LDRD
);
18259 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
18266 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
18268 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
18276 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
18278 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
18283 gcc_assert (TARGET_LDRD
);
18285 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
18290 otherops
[0] = operands
[1];
18291 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18292 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18294 /* IWMMXT allows offsets larger than ldrd can handle,
18295 fix these up with a pair of ldr. */
18297 && CONST_INT_P (otherops
[2])
18298 && (INTVAL(otherops
[2]) <= -256
18299 || INTVAL(otherops
[2]) >= 256))
18301 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18305 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18306 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18315 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18316 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18322 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18325 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
18330 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
18335 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18336 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18338 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18342 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
18349 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
18356 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
18361 && (REG_P (otherops
[2])
18363 || (CONST_INT_P (otherops
[2])
18364 && INTVAL (otherops
[2]) > -256
18365 && INTVAL (otherops
[2]) < 256)))
18367 otherops
[0] = operands
[1];
18368 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18370 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
18376 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18377 otherops
[1] = operands
[1];
18380 output_asm_insn ("str%?\t%1, %0", operands
);
18381 output_asm_insn ("str%?\t%H1, %0", otherops
);
18391 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18392 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18395 output_move_quad (rtx
*operands
)
18397 if (REG_P (operands
[0]))
18399 /* Load, or reg->reg move. */
18401 if (MEM_P (operands
[1]))
18403 switch (GET_CODE (XEXP (operands
[1], 0)))
18406 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18411 output_asm_insn ("adr%?\t%0, %1", operands
);
18412 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
18416 gcc_unreachable ();
18424 gcc_assert (REG_P (operands
[1]));
18426 dest
= REGNO (operands
[0]);
18427 src
= REGNO (operands
[1]);
18429 /* This seems pretty dumb, but hopefully GCC won't try to do it
18432 for (i
= 0; i
< 4; i
++)
18434 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18435 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18436 output_asm_insn ("mov%?\t%0, %1", ops
);
18439 for (i
= 3; i
>= 0; i
--)
18441 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18442 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18443 output_asm_insn ("mov%?\t%0, %1", ops
);
18449 gcc_assert (MEM_P (operands
[0]));
18450 gcc_assert (REG_P (operands
[1]));
18451 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18453 switch (GET_CODE (XEXP (operands
[0], 0)))
18456 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18460 gcc_unreachable ();
18467 /* Output a VFP load or store instruction. */
18470 output_move_vfp (rtx
*operands
)
18472 rtx reg
, mem
, addr
, ops
[2];
18473 int load
= REG_P (operands
[0]);
18474 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18475 int sp
= (!TARGET_VFP_FP16INST
18476 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
18477 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18482 reg
= operands
[!load
];
18483 mem
= operands
[load
];
18485 mode
= GET_MODE (reg
);
18487 gcc_assert (REG_P (reg
));
18488 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18489 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
18495 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18496 gcc_assert (MEM_P (mem
));
18498 addr
= XEXP (mem
, 0);
18500 switch (GET_CODE (addr
))
18503 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18504 ops
[0] = XEXP (addr
, 0);
18509 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18510 ops
[0] = XEXP (addr
, 0);
18515 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18521 sprintf (buff
, templ
,
18522 load
? "ld" : "st",
18523 dp
? "64" : sp
? "32" : "16",
18525 integer_p
? "\t%@ int" : "");
18526 output_asm_insn (buff
, ops
);
18531 /* Output a Neon double-word or quad-word load or store, or a load
18532 or store for larger structure modes.
18534 WARNING: The ordering of elements is weird in big-endian mode,
18535 because the EABI requires that vectors stored in memory appear
18536 as though they were stored by a VSTM, as required by the EABI.
18537 GCC RTL defines element ordering based on in-memory order.
18538 This can be different from the architectural ordering of elements
18539 within a NEON register. The intrinsics defined in arm_neon.h use the
18540 NEON register element ordering, not the GCC RTL element ordering.
18542 For example, the in-memory ordering of a big-endian a quadword
18543 vector with 16-bit elements when stored from register pair {d0,d1}
18544 will be (lowest address first, d0[N] is NEON register element N):
18546 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18548 When necessary, quadword registers (dN, dN+1) are moved to ARM
18549 registers from rN in the order:
18551 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18553 So that STM/LDM can be used on vectors in ARM registers, and the
18554 same memory layout will result as if VSTM/VLDM were used.
18556 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18557 possible, which allows use of appropriate alignment tags.
18558 Note that the choice of "64" is independent of the actual vector
18559 element size; this size simply ensures that the behavior is
18560 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18562 Due to limitations of those instructions, use of VST1.64/VLD1.64
18563 is not possible if:
18564 - the address contains PRE_DEC, or
18565 - the mode refers to more than 4 double-word registers
18567 In those cases, it would be possible to replace VSTM/VLDM by a
18568 sequence of instructions; this is not currently implemented since
18569 this is not certain to actually improve performance. */
18572 output_move_neon (rtx
*operands
)
18574 rtx reg
, mem
, addr
, ops
[2];
18575 int regno
, nregs
, load
= REG_P (operands
[0]);
18580 reg
= operands
[!load
];
18581 mem
= operands
[load
];
18583 mode
= GET_MODE (reg
);
18585 gcc_assert (REG_P (reg
));
18586 regno
= REGNO (reg
);
18587 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18588 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18589 || NEON_REGNO_OK_FOR_QUAD (regno
));
18590 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18591 || VALID_NEON_QREG_MODE (mode
)
18592 || VALID_NEON_STRUCT_MODE (mode
));
18593 gcc_assert (MEM_P (mem
));
18595 addr
= XEXP (mem
, 0);
18597 /* Strip off const from addresses like (const (plus (...))). */
18598 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18599 addr
= XEXP (addr
, 0);
18601 switch (GET_CODE (addr
))
18604 /* We have to use vldm / vstm for too-large modes. */
18607 templ
= "v%smia%%?\t%%0!, %%h1";
18608 ops
[0] = XEXP (addr
, 0);
18612 templ
= "v%s1.64\t%%h1, %%A0";
18619 /* We have to use vldm / vstm in this case, since there is no
18620 pre-decrement form of the vld1 / vst1 instructions. */
18621 templ
= "v%smdb%%?\t%%0!, %%h1";
18622 ops
[0] = XEXP (addr
, 0);
18627 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18628 gcc_unreachable ();
18631 /* We have to use vldm / vstm for too-large modes. */
18635 templ
= "v%smia%%?\t%%m0, %%h1";
18637 templ
= "v%s1.64\t%%h1, %%A0";
18643 /* Fall through. */
18649 for (i
= 0; i
< nregs
; i
++)
18651 /* We're only using DImode here because it's a convenient size. */
18652 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18653 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18654 if (reg_overlap_mentioned_p (ops
[0], mem
))
18656 gcc_assert (overlap
== -1);
18661 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18662 output_asm_insn (buff
, ops
);
18667 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18668 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18669 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18670 output_asm_insn (buff
, ops
);
18677 gcc_unreachable ();
18680 sprintf (buff
, templ
, load
? "ld" : "st");
18681 output_asm_insn (buff
, ops
);
18686 /* Compute and return the length of neon_mov<mode>, where <mode> is
18687 one of VSTRUCT modes: EI, OI, CI or XI. */
18689 arm_attr_length_move_neon (rtx_insn
*insn
)
18691 rtx reg
, mem
, addr
;
18695 extract_insn_cached (insn
);
18697 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18699 mode
= GET_MODE (recog_data
.operand
[0]);
18710 gcc_unreachable ();
18714 load
= REG_P (recog_data
.operand
[0]);
18715 reg
= recog_data
.operand
[!load
];
18716 mem
= recog_data
.operand
[load
];
18718 gcc_assert (MEM_P (mem
));
18720 mode
= GET_MODE (reg
);
18721 addr
= XEXP (mem
, 0);
18723 /* Strip off const from addresses like (const (plus (...))). */
18724 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18725 addr
= XEXP (addr
, 0);
18727 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18729 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18736 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18740 arm_address_offset_is_imm (rtx_insn
*insn
)
18744 extract_insn_cached (insn
);
18746 if (REG_P (recog_data
.operand
[0]))
18749 mem
= recog_data
.operand
[0];
18751 gcc_assert (MEM_P (mem
));
18753 addr
= XEXP (mem
, 0);
18756 || (GET_CODE (addr
) == PLUS
18757 && REG_P (XEXP (addr
, 0))
18758 && CONST_INT_P (XEXP (addr
, 1))))
18764 /* Output an ADD r, s, #n where n may be too big for one instruction.
18765 If adding zero to one register, output nothing. */
18767 output_add_immediate (rtx
*operands
)
18769 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18771 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18774 output_multi_immediate (operands
,
18775 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18778 output_multi_immediate (operands
,
18779 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18786 /* Output a multiple immediate operation.
18787 OPERANDS is the vector of operands referred to in the output patterns.
18788 INSTR1 is the output pattern to use for the first constant.
18789 INSTR2 is the output pattern to use for subsequent constants.
18790 IMMED_OP is the index of the constant slot in OPERANDS.
18791 N is the constant value. */
18792 static const char *
18793 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18794 int immed_op
, HOST_WIDE_INT n
)
18796 #if HOST_BITS_PER_WIDE_INT > 32
18802 /* Quick and easy output. */
18803 operands
[immed_op
] = const0_rtx
;
18804 output_asm_insn (instr1
, operands
);
18809 const char * instr
= instr1
;
18811 /* Note that n is never zero here (which would give no output). */
18812 for (i
= 0; i
< 32; i
+= 2)
18816 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18817 output_asm_insn (instr
, operands
);
18827 /* Return the name of a shifter operation. */
18828 static const char *
18829 arm_shift_nmem(enum rtx_code code
)
18834 return ARM_LSL_NAME
;
18850 /* Return the appropriate ARM instruction for the operation code.
18851 The returned result should not be overwritten. OP is the rtx of the
18852 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18855 arithmetic_instr (rtx op
, int shift_first_arg
)
18857 switch (GET_CODE (op
))
18863 return shift_first_arg
? "rsb" : "sub";
18878 return arm_shift_nmem(GET_CODE(op
));
18881 gcc_unreachable ();
18885 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18886 for the operation code. The returned result should not be overwritten.
18887 OP is the rtx code of the shift.
18888 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18890 static const char *
18891 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18894 enum rtx_code code
= GET_CODE (op
);
18899 if (!CONST_INT_P (XEXP (op
, 1)))
18901 output_operand_lossage ("invalid shift operand");
18906 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18914 mnem
= arm_shift_nmem(code
);
18915 if (CONST_INT_P (XEXP (op
, 1)))
18917 *amountp
= INTVAL (XEXP (op
, 1));
18919 else if (REG_P (XEXP (op
, 1)))
18926 output_operand_lossage ("invalid shift operand");
18932 /* We never have to worry about the amount being other than a
18933 power of 2, since this case can never be reloaded from a reg. */
18934 if (!CONST_INT_P (XEXP (op
, 1)))
18936 output_operand_lossage ("invalid shift operand");
18940 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18942 /* Amount must be a power of two. */
18943 if (*amountp
& (*amountp
- 1))
18945 output_operand_lossage ("invalid shift operand");
18949 *amountp
= exact_log2 (*amountp
);
18950 gcc_assert (IN_RANGE (*amountp
, 0, 31));
18951 return ARM_LSL_NAME
;
18954 output_operand_lossage ("invalid shift operand");
18958 /* This is not 100% correct, but follows from the desire to merge
18959 multiplication by a power of 2 with the recognizer for a
18960 shift. >=32 is not a valid shift for "lsl", so we must try and
18961 output a shift that produces the correct arithmetical result.
18962 Using lsr #32 is identical except for the fact that the carry bit
18963 is not set correctly if we set the flags; but we never use the
18964 carry bit from such an operation, so we can ignore that. */
18965 if (code
== ROTATERT
)
18966 /* Rotate is just modulo 32. */
18968 else if (*amountp
!= (*amountp
& 31))
18970 if (code
== ASHIFT
)
18975 /* Shifts of 0 are no-ops. */
18982 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18983 because /bin/as is horribly restrictive. The judgement about
18984 whether or not each character is 'printable' (and can be output as
18985 is) or not (and must be printed with an octal escape) must be made
18986 with reference to the *host* character set -- the situation is
18987 similar to that discussed in the comments above pp_c_char in
18988 c-pretty-print.c. */
18990 #define MAX_ASCII_LEN 51
18993 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18996 int len_so_far
= 0;
18998 fputs ("\t.ascii\t\"", stream
);
19000 for (i
= 0; i
< len
; i
++)
19004 if (len_so_far
>= MAX_ASCII_LEN
)
19006 fputs ("\"\n\t.ascii\t\"", stream
);
19012 if (c
== '\\' || c
== '\"')
19014 putc ('\\', stream
);
19022 fprintf (stream
, "\\%03o", c
);
19027 fputs ("\"\n", stream
);
19030 /* Whether a register is callee saved or not. This is necessary because high
19031 registers are marked as caller saved when optimizing for size on Thumb-1
19032 targets despite being callee saved in order to avoid using them. */
19033 #define callee_saved_reg_p(reg) \
19034 (!call_used_regs[reg] \
19035 || (TARGET_THUMB1 && optimize_size \
19036 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19038 /* Compute the register save mask for registers 0 through 12
19039 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19041 static unsigned long
19042 arm_compute_save_reg0_reg12_mask (void)
19044 unsigned long func_type
= arm_current_func_type ();
19045 unsigned long save_reg_mask
= 0;
19048 if (IS_INTERRUPT (func_type
))
19050 unsigned int max_reg
;
19051 /* Interrupt functions must not corrupt any registers,
19052 even call clobbered ones. If this is a leaf function
19053 we can just examine the registers used by the RTL, but
19054 otherwise we have to assume that whatever function is
19055 called might clobber anything, and so we have to save
19056 all the call-clobbered registers as well. */
19057 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
19058 /* FIQ handlers have registers r8 - r12 banked, so
19059 we only need to check r0 - r7, Normal ISRs only
19060 bank r14 and r15, so we must check up to r12.
19061 r13 is the stack pointer which is always preserved,
19062 so we do not need to consider it here. */
19067 for (reg
= 0; reg
<= max_reg
; reg
++)
19068 if (df_regs_ever_live_p (reg
)
19069 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
19070 save_reg_mask
|= (1 << reg
);
19072 /* Also save the pic base register if necessary. */
19074 && !TARGET_SINGLE_PIC_BASE
19075 && arm_pic_register
!= INVALID_REGNUM
19076 && crtl
->uses_pic_offset_table
)
19077 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19079 else if (IS_VOLATILE(func_type
))
19081 /* For noreturn functions we historically omitted register saves
19082 altogether. However this really messes up debugging. As a
19083 compromise save just the frame pointers. Combined with the link
19084 register saved elsewhere this should be sufficient to get
19086 if (frame_pointer_needed
)
19087 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19088 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19089 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19090 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19091 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19095 /* In the normal case we only need to save those registers
19096 which are call saved and which are used by this function. */
19097 for (reg
= 0; reg
<= 11; reg
++)
19098 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19099 save_reg_mask
|= (1 << reg
);
19101 /* Handle the frame pointer as a special case. */
19102 if (frame_pointer_needed
)
19103 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19105 /* If we aren't loading the PIC register,
19106 don't stack it even though it may be live. */
19108 && !TARGET_SINGLE_PIC_BASE
19109 && arm_pic_register
!= INVALID_REGNUM
19110 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19111 || crtl
->uses_pic_offset_table
))
19112 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19114 /* The prologue will copy SP into R0, so save it. */
19115 if (IS_STACKALIGN (func_type
))
19116 save_reg_mask
|= 1;
19119 /* Save registers so the exception handler can modify them. */
19120 if (crtl
->calls_eh_return
)
19126 reg
= EH_RETURN_DATA_REGNO (i
);
19127 if (reg
== INVALID_REGNUM
)
19129 save_reg_mask
|= 1 << reg
;
19133 return save_reg_mask
;
19136 /* Return true if r3 is live at the start of the function. */
19139 arm_r3_live_at_start_p (void)
19141 /* Just look at cfg info, which is still close enough to correct at this
19142 point. This gives false positives for broken functions that might use
19143 uninitialized data that happens to be allocated in r3, but who cares? */
19144 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19147 /* Compute the number of bytes used to store the static chain register on the
19148 stack, above the stack frame. We need to know this accurately to get the
19149 alignment of the rest of the stack frame correct. */
19152 arm_compute_static_chain_stack_bytes (void)
19154 /* See the defining assertion in arm_expand_prologue. */
19155 if (IS_NESTED (arm_current_func_type ())
19156 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19157 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
19158 && !df_regs_ever_live_p (LR_REGNUM
)))
19159 && arm_r3_live_at_start_p ()
19160 && crtl
->args
.pretend_args_size
== 0)
19166 /* Compute a bit mask of which core registers need to be
19167 saved on the stack for the current function.
19168 This is used by arm_compute_frame_layout, which may add extra registers. */
19170 static unsigned long
19171 arm_compute_save_core_reg_mask (void)
19173 unsigned int save_reg_mask
= 0;
19174 unsigned long func_type
= arm_current_func_type ();
19177 if (IS_NAKED (func_type
))
19178 /* This should never really happen. */
19181 /* If we are creating a stack frame, then we must save the frame pointer,
19182 IP (which will hold the old stack pointer), LR and the PC. */
19183 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19185 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19188 | (1 << PC_REGNUM
);
19190 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19192 /* Decide if we need to save the link register.
19193 Interrupt routines have their own banked link register,
19194 so they never need to save it.
19195 Otherwise if we do not use the link register we do not need to save
19196 it. If we are pushing other registers onto the stack however, we
19197 can save an instruction in the epilogue by pushing the link register
19198 now and then popping it back into the PC. This incurs extra memory
19199 accesses though, so we only do it when optimizing for size, and only
19200 if we know that we will not need a fancy return sequence. */
19201 if (df_regs_ever_live_p (LR_REGNUM
)
19204 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19205 && !crtl
->tail_call_emit
19206 && !crtl
->calls_eh_return
))
19207 save_reg_mask
|= 1 << LR_REGNUM
;
19209 if (cfun
->machine
->lr_save_eliminated
)
19210 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19212 if (TARGET_REALLY_IWMMXT
19213 && ((bit_count (save_reg_mask
)
19214 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19215 arm_compute_static_chain_stack_bytes())
19218 /* The total number of registers that are going to be pushed
19219 onto the stack is odd. We need to ensure that the stack
19220 is 64-bit aligned before we start to save iWMMXt registers,
19221 and also before we start to create locals. (A local variable
19222 might be a double or long long which we will load/store using
19223 an iWMMXt instruction). Therefore we need to push another
19224 ARM register, so that the stack will be 64-bit aligned. We
19225 try to avoid using the arg registers (r0 -r3) as they might be
19226 used to pass values in a tail call. */
19227 for (reg
= 4; reg
<= 12; reg
++)
19228 if ((save_reg_mask
& (1 << reg
)) == 0)
19232 save_reg_mask
|= (1 << reg
);
19235 cfun
->machine
->sibcall_blocked
= 1;
19236 save_reg_mask
|= (1 << 3);
19240 /* We may need to push an additional register for use initializing the
19241 PIC base register. */
19242 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19243 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19245 reg
= thumb_find_work_register (1 << 4);
19246 if (!call_used_regs
[reg
])
19247 save_reg_mask
|= (1 << reg
);
19250 return save_reg_mask
;
19253 /* Compute a bit mask of which core registers need to be
19254 saved on the stack for the current function. */
19255 static unsigned long
19256 thumb1_compute_save_core_reg_mask (void)
19258 unsigned long mask
;
19262 for (reg
= 0; reg
< 12; reg
++)
19263 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19266 /* Handle the frame pointer as a special case. */
19267 if (frame_pointer_needed
)
19268 mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19271 && !TARGET_SINGLE_PIC_BASE
19272 && arm_pic_register
!= INVALID_REGNUM
19273 && crtl
->uses_pic_offset_table
)
19274 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19276 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19277 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19278 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19280 /* LR will also be pushed if any lo regs are pushed. */
19281 if (mask
& 0xff || thumb_force_lr_save ())
19282 mask
|= (1 << LR_REGNUM
);
19284 /* Make sure we have a low work register if we need one.
19285 We will need one if we are going to push a high register,
19286 but we are not currently intending to push a low register. */
19287 if ((mask
& 0xff) == 0
19288 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19290 /* Use thumb_find_work_register to choose which register
19291 we will use. If the register is live then we will
19292 have to push it. Use LAST_LO_REGNUM as our fallback
19293 choice for the register to select. */
19294 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19295 /* Make sure the register returned by thumb_find_work_register is
19296 not part of the return value. */
19297 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19298 reg
= LAST_LO_REGNUM
;
19300 if (callee_saved_reg_p (reg
))
19304 /* The 504 below is 8 bytes less than 512 because there are two possible
19305 alignment words. We can't tell here if they will be present or not so we
19306 have to play it safe and assume that they are. */
19307 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19308 ROUND_UP_WORD (get_frame_size ()) +
19309 crtl
->outgoing_args_size
) >= 504)
19311 /* This is the same as the code in thumb1_expand_prologue() which
19312 determines which register to use for stack decrement. */
19313 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19314 if (mask
& (1 << reg
))
19317 if (reg
> LAST_LO_REGNUM
)
19319 /* Make sure we have a register available for stack decrement. */
19320 mask
|= 1 << LAST_LO_REGNUM
;
19328 /* Return the number of bytes required to save VFP registers. */
19330 arm_get_vfp_saved_size (void)
19332 unsigned int regno
;
19337 /* Space for saved VFP registers. */
19338 if (TARGET_HARD_FLOAT
)
19341 for (regno
= FIRST_VFP_REGNUM
;
19342 regno
< LAST_VFP_REGNUM
;
19345 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19346 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19350 /* Workaround ARM10 VFPr1 bug. */
19351 if (count
== 2 && !arm_arch6
)
19353 saved
+= count
* 8;
19362 if (count
== 2 && !arm_arch6
)
19364 saved
+= count
* 8;
19371 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19372 everything bar the final return instruction. If simple_return is true,
19373 then do not output epilogue, because it has already been emitted in RTL. */
19375 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19376 bool simple_return
)
19378 char conditional
[10];
19381 unsigned long live_regs_mask
;
19382 unsigned long func_type
;
19383 arm_stack_offsets
*offsets
;
19385 func_type
= arm_current_func_type ();
19387 if (IS_NAKED (func_type
))
19390 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19392 /* If this function was declared non-returning, and we have
19393 found a tail call, then we have to trust that the called
19394 function won't return. */
19399 /* Otherwise, trap an attempted return by aborting. */
19401 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19403 assemble_external_libcall (ops
[1]);
19404 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19410 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19412 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19414 cfun
->machine
->return_used_this_function
= 1;
19416 offsets
= arm_get_frame_offsets ();
19417 live_regs_mask
= offsets
->saved_regs_mask
;
19419 if (!simple_return
&& live_regs_mask
)
19421 const char * return_reg
;
19423 /* If we do not have any special requirements for function exit
19424 (e.g. interworking) then we can load the return address
19425 directly into the PC. Otherwise we must load it into LR. */
19427 && !IS_CMSE_ENTRY (func_type
)
19428 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19429 return_reg
= reg_names
[PC_REGNUM
];
19431 return_reg
= reg_names
[LR_REGNUM
];
19433 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19435 /* There are three possible reasons for the IP register
19436 being saved. 1) a stack frame was created, in which case
19437 IP contains the old stack pointer, or 2) an ISR routine
19438 corrupted it, or 3) it was saved to align the stack on
19439 iWMMXt. In case 1, restore IP into SP, otherwise just
19441 if (frame_pointer_needed
)
19443 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19444 live_regs_mask
|= (1 << SP_REGNUM
);
19447 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19450 /* On some ARM architectures it is faster to use LDR rather than
19451 LDM to load a single register. On other architectures, the
19452 cost is the same. In 26 bit mode, or for exception handlers,
19453 we have to use LDM to load the PC so that the CPSR is also
19455 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19456 if (live_regs_mask
== (1U << reg
))
19459 if (reg
<= LAST_ARM_REGNUM
19460 && (reg
!= LR_REGNUM
19462 || ! IS_INTERRUPT (func_type
)))
19464 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19465 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19472 /* Generate the load multiple instruction to restore the
19473 registers. Note we can get here, even if
19474 frame_pointer_needed is true, but only if sp already
19475 points to the base of the saved core registers. */
19476 if (live_regs_mask
& (1 << SP_REGNUM
))
19478 unsigned HOST_WIDE_INT stack_adjust
;
19480 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19481 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19483 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19484 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19487 /* If we can't use ldmib (SA110 bug),
19488 then try to pop r3 instead. */
19490 live_regs_mask
|= 1 << 3;
19492 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19495 /* For interrupt returns we have to use an LDM rather than
19496 a POP so that we can use the exception return variant. */
19497 else if (IS_INTERRUPT (func_type
))
19498 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
19500 sprintf (instr
, "pop%s\t{", conditional
);
19502 p
= instr
+ strlen (instr
);
19504 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19505 if (live_regs_mask
& (1 << reg
))
19507 int l
= strlen (reg_names
[reg
]);
19513 memcpy (p
, ", ", 2);
19517 memcpy (p
, "%|", 2);
19518 memcpy (p
+ 2, reg_names
[reg
], l
);
19522 if (live_regs_mask
& (1 << LR_REGNUM
))
19524 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19525 /* If returning from an interrupt, restore the CPSR. */
19526 if (IS_INTERRUPT (func_type
))
19533 output_asm_insn (instr
, & operand
);
19535 /* See if we need to generate an extra instruction to
19536 perform the actual function return. */
19538 && func_type
!= ARM_FT_INTERWORKED
19539 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19541 /* The return has already been handled
19542 by loading the LR into the PC. */
19549 switch ((int) ARM_FUNC_TYPE (func_type
))
19553 /* ??? This is wrong for unified assembly syntax. */
19554 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19557 case ARM_FT_INTERWORKED
:
19558 gcc_assert (arm_arch5
|| arm_arch4t
);
19559 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19562 case ARM_FT_EXCEPTION
:
19563 /* ??? This is wrong for unified assembly syntax. */
19564 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19568 if (IS_CMSE_ENTRY (func_type
))
19570 /* Check if we have to clear the 'GE bits' which is only used if
19571 parallel add and subtraction instructions are available. */
19572 if (TARGET_INT_SIMD
)
19573 snprintf (instr
, sizeof (instr
),
19574 "msr%s\tAPSR_nzcvqg, %%|lr", conditional
);
19576 snprintf (instr
, sizeof (instr
),
19577 "msr%s\tAPSR_nzcvq, %%|lr", conditional
);
19579 output_asm_insn (instr
, & operand
);
19580 if (TARGET_HARD_FLOAT
&& !TARGET_THUMB1
)
19582 /* Clear the cumulative exception-status bits (0-4,7) and the
19583 condition code bits (28-31) of the FPSCR. We need to
19584 remember to clear the first scratch register used (IP) and
19585 save and restore the second (r4). */
19586 snprintf (instr
, sizeof (instr
), "push\t{%%|r4}");
19587 output_asm_insn (instr
, & operand
);
19588 snprintf (instr
, sizeof (instr
), "vmrs\t%%|ip, fpscr");
19589 output_asm_insn (instr
, & operand
);
19590 snprintf (instr
, sizeof (instr
), "movw\t%%|r4, #65376");
19591 output_asm_insn (instr
, & operand
);
19592 snprintf (instr
, sizeof (instr
), "movt\t%%|r4, #4095");
19593 output_asm_insn (instr
, & operand
);
19594 snprintf (instr
, sizeof (instr
), "and\t%%|ip, %%|r4");
19595 output_asm_insn (instr
, & operand
);
19596 snprintf (instr
, sizeof (instr
), "vmsr\tfpscr, %%|ip");
19597 output_asm_insn (instr
, & operand
);
19598 snprintf (instr
, sizeof (instr
), "pop\t{%%|r4}");
19599 output_asm_insn (instr
, & operand
);
19600 snprintf (instr
, sizeof (instr
), "mov\t%%|ip, %%|lr");
19601 output_asm_insn (instr
, & operand
);
19603 snprintf (instr
, sizeof (instr
), "bxns\t%%|lr");
19605 /* Use bx if it's available. */
19606 else if (arm_arch5
|| arm_arch4t
)
19607 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19609 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19613 output_asm_insn (instr
, & operand
);
19619 /* Output in FILE asm statements needed to declare the NAME of the function
19620 defined by its DECL node. */
19623 arm_asm_declare_function_name (FILE *file
, const char *name
, tree decl
)
19625 size_t cmse_name_len
;
19626 char *cmse_name
= 0;
19627 char cmse_prefix
[] = "__acle_se_";
19629 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19630 extra function label for each function with the 'cmse_nonsecure_entry'
19631 attribute. This extra function label should be prepended with
19632 '__acle_se_', telling the linker that it needs to create secure gateway
19633 veneers for this function. */
19634 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_entry",
19635 DECL_ATTRIBUTES (decl
)))
19637 cmse_name_len
= sizeof (cmse_prefix
) + strlen (name
);
19638 cmse_name
= XALLOCAVEC (char, cmse_name_len
);
19639 snprintf (cmse_name
, cmse_name_len
, "%s%s", cmse_prefix
, name
);
19640 targetm
.asm_out
.globalize_label (file
, cmse_name
);
19642 ARM_DECLARE_FUNCTION_NAME (file
, cmse_name
, decl
);
19643 ASM_OUTPUT_TYPE_DIRECTIVE (file
, cmse_name
, "function");
19646 ARM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
19647 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
19648 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
19649 ASM_OUTPUT_LABEL (file
, name
);
19652 ASM_OUTPUT_LABEL (file
, cmse_name
);
19654 ARM_OUTPUT_FN_UNWIND (file
, TRUE
);
19657 /* Write the function name into the code section, directly preceding
19658 the function prologue.
19660 Code will be output similar to this:
19662 .ascii "arm_poke_function_name", 0
19665 .word 0xff000000 + (t1 - t0)
19666 arm_poke_function_name
19668 stmfd sp!, {fp, ip, lr, pc}
19671 When performing a stack backtrace, code can inspect the value
19672 of 'pc' stored at 'fp' + 0. If the trace function then looks
19673 at location pc - 12 and the top 8 bits are set, then we know
19674 that there is a function name embedded immediately preceding this
19675 location and has length ((pc[-3]) & 0xff000000).
19677 We assume that pc is declared as a pointer to an unsigned long.
19679 It is of no benefit to output the function name if we are assembling
19680 a leaf function. These function types will not contain a stack
19681 backtrace structure, therefore it is not possible to determine the
19684 arm_poke_function_name (FILE *stream
, const char *name
)
19686 unsigned long alignlength
;
19687 unsigned long length
;
19690 length
= strlen (name
) + 1;
19691 alignlength
= ROUND_UP_WORD (length
);
19693 ASM_OUTPUT_ASCII (stream
, name
, length
);
19694 ASM_OUTPUT_ALIGN (stream
, 2);
19695 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19696 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19699 /* Place some comments into the assembler stream
19700 describing the current function. */
19702 arm_output_function_prologue (FILE *f
)
19704 unsigned long func_type
;
19706 /* Sanity check. */
19707 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19709 func_type
= arm_current_func_type ();
19711 switch ((int) ARM_FUNC_TYPE (func_type
))
19714 case ARM_FT_NORMAL
:
19716 case ARM_FT_INTERWORKED
:
19717 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19720 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19723 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19725 case ARM_FT_EXCEPTION
:
19726 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19730 if (IS_NAKED (func_type
))
19731 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19733 if (IS_VOLATILE (func_type
))
19734 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19736 if (IS_NESTED (func_type
))
19737 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19738 if (IS_STACKALIGN (func_type
))
19739 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19740 if (IS_CMSE_ENTRY (func_type
))
19741 asm_fprintf (f
, "\t%@ Non-secure entry function: called from non-secure code.\n");
19743 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19745 crtl
->args
.pretend_args_size
,
19746 (HOST_WIDE_INT
) get_frame_size ());
19748 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19749 frame_pointer_needed
,
19750 cfun
->machine
->uses_anonymous_args
);
19752 if (cfun
->machine
->lr_save_eliminated
)
19753 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19755 if (crtl
->calls_eh_return
)
19756 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19761 arm_output_function_epilogue (FILE *)
19763 arm_stack_offsets
*offsets
;
19769 /* Emit any call-via-reg trampolines that are needed for v4t support
19770 of call_reg and call_value_reg type insns. */
19771 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19773 rtx label
= cfun
->machine
->call_via
[regno
];
19777 switch_to_section (function_section (current_function_decl
));
19778 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19779 CODE_LABEL_NUMBER (label
));
19780 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19784 /* ??? Probably not safe to set this here, since it assumes that a
19785 function will be emitted as assembly immediately after we generate
19786 RTL for it. This does not happen for inline functions. */
19787 cfun
->machine
->return_used_this_function
= 0;
19789 else /* TARGET_32BIT */
19791 /* We need to take into account any stack-frame rounding. */
19792 offsets
= arm_get_frame_offsets ();
19794 gcc_assert (!use_return_insn (FALSE
, NULL
)
19795 || (cfun
->machine
->return_used_this_function
!= 0)
19796 || offsets
->saved_regs
== offsets
->outgoing_args
19797 || frame_pointer_needed
);
19801 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19802 STR and STRD. If an even number of registers are being pushed, one
19803 or more STRD patterns are created for each register pair. If an
19804 odd number of registers are pushed, emit an initial STR followed by
19805 as many STRD instructions as are needed. This works best when the
19806 stack is initially 64-bit aligned (the normal case), since it
19807 ensures that each STRD is also 64-bit aligned. */
19809 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19814 rtx par
= NULL_RTX
;
19815 rtx dwarf
= NULL_RTX
;
19819 num_regs
= bit_count (saved_regs_mask
);
19821 /* Must be at least one register to save, and can't save SP or PC. */
19822 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19823 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19824 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19826 /* Create sequence for DWARF info. All the frame-related data for
19827 debugging is held in this wrapper. */
19828 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19830 /* Describe the stack adjustment. */
19831 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19832 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19833 RTX_FRAME_RELATED_P (tmp
) = 1;
19834 XVECEXP (dwarf
, 0, 0) = tmp
;
19836 /* Find the first register. */
19837 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19842 /* If there's an odd number of registers to push. Start off by
19843 pushing a single register. This ensures that subsequent strd
19844 operations are dword aligned (assuming that SP was originally
19845 64-bit aligned). */
19846 if ((num_regs
& 1) != 0)
19848 rtx reg
, mem
, insn
;
19850 reg
= gen_rtx_REG (SImode
, regno
);
19852 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19853 stack_pointer_rtx
));
19855 mem
= gen_frame_mem (Pmode
,
19857 (Pmode
, stack_pointer_rtx
,
19858 plus_constant (Pmode
, stack_pointer_rtx
,
19861 tmp
= gen_rtx_SET (mem
, reg
);
19862 RTX_FRAME_RELATED_P (tmp
) = 1;
19863 insn
= emit_insn (tmp
);
19864 RTX_FRAME_RELATED_P (insn
) = 1;
19865 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19866 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
19867 RTX_FRAME_RELATED_P (tmp
) = 1;
19870 XVECEXP (dwarf
, 0, i
) = tmp
;
19874 while (i
< num_regs
)
19875 if (saved_regs_mask
& (1 << regno
))
19877 rtx reg1
, reg2
, mem1
, mem2
;
19878 rtx tmp0
, tmp1
, tmp2
;
19881 /* Find the register to pair with this one. */
19882 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19886 reg1
= gen_rtx_REG (SImode
, regno
);
19887 reg2
= gen_rtx_REG (SImode
, regno2
);
19894 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19897 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19899 -4 * (num_regs
- 1)));
19900 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
19901 plus_constant (Pmode
, stack_pointer_rtx
,
19903 tmp1
= gen_rtx_SET (mem1
, reg1
);
19904 tmp2
= gen_rtx_SET (mem2
, reg2
);
19905 RTX_FRAME_RELATED_P (tmp0
) = 1;
19906 RTX_FRAME_RELATED_P (tmp1
) = 1;
19907 RTX_FRAME_RELATED_P (tmp2
) = 1;
19908 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19909 XVECEXP (par
, 0, 0) = tmp0
;
19910 XVECEXP (par
, 0, 1) = tmp1
;
19911 XVECEXP (par
, 0, 2) = tmp2
;
19912 insn
= emit_insn (par
);
19913 RTX_FRAME_RELATED_P (insn
) = 1;
19914 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19918 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19921 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19924 tmp1
= gen_rtx_SET (mem1
, reg1
);
19925 tmp2
= gen_rtx_SET (mem2
, reg2
);
19926 RTX_FRAME_RELATED_P (tmp1
) = 1;
19927 RTX_FRAME_RELATED_P (tmp2
) = 1;
19928 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19929 XVECEXP (par
, 0, 0) = tmp1
;
19930 XVECEXP (par
, 0, 1) = tmp2
;
19934 /* Create unwind information. This is an approximation. */
19935 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
19936 plus_constant (Pmode
,
19940 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
19941 plus_constant (Pmode
,
19946 RTX_FRAME_RELATED_P (tmp1
) = 1;
19947 RTX_FRAME_RELATED_P (tmp2
) = 1;
19948 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19949 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19951 regno
= regno2
+ 1;
19959 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19960 whenever possible, otherwise it emits single-word stores. The first store
19961 also allocates stack space for all saved registers, using writeback with
19962 post-addressing mode. All other stores use offset addressing. If no STRD
19963 can be emitted, this function emits a sequence of single-word stores,
19964 and not an STM as before, because single-word stores provide more freedom
19965 scheduling and can be turned into an STM by peephole optimizations. */
19967 arm_emit_strd_push (unsigned long saved_regs_mask
)
19970 int i
, j
, dwarf_index
= 0;
19972 rtx dwarf
= NULL_RTX
;
19973 rtx insn
= NULL_RTX
;
19976 /* TODO: A more efficient code can be emitted by changing the
19977 layout, e.g., first push all pairs that can use STRD to keep the
19978 stack aligned, and then push all other registers. */
19979 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19980 if (saved_regs_mask
& (1 << i
))
19983 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19984 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19985 gcc_assert (num_regs
> 0);
19987 /* Create sequence for DWARF info. */
19988 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19990 /* For dwarf info, we generate explicit stack update. */
19991 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19992 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19993 RTX_FRAME_RELATED_P (tmp
) = 1;
19994 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19996 /* Save registers. */
19997 offset
= - 4 * num_regs
;
19999 while (j
<= LAST_ARM_REGNUM
)
20000 if (saved_regs_mask
& (1 << j
))
20003 && (saved_regs_mask
& (1 << (j
+ 1))))
20005 /* Current register and previous register form register pair for
20006 which STRD can be generated. */
20009 /* Allocate stack space for all saved registers. */
20010 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20011 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20012 mem
= gen_frame_mem (DImode
, tmp
);
20015 else if (offset
> 0)
20016 mem
= gen_frame_mem (DImode
,
20017 plus_constant (Pmode
,
20021 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20023 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
20024 RTX_FRAME_RELATED_P (tmp
) = 1;
20025 tmp
= emit_insn (tmp
);
20027 /* Record the first store insn. */
20028 if (dwarf_index
== 1)
20031 /* Generate dwarf info. */
20032 mem
= gen_frame_mem (SImode
,
20033 plus_constant (Pmode
,
20036 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20037 RTX_FRAME_RELATED_P (tmp
) = 1;
20038 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20040 mem
= gen_frame_mem (SImode
,
20041 plus_constant (Pmode
,
20044 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
20045 RTX_FRAME_RELATED_P (tmp
) = 1;
20046 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20053 /* Emit a single word store. */
20056 /* Allocate stack space for all saved registers. */
20057 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20058 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20059 mem
= gen_frame_mem (SImode
, tmp
);
20062 else if (offset
> 0)
20063 mem
= gen_frame_mem (SImode
,
20064 plus_constant (Pmode
,
20068 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20070 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20071 RTX_FRAME_RELATED_P (tmp
) = 1;
20072 tmp
= emit_insn (tmp
);
20074 /* Record the first store insn. */
20075 if (dwarf_index
== 1)
20078 /* Generate dwarf info. */
20079 mem
= gen_frame_mem (SImode
,
20080 plus_constant(Pmode
,
20083 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20084 RTX_FRAME_RELATED_P (tmp
) = 1;
20085 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20094 /* Attach dwarf info to the first insn we generate. */
20095 gcc_assert (insn
!= NULL_RTX
);
20096 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20097 RTX_FRAME_RELATED_P (insn
) = 1;
20100 /* Generate and emit an insn that we will recognize as a push_multi.
20101 Unfortunately, since this insn does not reflect very well the actual
20102 semantics of the operation, we need to annotate the insn for the benefit
20103 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20104 MASK for registers that should be annotated for DWARF2 frame unwind
20107 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
20110 int num_dwarf_regs
= 0;
20114 int dwarf_par_index
;
20117 /* We don't record the PC in the dwarf frame information. */
20118 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20120 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20122 if (mask
& (1 << i
))
20124 if (dwarf_regs_mask
& (1 << i
))
20128 gcc_assert (num_regs
&& num_regs
<= 16);
20129 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20131 /* For the body of the insn we are going to generate an UNSPEC in
20132 parallel with several USEs. This allows the insn to be recognized
20133 by the push_multi pattern in the arm.md file.
20135 The body of the insn looks something like this:
20138 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20139 (const_int:SI <num>)))
20140 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20146 For the frame note however, we try to be more explicit and actually
20147 show each register being stored into the stack frame, plus a (single)
20148 decrement of the stack pointer. We do it this way in order to be
20149 friendly to the stack unwinding code, which only wants to see a single
20150 stack decrement per instruction. The RTL we generate for the note looks
20151 something like this:
20154 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20155 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20156 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20157 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20161 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20162 instead we'd have a parallel expression detailing all
20163 the stores to the various memory addresses so that debug
20164 information is more up-to-date. Remember however while writing
20165 this to take care of the constraints with the push instruction.
20167 Note also that this has to be taken care of for the VFP registers.
20169 For more see PR43399. */
20171 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20172 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20173 dwarf_par_index
= 1;
20175 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20177 if (mask
& (1 << i
))
20179 reg
= gen_rtx_REG (SImode
, i
);
20181 XVECEXP (par
, 0, 0)
20182 = gen_rtx_SET (gen_frame_mem
20184 gen_rtx_PRE_MODIFY (Pmode
,
20187 (Pmode
, stack_pointer_rtx
,
20190 gen_rtx_UNSPEC (BLKmode
,
20191 gen_rtvec (1, reg
),
20192 UNSPEC_PUSH_MULT
));
20194 if (dwarf_regs_mask
& (1 << i
))
20196 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
20198 RTX_FRAME_RELATED_P (tmp
) = 1;
20199 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20206 for (j
= 1, i
++; j
< num_regs
; i
++)
20208 if (mask
& (1 << i
))
20210 reg
= gen_rtx_REG (SImode
, i
);
20212 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20214 if (dwarf_regs_mask
& (1 << i
))
20217 = gen_rtx_SET (gen_frame_mem
20219 plus_constant (Pmode
, stack_pointer_rtx
,
20222 RTX_FRAME_RELATED_P (tmp
) = 1;
20223 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20230 par
= emit_insn (par
);
20232 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20233 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20234 RTX_FRAME_RELATED_P (tmp
) = 1;
20235 XVECEXP (dwarf
, 0, 0) = tmp
;
20237 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20242 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20243 SIZE is the offset to be adjusted.
20244 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20246 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20250 RTX_FRAME_RELATED_P (insn
) = 1;
20251 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
20252 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20255 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20256 SAVED_REGS_MASK shows which registers need to be restored.
20258 Unfortunately, since this insn does not reflect very well the actual
20259 semantics of the operation, we need to annotate the insn for the benefit
20260 of DWARF2 frame unwind information. */
20262 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20267 rtx dwarf
= NULL_RTX
;
20269 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20273 offset_adj
= return_in_pc
? 1 : 0;
20274 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20275 if (saved_regs_mask
& (1 << i
))
20278 gcc_assert (num_regs
&& num_regs
<= 16);
20280 /* If SP is in reglist, then we don't emit SP update insn. */
20281 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20283 /* The parallel needs to hold num_regs SETs
20284 and one SET for the stack update. */
20285 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20288 XVECEXP (par
, 0, 0) = ret_rtx
;
20292 /* Increment the stack pointer, based on there being
20293 num_regs 4-byte registers to restore. */
20294 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20295 plus_constant (Pmode
,
20298 RTX_FRAME_RELATED_P (tmp
) = 1;
20299 XVECEXP (par
, 0, offset_adj
) = tmp
;
20302 /* Now restore every reg, which may include PC. */
20303 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20304 if (saved_regs_mask
& (1 << i
))
20306 reg
= gen_rtx_REG (SImode
, i
);
20307 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20309 /* Emit single load with writeback. */
20310 tmp
= gen_frame_mem (SImode
,
20311 gen_rtx_POST_INC (Pmode
,
20312 stack_pointer_rtx
));
20313 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
20314 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20318 tmp
= gen_rtx_SET (reg
,
20321 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20322 RTX_FRAME_RELATED_P (tmp
) = 1;
20323 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20325 /* We need to maintain a sequence for DWARF info too. As dwarf info
20326 should not have PC, skip PC. */
20327 if (i
!= PC_REGNUM
)
20328 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20334 par
= emit_jump_insn (par
);
20336 par
= emit_insn (par
);
20338 REG_NOTES (par
) = dwarf
;
20340 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20341 stack_pointer_rtx
, stack_pointer_rtx
);
20344 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20345 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20347 Unfortunately, since this insn does not reflect very well the actual
20348 semantics of the operation, we need to annotate the insn for the benefit
20349 of DWARF2 frame unwind information. */
20351 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20355 rtx dwarf
= NULL_RTX
;
20358 gcc_assert (num_regs
&& num_regs
<= 32);
20360 /* Workaround ARM10 VFPr1 bug. */
20361 if (num_regs
== 2 && !arm_arch6
)
20363 if (first_reg
== 15)
20369 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20370 there could be up to 32 D-registers to restore.
20371 If there are more than 16 D-registers, make two recursive calls,
20372 each of which emits one pop_multi instruction. */
20375 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20376 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20380 /* The parallel needs to hold num_regs SETs
20381 and one SET for the stack update. */
20382 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20384 /* Increment the stack pointer, based on there being
20385 num_regs 8-byte registers to restore. */
20386 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20387 RTX_FRAME_RELATED_P (tmp
) = 1;
20388 XVECEXP (par
, 0, 0) = tmp
;
20390 /* Now show every reg that will be restored, using a SET for each. */
20391 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20393 reg
= gen_rtx_REG (DFmode
, i
);
20395 tmp
= gen_rtx_SET (reg
,
20398 plus_constant (Pmode
, base_reg
, 8 * j
)));
20399 RTX_FRAME_RELATED_P (tmp
) = 1;
20400 XVECEXP (par
, 0, j
+ 1) = tmp
;
20402 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20407 par
= emit_insn (par
);
20408 REG_NOTES (par
) = dwarf
;
20410 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20411 if (REGNO (base_reg
) == IP_REGNUM
)
20413 RTX_FRAME_RELATED_P (par
) = 1;
20414 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20417 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20418 base_reg
, base_reg
);
20421 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20422 number of registers are being popped, multiple LDRD patterns are created for
20423 all register pairs. If odd number of registers are popped, last register is
20424 loaded by using LDR pattern. */
20426 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20430 rtx par
= NULL_RTX
;
20431 rtx dwarf
= NULL_RTX
;
20432 rtx tmp
, reg
, tmp1
;
20433 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20435 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20436 if (saved_regs_mask
& (1 << i
))
20439 gcc_assert (num_regs
&& num_regs
<= 16);
20441 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20442 to be popped. So, if num_regs is even, now it will become odd,
20443 and we can generate pop with PC. If num_regs is odd, it will be
20444 even now, and ldr with return can be generated for PC. */
20448 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20450 /* Var j iterates over all the registers to gather all the registers in
20451 saved_regs_mask. Var i gives index of saved registers in stack frame.
20452 A PARALLEL RTX of register-pair is created here, so that pattern for
20453 LDRD can be matched. As PC is always last register to be popped, and
20454 we have already decremented num_regs if PC, we don't have to worry
20455 about PC in this loop. */
20456 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20457 if (saved_regs_mask
& (1 << j
))
20459 /* Create RTX for memory load. */
20460 reg
= gen_rtx_REG (SImode
, j
);
20461 tmp
= gen_rtx_SET (reg
,
20462 gen_frame_mem (SImode
,
20463 plus_constant (Pmode
,
20464 stack_pointer_rtx
, 4 * i
)));
20465 RTX_FRAME_RELATED_P (tmp
) = 1;
20469 /* When saved-register index (i) is even, the RTX to be emitted is
20470 yet to be created. Hence create it first. The LDRD pattern we
20471 are generating is :
20472 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20473 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20474 where target registers need not be consecutive. */
20475 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20479 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20480 added as 0th element and if i is odd, reg_i is added as 1st element
20481 of LDRD pattern shown above. */
20482 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20483 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20487 /* When saved-register index (i) is odd, RTXs for both the registers
20488 to be loaded are generated in above given LDRD pattern, and the
20489 pattern can be emitted now. */
20490 par
= emit_insn (par
);
20491 REG_NOTES (par
) = dwarf
;
20492 RTX_FRAME_RELATED_P (par
) = 1;
20498 /* If the number of registers pushed is odd AND return_in_pc is false OR
20499 number of registers are even AND return_in_pc is true, last register is
20500 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20501 then LDR with post increment. */
20503 /* Increment the stack pointer, based on there being
20504 num_regs 4-byte registers to restore. */
20505 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20506 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20507 RTX_FRAME_RELATED_P (tmp
) = 1;
20508 tmp
= emit_insn (tmp
);
20511 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20512 stack_pointer_rtx
, stack_pointer_rtx
);
20517 if (((num_regs
% 2) == 1 && !return_in_pc
)
20518 || ((num_regs
% 2) == 0 && return_in_pc
))
20520 /* Scan for the single register to be popped. Skip until the saved
20521 register is found. */
20522 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20524 /* Gen LDR with post increment here. */
20525 tmp1
= gen_rtx_MEM (SImode
,
20526 gen_rtx_POST_INC (SImode
,
20527 stack_pointer_rtx
));
20528 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20530 reg
= gen_rtx_REG (SImode
, j
);
20531 tmp
= gen_rtx_SET (reg
, tmp1
);
20532 RTX_FRAME_RELATED_P (tmp
) = 1;
20533 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20537 /* If return_in_pc, j must be PC_REGNUM. */
20538 gcc_assert (j
== PC_REGNUM
);
20539 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20540 XVECEXP (par
, 0, 0) = ret_rtx
;
20541 XVECEXP (par
, 0, 1) = tmp
;
20542 par
= emit_jump_insn (par
);
20546 par
= emit_insn (tmp
);
20547 REG_NOTES (par
) = dwarf
;
20548 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20549 stack_pointer_rtx
, stack_pointer_rtx
);
20553 else if ((num_regs
% 2) == 1 && return_in_pc
)
20555 /* There are 2 registers to be popped. So, generate the pattern
20556 pop_multiple_with_stack_update_and_return to pop in PC. */
20557 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20563 /* LDRD in ARM mode needs consecutive registers as operands. This function
20564 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20565 offset addressing and then generates one separate stack udpate. This provides
20566 more scheduling freedom, compared to writeback on every load. However,
20567 if the function returns using load into PC directly
20568 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20569 before the last load. TODO: Add a peephole optimization to recognize
20570 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20571 peephole optimization to merge the load at stack-offset zero
20572 with the stack update instruction using load with writeback
20573 in post-index addressing mode. */
20575 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20579 rtx par
= NULL_RTX
;
20580 rtx dwarf
= NULL_RTX
;
20583 /* Restore saved registers. */
20584 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20586 while (j
<= LAST_ARM_REGNUM
)
20587 if (saved_regs_mask
& (1 << j
))
20590 && (saved_regs_mask
& (1 << (j
+ 1)))
20591 && (j
+ 1) != PC_REGNUM
)
20593 /* Current register and next register form register pair for which
20594 LDRD can be generated. PC is always the last register popped, and
20595 we handle it separately. */
20597 mem
= gen_frame_mem (DImode
,
20598 plus_constant (Pmode
,
20602 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20604 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
20605 tmp
= emit_insn (tmp
);
20606 RTX_FRAME_RELATED_P (tmp
) = 1;
20608 /* Generate dwarf info. */
20610 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20611 gen_rtx_REG (SImode
, j
),
20613 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20614 gen_rtx_REG (SImode
, j
+ 1),
20617 REG_NOTES (tmp
) = dwarf
;
20622 else if (j
!= PC_REGNUM
)
20624 /* Emit a single word load. */
20626 mem
= gen_frame_mem (SImode
,
20627 plus_constant (Pmode
,
20631 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20633 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
20634 tmp
= emit_insn (tmp
);
20635 RTX_FRAME_RELATED_P (tmp
) = 1;
20637 /* Generate dwarf info. */
20638 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20639 gen_rtx_REG (SImode
, j
),
20645 else /* j == PC_REGNUM */
20651 /* Update the stack. */
20654 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20655 plus_constant (Pmode
,
20658 tmp
= emit_insn (tmp
);
20659 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20660 stack_pointer_rtx
, stack_pointer_rtx
);
20664 if (saved_regs_mask
& (1 << PC_REGNUM
))
20666 /* Only PC is to be popped. */
20667 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20668 XVECEXP (par
, 0, 0) = ret_rtx
;
20669 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
20670 gen_frame_mem (SImode
,
20671 gen_rtx_POST_INC (SImode
,
20672 stack_pointer_rtx
)));
20673 RTX_FRAME_RELATED_P (tmp
) = 1;
20674 XVECEXP (par
, 0, 1) = tmp
;
20675 par
= emit_jump_insn (par
);
20677 /* Generate dwarf info. */
20678 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20679 gen_rtx_REG (SImode
, PC_REGNUM
),
20681 REG_NOTES (par
) = dwarf
;
20682 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20683 stack_pointer_rtx
, stack_pointer_rtx
);
20687 /* Calculate the size of the return value that is passed in registers. */
20689 arm_size_return_regs (void)
20693 if (crtl
->return_rtx
!= 0)
20694 mode
= GET_MODE (crtl
->return_rtx
);
20696 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20698 return GET_MODE_SIZE (mode
);
20701 /* Return true if the current function needs to save/restore LR. */
20703 thumb_force_lr_save (void)
20705 return !cfun
->machine
->lr_save_eliminated
20707 || thumb_far_jump_used_p ()
20708 || df_regs_ever_live_p (LR_REGNUM
));
20711 /* We do not know if r3 will be available because
20712 we do have an indirect tailcall happening in this
20713 particular case. */
20715 is_indirect_tailcall_p (rtx call
)
20717 rtx pat
= PATTERN (call
);
20719 /* Indirect tail call. */
20720 pat
= XVECEXP (pat
, 0, 0);
20721 if (GET_CODE (pat
) == SET
)
20722 pat
= SET_SRC (pat
);
20724 pat
= XEXP (XEXP (pat
, 0), 0);
20725 return REG_P (pat
);
20728 /* Return true if r3 is used by any of the tail call insns in the
20729 current function. */
20731 any_sibcall_could_use_r3 (void)
20736 if (!crtl
->tail_call_emit
)
20738 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20739 if (e
->flags
& EDGE_SIBCALL
)
20741 rtx_insn
*call
= BB_END (e
->src
);
20742 if (!CALL_P (call
))
20743 call
= prev_nonnote_nondebug_insn (call
);
20744 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20745 if (find_regno_fusage (call
, USE
, 3)
20746 || is_indirect_tailcall_p (call
))
20753 /* Compute the distance from register FROM to register TO.
20754 These can be the arg pointer (26), the soft frame pointer (25),
20755 the stack pointer (13) or the hard frame pointer (11).
20756 In thumb mode r7 is used as the soft frame pointer, if needed.
20757 Typical stack layout looks like this:
20759 old stack pointer -> | |
20762 | | saved arguments for
20763 | | vararg functions
20766 hard FP & arg pointer -> | | \
20774 soft frame pointer -> | | /
20779 locals base pointer -> | | /
20784 current stack pointer -> | | /
20787 For a given function some or all of these stack components
20788 may not be needed, giving rise to the possibility of
20789 eliminating some of the registers.
20791 The values returned by this function must reflect the behavior
20792 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20794 The sign of the number returned reflects the direction of stack
20795 growth, so the values are positive for all eliminations except
20796 from the soft frame pointer to the hard frame pointer.
20798 SFP may point just inside the local variables block to ensure correct
20802 /* Return cached stack offsets. */
20804 static arm_stack_offsets
*
20805 arm_get_frame_offsets (void)
20807 struct arm_stack_offsets
*offsets
;
20809 offsets
= &cfun
->machine
->stack_offsets
;
20815 /* Calculate stack offsets. These are used to calculate register elimination
20816 offsets and in prologue/epilogue code. Also calculates which registers
20817 should be saved. */
20820 arm_compute_frame_layout (void)
20822 struct arm_stack_offsets
*offsets
;
20823 unsigned long func_type
;
20826 HOST_WIDE_INT frame_size
;
20829 offsets
= &cfun
->machine
->stack_offsets
;
20831 /* Initially this is the size of the local variables. It will translated
20832 into an offset once we have determined the size of preceding data. */
20833 frame_size
= ROUND_UP_WORD (get_frame_size ());
20835 /* Space for variadic functions. */
20836 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20838 /* In Thumb mode this is incorrect, but never used. */
20840 = (offsets
->saved_args
20841 + arm_compute_static_chain_stack_bytes ()
20842 + (frame_pointer_needed
? 4 : 0));
20846 unsigned int regno
;
20848 offsets
->saved_regs_mask
= arm_compute_save_core_reg_mask ();
20849 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20850 saved
= core_saved
;
20852 /* We know that SP will be doubleword aligned on entry, and we must
20853 preserve that condition at any subroutine call. We also require the
20854 soft frame pointer to be doubleword aligned. */
20856 if (TARGET_REALLY_IWMMXT
)
20858 /* Check for the call-saved iWMMXt registers. */
20859 for (regno
= FIRST_IWMMXT_REGNUM
;
20860 regno
<= LAST_IWMMXT_REGNUM
;
20862 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20866 func_type
= arm_current_func_type ();
20867 /* Space for saved VFP registers. */
20868 if (! IS_VOLATILE (func_type
)
20869 && TARGET_HARD_FLOAT
)
20870 saved
+= arm_get_vfp_saved_size ();
20872 else /* TARGET_THUMB1 */
20874 offsets
->saved_regs_mask
= thumb1_compute_save_core_reg_mask ();
20875 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20876 saved
= core_saved
;
20877 if (TARGET_BACKTRACE
)
20881 /* Saved registers include the stack frame. */
20882 offsets
->saved_regs
20883 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20884 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20886 /* A leaf function does not need any stack alignment if it has nothing
20888 if (crtl
->is_leaf
&& frame_size
== 0
20889 /* However if it calls alloca(), we have a dynamically allocated
20890 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20891 && ! cfun
->calls_alloca
)
20893 offsets
->outgoing_args
= offsets
->soft_frame
;
20894 offsets
->locals_base
= offsets
->soft_frame
;
20898 /* Ensure SFP has the correct alignment. */
20899 if (ARM_DOUBLEWORD_ALIGN
20900 && (offsets
->soft_frame
& 7))
20902 offsets
->soft_frame
+= 4;
20903 /* Try to align stack by pushing an extra reg. Don't bother doing this
20904 when there is a stack frame as the alignment will be rolled into
20905 the normal stack adjustment. */
20906 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20910 /* Register r3 is caller-saved. Normally it does not need to be
20911 saved on entry by the prologue. However if we choose to save
20912 it for padding then we may confuse the compiler into thinking
20913 a prologue sequence is required when in fact it is not. This
20914 will occur when shrink-wrapping if r3 is used as a scratch
20915 register and there are no other callee-saved writes.
20917 This situation can be avoided when other callee-saved registers
20918 are available and r3 is not mandatory if we choose a callee-saved
20919 register for padding. */
20920 bool prefer_callee_reg_p
= false;
20922 /* If it is safe to use r3, then do so. This sometimes
20923 generates better code on Thumb-2 by avoiding the need to
20924 use 32-bit push/pop instructions. */
20925 if (! any_sibcall_could_use_r3 ()
20926 && arm_size_return_regs () <= 12
20927 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20929 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20932 if (!TARGET_THUMB2
)
20933 prefer_callee_reg_p
= true;
20936 || prefer_callee_reg_p
)
20938 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20940 /* Avoid fixed registers; they may be changed at
20941 arbitrary times so it's unsafe to restore them
20942 during the epilogue. */
20944 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20954 offsets
->saved_regs
+= 4;
20955 offsets
->saved_regs_mask
|= (1 << reg
);
20960 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20961 offsets
->outgoing_args
= (offsets
->locals_base
20962 + crtl
->outgoing_args_size
);
20964 if (ARM_DOUBLEWORD_ALIGN
)
20966 /* Ensure SP remains doubleword aligned. */
20967 if (offsets
->outgoing_args
& 7)
20968 offsets
->outgoing_args
+= 4;
20969 gcc_assert (!(offsets
->outgoing_args
& 7));
20974 /* Calculate the relative offsets for the different stack pointers. Positive
20975 offsets are in the direction of stack growth. */
20978 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20980 arm_stack_offsets
*offsets
;
20982 offsets
= arm_get_frame_offsets ();
20984 /* OK, now we have enough information to compute the distances.
20985 There must be an entry in these switch tables for each pair
20986 of registers in ELIMINABLE_REGS, even if some of the entries
20987 seem to be redundant or useless. */
20990 case ARG_POINTER_REGNUM
:
20993 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20996 case FRAME_POINTER_REGNUM
:
20997 /* This is the reverse of the soft frame pointer
20998 to hard frame pointer elimination below. */
20999 return offsets
->soft_frame
- offsets
->saved_args
;
21001 case ARM_HARD_FRAME_POINTER_REGNUM
:
21002 /* This is only non-zero in the case where the static chain register
21003 is stored above the frame. */
21004 return offsets
->frame
- offsets
->saved_args
- 4;
21006 case STACK_POINTER_REGNUM
:
21007 /* If nothing has been pushed on the stack at all
21008 then this will return -4. This *is* correct! */
21009 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
21012 gcc_unreachable ();
21014 gcc_unreachable ();
21016 case FRAME_POINTER_REGNUM
:
21019 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21022 case ARM_HARD_FRAME_POINTER_REGNUM
:
21023 /* The hard frame pointer points to the top entry in the
21024 stack frame. The soft frame pointer to the bottom entry
21025 in the stack frame. If there is no stack frame at all,
21026 then they are identical. */
21028 return offsets
->frame
- offsets
->soft_frame
;
21030 case STACK_POINTER_REGNUM
:
21031 return offsets
->outgoing_args
- offsets
->soft_frame
;
21034 gcc_unreachable ();
21036 gcc_unreachable ();
21039 /* You cannot eliminate from the stack pointer.
21040 In theory you could eliminate from the hard frame
21041 pointer to the stack pointer, but this will never
21042 happen, since if a stack frame is not needed the
21043 hard frame pointer will never be used. */
21044 gcc_unreachable ();
21048 /* Given FROM and TO register numbers, say whether this elimination is
21049 allowed. Frame pointer elimination is automatically handled.
21051 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21052 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21053 pointer, we must eliminate FRAME_POINTER_REGNUM into
21054 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21055 ARG_POINTER_REGNUM. */
21058 arm_can_eliminate (const int from
, const int to
)
21060 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
21061 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
21062 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
21063 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
21067 /* Emit RTL to save coprocessor registers on function entry. Returns the
21068 number of bytes pushed. */
21071 arm_save_coproc_regs(void)
21073 int saved_size
= 0;
21075 unsigned start_reg
;
21078 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
21079 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
21081 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21082 insn
= gen_rtx_MEM (V2SImode
, insn
);
21083 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
21084 RTX_FRAME_RELATED_P (insn
) = 1;
21088 if (TARGET_HARD_FLOAT
)
21090 start_reg
= FIRST_VFP_REGNUM
;
21092 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
21094 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
21095 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
21097 if (start_reg
!= reg
)
21098 saved_size
+= vfp_emit_fstmd (start_reg
,
21099 (reg
- start_reg
) / 2);
21100 start_reg
= reg
+ 2;
21103 if (start_reg
!= reg
)
21104 saved_size
+= vfp_emit_fstmd (start_reg
,
21105 (reg
- start_reg
) / 2);
21111 /* Set the Thumb frame pointer from the stack pointer. */
21114 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21116 HOST_WIDE_INT amount
;
21119 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21121 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21122 stack_pointer_rtx
, GEN_INT (amount
)));
21125 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21126 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21127 expects the first two operands to be the same. */
21130 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21132 hard_frame_pointer_rtx
));
21136 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21137 hard_frame_pointer_rtx
,
21138 stack_pointer_rtx
));
21140 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
21141 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21142 RTX_FRAME_RELATED_P (dwarf
) = 1;
21143 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21146 RTX_FRAME_RELATED_P (insn
) = 1;
21149 struct scratch_reg
{
21154 /* Return a short-lived scratch register for use as a 2nd scratch register on
21155 function entry after the registers are saved in the prologue. This register
21156 must be released by means of release_scratch_register_on_entry. IP is not
21157 considered since it is always used as the 1st scratch register if available.
21159 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21160 mask of live registers. */
21163 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
21164 unsigned long live_regs
)
21170 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
21176 for (i
= 4; i
< 11; i
++)
21177 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
21185 /* If IP is used as the 1st scratch register for a nested function,
21186 then either r3 wasn't available or is used to preserve IP. */
21187 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
21189 regno
= (regno1
== 3 ? 2 : 3);
21191 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
21196 sr
->reg
= gen_rtx_REG (SImode
, regno
);
21199 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21200 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
21201 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21202 plus_constant (Pmode
, stack_pointer_rtx
, -4));
21203 RTX_FRAME_RELATED_P (insn
) = 1;
21204 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21208 /* Release a scratch register obtained from the preceding function. */
21211 release_scratch_register_on_entry (struct scratch_reg
*sr
)
21215 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
21216 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
21217 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21218 plus_constant (Pmode
, stack_pointer_rtx
, 4));
21219 RTX_FRAME_RELATED_P (insn
) = 1;
21220 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21224 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21226 #if PROBE_INTERVAL > 4096
21227 #error Cannot use indexed addressing mode for stack probing
21230 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21231 inclusive. These are offsets from the current stack pointer. REGNO1
21232 is the index number of the 1st scratch register and LIVE_REGS is the
21233 mask of live registers. */
21236 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
21237 unsigned int regno1
, unsigned long live_regs
)
21239 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
21241 /* See if we have a constant small number of probes to generate. If so,
21242 that's the easy case. */
21243 if (size
<= PROBE_INTERVAL
)
21245 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21246 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21247 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
21250 /* The run-time loop is made up of 10 insns in the generic case while the
21251 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21252 else if (size
<= 5 * PROBE_INTERVAL
)
21254 HOST_WIDE_INT i
, rem
;
21256 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21257 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21258 emit_stack_probe (reg1
);
21260 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21261 it exceeds SIZE. If only two probes are needed, this will not
21262 generate any code. Then probe at FIRST + SIZE. */
21263 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
21265 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21266 emit_stack_probe (reg1
);
21269 rem
= size
- (i
- PROBE_INTERVAL
);
21270 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21272 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21273 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
21276 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
21279 /* Otherwise, do the same as above, but in a loop. Note that we must be
21280 extra careful with variables wrapping around because we might be at
21281 the very top (or the very bottom) of the address space and we have
21282 to be able to handle this case properly; in particular, we use an
21283 equality test for the loop condition. */
21286 HOST_WIDE_INT rounded_size
;
21287 struct scratch_reg sr
;
21289 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
21291 emit_move_insn (reg1
, GEN_INT (first
));
21294 /* Step 1: round SIZE to the previous multiple of the interval. */
21296 rounded_size
= size
& -PROBE_INTERVAL
;
21297 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
21300 /* Step 2: compute initial and final value of the loop counter. */
21302 /* TEST_ADDR = SP + FIRST. */
21303 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21305 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21306 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
21309 /* Step 3: the loop
21313 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21316 while (TEST_ADDR != LAST_ADDR)
21318 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21319 until it is equal to ROUNDED_SIZE. */
21321 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
21324 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21325 that SIZE is equal to ROUNDED_SIZE. */
21327 if (size
!= rounded_size
)
21329 HOST_WIDE_INT rem
= size
- rounded_size
;
21331 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21333 emit_set_insn (sr
.reg
,
21334 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
21335 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
21336 PROBE_INTERVAL
- rem
));
21339 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
21342 release_scratch_register_on_entry (&sr
);
21345 /* Make sure nothing is scheduled before we are done. */
21346 emit_insn (gen_blockage ());
21349 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21350 absolute addresses. */
21353 output_probe_stack_range (rtx reg1
, rtx reg2
)
21355 static int labelno
= 0;
21359 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
21362 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
21364 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21366 xops
[1] = GEN_INT (PROBE_INTERVAL
);
21367 output_asm_insn ("sub\t%0, %0, %1", xops
);
21369 /* Probe at TEST_ADDR. */
21370 output_asm_insn ("str\tr0, [%0, #0]", xops
);
21372 /* Test if TEST_ADDR == LAST_ADDR. */
21374 output_asm_insn ("cmp\t%0, %1", xops
);
21377 fputs ("\tbne\t", asm_out_file
);
21378 assemble_name_raw (asm_out_file
, loop_lab
);
21379 fputc ('\n', asm_out_file
);
21384 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21387 arm_expand_prologue (void)
21392 unsigned long live_regs_mask
;
21393 unsigned long func_type
;
21395 int saved_pretend_args
= 0;
21396 int saved_regs
= 0;
21397 unsigned HOST_WIDE_INT args_to_push
;
21398 HOST_WIDE_INT size
;
21399 arm_stack_offsets
*offsets
;
21402 func_type
= arm_current_func_type ();
21404 /* Naked functions don't have prologues. */
21405 if (IS_NAKED (func_type
))
21407 if (flag_stack_usage_info
)
21408 current_function_static_stack_size
= 0;
21412 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21413 args_to_push
= crtl
->args
.pretend_args_size
;
21415 /* Compute which register we will have to save onto the stack. */
21416 offsets
= arm_get_frame_offsets ();
21417 live_regs_mask
= offsets
->saved_regs_mask
;
21419 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21421 if (IS_STACKALIGN (func_type
))
21425 /* Handle a word-aligned stack pointer. We generate the following:
21430 <save and restore r0 in normal prologue/epilogue>
21434 The unwinder doesn't need to know about the stack realignment.
21435 Just tell it we saved SP in r0. */
21436 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21438 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
21439 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
21441 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21442 RTX_FRAME_RELATED_P (insn
) = 1;
21443 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21445 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21447 /* ??? The CFA changes here, which may cause GDB to conclude that it
21448 has entered a different function. That said, the unwind info is
21449 correct, individually, before and after this instruction because
21450 we've described the save of SP, which will override the default
21451 handling of SP as restoring from the CFA. */
21452 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21455 /* The static chain register is the same as the IP register. If it is
21456 clobbered when creating the frame, we need to save and restore it. */
21457 clobber_ip
= IS_NESTED (func_type
)
21458 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21459 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21460 && !df_regs_ever_live_p (LR_REGNUM
)
21461 && arm_r3_live_at_start_p ()));
21463 /* Find somewhere to store IP whilst the frame is being created.
21464 We try the following places in order:
21466 1. The last argument register r3 if it is available.
21467 2. A slot on the stack above the frame if there are no
21468 arguments to push onto the stack.
21469 3. Register r3 again, after pushing the argument registers
21470 onto the stack, if this is a varargs function.
21471 4. The last slot on the stack created for the arguments to
21472 push, if this isn't a varargs function.
21474 Note - we only need to tell the dwarf2 backend about the SP
21475 adjustment in the second variant; the static chain register
21476 doesn't need to be unwound, as it doesn't contain a value
21477 inherited from the caller. */
21480 if (!arm_r3_live_at_start_p ())
21481 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21482 else if (args_to_push
== 0)
21486 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21489 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21490 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21493 /* Just tell the dwarf backend that we adjusted SP. */
21494 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21495 plus_constant (Pmode
, stack_pointer_rtx
,
21497 RTX_FRAME_RELATED_P (insn
) = 1;
21498 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21502 /* Store the args on the stack. */
21503 if (cfun
->machine
->uses_anonymous_args
)
21505 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21506 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21507 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21508 saved_pretend_args
= 1;
21514 if (args_to_push
== 4)
21515 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21517 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21518 plus_constant (Pmode
,
21522 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21524 /* Just tell the dwarf backend that we adjusted SP. */
21525 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21526 plus_constant (Pmode
, stack_pointer_rtx
,
21528 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21531 RTX_FRAME_RELATED_P (insn
) = 1;
21532 fp_offset
= args_to_push
;
21537 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21539 if (IS_INTERRUPT (func_type
))
21541 /* Interrupt functions must not corrupt any registers.
21542 Creating a frame pointer however, corrupts the IP
21543 register, so we must push it first. */
21544 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21546 /* Do not set RTX_FRAME_RELATED_P on this insn.
21547 The dwarf stack unwinding code only wants to see one
21548 stack decrement per function, and this is not it. If
21549 this instruction is labeled as being part of the frame
21550 creation sequence then dwarf2out_frame_debug_expr will
21551 die when it encounters the assignment of IP to FP
21552 later on, since the use of SP here establishes SP as
21553 the CFA register and not IP.
21555 Anyway this instruction is not really part of the stack
21556 frame creation although it is part of the prologue. */
21559 insn
= emit_set_insn (ip_rtx
,
21560 plus_constant (Pmode
, stack_pointer_rtx
,
21562 RTX_FRAME_RELATED_P (insn
) = 1;
21567 /* Push the argument registers, or reserve space for them. */
21568 if (cfun
->machine
->uses_anonymous_args
)
21569 insn
= emit_multi_reg_push
21570 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21571 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21574 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21575 GEN_INT (- args_to_push
)));
21576 RTX_FRAME_RELATED_P (insn
) = 1;
21579 /* If this is an interrupt service routine, and the link register
21580 is going to be pushed, and we're not generating extra
21581 push of IP (needed when frame is needed and frame layout if apcs),
21582 subtracting four from LR now will mean that the function return
21583 can be done with a single instruction. */
21584 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21585 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21586 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21589 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21591 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21594 if (live_regs_mask
)
21596 unsigned long dwarf_regs_mask
= live_regs_mask
;
21598 saved_regs
+= bit_count (live_regs_mask
) * 4;
21599 if (optimize_size
&& !frame_pointer_needed
21600 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21602 /* If no coprocessor registers are being pushed and we don't have
21603 to worry about a frame pointer then push extra registers to
21604 create the stack frame. This is done in a way that does not
21605 alter the frame layout, so is independent of the epilogue. */
21609 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21611 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21612 if (frame
&& n
* 4 >= frame
)
21615 live_regs_mask
|= (1 << n
) - 1;
21616 saved_regs
+= frame
;
21621 && current_tune
->prefer_ldrd_strd
21622 && !optimize_function_for_size_p (cfun
))
21624 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21626 thumb2_emit_strd_push (live_regs_mask
);
21627 else if (TARGET_ARM
21628 && !TARGET_APCS_FRAME
21629 && !IS_INTERRUPT (func_type
))
21630 arm_emit_strd_push (live_regs_mask
);
21633 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21634 RTX_FRAME_RELATED_P (insn
) = 1;
21639 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21640 RTX_FRAME_RELATED_P (insn
) = 1;
21644 if (! IS_VOLATILE (func_type
))
21645 saved_regs
+= arm_save_coproc_regs ();
21647 if (frame_pointer_needed
&& TARGET_ARM
)
21649 /* Create the new frame pointer. */
21650 if (TARGET_APCS_FRAME
)
21652 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21653 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21654 RTX_FRAME_RELATED_P (insn
) = 1;
21658 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
21659 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21660 stack_pointer_rtx
, insn
));
21661 RTX_FRAME_RELATED_P (insn
) = 1;
21665 size
= offsets
->outgoing_args
- offsets
->saved_args
;
21666 if (flag_stack_usage_info
)
21667 current_function_static_stack_size
= size
;
21669 /* If this isn't an interrupt service routine and we have a frame, then do
21670 stack checking. We use IP as the first scratch register, except for the
21671 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21672 if (!IS_INTERRUPT (func_type
)
21673 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
21675 unsigned int regno
;
21677 if (!IS_NESTED (func_type
) || clobber_ip
)
21679 else if (df_regs_ever_live_p (LR_REGNUM
))
21684 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
21686 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
21687 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
,
21688 size
- STACK_CHECK_PROTECT
,
21689 regno
, live_regs_mask
);
21692 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
,
21693 regno
, live_regs_mask
);
21696 /* Recover the static chain register. */
21699 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21700 insn
= gen_rtx_REG (SImode
, 3);
21703 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21704 insn
= gen_frame_mem (SImode
, insn
);
21706 emit_set_insn (ip_rtx
, insn
);
21707 emit_insn (gen_force_register_use (ip_rtx
));
21710 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21712 /* This add can produce multiple insns for a large constant, so we
21713 need to get tricky. */
21714 rtx_insn
*last
= get_last_insn ();
21716 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21717 - offsets
->outgoing_args
);
21719 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21723 last
= last
? NEXT_INSN (last
) : get_insns ();
21724 RTX_FRAME_RELATED_P (last
) = 1;
21726 while (last
!= insn
);
21728 /* If the frame pointer is needed, emit a special barrier that
21729 will prevent the scheduler from moving stores to the frame
21730 before the stack adjustment. */
21731 if (frame_pointer_needed
)
21732 emit_insn (gen_stack_tie (stack_pointer_rtx
,
21733 hard_frame_pointer_rtx
));
21737 if (frame_pointer_needed
&& TARGET_THUMB2
)
21738 thumb_set_frame_pointer (offsets
);
21740 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21742 unsigned long mask
;
21744 mask
= live_regs_mask
;
21745 mask
&= THUMB2_WORK_REGS
;
21746 if (!IS_NESTED (func_type
))
21747 mask
|= (1 << IP_REGNUM
);
21748 arm_load_pic_register (mask
);
21751 /* If we are profiling, make sure no instructions are scheduled before
21752 the call to mcount. Similarly if the user has requested no
21753 scheduling in the prolog. Similarly if we want non-call exceptions
21754 using the EABI unwinder, to prevent faulting instructions from being
21755 swapped with a stack adjustment. */
21756 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21757 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21758 && cfun
->can_throw_non_call_exceptions
))
21759 emit_insn (gen_blockage ());
21761 /* If the link register is being kept alive, with the return address in it,
21762 then make sure that it does not get reused by the ce2 pass. */
21763 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21764 cfun
->machine
->lr_save_eliminated
= 1;
21767 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21769 arm_print_condition (FILE *stream
)
21771 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21773 /* Branch conversion is not implemented for Thumb-2. */
21776 output_operand_lossage ("predicated Thumb instruction");
21779 if (current_insn_predicate
!= NULL
)
21781 output_operand_lossage
21782 ("predicated instruction in conditional sequence");
21786 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21788 else if (current_insn_predicate
)
21790 enum arm_cond_code code
;
21794 output_operand_lossage ("predicated Thumb instruction");
21798 code
= get_arm_condition_code (current_insn_predicate
);
21799 fputs (arm_condition_codes
[code
], stream
);
21804 /* Globally reserved letters: acln
21805 Puncutation letters currently used: @_|?().!#
21806 Lower case letters currently used: bcdefhimpqtvwxyz
21807 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21808 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21810 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21812 If CODE is 'd', then the X is a condition operand and the instruction
21813 should only be executed if the condition is true.
21814 if CODE is 'D', then the X is a condition operand and the instruction
21815 should only be executed if the condition is false: however, if the mode
21816 of the comparison is CCFPEmode, then always execute the instruction -- we
21817 do this because in these circumstances !GE does not necessarily imply LT;
21818 in these cases the instruction pattern will take care to make sure that
21819 an instruction containing %d will follow, thereby undoing the effects of
21820 doing this instruction unconditionally.
21821 If CODE is 'N' then X is a floating point operand that must be negated
21823 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21824 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21826 arm_print_operand (FILE *stream
, rtx x
, int code
)
21831 fputs (ASM_COMMENT_START
, stream
);
21835 fputs (user_label_prefix
, stream
);
21839 fputs (REGISTER_PREFIX
, stream
);
21843 arm_print_condition (stream
);
21847 /* The current condition code for a condition code setting instruction.
21848 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21849 fputc('s', stream
);
21850 arm_print_condition (stream
);
21854 /* If the instruction is conditionally executed then print
21855 the current condition code, otherwise print 's'. */
21856 gcc_assert (TARGET_THUMB2
);
21857 if (current_insn_predicate
)
21858 arm_print_condition (stream
);
21860 fputc('s', stream
);
21863 /* %# is a "break" sequence. It doesn't output anything, but is used to
21864 separate e.g. operand numbers from following text, if that text consists
21865 of further digits which we don't want to be part of the operand
21873 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
21874 fprintf (stream
, "%s", fp_const_from_val (&r
));
21878 /* An integer or symbol address without a preceding # sign. */
21880 switch (GET_CODE (x
))
21883 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21887 output_addr_const (stream
, x
);
21891 if (GET_CODE (XEXP (x
, 0)) == PLUS
21892 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21894 output_addr_const (stream
, x
);
21897 /* Fall through. */
21900 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21904 /* An integer that we want to print in HEX. */
21906 switch (GET_CODE (x
))
21909 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21913 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21918 if (CONST_INT_P (x
))
21921 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21922 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21926 putc ('~', stream
);
21927 output_addr_const (stream
, x
);
21932 /* Print the log2 of a CONST_INT. */
21936 if (!CONST_INT_P (x
)
21937 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
21938 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21940 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21945 /* The low 16 bits of an immediate constant. */
21946 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21950 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21954 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21962 shift
= shift_op (x
, &val
);
21966 fprintf (stream
, ", %s ", shift
);
21968 arm_print_operand (stream
, XEXP (x
, 1), 0);
21970 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21975 /* An explanation of the 'Q', 'R' and 'H' register operands:
21977 In a pair of registers containing a DI or DF value the 'Q'
21978 operand returns the register number of the register containing
21979 the least significant part of the value. The 'R' operand returns
21980 the register number of the register containing the most
21981 significant part of the value.
21983 The 'H' operand returns the higher of the two register numbers.
21984 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21985 same as the 'Q' operand, since the most significant part of the
21986 value is held in the lower number register. The reverse is true
21987 on systems where WORDS_BIG_ENDIAN is false.
21989 The purpose of these operands is to distinguish between cases
21990 where the endian-ness of the values is important (for example
21991 when they are added together), and cases where the endian-ness
21992 is irrelevant, but the order of register operations is important.
21993 For example when loading a value from memory into a register
21994 pair, the endian-ness does not matter. Provided that the value
21995 from the lower memory address is put into the lower numbered
21996 register, and the value from the higher address is put into the
21997 higher numbered register, the load will work regardless of whether
21998 the value being loaded is big-wordian or little-wordian. The
21999 order of the two register loads can matter however, if the address
22000 of the memory location is actually held in one of the registers
22001 being overwritten by the load.
22003 The 'Q' and 'R' constraints are also available for 64-bit
22006 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22008 rtx part
= gen_lowpart (SImode
, x
);
22009 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22013 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22015 output_operand_lossage ("invalid operand for code '%c'", code
);
22019 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
22023 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22025 machine_mode mode
= GET_MODE (x
);
22028 if (mode
== VOIDmode
)
22030 part
= gen_highpart_mode (SImode
, mode
, x
);
22031 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22035 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22037 output_operand_lossage ("invalid operand for code '%c'", code
);
22041 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
22045 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22047 output_operand_lossage ("invalid operand for code '%c'", code
);
22051 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
22055 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22057 output_operand_lossage ("invalid operand for code '%c'", code
);
22061 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
22065 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22067 output_operand_lossage ("invalid operand for code '%c'", code
);
22071 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
22075 asm_fprintf (stream
, "%r",
22076 REG_P (XEXP (x
, 0))
22077 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
22081 asm_fprintf (stream
, "{%r-%r}",
22083 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
22086 /* Like 'M', but writing doubleword vector registers, for use by Neon
22090 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
22091 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
22093 asm_fprintf (stream
, "{d%d}", regno
);
22095 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
22100 /* CONST_TRUE_RTX means always -- that's the default. */
22101 if (x
== const_true_rtx
)
22104 if (!COMPARISON_P (x
))
22106 output_operand_lossage ("invalid operand for code '%c'", code
);
22110 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
22115 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22116 want to do that. */
22117 if (x
== const_true_rtx
)
22119 output_operand_lossage ("instruction never executed");
22122 if (!COMPARISON_P (x
))
22124 output_operand_lossage ("invalid operand for code '%c'", code
);
22128 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
22129 (get_arm_condition_code (x
))],
22139 /* Former Maverick support, removed after GCC-4.7. */
22140 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
22145 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
22146 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
22147 /* Bad value for wCG register number. */
22149 output_operand_lossage ("invalid operand for code '%c'", code
);
22154 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
22157 /* Print an iWMMXt control register name. */
22159 if (!CONST_INT_P (x
)
22161 || INTVAL (x
) >= 16)
22162 /* Bad value for wC register number. */
22164 output_operand_lossage ("invalid operand for code '%c'", code
);
22170 static const char * wc_reg_names
[16] =
22172 "wCID", "wCon", "wCSSF", "wCASF",
22173 "wC4", "wC5", "wC6", "wC7",
22174 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22175 "wC12", "wC13", "wC14", "wC15"
22178 fputs (wc_reg_names
[INTVAL (x
)], stream
);
22182 /* Print the high single-precision register of a VFP double-precision
22186 machine_mode mode
= GET_MODE (x
);
22189 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
22191 output_operand_lossage ("invalid operand for code '%c'", code
);
22196 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
22198 output_operand_lossage ("invalid operand for code '%c'", code
);
22202 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
22206 /* Print a VFP/Neon double precision or quad precision register name. */
22210 machine_mode mode
= GET_MODE (x
);
22211 int is_quad
= (code
== 'q');
22214 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
22216 output_operand_lossage ("invalid operand for code '%c'", code
);
22221 || !IS_VFP_REGNUM (REGNO (x
)))
22223 output_operand_lossage ("invalid operand for code '%c'", code
);
22228 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
22229 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
22231 output_operand_lossage ("invalid operand for code '%c'", code
);
22235 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
22236 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
22240 /* These two codes print the low/high doubleword register of a Neon quad
22241 register, respectively. For pair-structure types, can also print
22242 low/high quadword registers. */
22246 machine_mode mode
= GET_MODE (x
);
22249 if ((GET_MODE_SIZE (mode
) != 16
22250 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
22252 output_operand_lossage ("invalid operand for code '%c'", code
);
22257 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
22259 output_operand_lossage ("invalid operand for code '%c'", code
);
22263 if (GET_MODE_SIZE (mode
) == 16)
22264 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
22265 + (code
== 'f' ? 1 : 0));
22267 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
22268 + (code
== 'f' ? 1 : 0));
22272 /* Print a VFPv3 floating-point constant, represented as an integer
22276 int index
= vfp3_const_double_index (x
);
22277 gcc_assert (index
!= -1);
22278 fprintf (stream
, "%d", index
);
22282 /* Print bits representing opcode features for Neon.
22284 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22285 and polynomials as unsigned.
22287 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22289 Bit 2 is 1 for rounding functions, 0 otherwise. */
22291 /* Identify the type as 's', 'u', 'p' or 'f'. */
22294 HOST_WIDE_INT bits
= INTVAL (x
);
22295 fputc ("uspf"[bits
& 3], stream
);
22299 /* Likewise, but signed and unsigned integers are both 'i'. */
22302 HOST_WIDE_INT bits
= INTVAL (x
);
22303 fputc ("iipf"[bits
& 3], stream
);
22307 /* As for 'T', but emit 'u' instead of 'p'. */
22310 HOST_WIDE_INT bits
= INTVAL (x
);
22311 fputc ("usuf"[bits
& 3], stream
);
22315 /* Bit 2: rounding (vs none). */
22318 HOST_WIDE_INT bits
= INTVAL (x
);
22319 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
22323 /* Memory operand for vld1/vst1 instruction. */
22327 bool postinc
= FALSE
;
22328 rtx postinc_reg
= NULL
;
22329 unsigned align
, memsize
, align_bits
;
22331 gcc_assert (MEM_P (x
));
22332 addr
= XEXP (x
, 0);
22333 if (GET_CODE (addr
) == POST_INC
)
22336 addr
= XEXP (addr
, 0);
22338 if (GET_CODE (addr
) == POST_MODIFY
)
22340 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22341 addr
= XEXP (addr
, 0);
22343 asm_fprintf (stream
, "[%r", REGNO (addr
));
22345 /* We know the alignment of this access, so we can emit a hint in the
22346 instruction (for some alignments) as an aid to the memory subsystem
22348 align
= MEM_ALIGN (x
) >> 3;
22349 memsize
= MEM_SIZE (x
);
22351 /* Only certain alignment specifiers are supported by the hardware. */
22352 if (memsize
== 32 && (align
% 32) == 0)
22354 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22356 else if (memsize
>= 8 && (align
% 8) == 0)
22361 if (align_bits
!= 0)
22362 asm_fprintf (stream
, ":%d", align_bits
);
22364 asm_fprintf (stream
, "]");
22367 fputs("!", stream
);
22369 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22377 gcc_assert (MEM_P (x
));
22378 addr
= XEXP (x
, 0);
22379 gcc_assert (REG_P (addr
));
22380 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22384 /* Translate an S register number into a D register number and element index. */
22387 machine_mode mode
= GET_MODE (x
);
22390 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22392 output_operand_lossage ("invalid operand for code '%c'", code
);
22397 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22399 output_operand_lossage ("invalid operand for code '%c'", code
);
22403 regno
= regno
- FIRST_VFP_REGNUM
;
22404 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22409 gcc_assert (CONST_DOUBLE_P (x
));
22411 result
= vfp3_const_double_for_fract_bits (x
);
22413 result
= vfp3_const_double_for_bits (x
);
22414 fprintf (stream
, "#%d", result
);
22417 /* Register specifier for vld1.16/vst1.16. Translate the S register
22418 number into a D register number and element index. */
22421 machine_mode mode
= GET_MODE (x
);
22424 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22426 output_operand_lossage ("invalid operand for code '%c'", code
);
22431 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22433 output_operand_lossage ("invalid operand for code '%c'", code
);
22437 regno
= regno
- FIRST_VFP_REGNUM
;
22438 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22445 output_operand_lossage ("missing operand");
22449 switch (GET_CODE (x
))
22452 asm_fprintf (stream
, "%r", REGNO (x
));
22456 output_address (GET_MODE (x
), XEXP (x
, 0));
22462 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22463 sizeof (fpstr
), 0, 1);
22464 fprintf (stream
, "#%s", fpstr
);
22469 gcc_assert (GET_CODE (x
) != NEG
);
22470 fputc ('#', stream
);
22471 if (GET_CODE (x
) == HIGH
)
22473 fputs (":lower16:", stream
);
22477 output_addr_const (stream
, x
);
22483 /* Target hook for printing a memory address. */
22485 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
22489 int is_minus
= GET_CODE (x
) == MINUS
;
22492 asm_fprintf (stream
, "[%r]", REGNO (x
));
22493 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22495 rtx base
= XEXP (x
, 0);
22496 rtx index
= XEXP (x
, 1);
22497 HOST_WIDE_INT offset
= 0;
22499 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22501 /* Ensure that BASE is a register. */
22502 /* (one of them must be). */
22503 /* Also ensure the SP is not used as in index register. */
22504 std::swap (base
, index
);
22506 switch (GET_CODE (index
))
22509 offset
= INTVAL (index
);
22512 asm_fprintf (stream
, "[%r, #%wd]",
22513 REGNO (base
), offset
);
22517 asm_fprintf (stream
, "[%r, %s%r]",
22518 REGNO (base
), is_minus
? "-" : "",
22528 asm_fprintf (stream
, "[%r, %s%r",
22529 REGNO (base
), is_minus
? "-" : "",
22530 REGNO (XEXP (index
, 0)));
22531 arm_print_operand (stream
, index
, 'S');
22532 fputs ("]", stream
);
22537 gcc_unreachable ();
22540 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22541 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22543 gcc_assert (REG_P (XEXP (x
, 0)));
22545 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22546 asm_fprintf (stream
, "[%r, #%s%d]!",
22547 REGNO (XEXP (x
, 0)),
22548 GET_CODE (x
) == PRE_DEC
? "-" : "",
22549 GET_MODE_SIZE (mode
));
22551 asm_fprintf (stream
, "[%r], #%s%d",
22552 REGNO (XEXP (x
, 0)),
22553 GET_CODE (x
) == POST_DEC
? "-" : "",
22554 GET_MODE_SIZE (mode
));
22556 else if (GET_CODE (x
) == PRE_MODIFY
)
22558 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22559 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22560 asm_fprintf (stream
, "#%wd]!",
22561 INTVAL (XEXP (XEXP (x
, 1), 1)));
22563 asm_fprintf (stream
, "%r]!",
22564 REGNO (XEXP (XEXP (x
, 1), 1)));
22566 else if (GET_CODE (x
) == POST_MODIFY
)
22568 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22569 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22570 asm_fprintf (stream
, "#%wd",
22571 INTVAL (XEXP (XEXP (x
, 1), 1)));
22573 asm_fprintf (stream
, "%r",
22574 REGNO (XEXP (XEXP (x
, 1), 1)));
22576 else output_addr_const (stream
, x
);
22581 asm_fprintf (stream
, "[%r]", REGNO (x
));
22582 else if (GET_CODE (x
) == POST_INC
)
22583 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22584 else if (GET_CODE (x
) == PLUS
)
22586 gcc_assert (REG_P (XEXP (x
, 0)));
22587 if (CONST_INT_P (XEXP (x
, 1)))
22588 asm_fprintf (stream
, "[%r, #%wd]",
22589 REGNO (XEXP (x
, 0)),
22590 INTVAL (XEXP (x
, 1)));
22592 asm_fprintf (stream
, "[%r, %r]",
22593 REGNO (XEXP (x
, 0)),
22594 REGNO (XEXP (x
, 1)));
22597 output_addr_const (stream
, x
);
22601 /* Target hook for indicating whether a punctuation character for
22602 TARGET_PRINT_OPERAND is valid. */
22604 arm_print_operand_punct_valid_p (unsigned char code
)
22606 return (code
== '@' || code
== '|' || code
== '.'
22607 || code
== '(' || code
== ')' || code
== '#'
22608 || (TARGET_32BIT
&& (code
== '?'))
22609 || (TARGET_THUMB2
&& (code
== '!'))
22610 || (TARGET_THUMB
&& (code
== '_')));
22613 /* Target hook for assembling integer objects. The ARM version needs to
22614 handle word-sized values specially. */
22616 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22620 if (size
== UNITS_PER_WORD
&& aligned_p
)
22622 fputs ("\t.word\t", asm_out_file
);
22623 output_addr_const (asm_out_file
, x
);
22625 /* Mark symbols as position independent. We only do this in the
22626 .text segment, not in the .data segment. */
22627 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22628 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22630 /* See legitimize_pic_address for an explanation of the
22631 TARGET_VXWORKS_RTP check. */
22632 /* References to weak symbols cannot be resolved locally:
22633 they may be overridden by a non-weak definition at link
22635 if (!arm_pic_data_is_text_relative
22636 || (GET_CODE (x
) == SYMBOL_REF
22637 && (!SYMBOL_REF_LOCAL_P (x
)
22638 || (SYMBOL_REF_DECL (x
)
22639 ? DECL_WEAK (SYMBOL_REF_DECL (x
)) : 0))))
22640 fputs ("(GOT)", asm_out_file
);
22642 fputs ("(GOTOFF)", asm_out_file
);
22644 fputc ('\n', asm_out_file
);
22648 mode
= GET_MODE (x
);
22650 if (arm_vector_mode_supported_p (mode
))
22654 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22656 units
= CONST_VECTOR_NUNITS (x
);
22657 size
= GET_MODE_UNIT_SIZE (mode
);
22659 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22660 for (i
= 0; i
< units
; i
++)
22662 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22664 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22667 for (i
= 0; i
< units
; i
++)
22669 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22671 (*CONST_DOUBLE_REAL_VALUE (elt
),
22672 as_a
<scalar_float_mode
> (GET_MODE_INNER (mode
)),
22673 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22679 return default_assemble_integer (x
, size
, aligned_p
);
22683 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22687 if (!TARGET_AAPCS_BASED
)
22690 default_named_section_asm_out_constructor
22691 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22695 /* Put these in the .init_array section, using a special relocation. */
22696 if (priority
!= DEFAULT_INIT_PRIORITY
)
22699 sprintf (buf
, "%s.%.5u",
22700 is_ctor
? ".init_array" : ".fini_array",
22702 s
= get_section (buf
, SECTION_WRITE
| SECTION_NOTYPE
, NULL_TREE
);
22709 switch_to_section (s
);
22710 assemble_align (POINTER_SIZE
);
22711 fputs ("\t.word\t", asm_out_file
);
22712 output_addr_const (asm_out_file
, symbol
);
22713 fputs ("(target1)\n", asm_out_file
);
22716 /* Add a function to the list of static constructors. */
22719 arm_elf_asm_constructor (rtx symbol
, int priority
)
22721 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22724 /* Add a function to the list of static destructors. */
22727 arm_elf_asm_destructor (rtx symbol
, int priority
)
22729 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22732 /* A finite state machine takes care of noticing whether or not instructions
22733 can be conditionally executed, and thus decrease execution time and code
22734 size by deleting branch instructions. The fsm is controlled by
22735 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22737 /* The state of the fsm controlling condition codes are:
22738 0: normal, do nothing special
22739 1: make ASM_OUTPUT_OPCODE not output this instruction
22740 2: make ASM_OUTPUT_OPCODE not output this instruction
22741 3: make instructions conditional
22742 4: make instructions conditional
22744 State transitions (state->state by whom under condition):
22745 0 -> 1 final_prescan_insn if the `target' is a label
22746 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22747 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22748 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22749 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22750 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22751 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22752 (the target insn is arm_target_insn).
22754 If the jump clobbers the conditions then we use states 2 and 4.
22756 A similar thing can be done with conditional return insns.
22758 XXX In case the `target' is an unconditional branch, this conditionalising
22759 of the instructions always reduces code size, but not always execution
22760 time. But then, I want to reduce the code size to somewhere near what
22761 /bin/cc produces. */
22763 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22764 instructions. When a COND_EXEC instruction is seen the subsequent
22765 instructions are scanned so that multiple conditional instructions can be
22766 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22767 specify the length and true/false mask for the IT block. These will be
22768 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22770 /* Returns the index of the ARM condition code string in
22771 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22772 COMPARISON should be an rtx like `(eq (...) (...))'. */
22775 maybe_get_arm_condition_code (rtx comparison
)
22777 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22778 enum arm_cond_code code
;
22779 enum rtx_code comp_code
= GET_CODE (comparison
);
22781 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22782 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22783 XEXP (comparison
, 1));
22787 case E_CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22788 case E_CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22789 case E_CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22790 case E_CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22791 case E_CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22792 case E_CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22793 case E_CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22794 case E_CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22795 case E_CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22796 case E_CC_DLTUmode
: code
= ARM_CC
;
22799 if (comp_code
== EQ
)
22800 return ARM_INVERSE_CONDITION_CODE (code
);
22801 if (comp_code
== NE
)
22805 case E_CC_NOOVmode
:
22808 case NE
: return ARM_NE
;
22809 case EQ
: return ARM_EQ
;
22810 case GE
: return ARM_PL
;
22811 case LT
: return ARM_MI
;
22812 default: return ARM_NV
;
22818 case NE
: return ARM_NE
;
22819 case EQ
: return ARM_EQ
;
22820 default: return ARM_NV
;
22826 case NE
: return ARM_MI
;
22827 case EQ
: return ARM_PL
;
22828 default: return ARM_NV
;
22833 /* We can handle all cases except UNEQ and LTGT. */
22836 case GE
: return ARM_GE
;
22837 case GT
: return ARM_GT
;
22838 case LE
: return ARM_LS
;
22839 case LT
: return ARM_MI
;
22840 case NE
: return ARM_NE
;
22841 case EQ
: return ARM_EQ
;
22842 case ORDERED
: return ARM_VC
;
22843 case UNORDERED
: return ARM_VS
;
22844 case UNLT
: return ARM_LT
;
22845 case UNLE
: return ARM_LE
;
22846 case UNGT
: return ARM_HI
;
22847 case UNGE
: return ARM_PL
;
22848 /* UNEQ and LTGT do not have a representation. */
22849 case UNEQ
: /* Fall through. */
22850 case LTGT
: /* Fall through. */
22851 default: return ARM_NV
;
22857 case NE
: return ARM_NE
;
22858 case EQ
: return ARM_EQ
;
22859 case GE
: return ARM_LE
;
22860 case GT
: return ARM_LT
;
22861 case LE
: return ARM_GE
;
22862 case LT
: return ARM_GT
;
22863 case GEU
: return ARM_LS
;
22864 case GTU
: return ARM_CC
;
22865 case LEU
: return ARM_CS
;
22866 case LTU
: return ARM_HI
;
22867 default: return ARM_NV
;
22873 case LTU
: return ARM_CS
;
22874 case GEU
: return ARM_CC
;
22875 case NE
: return ARM_CS
;
22876 case EQ
: return ARM_CC
;
22877 default: return ARM_NV
;
22883 case NE
: return ARM_NE
;
22884 case EQ
: return ARM_EQ
;
22885 case GEU
: return ARM_CS
;
22886 case GTU
: return ARM_HI
;
22887 case LEU
: return ARM_LS
;
22888 case LTU
: return ARM_CC
;
22889 default: return ARM_NV
;
22895 case GE
: return ARM_GE
;
22896 case LT
: return ARM_LT
;
22897 case GEU
: return ARM_CS
;
22898 case LTU
: return ARM_CC
;
22899 default: return ARM_NV
;
22905 case NE
: return ARM_VS
;
22906 case EQ
: return ARM_VC
;
22907 default: return ARM_NV
;
22913 case NE
: return ARM_NE
;
22914 case EQ
: return ARM_EQ
;
22915 case GE
: return ARM_GE
;
22916 case GT
: return ARM_GT
;
22917 case LE
: return ARM_LE
;
22918 case LT
: return ARM_LT
;
22919 case GEU
: return ARM_CS
;
22920 case GTU
: return ARM_HI
;
22921 case LEU
: return ARM_LS
;
22922 case LTU
: return ARM_CC
;
22923 default: return ARM_NV
;
22926 default: gcc_unreachable ();
22930 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22931 static enum arm_cond_code
22932 get_arm_condition_code (rtx comparison
)
22934 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22935 gcc_assert (code
!= ARM_NV
);
22939 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
22940 code registers when not targetting Thumb1. The VFP condition register
22941 only exists when generating hard-float code. */
22943 arm_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
22949 *p2
= TARGET_HARD_FLOAT
? VFPCC_REGNUM
: INVALID_REGNUM
;
22953 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22956 thumb2_final_prescan_insn (rtx_insn
*insn
)
22958 rtx_insn
*first_insn
= insn
;
22959 rtx body
= PATTERN (insn
);
22961 enum arm_cond_code code
;
22966 /* max_insns_skipped in the tune was already taken into account in the
22967 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22968 just emit the IT blocks as we can. It does not make sense to split
22970 max
= MAX_INSN_PER_IT_BLOCK
;
22972 /* Remove the previous insn from the count of insns to be output. */
22973 if (arm_condexec_count
)
22974 arm_condexec_count
--;
22976 /* Nothing to do if we are already inside a conditional block. */
22977 if (arm_condexec_count
)
22980 if (GET_CODE (body
) != COND_EXEC
)
22983 /* Conditional jumps are implemented directly. */
22987 predicate
= COND_EXEC_TEST (body
);
22988 arm_current_cc
= get_arm_condition_code (predicate
);
22990 n
= get_attr_ce_count (insn
);
22991 arm_condexec_count
= 1;
22992 arm_condexec_mask
= (1 << n
) - 1;
22993 arm_condexec_masklen
= n
;
22994 /* See if subsequent instructions can be combined into the same block. */
22997 insn
= next_nonnote_insn (insn
);
22999 /* Jumping into the middle of an IT block is illegal, so a label or
23000 barrier terminates the block. */
23001 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
23004 body
= PATTERN (insn
);
23005 /* USE and CLOBBER aren't really insns, so just skip them. */
23006 if (GET_CODE (body
) == USE
23007 || GET_CODE (body
) == CLOBBER
)
23010 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23011 if (GET_CODE (body
) != COND_EXEC
)
23013 /* Maximum number of conditionally executed instructions in a block. */
23014 n
= get_attr_ce_count (insn
);
23015 if (arm_condexec_masklen
+ n
> max
)
23018 predicate
= COND_EXEC_TEST (body
);
23019 code
= get_arm_condition_code (predicate
);
23020 mask
= (1 << n
) - 1;
23021 if (arm_current_cc
== code
)
23022 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
23023 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
23026 arm_condexec_count
++;
23027 arm_condexec_masklen
+= n
;
23029 /* A jump must be the last instruction in a conditional block. */
23033 /* Restore recog_data (getting the attributes of other insns can
23034 destroy this array, but final.c assumes that it remains intact
23035 across this call). */
23036 extract_constrain_insn_cached (first_insn
);
23040 arm_final_prescan_insn (rtx_insn
*insn
)
23042 /* BODY will hold the body of INSN. */
23043 rtx body
= PATTERN (insn
);
23045 /* This will be 1 if trying to repeat the trick, and things need to be
23046 reversed if it appears to fail. */
23049 /* If we start with a return insn, we only succeed if we find another one. */
23050 int seeking_return
= 0;
23051 enum rtx_code return_code
= UNKNOWN
;
23053 /* START_INSN will hold the insn from where we start looking. This is the
23054 first insn after the following code_label if REVERSE is true. */
23055 rtx_insn
*start_insn
= insn
;
23057 /* If in state 4, check if the target branch is reached, in order to
23058 change back to state 0. */
23059 if (arm_ccfsm_state
== 4)
23061 if (insn
== arm_target_insn
)
23063 arm_target_insn
= NULL
;
23064 arm_ccfsm_state
= 0;
23069 /* If in state 3, it is possible to repeat the trick, if this insn is an
23070 unconditional branch to a label, and immediately following this branch
23071 is the previous target label which is only used once, and the label this
23072 branch jumps to is not too far off. */
23073 if (arm_ccfsm_state
== 3)
23075 if (simplejump_p (insn
))
23077 start_insn
= next_nonnote_insn (start_insn
);
23078 if (BARRIER_P (start_insn
))
23080 /* XXX Isn't this always a barrier? */
23081 start_insn
= next_nonnote_insn (start_insn
);
23083 if (LABEL_P (start_insn
)
23084 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23085 && LABEL_NUSES (start_insn
) == 1)
23090 else if (ANY_RETURN_P (body
))
23092 start_insn
= next_nonnote_insn (start_insn
);
23093 if (BARRIER_P (start_insn
))
23094 start_insn
= next_nonnote_insn (start_insn
);
23095 if (LABEL_P (start_insn
)
23096 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23097 && LABEL_NUSES (start_insn
) == 1)
23100 seeking_return
= 1;
23101 return_code
= GET_CODE (body
);
23110 gcc_assert (!arm_ccfsm_state
|| reverse
);
23111 if (!JUMP_P (insn
))
23114 /* This jump might be paralleled with a clobber of the condition codes
23115 the jump should always come first */
23116 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
23117 body
= XVECEXP (body
, 0, 0);
23120 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
23121 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
23124 int fail
= FALSE
, succeed
= FALSE
;
23125 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23126 int then_not_else
= TRUE
;
23127 rtx_insn
*this_insn
= start_insn
;
23130 /* Register the insn jumped to. */
23133 if (!seeking_return
)
23134 label
= XEXP (SET_SRC (body
), 0);
23136 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
23137 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
23138 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
23140 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
23141 then_not_else
= FALSE
;
23143 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
23145 seeking_return
= 1;
23146 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
23148 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
23150 seeking_return
= 1;
23151 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
23152 then_not_else
= FALSE
;
23155 gcc_unreachable ();
23157 /* See how many insns this branch skips, and what kind of insns. If all
23158 insns are okay, and the label or unconditional branch to the same
23159 label is not too far away, succeed. */
23160 for (insns_skipped
= 0;
23161 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
23165 this_insn
= next_nonnote_insn (this_insn
);
23169 switch (GET_CODE (this_insn
))
23172 /* Succeed if it is the target label, otherwise fail since
23173 control falls in from somewhere else. */
23174 if (this_insn
== label
)
23176 arm_ccfsm_state
= 1;
23184 /* Succeed if the following insn is the target label.
23186 If return insns are used then the last insn in a function
23187 will be a barrier. */
23188 this_insn
= next_nonnote_insn (this_insn
);
23189 if (this_insn
&& this_insn
== label
)
23191 arm_ccfsm_state
= 1;
23199 /* The AAPCS says that conditional calls should not be
23200 used since they make interworking inefficient (the
23201 linker can't transform BL<cond> into BLX). That's
23202 only a problem if the machine has BLX. */
23209 /* Succeed if the following insn is the target label, or
23210 if the following two insns are a barrier and the
23212 this_insn
= next_nonnote_insn (this_insn
);
23213 if (this_insn
&& BARRIER_P (this_insn
))
23214 this_insn
= next_nonnote_insn (this_insn
);
23216 if (this_insn
&& this_insn
== label
23217 && insns_skipped
< max_insns_skipped
)
23219 arm_ccfsm_state
= 1;
23227 /* If this is an unconditional branch to the same label, succeed.
23228 If it is to another label, do nothing. If it is conditional,
23230 /* XXX Probably, the tests for SET and the PC are
23233 scanbody
= PATTERN (this_insn
);
23234 if (GET_CODE (scanbody
) == SET
23235 && GET_CODE (SET_DEST (scanbody
)) == PC
)
23237 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
23238 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
23240 arm_ccfsm_state
= 2;
23243 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
23246 /* Fail if a conditional return is undesirable (e.g. on a
23247 StrongARM), but still allow this if optimizing for size. */
23248 else if (GET_CODE (scanbody
) == return_code
23249 && !use_return_insn (TRUE
, NULL
)
23252 else if (GET_CODE (scanbody
) == return_code
)
23254 arm_ccfsm_state
= 2;
23257 else if (GET_CODE (scanbody
) == PARALLEL
)
23259 switch (get_attr_conds (this_insn
))
23269 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
23274 /* Instructions using or affecting the condition codes make it
23276 scanbody
= PATTERN (this_insn
);
23277 if (!(GET_CODE (scanbody
) == SET
23278 || GET_CODE (scanbody
) == PARALLEL
)
23279 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
23289 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
23290 arm_target_label
= CODE_LABEL_NUMBER (label
);
23293 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
23295 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
23297 this_insn
= next_nonnote_insn (this_insn
);
23298 gcc_assert (!this_insn
23299 || (!BARRIER_P (this_insn
)
23300 && !LABEL_P (this_insn
)));
23304 /* Oh, dear! we ran off the end.. give up. */
23305 extract_constrain_insn_cached (insn
);
23306 arm_ccfsm_state
= 0;
23307 arm_target_insn
= NULL
;
23310 arm_target_insn
= this_insn
;
23313 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23316 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
23318 if (reverse
|| then_not_else
)
23319 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
23322 /* Restore recog_data (getting the attributes of other insns can
23323 destroy this array, but final.c assumes that it remains intact
23324 across this call. */
23325 extract_constrain_insn_cached (insn
);
23329 /* Output IT instructions. */
23331 thumb2_asm_output_opcode (FILE * stream
)
23336 if (arm_condexec_mask
)
23338 for (n
= 0; n
< arm_condexec_masklen
; n
++)
23339 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
23341 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
23342 arm_condition_codes
[arm_current_cc
]);
23343 arm_condexec_mask
= 0;
23347 /* Returns true if REGNO is a valid register
23348 for holding a quantity of type MODE. */
23350 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
23352 if (GET_MODE_CLASS (mode
) == MODE_CC
)
23353 return (regno
== CC_REGNUM
23354 || (TARGET_HARD_FLOAT
23355 && regno
== VFPCC_REGNUM
));
23357 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23361 /* For the Thumb we only allow values bigger than SImode in
23362 registers 0 - 6, so that there is always a second low
23363 register available to hold the upper part of the value.
23364 We probably we ought to ensure that the register is the
23365 start of an even numbered register pair. */
23366 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23368 if (TARGET_HARD_FLOAT
&& IS_VFP_REGNUM (regno
))
23370 if (mode
== SFmode
|| mode
== SImode
)
23371 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23373 if (mode
== DFmode
)
23374 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23376 if (mode
== HFmode
)
23377 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23379 /* VFP registers can hold HImode values. */
23380 if (mode
== HImode
)
23381 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23384 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23385 || (VALID_NEON_QREG_MODE (mode
)
23386 && NEON_REGNO_OK_FOR_QUAD (regno
))
23387 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23388 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23389 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23390 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23391 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23396 if (TARGET_REALLY_IWMMXT
)
23398 if (IS_IWMMXT_GR_REGNUM (regno
))
23399 return mode
== SImode
;
23401 if (IS_IWMMXT_REGNUM (regno
))
23402 return VALID_IWMMXT_REG_MODE (mode
);
23405 /* We allow almost any value to be stored in the general registers.
23406 Restrict doubleword quantities to even register pairs in ARM state
23407 so that we can use ldrd. Do not allow very large Neon structure
23408 opaque modes in general registers; they would use too many. */
23409 if (regno
<= LAST_ARM_REGNUM
)
23411 if (ARM_NUM_REGS (mode
) > 4)
23417 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23420 if (regno
== FRAME_POINTER_REGNUM
23421 || regno
== ARG_POINTER_REGNUM
)
23422 /* We only allow integers in the fake hard registers. */
23423 return GET_MODE_CLASS (mode
) == MODE_INT
;
23428 /* Implement MODES_TIEABLE_P. */
23431 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23433 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23436 /* We specifically want to allow elements of "structure" modes to
23437 be tieable to the structure. This more general condition allows
23438 other rarer situations too. */
23440 && (VALID_NEON_DREG_MODE (mode1
)
23441 || VALID_NEON_QREG_MODE (mode1
)
23442 || VALID_NEON_STRUCT_MODE (mode1
))
23443 && (VALID_NEON_DREG_MODE (mode2
)
23444 || VALID_NEON_QREG_MODE (mode2
)
23445 || VALID_NEON_STRUCT_MODE (mode2
)))
23451 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23452 not used in arm mode. */
23455 arm_regno_class (int regno
)
23457 if (regno
== PC_REGNUM
)
23462 if (regno
== STACK_POINTER_REGNUM
)
23464 if (regno
== CC_REGNUM
)
23471 if (TARGET_THUMB2
&& regno
< 8)
23474 if ( regno
<= LAST_ARM_REGNUM
23475 || regno
== FRAME_POINTER_REGNUM
23476 || regno
== ARG_POINTER_REGNUM
)
23477 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23479 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23480 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23482 if (IS_VFP_REGNUM (regno
))
23484 if (regno
<= D7_VFP_REGNUM
)
23485 return VFP_D0_D7_REGS
;
23486 else if (regno
<= LAST_LO_VFP_REGNUM
)
23487 return VFP_LO_REGS
;
23489 return VFP_HI_REGS
;
23492 if (IS_IWMMXT_REGNUM (regno
))
23493 return IWMMXT_REGS
;
23495 if (IS_IWMMXT_GR_REGNUM (regno
))
23496 return IWMMXT_GR_REGS
;
23501 /* Handle a special case when computing the offset
23502 of an argument from the frame pointer. */
23504 arm_debugger_arg_offset (int value
, rtx addr
)
23508 /* We are only interested if dbxout_parms() failed to compute the offset. */
23512 /* We can only cope with the case where the address is held in a register. */
23516 /* If we are using the frame pointer to point at the argument, then
23517 an offset of 0 is correct. */
23518 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23521 /* If we are using the stack pointer to point at the
23522 argument, then an offset of 0 is correct. */
23523 /* ??? Check this is consistent with thumb2 frame layout. */
23524 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23525 && REGNO (addr
) == SP_REGNUM
)
23528 /* Oh dear. The argument is pointed to by a register rather
23529 than being held in a register, or being stored at a known
23530 offset from the frame pointer. Since GDB only understands
23531 those two kinds of argument we must translate the address
23532 held in the register into an offset from the frame pointer.
23533 We do this by searching through the insns for the function
23534 looking to see where this register gets its value. If the
23535 register is initialized from the frame pointer plus an offset
23536 then we are in luck and we can continue, otherwise we give up.
23538 This code is exercised by producing debugging information
23539 for a function with arguments like this:
23541 double func (double a, double b, int c, double d) {return d;}
23543 Without this code the stab for parameter 'd' will be set to
23544 an offset of 0 from the frame pointer, rather than 8. */
23546 /* The if() statement says:
23548 If the insn is a normal instruction
23549 and if the insn is setting the value in a register
23550 and if the register being set is the register holding the address of the argument
23551 and if the address is computing by an addition
23552 that involves adding to a register
23553 which is the frame pointer
23558 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23560 if ( NONJUMP_INSN_P (insn
)
23561 && GET_CODE (PATTERN (insn
)) == SET
23562 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23563 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23564 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23565 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23566 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23569 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23578 warning (0, "unable to compute real location of stacked parameter");
23579 value
= 8; /* XXX magic hack */
23585 /* Implement TARGET_PROMOTED_TYPE. */
23588 arm_promoted_type (const_tree t
)
23590 if (SCALAR_FLOAT_TYPE_P (t
)
23591 && TYPE_PRECISION (t
) == 16
23592 && TYPE_MAIN_VARIANT (t
) == arm_fp16_type_node
)
23593 return float_type_node
;
23597 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23598 This simply adds HFmode as a supported mode; even though we don't
23599 implement arithmetic on this type directly, it's supported by
23600 optabs conversions, much the way the double-word arithmetic is
23601 special-cased in the default hook. */
23604 arm_scalar_mode_supported_p (machine_mode mode
)
23606 if (mode
== HFmode
)
23607 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23608 else if (ALL_FIXED_POINT_MODE_P (mode
))
23611 return default_scalar_mode_supported_p (mode
);
23614 /* Set the value of FLT_EVAL_METHOD.
23615 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23617 0: evaluate all operations and constants, whose semantic type has at
23618 most the range and precision of type float, to the range and
23619 precision of float; evaluate all other operations and constants to
23620 the range and precision of the semantic type;
23622 N, where _FloatN is a supported interchange floating type
23623 evaluate all operations and constants, whose semantic type has at
23624 most the range and precision of _FloatN type, to the range and
23625 precision of the _FloatN type; evaluate all other operations and
23626 constants to the range and precision of the semantic type;
23628 If we have the ARMv8.2-A extensions then we support _Float16 in native
23629 precision, so we should set this to 16. Otherwise, we support the type,
23630 but want to evaluate expressions in float precision, so set this to
23633 static enum flt_eval_method
23634 arm_excess_precision (enum excess_precision_type type
)
23638 case EXCESS_PRECISION_TYPE_FAST
:
23639 case EXCESS_PRECISION_TYPE_STANDARD
:
23640 /* We can calculate either in 16-bit range and precision or
23641 32-bit range and precision. Make that decision based on whether
23642 we have native support for the ARMv8.2-A 16-bit floating-point
23643 instructions or not. */
23644 return (TARGET_VFP_FP16INST
23645 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23646 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
23647 case EXCESS_PRECISION_TYPE_IMPLICIT
:
23648 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
23650 gcc_unreachable ();
23652 return FLT_EVAL_METHOD_UNPREDICTABLE
;
23656 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23657 _Float16 if we are using anything other than ieee format for 16-bit
23658 floating point. Otherwise, punt to the default implementation. */
23659 static opt_scalar_float_mode
23660 arm_floatn_mode (int n
, bool extended
)
23662 if (!extended
&& n
== 16)
23664 if (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
)
23666 return opt_scalar_float_mode ();
23669 return default_floatn_mode (n
, extended
);
23673 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23674 not to early-clobber SRC registers in the process.
23676 We assume that the operands described by SRC and DEST represent a
23677 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23678 number of components into which the copy has been decomposed. */
23680 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23684 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23685 || REGNO (operands
[0]) < REGNO (operands
[1]))
23687 for (i
= 0; i
< count
; i
++)
23689 operands
[2 * i
] = dest
[i
];
23690 operands
[2 * i
+ 1] = src
[i
];
23695 for (i
= 0; i
< count
; i
++)
23697 operands
[2 * i
] = dest
[count
- i
- 1];
23698 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23703 /* Split operands into moves from op[1] + op[2] into op[0]. */
23706 neon_split_vcombine (rtx operands
[3])
23708 unsigned int dest
= REGNO (operands
[0]);
23709 unsigned int src1
= REGNO (operands
[1]);
23710 unsigned int src2
= REGNO (operands
[2]);
23711 machine_mode halfmode
= GET_MODE (operands
[1]);
23712 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
23713 rtx destlo
, desthi
;
23715 if (src1
== dest
&& src2
== dest
+ halfregs
)
23717 /* No-op move. Can't split to nothing; emit something. */
23718 emit_note (NOTE_INSN_DELETED
);
23722 /* Preserve register attributes for variable tracking. */
23723 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23724 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23725 GET_MODE_SIZE (halfmode
));
23727 /* Special case of reversed high/low parts. Use VSWP. */
23728 if (src2
== dest
&& src1
== dest
+ halfregs
)
23730 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
23731 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
23732 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
23736 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
23738 /* Try to avoid unnecessary moves if part of the result
23739 is in the right place already. */
23741 emit_move_insn (destlo
, operands
[1]);
23742 if (src2
!= dest
+ halfregs
)
23743 emit_move_insn (desthi
, operands
[2]);
23747 if (src2
!= dest
+ halfregs
)
23748 emit_move_insn (desthi
, operands
[2]);
23750 emit_move_insn (destlo
, operands
[1]);
23754 /* Return the number (counting from 0) of
23755 the least significant set bit in MASK. */
23758 number_of_first_bit_set (unsigned mask
)
23760 return ctz_hwi (mask
);
23763 /* Like emit_multi_reg_push, but allowing for a different set of
23764 registers to be described as saved. MASK is the set of registers
23765 to be saved; REAL_REGS is the set of registers to be described as
23766 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23769 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
23771 unsigned long regno
;
23772 rtx par
[10], tmp
, reg
;
23776 /* Build the parallel of the registers actually being stored. */
23777 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
23779 regno
= ctz_hwi (mask
);
23780 reg
= gen_rtx_REG (SImode
, regno
);
23783 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
23785 tmp
= gen_rtx_USE (VOIDmode
, reg
);
23790 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23791 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
23792 tmp
= gen_frame_mem (BLKmode
, tmp
);
23793 tmp
= gen_rtx_SET (tmp
, par
[0]);
23796 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
23797 insn
= emit_insn (tmp
);
23799 /* Always build the stack adjustment note for unwind info. */
23800 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23801 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
23804 /* Build the parallel of the registers recorded as saved for unwind. */
23805 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
23807 regno
= ctz_hwi (real_regs
);
23808 reg
= gen_rtx_REG (SImode
, regno
);
23810 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
23811 tmp
= gen_frame_mem (SImode
, tmp
);
23812 tmp
= gen_rtx_SET (tmp
, reg
);
23813 RTX_FRAME_RELATED_P (tmp
) = 1;
23821 RTX_FRAME_RELATED_P (par
[0]) = 1;
23822 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
23825 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
23830 /* Emit code to push or pop registers to or from the stack. F is the
23831 assembly file. MASK is the registers to pop. */
23833 thumb_pop (FILE *f
, unsigned long mask
)
23836 int lo_mask
= mask
& 0xFF;
23840 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
23842 /* Special case. Do not generate a POP PC statement here, do it in
23844 thumb_exit (f
, -1);
23848 fprintf (f
, "\tpop\t{");
23850 /* Look at the low registers first. */
23851 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
23855 asm_fprintf (f
, "%r", regno
);
23857 if ((lo_mask
& ~1) != 0)
23862 if (mask
& (1 << PC_REGNUM
))
23864 /* Catch popping the PC. */
23865 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
|| crtl
->calls_eh_return
23866 || IS_CMSE_ENTRY (arm_current_func_type ()))
23868 /* The PC is never poped directly, instead
23869 it is popped into r3 and then BX is used. */
23870 fprintf (f
, "}\n");
23872 thumb_exit (f
, -1);
23881 asm_fprintf (f
, "%r", PC_REGNUM
);
23885 fprintf (f
, "}\n");
23888 /* Generate code to return from a thumb function.
23889 If 'reg_containing_return_addr' is -1, then the return address is
23890 actually on the stack, at the stack pointer. */
23892 thumb_exit (FILE *f
, int reg_containing_return_addr
)
23894 unsigned regs_available_for_popping
;
23895 unsigned regs_to_pop
;
23897 unsigned available
;
23901 int restore_a4
= FALSE
;
23903 /* Compute the registers we need to pop. */
23907 if (reg_containing_return_addr
== -1)
23909 regs_to_pop
|= 1 << LR_REGNUM
;
23913 if (TARGET_BACKTRACE
)
23915 /* Restore the (ARM) frame pointer and stack pointer. */
23916 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
23920 /* If there is nothing to pop then just emit the BX instruction and
23922 if (pops_needed
== 0)
23924 if (crtl
->calls_eh_return
)
23925 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23927 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23929 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n",
23930 reg_containing_return_addr
);
23931 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
23934 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23937 /* Otherwise if we are not supporting interworking and we have not created
23938 a backtrace structure and the function was not entered in ARM mode then
23939 just pop the return address straight into the PC. */
23940 else if (!TARGET_INTERWORK
23941 && !TARGET_BACKTRACE
23942 && !is_called_in_ARM_mode (current_function_decl
)
23943 && !crtl
->calls_eh_return
23944 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23946 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
23950 /* Find out how many of the (return) argument registers we can corrupt. */
23951 regs_available_for_popping
= 0;
23953 /* If returning via __builtin_eh_return, the bottom three registers
23954 all contain information needed for the return. */
23955 if (crtl
->calls_eh_return
)
23959 /* If we can deduce the registers used from the function's
23960 return value. This is more reliable that examining
23961 df_regs_ever_live_p () because that will be set if the register is
23962 ever used in the function, not just if the register is used
23963 to hold a return value. */
23965 if (crtl
->return_rtx
!= 0)
23966 mode
= GET_MODE (crtl
->return_rtx
);
23968 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
23970 size
= GET_MODE_SIZE (mode
);
23974 /* In a void function we can use any argument register.
23975 In a function that returns a structure on the stack
23976 we can use the second and third argument registers. */
23977 if (mode
== VOIDmode
)
23978 regs_available_for_popping
=
23979 (1 << ARG_REGISTER (1))
23980 | (1 << ARG_REGISTER (2))
23981 | (1 << ARG_REGISTER (3));
23983 regs_available_for_popping
=
23984 (1 << ARG_REGISTER (2))
23985 | (1 << ARG_REGISTER (3));
23987 else if (size
<= 4)
23988 regs_available_for_popping
=
23989 (1 << ARG_REGISTER (2))
23990 | (1 << ARG_REGISTER (3));
23991 else if (size
<= 8)
23992 regs_available_for_popping
=
23993 (1 << ARG_REGISTER (3));
23996 /* Match registers to be popped with registers into which we pop them. */
23997 for (available
= regs_available_for_popping
,
23998 required
= regs_to_pop
;
23999 required
!= 0 && available
!= 0;
24000 available
&= ~(available
& - available
),
24001 required
&= ~(required
& - required
))
24004 /* If we have any popping registers left over, remove them. */
24006 regs_available_for_popping
&= ~available
;
24008 /* Otherwise if we need another popping register we can use
24009 the fourth argument register. */
24010 else if (pops_needed
)
24012 /* If we have not found any free argument registers and
24013 reg a4 contains the return address, we must move it. */
24014 if (regs_available_for_popping
== 0
24015 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
24017 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24018 reg_containing_return_addr
= LR_REGNUM
;
24020 else if (size
> 12)
24022 /* Register a4 is being used to hold part of the return value,
24023 but we have dire need of a free, low register. */
24026 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
24029 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
24031 /* The fourth argument register is available. */
24032 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
24038 /* Pop as many registers as we can. */
24039 thumb_pop (f
, regs_available_for_popping
);
24041 /* Process the registers we popped. */
24042 if (reg_containing_return_addr
== -1)
24044 /* The return address was popped into the lowest numbered register. */
24045 regs_to_pop
&= ~(1 << LR_REGNUM
);
24047 reg_containing_return_addr
=
24048 number_of_first_bit_set (regs_available_for_popping
);
24050 /* Remove this register for the mask of available registers, so that
24051 the return address will not be corrupted by further pops. */
24052 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
24055 /* If we popped other registers then handle them here. */
24056 if (regs_available_for_popping
)
24060 /* Work out which register currently contains the frame pointer. */
24061 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24063 /* Move it into the correct place. */
24064 asm_fprintf (f
, "\tmov\t%r, %r\n",
24065 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
24067 /* (Temporarily) remove it from the mask of popped registers. */
24068 regs_available_for_popping
&= ~(1 << frame_pointer
);
24069 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
24071 if (regs_available_for_popping
)
24075 /* We popped the stack pointer as well,
24076 find the register that contains it. */
24077 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24079 /* Move it into the stack register. */
24080 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
24082 /* At this point we have popped all necessary registers, so
24083 do not worry about restoring regs_available_for_popping
24084 to its correct value:
24086 assert (pops_needed == 0)
24087 assert (regs_available_for_popping == (1 << frame_pointer))
24088 assert (regs_to_pop == (1 << STACK_POINTER)) */
24092 /* Since we have just move the popped value into the frame
24093 pointer, the popping register is available for reuse, and
24094 we know that we still have the stack pointer left to pop. */
24095 regs_available_for_popping
|= (1 << frame_pointer
);
24099 /* If we still have registers left on the stack, but we no longer have
24100 any registers into which we can pop them, then we must move the return
24101 address into the link register and make available the register that
24103 if (regs_available_for_popping
== 0 && pops_needed
> 0)
24105 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
24107 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
24108 reg_containing_return_addr
);
24110 reg_containing_return_addr
= LR_REGNUM
;
24113 /* If we have registers left on the stack then pop some more.
24114 We know that at most we will want to pop FP and SP. */
24115 if (pops_needed
> 0)
24120 thumb_pop (f
, regs_available_for_popping
);
24122 /* We have popped either FP or SP.
24123 Move whichever one it is into the correct register. */
24124 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24125 move_to
= number_of_first_bit_set (regs_to_pop
);
24127 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
24131 /* If we still have not popped everything then we must have only
24132 had one register available to us and we are now popping the SP. */
24133 if (pops_needed
> 0)
24137 thumb_pop (f
, regs_available_for_popping
);
24139 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24141 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
24143 assert (regs_to_pop == (1 << STACK_POINTER))
24144 assert (pops_needed == 1)
24148 /* If necessary restore the a4 register. */
24151 if (reg_containing_return_addr
!= LR_REGNUM
)
24153 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24154 reg_containing_return_addr
= LR_REGNUM
;
24157 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
24160 if (crtl
->calls_eh_return
)
24161 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24163 /* Return to caller. */
24164 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24166 /* This is for the cases where LR is not being used to contain the return
24167 address. It may therefore contain information that we might not want
24168 to leak, hence it must be cleared. The value in R0 will never be a
24169 secret at this point, so it is safe to use it, see the clearing code
24170 in 'cmse_nonsecure_entry_clear_before_return'. */
24171 if (reg_containing_return_addr
!= LR_REGNUM
)
24172 asm_fprintf (f
, "\tmov\tlr, r0\n");
24174 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr
);
24175 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
24178 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24181 /* Scan INSN just before assembler is output for it.
24182 For Thumb-1, we track the status of the condition codes; this
24183 information is used in the cbranchsi4_insn pattern. */
24185 thumb1_final_prescan_insn (rtx_insn
*insn
)
24187 if (flag_print_asm_name
)
24188 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
24189 INSN_ADDRESSES (INSN_UID (insn
)));
24190 /* Don't overwrite the previous setter when we get to a cbranch. */
24191 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
24193 enum attr_conds conds
;
24195 if (cfun
->machine
->thumb1_cc_insn
)
24197 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
24198 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
24201 conds
= get_attr_conds (insn
);
24202 if (conds
== CONDS_SET
)
24204 rtx set
= single_set (insn
);
24205 cfun
->machine
->thumb1_cc_insn
= insn
;
24206 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
24207 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
24208 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
24209 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
24211 rtx src1
= XEXP (SET_SRC (set
), 1);
24212 if (src1
== const0_rtx
)
24213 cfun
->machine
->thumb1_cc_mode
= CCmode
;
24215 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
24217 /* Record the src register operand instead of dest because
24218 cprop_hardreg pass propagates src. */
24219 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
24222 else if (conds
!= CONDS_NOCOND
)
24223 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
24226 /* Check if unexpected far jump is used. */
24227 if (cfun
->machine
->lr_save_eliminated
24228 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24229 internal_error("Unexpected thumb1 far jump");
24233 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
24235 unsigned HOST_WIDE_INT mask
= 0xff;
24238 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
24239 if (val
== 0) /* XXX */
24242 for (i
= 0; i
< 25; i
++)
24243 if ((val
& (mask
<< i
)) == val
)
24249 /* Returns nonzero if the current function contains,
24250 or might contain a far jump. */
24252 thumb_far_jump_used_p (void)
24255 bool far_jump
= false;
24256 unsigned int func_size
= 0;
24258 /* If we have already decided that far jumps may be used,
24259 do not bother checking again, and always return true even if
24260 it turns out that they are not being used. Once we have made
24261 the decision that far jumps are present (and that hence the link
24262 register will be pushed onto the stack) we cannot go back on it. */
24263 if (cfun
->machine
->far_jump_used
)
24266 /* If this function is not being called from the prologue/epilogue
24267 generation code then it must be being called from the
24268 INITIAL_ELIMINATION_OFFSET macro. */
24269 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
24271 /* In this case we know that we are being asked about the elimination
24272 of the arg pointer register. If that register is not being used,
24273 then there are no arguments on the stack, and we do not have to
24274 worry that a far jump might force the prologue to push the link
24275 register, changing the stack offsets. In this case we can just
24276 return false, since the presence of far jumps in the function will
24277 not affect stack offsets.
24279 If the arg pointer is live (or if it was live, but has now been
24280 eliminated and so set to dead) then we do have to test to see if
24281 the function might contain a far jump. This test can lead to some
24282 false negatives, since before reload is completed, then length of
24283 branch instructions is not known, so gcc defaults to returning their
24284 longest length, which in turn sets the far jump attribute to true.
24286 A false negative will not result in bad code being generated, but it
24287 will result in a needless push and pop of the link register. We
24288 hope that this does not occur too often.
24290 If we need doubleword stack alignment this could affect the other
24291 elimination offsets so we can't risk getting it wrong. */
24292 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
24293 cfun
->machine
->arg_pointer_live
= 1;
24294 else if (!cfun
->machine
->arg_pointer_live
)
24298 /* We should not change far_jump_used during or after reload, as there is
24299 no chance to change stack frame layout. */
24300 if (reload_in_progress
|| reload_completed
)
24303 /* Check to see if the function contains a branch
24304 insn with the far jump attribute set. */
24305 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
24307 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24311 func_size
+= get_attr_length (insn
);
24314 /* Attribute far_jump will always be true for thumb1 before
24315 shorten_branch pass. So checking far_jump attribute before
24316 shorten_branch isn't much useful.
24318 Following heuristic tries to estimate more accurately if a far jump
24319 may finally be used. The heuristic is very conservative as there is
24320 no chance to roll-back the decision of not to use far jump.
24322 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24323 2-byte insn is associated with a 4 byte constant pool. Using
24324 function size 2048/3 as the threshold is conservative enough. */
24327 if ((func_size
* 3) >= 2048)
24329 /* Record the fact that we have decided that
24330 the function does use far jumps. */
24331 cfun
->machine
->far_jump_used
= 1;
24339 /* Return nonzero if FUNC must be entered in ARM mode. */
24341 is_called_in_ARM_mode (tree func
)
24343 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
24345 /* Ignore the problem about functions whose address is taken. */
24346 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
24350 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
24356 /* Given the stack offsets and register mask in OFFSETS, decide how
24357 many additional registers to push instead of subtracting a constant
24358 from SP. For epilogues the principle is the same except we use pop.
24359 FOR_PROLOGUE indicates which we're generating. */
24361 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
24363 HOST_WIDE_INT amount
;
24364 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
24365 /* Extract a mask of the ones we can give to the Thumb's push/pop
24367 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
24368 /* Then count how many other high registers will need to be pushed. */
24369 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24370 int n_free
, reg_base
, size
;
24372 if (!for_prologue
&& frame_pointer_needed
)
24373 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24375 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24377 /* If the stack frame size is 512 exactly, we can save one load
24378 instruction, which should make this a win even when optimizing
24380 if (!optimize_size
&& amount
!= 512)
24383 /* Can't do this if there are high registers to push. */
24384 if (high_regs_pushed
!= 0)
24387 /* Shouldn't do it in the prologue if no registers would normally
24388 be pushed at all. In the epilogue, also allow it if we'll have
24389 a pop insn for the PC. */
24392 || TARGET_BACKTRACE
24393 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
24394 || TARGET_INTERWORK
24395 || crtl
->args
.pretend_args_size
!= 0))
24398 /* Don't do this if thumb_expand_prologue wants to emit instructions
24399 between the push and the stack frame allocation. */
24401 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24402 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
24409 size
= arm_size_return_regs ();
24410 reg_base
= ARM_NUM_INTS (size
);
24411 live_regs_mask
>>= reg_base
;
24414 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
24415 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
24417 live_regs_mask
>>= 1;
24423 gcc_assert (amount
/ 4 * 4 == amount
);
24425 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
24426 return (amount
- 508) / 4;
24427 if (amount
<= n_free
* 4)
24432 /* The bits which aren't usefully expanded as rtl. */
24434 thumb1_unexpanded_epilogue (void)
24436 arm_stack_offsets
*offsets
;
24438 unsigned long live_regs_mask
= 0;
24439 int high_regs_pushed
= 0;
24441 int had_to_push_lr
;
24444 if (cfun
->machine
->return_used_this_function
!= 0)
24447 if (IS_NAKED (arm_current_func_type ()))
24450 offsets
= arm_get_frame_offsets ();
24451 live_regs_mask
= offsets
->saved_regs_mask
;
24452 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24454 /* If we can deduce the registers used from the function's return value.
24455 This is more reliable that examining df_regs_ever_live_p () because that
24456 will be set if the register is ever used in the function, not just if
24457 the register is used to hold a return value. */
24458 size
= arm_size_return_regs ();
24460 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24463 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24464 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24467 /* The prolog may have pushed some high registers to use as
24468 work registers. e.g. the testsuite file:
24469 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24470 compiles to produce:
24471 push {r4, r5, r6, r7, lr}
24475 as part of the prolog. We have to undo that pushing here. */
24477 if (high_regs_pushed
)
24479 unsigned long mask
= live_regs_mask
& 0xff;
24482 /* The available low registers depend on the size of the value we are
24490 /* Oh dear! We have no low registers into which we can pop
24493 ("no low registers available for popping high registers");
24495 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
24496 if (live_regs_mask
& (1 << next_hi_reg
))
24499 while (high_regs_pushed
)
24501 /* Find lo register(s) into which the high register(s) can
24503 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24505 if (mask
& (1 << regno
))
24506 high_regs_pushed
--;
24507 if (high_regs_pushed
== 0)
24511 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24513 /* Pop the values into the low register(s). */
24514 thumb_pop (asm_out_file
, mask
);
24516 /* Move the value(s) into the high registers. */
24517 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24519 if (mask
& (1 << regno
))
24521 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24524 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24525 if (live_regs_mask
& (1 << next_hi_reg
))
24530 live_regs_mask
&= ~0x0f00;
24533 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24534 live_regs_mask
&= 0xff;
24536 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24538 /* Pop the return address into the PC. */
24539 if (had_to_push_lr
)
24540 live_regs_mask
|= 1 << PC_REGNUM
;
24542 /* Either no argument registers were pushed or a backtrace
24543 structure was created which includes an adjusted stack
24544 pointer, so just pop everything. */
24545 if (live_regs_mask
)
24546 thumb_pop (asm_out_file
, live_regs_mask
);
24548 /* We have either just popped the return address into the
24549 PC or it is was kept in LR for the entire function.
24550 Note that thumb_pop has already called thumb_exit if the
24551 PC was in the list. */
24552 if (!had_to_push_lr
)
24553 thumb_exit (asm_out_file
, LR_REGNUM
);
24557 /* Pop everything but the return address. */
24558 if (live_regs_mask
)
24559 thumb_pop (asm_out_file
, live_regs_mask
);
24561 if (had_to_push_lr
)
24565 /* We have no free low regs, so save one. */
24566 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24570 /* Get the return address into a temporary register. */
24571 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24575 /* Move the return address to lr. */
24576 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24578 /* Restore the low register. */
24579 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24584 regno
= LAST_ARG_REGNUM
;
24589 /* Remove the argument registers that were pushed onto the stack. */
24590 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24591 SP_REGNUM
, SP_REGNUM
,
24592 crtl
->args
.pretend_args_size
);
24594 thumb_exit (asm_out_file
, regno
);
24600 /* Functions to save and restore machine-specific function data. */
24601 static struct machine_function
*
24602 arm_init_machine_status (void)
24604 struct machine_function
*machine
;
24605 machine
= ggc_cleared_alloc
<machine_function
> ();
24607 #if ARM_FT_UNKNOWN != 0
24608 machine
->func_type
= ARM_FT_UNKNOWN
;
24613 /* Return an RTX indicating where the return address to the
24614 calling function can be found. */
24616 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24621 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24624 /* Do anything needed before RTL is emitted for each function. */
24626 arm_init_expanders (void)
24628 /* Arrange to initialize and mark the machine per-function status. */
24629 init_machine_status
= arm_init_machine_status
;
24631 /* This is to stop the combine pass optimizing away the alignment
24632 adjustment of va_arg. */
24633 /* ??? It is claimed that this should not be necessary. */
24635 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24638 /* Check that FUNC is called with a different mode. */
24641 arm_change_mode_p (tree func
)
24643 if (TREE_CODE (func
) != FUNCTION_DECL
)
24646 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
24649 callee_tree
= target_option_default_node
;
24651 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
24652 int flags
= callee_opts
->x_target_flags
;
24654 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
24657 /* Like arm_compute_initial_elimination offset. Simpler because there
24658 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24659 to point at the base of the local variables after static stack
24660 space for a function has been allocated. */
24663 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24665 arm_stack_offsets
*offsets
;
24667 offsets
= arm_get_frame_offsets ();
24671 case ARG_POINTER_REGNUM
:
24674 case STACK_POINTER_REGNUM
:
24675 return offsets
->outgoing_args
- offsets
->saved_args
;
24677 case FRAME_POINTER_REGNUM
:
24678 return offsets
->soft_frame
- offsets
->saved_args
;
24680 case ARM_HARD_FRAME_POINTER_REGNUM
:
24681 return offsets
->saved_regs
- offsets
->saved_args
;
24683 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24684 return offsets
->locals_base
- offsets
->saved_args
;
24687 gcc_unreachable ();
24691 case FRAME_POINTER_REGNUM
:
24694 case STACK_POINTER_REGNUM
:
24695 return offsets
->outgoing_args
- offsets
->soft_frame
;
24697 case ARM_HARD_FRAME_POINTER_REGNUM
:
24698 return offsets
->saved_regs
- offsets
->soft_frame
;
24700 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24701 return offsets
->locals_base
- offsets
->soft_frame
;
24704 gcc_unreachable ();
24709 gcc_unreachable ();
24713 /* Generate the function's prologue. */
24716 thumb1_expand_prologue (void)
24720 HOST_WIDE_INT amount
;
24721 HOST_WIDE_INT size
;
24722 arm_stack_offsets
*offsets
;
24723 unsigned long func_type
;
24725 unsigned long live_regs_mask
;
24726 unsigned long l_mask
;
24727 unsigned high_regs_pushed
= 0;
24728 bool lr_needs_saving
;
24730 func_type
= arm_current_func_type ();
24732 /* Naked functions don't have prologues. */
24733 if (IS_NAKED (func_type
))
24735 if (flag_stack_usage_info
)
24736 current_function_static_stack_size
= 0;
24740 if (IS_INTERRUPT (func_type
))
24742 error ("interrupt Service Routines cannot be coded in Thumb mode");
24746 if (is_called_in_ARM_mode (current_function_decl
))
24747 emit_insn (gen_prologue_thumb1_interwork ());
24749 offsets
= arm_get_frame_offsets ();
24750 live_regs_mask
= offsets
->saved_regs_mask
;
24751 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
24753 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24754 l_mask
= live_regs_mask
& 0x40ff;
24755 /* Then count how many other high registers will need to be pushed. */
24756 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24758 if (crtl
->args
.pretend_args_size
)
24760 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
24762 if (cfun
->machine
->uses_anonymous_args
)
24764 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
24765 unsigned long mask
;
24767 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
24768 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
24770 insn
= thumb1_emit_multi_reg_push (mask
, 0);
24774 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24775 stack_pointer_rtx
, x
));
24777 RTX_FRAME_RELATED_P (insn
) = 1;
24780 if (TARGET_BACKTRACE
)
24782 HOST_WIDE_INT offset
= 0;
24783 unsigned work_register
;
24784 rtx work_reg
, x
, arm_hfp_rtx
;
24786 /* We have been asked to create a stack backtrace structure.
24787 The code looks like this:
24791 0 sub SP, #16 Reserve space for 4 registers.
24792 2 push {R7} Push low registers.
24793 4 add R7, SP, #20 Get the stack pointer before the push.
24794 6 str R7, [SP, #8] Store the stack pointer
24795 (before reserving the space).
24796 8 mov R7, PC Get hold of the start of this code + 12.
24797 10 str R7, [SP, #16] Store it.
24798 12 mov R7, FP Get hold of the current frame pointer.
24799 14 str R7, [SP, #4] Store it.
24800 16 mov R7, LR Get hold of the current return address.
24801 18 str R7, [SP, #12] Store it.
24802 20 add R7, SP, #16 Point at the start of the
24803 backtrace structure.
24804 22 mov FP, R7 Put this value into the frame pointer. */
24806 work_register
= thumb_find_work_register (live_regs_mask
);
24807 work_reg
= gen_rtx_REG (SImode
, work_register
);
24808 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
24810 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24811 stack_pointer_rtx
, GEN_INT (-16)));
24812 RTX_FRAME_RELATED_P (insn
) = 1;
24816 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
24817 RTX_FRAME_RELATED_P (insn
) = 1;
24818 lr_needs_saving
= false;
24820 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
24823 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
24824 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24826 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
24827 x
= gen_frame_mem (SImode
, x
);
24828 emit_move_insn (x
, work_reg
);
24830 /* Make sure that the instruction fetching the PC is in the right place
24831 to calculate "start of backtrace creation code + 12". */
24832 /* ??? The stores using the common WORK_REG ought to be enough to
24833 prevent the scheduler from doing anything weird. Failing that
24834 we could always move all of the following into an UNSPEC_VOLATILE. */
24837 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24838 emit_move_insn (work_reg
, x
);
24840 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24841 x
= gen_frame_mem (SImode
, x
);
24842 emit_move_insn (x
, work_reg
);
24844 emit_move_insn (work_reg
, arm_hfp_rtx
);
24846 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24847 x
= gen_frame_mem (SImode
, x
);
24848 emit_move_insn (x
, work_reg
);
24852 emit_move_insn (work_reg
, arm_hfp_rtx
);
24854 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24855 x
= gen_frame_mem (SImode
, x
);
24856 emit_move_insn (x
, work_reg
);
24858 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24859 emit_move_insn (work_reg
, x
);
24861 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24862 x
= gen_frame_mem (SImode
, x
);
24863 emit_move_insn (x
, work_reg
);
24866 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
24867 emit_move_insn (work_reg
, x
);
24869 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
24870 x
= gen_frame_mem (SImode
, x
);
24871 emit_move_insn (x
, work_reg
);
24873 x
= GEN_INT (offset
+ 12);
24874 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24876 emit_move_insn (arm_hfp_rtx
, work_reg
);
24878 /* Optimization: If we are not pushing any low registers but we are going
24879 to push some high registers then delay our first push. This will just
24880 be a push of LR and we can combine it with the push of the first high
24882 else if ((l_mask
& 0xff) != 0
24883 || (high_regs_pushed
== 0 && lr_needs_saving
))
24885 unsigned long mask
= l_mask
;
24886 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
24887 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
24888 RTX_FRAME_RELATED_P (insn
) = 1;
24889 lr_needs_saving
= false;
24892 if (high_regs_pushed
)
24894 unsigned pushable_regs
;
24895 unsigned next_hi_reg
;
24896 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
24897 : crtl
->args
.info
.nregs
;
24898 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
24900 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
24901 if (live_regs_mask
& (1 << next_hi_reg
))
24904 /* Here we need to mask out registers used for passing arguments
24905 even if they can be pushed. This is to avoid using them to stash the high
24906 registers. Such kind of stash may clobber the use of arguments. */
24907 pushable_regs
= l_mask
& (~arg_regs_mask
);
24908 if (lr_needs_saving
)
24909 pushable_regs
&= ~(1 << LR_REGNUM
);
24911 if (pushable_regs
== 0)
24912 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
24914 while (high_regs_pushed
> 0)
24916 unsigned long real_regs_mask
= 0;
24917 unsigned long push_mask
= 0;
24919 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
24921 if (pushable_regs
& (1 << regno
))
24923 emit_move_insn (gen_rtx_REG (SImode
, regno
),
24924 gen_rtx_REG (SImode
, next_hi_reg
));
24926 high_regs_pushed
--;
24927 real_regs_mask
|= (1 << next_hi_reg
);
24928 push_mask
|= (1 << regno
);
24930 if (high_regs_pushed
)
24932 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
24934 if (live_regs_mask
& (1 << next_hi_reg
))
24942 /* If we had to find a work register and we have not yet
24943 saved the LR then add it to the list of regs to push. */
24944 if (lr_needs_saving
)
24946 push_mask
|= 1 << LR_REGNUM
;
24947 real_regs_mask
|= 1 << LR_REGNUM
;
24948 lr_needs_saving
= false;
24951 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
24952 RTX_FRAME_RELATED_P (insn
) = 1;
24956 /* Load the pic register before setting the frame pointer,
24957 so we can use r7 as a temporary work register. */
24958 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24959 arm_load_pic_register (live_regs_mask
);
24961 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
24962 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
24963 stack_pointer_rtx
);
24965 size
= offsets
->outgoing_args
- offsets
->saved_args
;
24966 if (flag_stack_usage_info
)
24967 current_function_static_stack_size
= size
;
24969 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24970 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
&& size
)
24971 sorry ("-fstack-check=specific for Thumb-1");
24973 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24974 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
24979 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24980 GEN_INT (- amount
)));
24981 RTX_FRAME_RELATED_P (insn
) = 1;
24987 /* The stack decrement is too big for an immediate value in a single
24988 insn. In theory we could issue multiple subtracts, but after
24989 three of them it becomes more space efficient to place the full
24990 value in the constant pool and load into a register. (Also the
24991 ARM debugger really likes to see only one stack decrement per
24992 function). So instead we look for a scratch register into which
24993 we can load the decrement, and then we subtract this from the
24994 stack pointer. Unfortunately on the thumb the only available
24995 scratch registers are the argument registers, and we cannot use
24996 these as they may hold arguments to the function. Instead we
24997 attempt to locate a call preserved register which is used by this
24998 function. If we can find one, then we know that it will have
24999 been pushed at the start of the prologue and so we can corrupt
25001 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
25002 if (live_regs_mask
& (1 << regno
))
25005 gcc_assert(regno
<= LAST_LO_REGNUM
);
25007 reg
= gen_rtx_REG (SImode
, regno
);
25009 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
25011 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25012 stack_pointer_rtx
, reg
));
25014 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
25015 plus_constant (Pmode
, stack_pointer_rtx
,
25017 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
25018 RTX_FRAME_RELATED_P (insn
) = 1;
25022 if (frame_pointer_needed
)
25023 thumb_set_frame_pointer (offsets
);
25025 /* If we are profiling, make sure no instructions are scheduled before
25026 the call to mcount. Similarly if the user has requested no
25027 scheduling in the prolog. Similarly if we want non-call exceptions
25028 using the EABI unwinder, to prevent faulting instructions from being
25029 swapped with a stack adjustment. */
25030 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
25031 || (arm_except_unwind_info (&global_options
) == UI_TARGET
25032 && cfun
->can_throw_non_call_exceptions
))
25033 emit_insn (gen_blockage ());
25035 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
25036 if (live_regs_mask
& 0xff)
25037 cfun
->machine
->lr_save_eliminated
= 0;
25040 /* Clear caller saved registers not used to pass return values and leaked
25041 condition flags before exiting a cmse_nonsecure_entry function. */
25044 cmse_nonsecure_entry_clear_before_return (void)
25046 uint64_t to_clear_mask
[2];
25047 uint32_t padding_bits_to_clear
= 0;
25048 uint32_t * padding_bits_to_clear_ptr
= &padding_bits_to_clear
;
25049 int regno
, maxregno
= IP_REGNUM
;
25053 to_clear_mask
[0] = (1ULL << (NUM_ARG_REGS
)) - 1;
25054 to_clear_mask
[0] |= (1ULL << IP_REGNUM
);
25056 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25057 registers. We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
25058 to make sure the instructions used to clear them are present. */
25059 if (TARGET_HARD_FLOAT
&& !TARGET_THUMB1
)
25061 uint64_t float_mask
= (1ULL << (D7_VFP_REGNUM
+ 1)) - 1;
25062 maxregno
= LAST_VFP_REGNUM
;
25064 float_mask
&= ~((1ULL << FIRST_VFP_REGNUM
) - 1);
25065 to_clear_mask
[0] |= float_mask
;
25067 float_mask
= (1ULL << (maxregno
- 63)) - 1;
25068 to_clear_mask
[1] = float_mask
;
25070 /* Make sure we don't clear the two scratch registers used to clear the
25071 relevant FPSCR bits in output_return_instruction. */
25072 emit_use (gen_rtx_REG (SImode
, IP_REGNUM
));
25073 to_clear_mask
[0] &= ~(1ULL << IP_REGNUM
);
25074 emit_use (gen_rtx_REG (SImode
, 4));
25075 to_clear_mask
[0] &= ~(1ULL << 4);
25078 /* If the user has defined registers to be caller saved, these are no longer
25079 restored by the function before returning and must thus be cleared for
25080 security purposes. */
25081 for (regno
= NUM_ARG_REGS
; regno
< LAST_VFP_REGNUM
; regno
++)
25083 /* We do not touch registers that can be used to pass arguments as per
25084 the AAPCS, since these should never be made callee-saved by user
25086 if (IN_RANGE (regno
, FIRST_VFP_REGNUM
, D7_VFP_REGNUM
))
25088 if (IN_RANGE (regno
, IP_REGNUM
, PC_REGNUM
))
25090 if (call_used_regs
[regno
])
25091 to_clear_mask
[regno
/ 64] |= (1ULL << (regno
% 64));
25094 /* Make sure we do not clear the registers used to return the result in. */
25095 result_type
= TREE_TYPE (DECL_RESULT (current_function_decl
));
25096 if (!VOID_TYPE_P (result_type
))
25098 result_rtl
= arm_function_value (result_type
, current_function_decl
, 0);
25100 /* No need to check that we return in registers, because we don't
25101 support returning on stack yet. */
25103 &= ~compute_not_to_clear_mask (result_type
, result_rtl
, 0,
25104 padding_bits_to_clear_ptr
);
25107 if (padding_bits_to_clear
!= 0)
25110 /* Padding bits to clear is not 0 so we know we are dealing with
25111 returning a composite type, which only uses r0. Let's make sure that
25112 r1-r3 is cleared too, we will use r1 as a scratch register. */
25113 gcc_assert ((to_clear_mask
[0] & 0xe) == 0xe);
25115 reg_rtx
= gen_rtx_REG (SImode
, R1_REGNUM
);
25117 /* Fill the lower half of the negated padding_bits_to_clear. */
25118 emit_move_insn (reg_rtx
,
25119 GEN_INT ((((~padding_bits_to_clear
) << 16u) >> 16u)));
25121 /* Also fill the top half of the negated padding_bits_to_clear. */
25122 if (((~padding_bits_to_clear
) >> 16) > 0)
25123 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode
, reg_rtx
,
25126 GEN_INT ((~padding_bits_to_clear
) >> 16)));
25128 emit_insn (gen_andsi3 (gen_rtx_REG (SImode
, R0_REGNUM
),
25129 gen_rtx_REG (SImode
, R0_REGNUM
),
25133 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
25135 if (!(to_clear_mask
[regno
/ 64] & (1ULL << (regno
% 64))))
25138 if (IS_VFP_REGNUM (regno
))
25140 /* If regno is an even vfp register and its successor is also to
25141 be cleared, use vmov. */
25142 if (TARGET_VFP_DOUBLE
25143 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
25144 && to_clear_mask
[regno
/ 64] & (1ULL << ((regno
% 64) + 1)))
25146 emit_move_insn (gen_rtx_REG (DFmode
, regno
),
25147 CONST1_RTX (DFmode
));
25148 emit_use (gen_rtx_REG (DFmode
, regno
));
25153 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
25154 CONST1_RTX (SFmode
));
25155 emit_use (gen_rtx_REG (SFmode
, regno
));
25162 if (regno
== R0_REGNUM
)
25163 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25166 /* R0 has either been cleared before, see code above, or it
25167 holds a return value, either way it is not secret
25169 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25170 gen_rtx_REG (SImode
, R0_REGNUM
));
25171 emit_use (gen_rtx_REG (SImode
, regno
));
25175 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25176 gen_rtx_REG (SImode
, LR_REGNUM
));
25177 emit_use (gen_rtx_REG (SImode
, regno
));
25183 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25184 POP instruction can be generated. LR should be replaced by PC. All
25185 the checks required are already done by USE_RETURN_INSN (). Hence,
25186 all we really need to check here is if single register is to be
25187 returned, or multiple register return. */
25189 thumb2_expand_return (bool simple_return
)
25192 unsigned long saved_regs_mask
;
25193 arm_stack_offsets
*offsets
;
25195 offsets
= arm_get_frame_offsets ();
25196 saved_regs_mask
= offsets
->saved_regs_mask
;
25198 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25199 if (saved_regs_mask
& (1 << i
))
25202 if (!simple_return
&& saved_regs_mask
)
25204 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25205 functions or adapt code to handle according to ACLE. This path should
25206 not be reachable for cmse_nonsecure_entry functions though we prefer
25207 to assert it for now to ensure that future code changes do not silently
25208 change this behavior. */
25209 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25212 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25213 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
25214 rtx addr
= gen_rtx_MEM (SImode
,
25215 gen_rtx_POST_INC (SImode
,
25216 stack_pointer_rtx
));
25217 set_mem_alias_set (addr
, get_frame_alias_set ());
25218 XVECEXP (par
, 0, 0) = ret_rtx
;
25219 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
25220 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
25221 emit_jump_insn (par
);
25225 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
25226 saved_regs_mask
|= (1 << PC_REGNUM
);
25227 arm_emit_multi_reg_pop (saved_regs_mask
);
25232 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25233 cmse_nonsecure_entry_clear_before_return ();
25234 emit_jump_insn (simple_return_rtx
);
25239 thumb1_expand_epilogue (void)
25241 HOST_WIDE_INT amount
;
25242 arm_stack_offsets
*offsets
;
25245 /* Naked functions don't have prologues. */
25246 if (IS_NAKED (arm_current_func_type ()))
25249 offsets
= arm_get_frame_offsets ();
25250 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25252 if (frame_pointer_needed
)
25254 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
25255 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25257 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
25259 gcc_assert (amount
>= 0);
25262 emit_insn (gen_blockage ());
25265 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25266 GEN_INT (amount
)));
25269 /* r3 is always free in the epilogue. */
25270 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
25272 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
25273 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
25277 /* Emit a USE (stack_pointer_rtx), so that
25278 the stack adjustment will not be deleted. */
25279 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25281 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
25282 emit_insn (gen_blockage ());
25284 /* Emit a clobber for each insn that will be restored in the epilogue,
25285 so that flow2 will get register lifetimes correct. */
25286 for (regno
= 0; regno
< 13; regno
++)
25287 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
25288 emit_clobber (gen_rtx_REG (SImode
, regno
));
25290 if (! df_regs_ever_live_p (LR_REGNUM
))
25291 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
25293 /* Clear all caller-saved regs that are not used to return. */
25294 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25295 cmse_nonsecure_entry_clear_before_return ();
25298 /* Epilogue code for APCS frame. */
25300 arm_expand_epilogue_apcs_frame (bool really_return
)
25302 unsigned long func_type
;
25303 unsigned long saved_regs_mask
;
25306 int floats_from_frame
= 0;
25307 arm_stack_offsets
*offsets
;
25309 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
25310 func_type
= arm_current_func_type ();
25312 /* Get frame offsets for ARM. */
25313 offsets
= arm_get_frame_offsets ();
25314 saved_regs_mask
= offsets
->saved_regs_mask
;
25316 /* Find the offset of the floating-point save area in the frame. */
25318 = (offsets
->saved_args
25319 + arm_compute_static_chain_stack_bytes ()
25322 /* Compute how many core registers saved and how far away the floats are. */
25323 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25324 if (saved_regs_mask
& (1 << i
))
25327 floats_from_frame
+= 4;
25330 if (TARGET_HARD_FLOAT
)
25333 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
25335 /* The offset is from IP_REGNUM. */
25336 int saved_size
= arm_get_vfp_saved_size ();
25337 if (saved_size
> 0)
25340 floats_from_frame
+= saved_size
;
25341 insn
= emit_insn (gen_addsi3 (ip_rtx
,
25342 hard_frame_pointer_rtx
,
25343 GEN_INT (-floats_from_frame
)));
25344 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
25345 ip_rtx
, hard_frame_pointer_rtx
);
25348 /* Generate VFP register multi-pop. */
25349 start_reg
= FIRST_VFP_REGNUM
;
25351 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
25352 /* Look for a case where a reg does not need restoring. */
25353 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25354 && (!df_regs_ever_live_p (i
+ 1)
25355 || call_used_regs
[i
+ 1]))
25357 if (start_reg
!= i
)
25358 arm_emit_vfp_multi_reg_pop (start_reg
,
25359 (i
- start_reg
) / 2,
25360 gen_rtx_REG (SImode
,
25365 /* Restore the remaining regs that we have discovered (or possibly
25366 even all of them, if the conditional in the for loop never
25368 if (start_reg
!= i
)
25369 arm_emit_vfp_multi_reg_pop (start_reg
,
25370 (i
- start_reg
) / 2,
25371 gen_rtx_REG (SImode
, IP_REGNUM
));
25376 /* The frame pointer is guaranteed to be non-double-word aligned, as
25377 it is set to double-word-aligned old_stack_pointer - 4. */
25379 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
25381 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
25382 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25384 rtx addr
= gen_frame_mem (V2SImode
,
25385 plus_constant (Pmode
, hard_frame_pointer_rtx
,
25387 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25388 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25389 gen_rtx_REG (V2SImode
, i
),
25395 /* saved_regs_mask should contain IP which contains old stack pointer
25396 at the time of activation creation. Since SP and IP are adjacent registers,
25397 we can restore the value directly into SP. */
25398 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
25399 saved_regs_mask
&= ~(1 << IP_REGNUM
);
25400 saved_regs_mask
|= (1 << SP_REGNUM
);
25402 /* There are two registers left in saved_regs_mask - LR and PC. We
25403 only need to restore LR (the return address), but to
25404 save time we can load it directly into PC, unless we need a
25405 special function exit sequence, or we are not really returning. */
25407 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
25408 && !crtl
->calls_eh_return
)
25409 /* Delete LR from the register mask, so that LR on
25410 the stack is loaded into the PC in the register mask. */
25411 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25413 saved_regs_mask
&= ~(1 << PC_REGNUM
);
25415 num_regs
= bit_count (saved_regs_mask
);
25416 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
25419 emit_insn (gen_blockage ());
25420 /* Unwind the stack to just below the saved registers. */
25421 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25422 hard_frame_pointer_rtx
,
25423 GEN_INT (- 4 * num_regs
)));
25425 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
25426 stack_pointer_rtx
, hard_frame_pointer_rtx
);
25429 arm_emit_multi_reg_pop (saved_regs_mask
);
25431 if (IS_INTERRUPT (func_type
))
25433 /* Interrupt handlers will have pushed the
25434 IP onto the stack, so restore it now. */
25436 rtx addr
= gen_rtx_MEM (SImode
,
25437 gen_rtx_POST_INC (SImode
,
25438 stack_pointer_rtx
));
25439 set_mem_alias_set (addr
, get_frame_alias_set ());
25440 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
25441 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25442 gen_rtx_REG (SImode
, IP_REGNUM
),
25446 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
25449 if (crtl
->calls_eh_return
)
25450 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25452 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25454 if (IS_STACKALIGN (func_type
))
25455 /* Restore the original stack pointer. Before prologue, the stack was
25456 realigned and the original stack pointer saved in r0. For details,
25457 see comment in arm_expand_prologue. */
25458 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25460 emit_jump_insn (simple_return_rtx
);
25463 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25464 function is not a sibcall. */
25466 arm_expand_epilogue (bool really_return
)
25468 unsigned long func_type
;
25469 unsigned long saved_regs_mask
;
25473 arm_stack_offsets
*offsets
;
25475 func_type
= arm_current_func_type ();
25477 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25478 let output_return_instruction take care of instruction emission if any. */
25479 if (IS_NAKED (func_type
)
25480 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
25483 emit_jump_insn (simple_return_rtx
);
25487 /* If we are throwing an exception, then we really must be doing a
25488 return, so we can't tail-call. */
25489 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
25491 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
25493 arm_expand_epilogue_apcs_frame (really_return
);
25497 /* Get frame offsets for ARM. */
25498 offsets
= arm_get_frame_offsets ();
25499 saved_regs_mask
= offsets
->saved_regs_mask
;
25500 num_regs
= bit_count (saved_regs_mask
);
25502 if (frame_pointer_needed
)
25505 /* Restore stack pointer if necessary. */
25508 /* In ARM mode, frame pointer points to first saved register.
25509 Restore stack pointer to last saved register. */
25510 amount
= offsets
->frame
- offsets
->saved_regs
;
25512 /* Force out any pending memory operations that reference stacked data
25513 before stack de-allocation occurs. */
25514 emit_insn (gen_blockage ());
25515 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25516 hard_frame_pointer_rtx
,
25517 GEN_INT (amount
)));
25518 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25520 hard_frame_pointer_rtx
);
25522 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25524 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25528 /* In Thumb-2 mode, the frame pointer points to the last saved
25530 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25533 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
25534 hard_frame_pointer_rtx
,
25535 GEN_INT (amount
)));
25536 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25537 hard_frame_pointer_rtx
,
25538 hard_frame_pointer_rtx
);
25541 /* Force out any pending memory operations that reference stacked data
25542 before stack de-allocation occurs. */
25543 emit_insn (gen_blockage ());
25544 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
25545 hard_frame_pointer_rtx
));
25546 arm_add_cfa_adjust_cfa_note (insn
, 0,
25548 hard_frame_pointer_rtx
);
25549 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25551 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25556 /* Pop off outgoing args and local frame to adjust stack pointer to
25557 last saved register. */
25558 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25562 /* Force out any pending memory operations that reference stacked data
25563 before stack de-allocation occurs. */
25564 emit_insn (gen_blockage ());
25565 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25567 GEN_INT (amount
)));
25568 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25569 stack_pointer_rtx
, stack_pointer_rtx
);
25570 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25572 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25576 if (TARGET_HARD_FLOAT
)
25578 /* Generate VFP register multi-pop. */
25579 int end_reg
= LAST_VFP_REGNUM
+ 1;
25581 /* Scan the registers in reverse order. We need to match
25582 any groupings made in the prologue and generate matching
25583 vldm operations. The need to match groups is because,
25584 unlike pop, vldm can only do consecutive regs. */
25585 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
25586 /* Look for a case where a reg does not need restoring. */
25587 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25588 && (!df_regs_ever_live_p (i
+ 1)
25589 || call_used_regs
[i
+ 1]))
25591 /* Restore the regs discovered so far (from reg+2 to
25593 if (end_reg
> i
+ 2)
25594 arm_emit_vfp_multi_reg_pop (i
+ 2,
25595 (end_reg
- (i
+ 2)) / 2,
25596 stack_pointer_rtx
);
25600 /* Restore the remaining regs that we have discovered (or possibly
25601 even all of them, if the conditional in the for loop never
25603 if (end_reg
> i
+ 2)
25604 arm_emit_vfp_multi_reg_pop (i
+ 2,
25605 (end_reg
- (i
+ 2)) / 2,
25606 stack_pointer_rtx
);
25610 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
25611 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25614 rtx addr
= gen_rtx_MEM (V2SImode
,
25615 gen_rtx_POST_INC (SImode
,
25616 stack_pointer_rtx
));
25617 set_mem_alias_set (addr
, get_frame_alias_set ());
25618 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25619 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25620 gen_rtx_REG (V2SImode
, i
),
25622 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25623 stack_pointer_rtx
, stack_pointer_rtx
);
25626 if (saved_regs_mask
)
25629 bool return_in_pc
= false;
25631 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
25632 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
25633 && !IS_CMSE_ENTRY (func_type
)
25634 && !IS_STACKALIGN (func_type
)
25636 && crtl
->args
.pretend_args_size
== 0
25637 && saved_regs_mask
& (1 << LR_REGNUM
)
25638 && !crtl
->calls_eh_return
)
25640 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25641 saved_regs_mask
|= (1 << PC_REGNUM
);
25642 return_in_pc
= true;
25645 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
25647 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25648 if (saved_regs_mask
& (1 << i
))
25650 rtx addr
= gen_rtx_MEM (SImode
,
25651 gen_rtx_POST_INC (SImode
,
25652 stack_pointer_rtx
));
25653 set_mem_alias_set (addr
, get_frame_alias_set ());
25655 if (i
== PC_REGNUM
)
25657 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25658 XVECEXP (insn
, 0, 0) = ret_rtx
;
25659 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
25661 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
25662 insn
= emit_jump_insn (insn
);
25666 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
25668 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25669 gen_rtx_REG (SImode
, i
),
25671 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25673 stack_pointer_rtx
);
25680 && current_tune
->prefer_ldrd_strd
25681 && !optimize_function_for_size_p (cfun
))
25684 thumb2_emit_ldrd_pop (saved_regs_mask
);
25685 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
25686 arm_emit_ldrd_pop (saved_regs_mask
);
25688 arm_emit_multi_reg_pop (saved_regs_mask
);
25691 arm_emit_multi_reg_pop (saved_regs_mask
);
25699 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
25703 rtx dwarf
= NULL_RTX
;
25705 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25707 GEN_INT (amount
)));
25709 RTX_FRAME_RELATED_P (tmp
) = 1;
25711 if (cfun
->machine
->uses_anonymous_args
)
25713 /* Restore pretend args. Refer arm_expand_prologue on how to save
25714 pretend_args in stack. */
25715 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25716 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25717 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25718 if (saved_regs_mask
& (1 << i
))
25720 rtx reg
= gen_rtx_REG (SImode
, i
);
25721 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25724 REG_NOTES (tmp
) = dwarf
;
25726 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25727 stack_pointer_rtx
, stack_pointer_rtx
);
25730 /* Clear all caller-saved regs that are not used to return. */
25731 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25733 /* CMSE_ENTRY always returns. */
25734 gcc_assert (really_return
);
25735 cmse_nonsecure_entry_clear_before_return ();
25738 if (!really_return
)
25741 if (crtl
->calls_eh_return
)
25742 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25744 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25746 if (IS_STACKALIGN (func_type
))
25747 /* Restore the original stack pointer. Before prologue, the stack was
25748 realigned and the original stack pointer saved in r0. For details,
25749 see comment in arm_expand_prologue. */
25750 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25752 emit_jump_insn (simple_return_rtx
);
25755 /* Implementation of insn prologue_thumb1_interwork. This is the first
25756 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25759 thumb1_output_interwork (void)
25762 FILE *f
= asm_out_file
;
25764 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25765 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25767 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25769 /* Generate code sequence to switch us into Thumb mode. */
25770 /* The .code 32 directive has already been emitted by
25771 ASM_DECLARE_FUNCTION_NAME. */
25772 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25773 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
25775 /* Generate a label, so that the debugger will notice the
25776 change in instruction sets. This label is also used by
25777 the assembler to bypass the ARM code when this function
25778 is called from a Thumb encoded function elsewhere in the
25779 same file. Hence the definition of STUB_NAME here must
25780 agree with the definition in gas/config/tc-arm.c. */
25782 #define STUB_NAME ".real_start_of"
25784 fprintf (f
, "\t.code\t16\n");
25786 if (arm_dllexport_name_p (name
))
25787 name
= arm_strip_name_encoding (name
);
25789 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
25790 fprintf (f
, "\t.thumb_func\n");
25791 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
25796 /* Handle the case of a double word load into a low register from
25797 a computed memory address. The computed address may involve a
25798 register which is overwritten by the load. */
25800 thumb_load_double_from_address (rtx
*operands
)
25808 gcc_assert (REG_P (operands
[0]));
25809 gcc_assert (MEM_P (operands
[1]));
25811 /* Get the memory address. */
25812 addr
= XEXP (operands
[1], 0);
25814 /* Work out how the memory address is computed. */
25815 switch (GET_CODE (addr
))
25818 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25820 if (REGNO (operands
[0]) == REGNO (addr
))
25822 output_asm_insn ("ldr\t%H0, %2", operands
);
25823 output_asm_insn ("ldr\t%0, %1", operands
);
25827 output_asm_insn ("ldr\t%0, %1", operands
);
25828 output_asm_insn ("ldr\t%H0, %2", operands
);
25833 /* Compute <address> + 4 for the high order load. */
25834 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25836 output_asm_insn ("ldr\t%0, %1", operands
);
25837 output_asm_insn ("ldr\t%H0, %2", operands
);
25841 arg1
= XEXP (addr
, 0);
25842 arg2
= XEXP (addr
, 1);
25844 if (CONSTANT_P (arg1
))
25845 base
= arg2
, offset
= arg1
;
25847 base
= arg1
, offset
= arg2
;
25849 gcc_assert (REG_P (base
));
25851 /* Catch the case of <address> = <reg> + <reg> */
25852 if (REG_P (offset
))
25854 int reg_offset
= REGNO (offset
);
25855 int reg_base
= REGNO (base
);
25856 int reg_dest
= REGNO (operands
[0]);
25858 /* Add the base and offset registers together into the
25859 higher destination register. */
25860 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
25861 reg_dest
+ 1, reg_base
, reg_offset
);
25863 /* Load the lower destination register from the address in
25864 the higher destination register. */
25865 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
25866 reg_dest
, reg_dest
+ 1);
25868 /* Load the higher destination register from its own address
25870 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
25871 reg_dest
+ 1, reg_dest
+ 1);
25875 /* Compute <address> + 4 for the high order load. */
25876 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25878 /* If the computed address is held in the low order register
25879 then load the high order register first, otherwise always
25880 load the low order register first. */
25881 if (REGNO (operands
[0]) == REGNO (base
))
25883 output_asm_insn ("ldr\t%H0, %2", operands
);
25884 output_asm_insn ("ldr\t%0, %1", operands
);
25888 output_asm_insn ("ldr\t%0, %1", operands
);
25889 output_asm_insn ("ldr\t%H0, %2", operands
);
25895 /* With no registers to worry about we can just load the value
25897 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25899 output_asm_insn ("ldr\t%H0, %2", operands
);
25900 output_asm_insn ("ldr\t%0, %1", operands
);
25904 gcc_unreachable ();
25911 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
25916 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25917 std::swap (operands
[4], operands
[5]);
25919 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
25920 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
25924 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25925 std::swap (operands
[4], operands
[5]);
25926 if (REGNO (operands
[5]) > REGNO (operands
[6]))
25927 std::swap (operands
[5], operands
[6]);
25928 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25929 std::swap (operands
[4], operands
[5]);
25931 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
25932 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
25936 gcc_unreachable ();
25942 /* Output a call-via instruction for thumb state. */
25944 thumb_call_via_reg (rtx reg
)
25946 int regno
= REGNO (reg
);
25949 gcc_assert (regno
< LR_REGNUM
);
25951 /* If we are in the normal text section we can use a single instance
25952 per compilation unit. If we are doing function sections, then we need
25953 an entry per section, since we can't rely on reachability. */
25954 if (in_section
== text_section
)
25956 thumb_call_reg_needed
= 1;
25958 if (thumb_call_via_label
[regno
] == NULL
)
25959 thumb_call_via_label
[regno
] = gen_label_rtx ();
25960 labelp
= thumb_call_via_label
+ regno
;
25964 if (cfun
->machine
->call_via
[regno
] == NULL
)
25965 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
25966 labelp
= cfun
->machine
->call_via
+ regno
;
25969 output_asm_insn ("bl\t%a0", labelp
);
25973 /* Routines for generating rtl. */
25975 thumb_expand_movmemqi (rtx
*operands
)
25977 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
25978 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
25979 HOST_WIDE_INT len
= INTVAL (operands
[2]);
25980 HOST_WIDE_INT offset
= 0;
25984 emit_insn (gen_movmem12b (out
, in
, out
, in
));
25990 emit_insn (gen_movmem8b (out
, in
, out
, in
));
25996 rtx reg
= gen_reg_rtx (SImode
);
25997 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
25998 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
26005 rtx reg
= gen_reg_rtx (HImode
);
26006 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
26007 plus_constant (Pmode
, in
,
26009 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
26018 rtx reg
= gen_reg_rtx (QImode
);
26019 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
26020 plus_constant (Pmode
, in
,
26022 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
26029 thumb_reload_out_hi (rtx
*operands
)
26031 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
26034 /* Return the length of a function name prefix
26035 that starts with the character 'c'. */
26037 arm_get_strip_length (int c
)
26041 ARM_NAME_ENCODING_LENGTHS
26046 /* Return a pointer to a function's name with any
26047 and all prefix encodings stripped from it. */
26049 arm_strip_name_encoding (const char *name
)
26053 while ((skip
= arm_get_strip_length (* name
)))
26059 /* If there is a '*' anywhere in the name's prefix, then
26060 emit the stripped name verbatim, otherwise prepend an
26061 underscore if leading underscores are being used. */
26063 arm_asm_output_labelref (FILE *stream
, const char *name
)
26068 while ((skip
= arm_get_strip_length (* name
)))
26070 verbatim
|= (*name
== '*');
26075 fputs (name
, stream
);
26077 asm_fprintf (stream
, "%U%s", name
);
26080 /* This function is used to emit an EABI tag and its associated value.
26081 We emit the numerical value of the tag in case the assembler does not
26082 support textual tags. (Eg gas prior to 2.20). If requested we include
26083 the tag name in a comment so that anyone reading the assembler output
26084 will know which tag is being set.
26086 This function is not static because arm-c.c needs it too. */
26089 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
26091 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
26092 if (flag_verbose_asm
|| flag_debug_asm
)
26093 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
26094 asm_fprintf (asm_out_file
, "\n");
26097 /* This function is used to print CPU tuning information as comment
26098 in assembler file. Pointers are not printed for now. */
26101 arm_print_tune_info (void)
26103 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune parameters\n");
26104 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"constant_limit:\t%d\n",
26105 current_tune
->constant_limit
);
26106 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26107 "max_insns_skipped:\t%d\n", current_tune
->max_insns_skipped
);
26108 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26109 "prefetch.num_slots:\t%d\n", current_tune
->prefetch
.num_slots
);
26110 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26111 "prefetch.l1_cache_size:\t%d\n",
26112 current_tune
->prefetch
.l1_cache_size
);
26113 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26114 "prefetch.l1_cache_line_size:\t%d\n",
26115 current_tune
->prefetch
.l1_cache_line_size
);
26116 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26117 "prefer_constant_pool:\t%d\n",
26118 (int) current_tune
->prefer_constant_pool
);
26119 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26120 "branch_cost:\t(s:speed, p:predictable)\n");
26121 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\ts&p\tcost\n");
26122 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t00\t%d\n",
26123 current_tune
->branch_cost (false, false));
26124 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t01\t%d\n",
26125 current_tune
->branch_cost (false, true));
26126 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t10\t%d\n",
26127 current_tune
->branch_cost (true, false));
26128 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t11\t%d\n",
26129 current_tune
->branch_cost (true, true));
26130 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26131 "prefer_ldrd_strd:\t%d\n",
26132 (int) current_tune
->prefer_ldrd_strd
);
26133 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26134 "logical_op_non_short_circuit:\t[%d,%d]\n",
26135 (int) current_tune
->logical_op_non_short_circuit_thumb
,
26136 (int) current_tune
->logical_op_non_short_circuit_arm
);
26137 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26138 "prefer_neon_for_64bits:\t%d\n",
26139 (int) current_tune
->prefer_neon_for_64bits
);
26140 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26141 "disparage_flag_setting_t16_encodings:\t%d\n",
26142 (int) current_tune
->disparage_flag_setting_t16_encodings
);
26143 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26144 "string_ops_prefer_neon:\t%d\n",
26145 (int) current_tune
->string_ops_prefer_neon
);
26146 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26147 "max_insns_inline_memset:\t%d\n",
26148 current_tune
->max_insns_inline_memset
);
26149 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"fusible_ops:\t%u\n",
26150 current_tune
->fusible_ops
);
26151 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"sched_autopref:\t%d\n",
26152 (int) current_tune
->sched_autopref
);
26155 /* Print .arch and .arch_extension directives corresponding to the
26156 current architecture configuration. */
26158 arm_print_asm_arch_directives ()
26160 const arch_option
*arch
26161 = arm_parse_arch_option_name (all_architectures
, "-march",
26162 arm_active_target
.arch_name
);
26163 auto_sbitmap
opt_bits (isa_num_bits
);
26167 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_active_target
.arch_name
);
26168 if (!arch
->common
.extensions
)
26171 for (const struct cpu_arch_extension
*opt
= arch
->common
.extensions
;
26177 arm_initialize_isa (opt_bits
, opt
->isa_bits
);
26179 /* If every feature bit of this option is set in the target
26180 ISA specification, print out the option name. However,
26181 don't print anything if all the bits are part of the
26182 FPU specification. */
26183 if (bitmap_subset_p (opt_bits
, arm_active_target
.isa
)
26184 && !bitmap_subset_p (opt_bits
, isa_all_fpubits
))
26185 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", opt
->name
);
26191 arm_file_start (void)
26197 /* We don't have a specified CPU. Use the architecture to
26200 Note: it might be better to do this unconditionally, then the
26201 assembler would not need to know about all new CPU names as
26203 if (!arm_active_target
.core_name
)
26205 /* armv7ve doesn't support any extensions. */
26206 if (strcmp (arm_active_target
.arch_name
, "armv7ve") == 0)
26208 /* Keep backward compatability for assemblers
26209 which don't support armv7ve. */
26210 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
26211 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
26212 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
26213 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
26214 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
26217 arm_print_asm_arch_directives ();
26219 else if (strncmp (arm_active_target
.core_name
, "generic", 7) == 0)
26220 asm_fprintf (asm_out_file
, "\t.arch %s\n",
26221 arm_active_target
.core_name
+ 8);
26224 const char* truncated_name
26225 = arm_rewrite_selected_cpu (arm_active_target
.core_name
);
26226 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
26229 if (print_tune_info
)
26230 arm_print_tune_info ();
26232 if (! TARGET_SOFT_FLOAT
)
26234 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
26235 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26237 if (TARGET_HARD_FLOAT_ABI
)
26238 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26241 /* Some of these attributes only apply when the corresponding features
26242 are used. However we don't have any easy way of figuring this out.
26243 Conservatively record the setting that would have been used. */
26245 if (flag_rounding_math
)
26246 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26248 if (!flag_unsafe_math_optimizations
)
26250 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26251 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26253 if (flag_signaling_nans
)
26254 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26256 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26257 flag_finite_math_only
? 1 : 3);
26259 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26260 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26261 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26262 flag_short_enums
? 1 : 2);
26264 /* Tag_ABI_optimization_goals. */
26267 else if (optimize
>= 2)
26273 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
26275 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26278 if (arm_fp16_format
)
26279 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26280 (int) arm_fp16_format
);
26282 if (arm_lang_output_object_attributes_hook
)
26283 arm_lang_output_object_attributes_hook();
26286 default_file_start ();
26290 arm_file_end (void)
26294 if (NEED_INDICATE_EXEC_STACK
)
26295 /* Add .note.GNU-stack. */
26296 file_end_indicate_exec_stack ();
26298 if (! thumb_call_reg_needed
)
26301 switch_to_section (text_section
);
26302 asm_fprintf (asm_out_file
, "\t.code 16\n");
26303 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
26305 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
26307 rtx label
= thumb_call_via_label
[regno
];
26311 targetm
.asm_out
.internal_label (asm_out_file
, "L",
26312 CODE_LABEL_NUMBER (label
));
26313 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
26319 /* Symbols in the text segment can be accessed without indirecting via the
26320 constant pool; it may take an extra binary operation, but this is still
26321 faster than indirecting via memory. Don't do this when not optimizing,
26322 since we won't be calculating al of the offsets necessary to do this
26326 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
26328 if (optimize
> 0 && TREE_CONSTANT (decl
))
26329 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
26331 default_encode_section_info (decl
, rtl
, first
);
26333 #endif /* !ARM_PE */
26336 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
26338 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
26339 && !strcmp (prefix
, "L"))
26341 arm_ccfsm_state
= 0;
26342 arm_target_insn
= NULL
;
26344 default_internal_label (stream
, prefix
, labelno
);
26347 /* Output code to add DELTA to the first argument, and then jump
26348 to FUNCTION. Used for C++ multiple inheritance. */
26351 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26352 HOST_WIDE_INT
, tree function
)
26354 static int thunk_label
= 0;
26357 int mi_delta
= delta
;
26358 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
26360 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
26363 mi_delta
= - mi_delta
;
26365 final_start_function (emit_barrier (), file
, 1);
26369 int labelno
= thunk_label
++;
26370 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
26371 /* Thunks are entered in arm mode when available. */
26372 if (TARGET_THUMB1_ONLY
)
26374 /* push r3 so we can use it as a temporary. */
26375 /* TODO: Omit this save if r3 is not used. */
26376 fputs ("\tpush {r3}\n", file
);
26377 fputs ("\tldr\tr3, ", file
);
26381 fputs ("\tldr\tr12, ", file
);
26383 assemble_name (file
, label
);
26384 fputc ('\n', file
);
26387 /* If we are generating PIC, the ldr instruction below loads
26388 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26389 the address of the add + 8, so we have:
26391 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26394 Note that we have "+ 1" because some versions of GNU ld
26395 don't set the low bit of the result for R_ARM_REL32
26396 relocations against thumb function symbols.
26397 On ARMv6M this is +4, not +8. */
26398 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
26399 assemble_name (file
, labelpc
);
26400 fputs (":\n", file
);
26401 if (TARGET_THUMB1_ONLY
)
26403 /* This is 2 insns after the start of the thunk, so we know it
26404 is 4-byte aligned. */
26405 fputs ("\tadd\tr3, pc, r3\n", file
);
26406 fputs ("\tmov r12, r3\n", file
);
26409 fputs ("\tadd\tr12, pc, r12\n", file
);
26411 else if (TARGET_THUMB1_ONLY
)
26412 fputs ("\tmov r12, r3\n", file
);
26414 if (TARGET_THUMB1_ONLY
)
26416 if (mi_delta
> 255)
26418 fputs ("\tldr\tr3, ", file
);
26419 assemble_name (file
, label
);
26420 fputs ("+4\n", file
);
26421 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
26422 mi_op
, this_regno
, this_regno
);
26424 else if (mi_delta
!= 0)
26426 /* Thumb1 unified syntax requires s suffix in instruction name when
26427 one of the operands is immediate. */
26428 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
26429 mi_op
, this_regno
, this_regno
,
26435 /* TODO: Use movw/movt for large constants when available. */
26436 while (mi_delta
!= 0)
26438 if ((mi_delta
& (3 << shift
)) == 0)
26442 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
26443 mi_op
, this_regno
, this_regno
,
26444 mi_delta
& (0xff << shift
));
26445 mi_delta
&= ~(0xff << shift
);
26452 if (TARGET_THUMB1_ONLY
)
26453 fputs ("\tpop\t{r3}\n", file
);
26455 fprintf (file
, "\tbx\tr12\n");
26456 ASM_OUTPUT_ALIGN (file
, 2);
26457 assemble_name (file
, label
);
26458 fputs (":\n", file
);
26461 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26462 rtx tem
= XEXP (DECL_RTL (function
), 0);
26463 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26464 pipeline offset is four rather than eight. Adjust the offset
26466 tem
= plus_constant (GET_MODE (tem
), tem
,
26467 TARGET_THUMB1_ONLY
? -3 : -7);
26468 tem
= gen_rtx_MINUS (GET_MODE (tem
),
26470 gen_rtx_SYMBOL_REF (Pmode
,
26471 ggc_strdup (labelpc
)));
26472 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
26475 /* Output ".word .LTHUNKn". */
26476 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
26478 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
26479 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
26483 fputs ("\tb\t", file
);
26484 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
26485 if (NEED_PLT_RELOC
)
26486 fputs ("(PLT)", file
);
26487 fputc ('\n', file
);
26490 final_end_function ();
26493 /* MI thunk handling for TARGET_32BIT. */
26496 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26497 HOST_WIDE_INT vcall_offset
, tree function
)
26499 /* On ARM, this_regno is R0 or R1 depending on
26500 whether the function returns an aggregate or not.
26502 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
26504 ? R1_REGNUM
: R0_REGNUM
);
26506 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
26507 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
26508 reload_completed
= 1;
26509 emit_note (NOTE_INSN_PROLOGUE_END
);
26511 /* Add DELTA to THIS_RTX. */
26513 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
26514 delta
, this_rtx
, this_rtx
, false);
26516 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26517 if (vcall_offset
!= 0)
26519 /* Load *THIS_RTX. */
26520 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
26521 /* Compute *THIS_RTX + VCALL_OFFSET. */
26522 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
26524 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26525 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
26526 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
26529 /* Generate a tail call to the target function. */
26530 if (!TREE_USED (function
))
26532 assemble_external (function
);
26533 TREE_USED (function
) = 1;
26535 rtx funexp
= XEXP (DECL_RTL (function
), 0);
26536 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
26537 rtx_insn
* insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
26538 SIBLING_CALL_P (insn
) = 1;
26540 insn
= get_insns ();
26541 shorten_branches (insn
);
26542 final_start_function (insn
, file
, 1);
26543 final (insn
, file
, 1);
26544 final_end_function ();
26546 /* Stop pretending this is a post-reload pass. */
26547 reload_completed
= 0;
26550 /* Output code to add DELTA to the first argument, and then jump
26551 to FUNCTION. Used for C++ multiple inheritance. */
26554 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
26555 HOST_WIDE_INT vcall_offset
, tree function
)
26558 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26560 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26564 arm_emit_vector_const (FILE *file
, rtx x
)
26567 const char * pattern
;
26569 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
26571 switch (GET_MODE (x
))
26573 case E_V2SImode
: pattern
= "%08x"; break;
26574 case E_V4HImode
: pattern
= "%04x"; break;
26575 case E_V8QImode
: pattern
= "%02x"; break;
26576 default: gcc_unreachable ();
26579 fprintf (file
, "0x");
26580 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
26584 element
= CONST_VECTOR_ELT (x
, i
);
26585 fprintf (file
, pattern
, INTVAL (element
));
26591 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26592 HFmode constant pool entries are actually loaded with ldr. */
26594 arm_emit_fp16_const (rtx c
)
26598 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
26599 if (WORDS_BIG_ENDIAN
)
26600 assemble_zeros (2);
26601 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
26602 if (!WORDS_BIG_ENDIAN
)
26603 assemble_zeros (2);
26607 arm_output_load_gr (rtx
*operands
)
26614 if (!MEM_P (operands
[1])
26615 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
26616 || !REG_P (reg
= XEXP (sum
, 0))
26617 || !CONST_INT_P (offset
= XEXP (sum
, 1))
26618 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
26619 return "wldrw%?\t%0, %1";
26621 /* Fix up an out-of-range load of a GR register. */
26622 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
26623 wcgr
= operands
[0];
26625 output_asm_insn ("ldr%?\t%0, %1", operands
);
26627 operands
[0] = wcgr
;
26629 output_asm_insn ("tmcr%?\t%0, %1", operands
);
26630 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
26635 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26637 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26638 named arg and all anonymous args onto the stack.
26639 XXX I know the prologue shouldn't be pushing registers, but it is faster
26643 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
26647 int second_time ATTRIBUTE_UNUSED
)
26649 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
26652 cfun
->machine
->uses_anonymous_args
= 1;
26653 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
26655 nregs
= pcum
->aapcs_ncrn
;
26658 int res
= arm_needs_doubleword_align (mode
, type
);
26659 if (res
< 0 && warn_psabi
)
26660 inform (input_location
, "parameter passing for argument of "
26661 "type %qT changed in GCC 7.1", type
);
26667 nregs
= pcum
->nregs
;
26669 if (nregs
< NUM_ARG_REGS
)
26670 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
26673 /* We can't rely on the caller doing the proper promotion when
26674 using APCS or ATPCS. */
26677 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
26679 return !TARGET_AAPCS_BASED
;
26682 static machine_mode
26683 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
26685 int *punsignedp ATTRIBUTE_UNUSED
,
26686 const_tree fntype ATTRIBUTE_UNUSED
,
26687 int for_return ATTRIBUTE_UNUSED
)
26689 if (GET_MODE_CLASS (mode
) == MODE_INT
26690 && GET_MODE_SIZE (mode
) < 4)
26698 arm_default_short_enums (void)
26700 return ARM_DEFAULT_SHORT_ENUMS
;
26704 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26707 arm_align_anon_bitfield (void)
26709 return TARGET_AAPCS_BASED
;
26713 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26716 arm_cxx_guard_type (void)
26718 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
26722 /* The EABI says test the least significant bit of a guard variable. */
26725 arm_cxx_guard_mask_bit (void)
26727 return TARGET_AAPCS_BASED
;
26731 /* The EABI specifies that all array cookies are 8 bytes long. */
26734 arm_get_cookie_size (tree type
)
26738 if (!TARGET_AAPCS_BASED
)
26739 return default_cxx_get_cookie_size (type
);
26741 size
= build_int_cst (sizetype
, 8);
26746 /* The EABI says that array cookies should also contain the element size. */
26749 arm_cookie_has_size (void)
26751 return TARGET_AAPCS_BASED
;
26755 /* The EABI says constructors and destructors should return a pointer to
26756 the object constructed/destroyed. */
26759 arm_cxx_cdtor_returns_this (void)
26761 return TARGET_AAPCS_BASED
;
26764 /* The EABI says that an inline function may never be the key
26768 arm_cxx_key_method_may_be_inline (void)
26770 return !TARGET_AAPCS_BASED
;
26774 arm_cxx_determine_class_data_visibility (tree decl
)
26776 if (!TARGET_AAPCS_BASED
26777 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
26780 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26781 is exported. However, on systems without dynamic vague linkage,
26782 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26783 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
26784 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
26786 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
26787 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
26791 arm_cxx_class_data_always_comdat (void)
26793 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26794 vague linkage if the class has no key function. */
26795 return !TARGET_AAPCS_BASED
;
26799 /* The EABI says __aeabi_atexit should be used to register static
26803 arm_cxx_use_aeabi_atexit (void)
26805 return TARGET_AAPCS_BASED
;
26810 arm_set_return_address (rtx source
, rtx scratch
)
26812 arm_stack_offsets
*offsets
;
26813 HOST_WIDE_INT delta
;
26815 unsigned long saved_regs
;
26817 offsets
= arm_get_frame_offsets ();
26818 saved_regs
= offsets
->saved_regs_mask
;
26820 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
26821 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26824 if (frame_pointer_needed
)
26825 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
26828 /* LR will be the first saved register. */
26829 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
26834 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
26835 GEN_INT (delta
& ~4095)));
26840 addr
= stack_pointer_rtx
;
26842 addr
= plus_constant (Pmode
, addr
, delta
);
26844 /* The store needs to be marked as frame related in order to prevent
26845 DSE from deleting it as dead if it is based on fp. */
26846 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26847 RTX_FRAME_RELATED_P (insn
) = 1;
26848 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26854 thumb_set_return_address (rtx source
, rtx scratch
)
26856 arm_stack_offsets
*offsets
;
26857 HOST_WIDE_INT delta
;
26858 HOST_WIDE_INT limit
;
26861 unsigned long mask
;
26865 offsets
= arm_get_frame_offsets ();
26866 mask
= offsets
->saved_regs_mask
;
26867 if (mask
& (1 << LR_REGNUM
))
26870 /* Find the saved regs. */
26871 if (frame_pointer_needed
)
26873 delta
= offsets
->soft_frame
- offsets
->saved_args
;
26874 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
26880 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
26883 /* Allow for the stack frame. */
26884 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
26886 /* The link register is always the first saved register. */
26889 /* Construct the address. */
26890 addr
= gen_rtx_REG (SImode
, reg
);
26893 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
26894 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
26898 addr
= plus_constant (Pmode
, addr
, delta
);
26900 /* The store needs to be marked as frame related in order to prevent
26901 DSE from deleting it as dead if it is based on fp. */
26902 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26903 RTX_FRAME_RELATED_P (insn
) = 1;
26904 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26907 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26910 /* Implements target hook vector_mode_supported_p. */
26912 arm_vector_mode_supported_p (machine_mode mode
)
26914 /* Neon also supports V2SImode, etc. listed in the clause below. */
26915 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
26916 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
26917 || mode
== V2DImode
|| mode
== V8HFmode
))
26920 if ((TARGET_NEON
|| TARGET_IWMMXT
)
26921 && ((mode
== V2SImode
)
26922 || (mode
== V4HImode
)
26923 || (mode
== V8QImode
)))
26926 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
26927 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
26928 || mode
== V2HAmode
))
26934 /* Implements target hook array_mode_supported_p. */
26937 arm_array_mode_supported_p (machine_mode mode
,
26938 unsigned HOST_WIDE_INT nelems
)
26941 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
26942 && (nelems
>= 2 && nelems
<= 4))
26948 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26949 registers when autovectorizing for Neon, at least until multiple vector
26950 widths are supported properly by the middle-end. */
26952 static machine_mode
26953 arm_preferred_simd_mode (machine_mode mode
)
26959 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
26961 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
26963 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
26965 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
26967 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
26974 if (TARGET_REALLY_IWMMXT
)
26990 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26992 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26993 using r0-r4 for function arguments, r7 for the stack frame and don't have
26994 enough left over to do doubleword arithmetic. For Thumb-2 all the
26995 potentially problematic instructions accept high registers so this is not
26996 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26997 that require many low registers. */
26999 arm_class_likely_spilled_p (reg_class_t rclass
)
27001 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
27002 || rclass
== CC_REG
)
27008 /* Implements target hook small_register_classes_for_mode_p. */
27010 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
27012 return TARGET_THUMB1
;
27015 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27016 ARM insns and therefore guarantee that the shift count is modulo 256.
27017 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27018 guarantee no particular behavior for out-of-range counts. */
27020 static unsigned HOST_WIDE_INT
27021 arm_shift_truncation_mask (machine_mode mode
)
27023 return mode
== SImode
? 255 : 0;
27027 /* Map internal gcc register numbers to DWARF2 register numbers. */
27030 arm_dbx_register_number (unsigned int regno
)
27035 if (IS_VFP_REGNUM (regno
))
27037 /* See comment in arm_dwarf_register_span. */
27038 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
27039 return 64 + regno
- FIRST_VFP_REGNUM
;
27041 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
27044 if (IS_IWMMXT_GR_REGNUM (regno
))
27045 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
27047 if (IS_IWMMXT_REGNUM (regno
))
27048 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
27050 return DWARF_FRAME_REGISTERS
;
27053 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27054 GCC models tham as 64 32-bit registers, so we need to describe this to
27055 the DWARF generation code. Other registers can use the default. */
27057 arm_dwarf_register_span (rtx rtl
)
27065 regno
= REGNO (rtl
);
27066 if (!IS_VFP_REGNUM (regno
))
27069 /* XXX FIXME: The EABI defines two VFP register ranges:
27070 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27072 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27073 corresponding D register. Until GDB supports this, we shall use the
27074 legacy encodings. We also use these encodings for D0-D15 for
27075 compatibility with older debuggers. */
27076 mode
= GET_MODE (rtl
);
27077 if (GET_MODE_SIZE (mode
) < 8)
27080 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
27082 nregs
= GET_MODE_SIZE (mode
) / 4;
27083 for (i
= 0; i
< nregs
; i
+= 2)
27084 if (TARGET_BIG_END
)
27086 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
27087 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
27091 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
27092 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
27097 nregs
= GET_MODE_SIZE (mode
) / 8;
27098 for (i
= 0; i
< nregs
; i
++)
27099 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
27102 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
27105 #if ARM_UNWIND_INFO
27106 /* Emit unwind directives for a store-multiple instruction or stack pointer
27107 push during alignment.
27108 These should only ever be generated by the function prologue code, so
27109 expect them to have a particular form.
27110 The store-multiple instruction sometimes pushes pc as the last register,
27111 although it should not be tracked into unwind information, or for -Os
27112 sometimes pushes some dummy registers before first register that needs
27113 to be tracked in unwind information; such dummy registers are there just
27114 to avoid separate stack adjustment, and will not be restored in the
27118 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
27121 HOST_WIDE_INT offset
;
27122 HOST_WIDE_INT nregs
;
27126 unsigned padfirst
= 0, padlast
= 0;
27129 e
= XVECEXP (p
, 0, 0);
27130 gcc_assert (GET_CODE (e
) == SET
);
27132 /* First insn will adjust the stack pointer. */
27133 gcc_assert (GET_CODE (e
) == SET
27134 && REG_P (SET_DEST (e
))
27135 && REGNO (SET_DEST (e
)) == SP_REGNUM
27136 && GET_CODE (SET_SRC (e
)) == PLUS
);
27138 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
27139 nregs
= XVECLEN (p
, 0) - 1;
27140 gcc_assert (nregs
);
27142 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
27145 /* For -Os dummy registers can be pushed at the beginning to
27146 avoid separate stack pointer adjustment. */
27147 e
= XVECEXP (p
, 0, 1);
27148 e
= XEXP (SET_DEST (e
), 0);
27149 if (GET_CODE (e
) == PLUS
)
27150 padfirst
= INTVAL (XEXP (e
, 1));
27151 gcc_assert (padfirst
== 0 || optimize_size
);
27152 /* The function prologue may also push pc, but not annotate it as it is
27153 never restored. We turn this into a stack pointer adjustment. */
27154 e
= XVECEXP (p
, 0, nregs
);
27155 e
= XEXP (SET_DEST (e
), 0);
27156 if (GET_CODE (e
) == PLUS
)
27157 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
27159 padlast
= offset
- 4;
27160 gcc_assert (padlast
== 0 || padlast
== 4);
27162 fprintf (asm_out_file
, "\t.pad #4\n");
27164 fprintf (asm_out_file
, "\t.save {");
27166 else if (IS_VFP_REGNUM (reg
))
27169 fprintf (asm_out_file
, "\t.vsave {");
27172 /* Unknown register type. */
27173 gcc_unreachable ();
27175 /* If the stack increment doesn't match the size of the saved registers,
27176 something has gone horribly wrong. */
27177 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
27181 /* The remaining insns will describe the stores. */
27182 for (i
= 1; i
<= nregs
; i
++)
27184 /* Expect (set (mem <addr>) (reg)).
27185 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27186 e
= XVECEXP (p
, 0, i
);
27187 gcc_assert (GET_CODE (e
) == SET
27188 && MEM_P (SET_DEST (e
))
27189 && REG_P (SET_SRC (e
)));
27191 reg
= REGNO (SET_SRC (e
));
27192 gcc_assert (reg
>= lastreg
);
27195 fprintf (asm_out_file
, ", ");
27196 /* We can't use %r for vfp because we need to use the
27197 double precision register names. */
27198 if (IS_VFP_REGNUM (reg
))
27199 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
27201 asm_fprintf (asm_out_file
, "%r", reg
);
27205 /* Check that the addresses are consecutive. */
27206 e
= XEXP (SET_DEST (e
), 0);
27207 if (GET_CODE (e
) == PLUS
)
27208 gcc_assert (REG_P (XEXP (e
, 0))
27209 && REGNO (XEXP (e
, 0)) == SP_REGNUM
27210 && CONST_INT_P (XEXP (e
, 1))
27211 && offset
== INTVAL (XEXP (e
, 1)));
27215 && REGNO (e
) == SP_REGNUM
);
27216 offset
+= reg_size
;
27219 fprintf (asm_out_file
, "}\n");
27221 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
27224 /* Emit unwind directives for a SET. */
27227 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
27235 switch (GET_CODE (e0
))
27238 /* Pushing a single register. */
27239 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
27240 || !REG_P (XEXP (XEXP (e0
, 0), 0))
27241 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
27244 asm_fprintf (asm_out_file
, "\t.save ");
27245 if (IS_VFP_REGNUM (REGNO (e1
)))
27246 asm_fprintf(asm_out_file
, "{d%d}\n",
27247 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
27249 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
27253 if (REGNO (e0
) == SP_REGNUM
)
27255 /* A stack increment. */
27256 if (GET_CODE (e1
) != PLUS
27257 || !REG_P (XEXP (e1
, 0))
27258 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
27259 || !CONST_INT_P (XEXP (e1
, 1)))
27262 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
27263 -INTVAL (XEXP (e1
, 1)));
27265 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
27267 HOST_WIDE_INT offset
;
27269 if (GET_CODE (e1
) == PLUS
)
27271 if (!REG_P (XEXP (e1
, 0))
27272 || !CONST_INT_P (XEXP (e1
, 1)))
27274 reg
= REGNO (XEXP (e1
, 0));
27275 offset
= INTVAL (XEXP (e1
, 1));
27276 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
27277 HARD_FRAME_POINTER_REGNUM
, reg
,
27280 else if (REG_P (e1
))
27283 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
27284 HARD_FRAME_POINTER_REGNUM
, reg
);
27289 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
27291 /* Move from sp to reg. */
27292 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
27294 else if (GET_CODE (e1
) == PLUS
27295 && REG_P (XEXP (e1
, 0))
27296 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
27297 && CONST_INT_P (XEXP (e1
, 1)))
27299 /* Set reg to offset from sp. */
27300 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
27301 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
27313 /* Emit unwind directives for the given insn. */
27316 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
27319 bool handled_one
= false;
27321 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27324 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27325 && (TREE_NOTHROW (current_function_decl
)
27326 || crtl
->all_throwers_are_sibcalls
))
27329 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
27332 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
27334 switch (REG_NOTE_KIND (note
))
27336 case REG_FRAME_RELATED_EXPR
:
27337 pat
= XEXP (note
, 0);
27340 case REG_CFA_REGISTER
:
27341 pat
= XEXP (note
, 0);
27344 pat
= PATTERN (insn
);
27345 if (GET_CODE (pat
) == PARALLEL
)
27346 pat
= XVECEXP (pat
, 0, 0);
27349 /* Only emitted for IS_STACKALIGN re-alignment. */
27354 src
= SET_SRC (pat
);
27355 dest
= SET_DEST (pat
);
27357 gcc_assert (src
== stack_pointer_rtx
);
27358 reg
= REGNO (dest
);
27359 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27362 handled_one
= true;
27365 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27366 to get correct dwarf information for shrink-wrap. We should not
27367 emit unwind information for it because these are used either for
27368 pretend arguments or notes to adjust sp and restore registers from
27370 case REG_CFA_DEF_CFA
:
27371 case REG_CFA_ADJUST_CFA
:
27372 case REG_CFA_RESTORE
:
27375 case REG_CFA_EXPRESSION
:
27376 case REG_CFA_OFFSET
:
27377 /* ??? Only handling here what we actually emit. */
27378 gcc_unreachable ();
27386 pat
= PATTERN (insn
);
27389 switch (GET_CODE (pat
))
27392 arm_unwind_emit_set (asm_out_file
, pat
);
27396 /* Store multiple. */
27397 arm_unwind_emit_sequence (asm_out_file
, pat
);
27406 /* Output a reference from a function exception table to the type_info
27407 object X. The EABI specifies that the symbol should be relocated by
27408 an R_ARM_TARGET2 relocation. */
27411 arm_output_ttype (rtx x
)
27413 fputs ("\t.word\t", asm_out_file
);
27414 output_addr_const (asm_out_file
, x
);
27415 /* Use special relocations for symbol references. */
27416 if (!CONST_INT_P (x
))
27417 fputs ("(TARGET2)", asm_out_file
);
27418 fputc ('\n', asm_out_file
);
27423 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27426 arm_asm_emit_except_personality (rtx personality
)
27428 fputs ("\t.personality\t", asm_out_file
);
27429 output_addr_const (asm_out_file
, personality
);
27430 fputc ('\n', asm_out_file
);
27432 #endif /* ARM_UNWIND_INFO */
27434 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27437 arm_asm_init_sections (void)
27439 #if ARM_UNWIND_INFO
27440 exception_section
= get_unnamed_section (0, output_section_asm_op
,
27442 #endif /* ARM_UNWIND_INFO */
27444 #ifdef OBJECT_FORMAT_ELF
27445 if (target_pure_code
)
27446 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
27450 /* Output unwind directives for the start/end of a function. */
27453 arm_output_fn_unwind (FILE * f
, bool prologue
)
27455 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27459 fputs ("\t.fnstart\n", f
);
27462 /* If this function will never be unwound, then mark it as such.
27463 The came condition is used in arm_unwind_emit to suppress
27464 the frame annotations. */
27465 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27466 && (TREE_NOTHROW (current_function_decl
)
27467 || crtl
->all_throwers_are_sibcalls
))
27468 fputs("\t.cantunwind\n", f
);
27470 fputs ("\t.fnend\n", f
);
27475 arm_emit_tls_decoration (FILE *fp
, rtx x
)
27477 enum tls_reloc reloc
;
27480 val
= XVECEXP (x
, 0, 0);
27481 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
27483 output_addr_const (fp
, val
);
27488 fputs ("(tlsgd)", fp
);
27491 fputs ("(tlsldm)", fp
);
27494 fputs ("(tlsldo)", fp
);
27497 fputs ("(gottpoff)", fp
);
27500 fputs ("(tpoff)", fp
);
27503 fputs ("(tlsdesc)", fp
);
27506 gcc_unreachable ();
27515 fputs (" + (. - ", fp
);
27516 output_addr_const (fp
, XVECEXP (x
, 0, 2));
27517 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27518 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
27519 output_addr_const (fp
, XVECEXP (x
, 0, 3));
27529 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27532 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
27534 gcc_assert (size
== 4);
27535 fputs ("\t.word\t", file
);
27536 output_addr_const (file
, x
);
27537 fputs ("(tlsldo)", file
);
27540 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27543 arm_output_addr_const_extra (FILE *fp
, rtx x
)
27545 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
27546 return arm_emit_tls_decoration (fp
, x
);
27547 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
27550 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
27552 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
27553 assemble_name_raw (fp
, label
);
27557 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
27559 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
27563 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27567 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
27569 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27573 output_addr_const (fp
, XVECEXP (x
, 0, 1));
27577 else if (GET_CODE (x
) == CONST_VECTOR
)
27578 return arm_emit_vector_const (fp
, x
);
27583 /* Output assembly for a shift instruction.
27584 SET_FLAGS determines how the instruction modifies the condition codes.
27585 0 - Do not set condition codes.
27586 1 - Set condition codes.
27587 2 - Use smallest instruction. */
27589 arm_output_shift(rtx
* operands
, int set_flags
)
27592 static const char flag_chars
[3] = {'?', '.', '!'};
27597 c
= flag_chars
[set_flags
];
27598 shift
= shift_op(operands
[3], &val
);
27602 operands
[2] = GEN_INT(val
);
27603 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
27606 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
27608 output_asm_insn (pattern
, operands
);
27612 /* Output assembly for a WMMX immediate shift instruction. */
27614 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
27616 int shift
= INTVAL (operands
[2]);
27618 machine_mode opmode
= GET_MODE (operands
[0]);
27620 gcc_assert (shift
>= 0);
27622 /* If the shift value in the register versions is > 63 (for D qualifier),
27623 31 (for W qualifier) or 15 (for H qualifier). */
27624 if (((opmode
== V4HImode
) && (shift
> 15))
27625 || ((opmode
== V2SImode
) && (shift
> 31))
27626 || ((opmode
== DImode
) && (shift
> 63)))
27630 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27631 output_asm_insn (templ
, operands
);
27632 if (opmode
== DImode
)
27634 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
27635 output_asm_insn (templ
, operands
);
27640 /* The destination register will contain all zeros. */
27641 sprintf (templ
, "wzero\t%%0");
27642 output_asm_insn (templ
, operands
);
27647 if ((opmode
== DImode
) && (shift
> 32))
27649 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27650 output_asm_insn (templ
, operands
);
27651 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
27652 output_asm_insn (templ
, operands
);
27656 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
27657 output_asm_insn (templ
, operands
);
27662 /* Output assembly for a WMMX tinsr instruction. */
27664 arm_output_iwmmxt_tinsr (rtx
*operands
)
27666 int mask
= INTVAL (operands
[3]);
27669 int units
= mode_nunits
[GET_MODE (operands
[0])];
27670 gcc_assert ((mask
& (mask
- 1)) == 0);
27671 for (i
= 0; i
< units
; ++i
)
27673 if ((mask
& 0x01) == 1)
27679 gcc_assert (i
< units
);
27681 switch (GET_MODE (operands
[0]))
27684 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
27687 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
27690 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
27693 gcc_unreachable ();
27696 output_asm_insn (templ
, operands
);
27701 /* Output a Thumb-1 casesi dispatch sequence. */
27703 thumb1_output_casesi (rtx
*operands
)
27705 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
27707 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27709 switch (GET_MODE(diff_vec
))
27712 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27713 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27715 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27716 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27718 return "bl\t%___gnu_thumb1_case_si";
27720 gcc_unreachable ();
27724 /* Output a Thumb-2 casesi instruction. */
27726 thumb2_output_casesi (rtx
*operands
)
27728 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
27730 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27732 output_asm_insn ("cmp\t%0, %1", operands
);
27733 output_asm_insn ("bhi\t%l3", operands
);
27734 switch (GET_MODE(diff_vec
))
27737 return "tbb\t[%|pc, %0]";
27739 return "tbh\t[%|pc, %0, lsl #1]";
27743 output_asm_insn ("adr\t%4, %l2", operands
);
27744 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
27745 output_asm_insn ("add\t%4, %4, %5", operands
);
27750 output_asm_insn ("adr\t%4, %l2", operands
);
27751 return "ldr\t%|pc, [%4, %0, lsl #2]";
27754 gcc_unreachable ();
27758 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27759 per-core tuning structs. */
27761 arm_issue_rate (void)
27763 return current_tune
->issue_rate
;
27766 /* Return how many instructions should scheduler lookahead to choose the
27769 arm_first_cycle_multipass_dfa_lookahead (void)
27771 int issue_rate
= arm_issue_rate ();
27773 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
27776 /* Enable modeling of L2 auto-prefetcher. */
27778 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
27780 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
27784 arm_mangle_type (const_tree type
)
27786 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27787 has to be managled as if it is in the "std" namespace. */
27788 if (TARGET_AAPCS_BASED
27789 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
27790 return "St9__va_list";
27792 /* Half-precision float. */
27793 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
27796 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27798 if (TYPE_NAME (type
) != NULL
)
27799 return arm_mangle_builtin_type (type
);
27801 /* Use the default mangling. */
27805 /* Order of allocation of core registers for Thumb: this allocation is
27806 written over the corresponding initial entries of the array
27807 initialized with REG_ALLOC_ORDER. We allocate all low registers
27808 first. Saving and restoring a low register is usually cheaper than
27809 using a call-clobbered high register. */
27811 static const int thumb_core_reg_alloc_order
[] =
27813 3, 2, 1, 0, 4, 5, 6, 7,
27814 12, 14, 8, 9, 10, 11
27817 /* Adjust register allocation order when compiling for Thumb. */
27820 arm_order_regs_for_local_alloc (void)
27822 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
27823 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
27825 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
27826 sizeof (thumb_core_reg_alloc_order
));
27829 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27832 arm_frame_pointer_required (void)
27834 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
27837 /* If the function receives nonlocal gotos, it needs to save the frame
27838 pointer in the nonlocal_goto_save_area object. */
27839 if (cfun
->has_nonlocal_label
)
27842 /* The frame pointer is required for non-leaf APCS frames. */
27843 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !crtl
->is_leaf
)
27846 /* If we are probing the stack in the prologue, we will have a faulting
27847 instruction prior to the stack adjustment and this requires a frame
27848 pointer if we want to catch the exception using the EABI unwinder. */
27849 if (!IS_INTERRUPT (arm_current_func_type ())
27850 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
27851 && arm_except_unwind_info (&global_options
) == UI_TARGET
27852 && cfun
->can_throw_non_call_exceptions
)
27854 HOST_WIDE_INT size
= get_frame_size ();
27856 /* That's irrelevant if there is no stack adjustment. */
27860 /* That's relevant only if there is a stack probe. */
27861 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
27863 /* We don't have the final size of the frame so adjust. */
27864 size
+= 32 * UNITS_PER_WORD
;
27865 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
27875 /* Only thumb1 can't support conditional execution, so return true if
27876 the target is not thumb1. */
27878 arm_have_conditional_execution (void)
27880 return !TARGET_THUMB1
;
27883 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27884 static HOST_WIDE_INT
27885 arm_vector_alignment (const_tree type
)
27887 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
27889 if (TARGET_AAPCS_BASED
)
27890 align
= MIN (align
, 64);
27895 static unsigned int
27896 arm_autovectorize_vector_sizes (void)
27898 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
27902 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
27904 /* Vectors which aren't in packed structures will not be less aligned than
27905 the natural alignment of their element type, so this is safe. */
27906 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27909 return default_builtin_vector_alignment_reachable (type
, is_packed
);
27913 arm_builtin_support_vector_misalignment (machine_mode mode
,
27914 const_tree type
, int misalignment
,
27917 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27919 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
27924 /* If the misalignment is unknown, we should be able to handle the access
27925 so long as it is not to a member of a packed data structure. */
27926 if (misalignment
== -1)
27929 /* Return true if the misalignment is a multiple of the natural alignment
27930 of the vector's element type. This is probably always going to be
27931 true in practice, since we've already established that this isn't a
27933 return ((misalignment
% align
) == 0);
27936 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
27941 arm_conditional_register_usage (void)
27945 if (TARGET_THUMB1
&& optimize_size
)
27947 /* When optimizing for size on Thumb-1, it's better not
27948 to use the HI regs, because of the overhead of
27950 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
27951 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
27954 /* The link register can be clobbered by any branch insn,
27955 but we have no way to track that at present, so mark
27956 it as unavailable. */
27958 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
27960 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
)
27962 /* VFPv3 registers are disabled when earlier VFP
27963 versions are selected due to the definition of
27964 LAST_VFP_REGNUM. */
27965 for (regno
= FIRST_VFP_REGNUM
;
27966 regno
<= LAST_VFP_REGNUM
; ++ regno
)
27968 fixed_regs
[regno
] = 0;
27969 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
27970 || regno
>= FIRST_VFP_REGNUM
+ 32;
27974 if (TARGET_REALLY_IWMMXT
)
27976 regno
= FIRST_IWMMXT_GR_REGNUM
;
27977 /* The 2002/10/09 revision of the XScale ABI has wCG0
27978 and wCG1 as call-preserved registers. The 2002/11/21
27979 revision changed this so that all wCG registers are
27980 scratch registers. */
27981 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
27982 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
27983 fixed_regs
[regno
] = 0;
27984 /* The XScale ABI has wR0 - wR9 as scratch registers,
27985 the rest as call-preserved registers. */
27986 for (regno
= FIRST_IWMMXT_REGNUM
;
27987 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
27989 fixed_regs
[regno
] = 0;
27990 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
27994 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
27996 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27997 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27999 else if (TARGET_APCS_STACK
)
28001 fixed_regs
[10] = 1;
28002 call_used_regs
[10] = 1;
28004 /* -mcaller-super-interworking reserves r11 for calls to
28005 _interwork_r11_call_via_rN(). Making the register global
28006 is an easy way of ensuring that it remains valid for all
28008 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
28009 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
28011 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28012 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28013 if (TARGET_CALLER_INTERWORKING
)
28014 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28016 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28020 arm_preferred_rename_class (reg_class_t rclass
)
28022 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28023 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28024 and code size can be reduced. */
28025 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
28031 /* Compute the attribute "length" of insn "*push_multi".
28032 So this function MUST be kept in sync with that insn pattern. */
28034 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
28036 int i
, regno
, hi_reg
;
28037 int num_saves
= XVECLEN (parallel_op
, 0);
28047 regno
= REGNO (first_op
);
28048 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28049 list is 8-bit. Normally this means all registers in the list must be
28050 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28051 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28052 with 16-bit encoding. */
28053 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
28054 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
28056 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
28057 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
28065 /* Compute the attribute "length" of insn. Currently, this function is used
28066 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28067 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28068 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28069 true if OPERANDS contains insn which explicit updates base register. */
28072 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
28081 rtx parallel_op
= operands
[0];
28082 /* Initialize to elements number of PARALLEL. */
28083 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
28084 /* Initialize the value to base register. */
28085 unsigned regno
= REGNO (operands
[1]);
28086 /* Skip return and write back pattern.
28087 We only need register pop pattern for later analysis. */
28088 unsigned first_indx
= 0;
28089 first_indx
+= return_pc
? 1 : 0;
28090 first_indx
+= write_back_p
? 1 : 0;
28092 /* A pop operation can be done through LDM or POP. If the base register is SP
28093 and if it's with write back, then a LDM will be alias of POP. */
28094 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
28095 bool ldm_p
= !pop_p
;
28097 /* Check base register for LDM. */
28098 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
28101 /* Check each register in the list. */
28102 for (; indx
>= first_indx
; indx
--)
28104 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
28105 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28106 comment in arm_attr_length_push_multi. */
28107 if (REGNO_REG_CLASS (regno
) == HI_REGS
28108 && (regno
!= PC_REGNUM
|| ldm_p
))
28115 /* Compute the number of instructions emitted by output_move_double. */
28117 arm_count_output_move_double_insns (rtx
*operands
)
28121 /* output_move_double may modify the operands array, so call it
28122 here on a copy of the array. */
28123 ops
[0] = operands
[0];
28124 ops
[1] = operands
[1];
28125 output_move_double (ops
, false, &count
);
28130 vfp3_const_double_for_fract_bits (rtx operand
)
28132 REAL_VALUE_TYPE r0
;
28134 if (!CONST_DOUBLE_P (operand
))
28137 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
28138 if (exact_real_inverse (DFmode
, &r0
)
28139 && !REAL_VALUE_NEGATIVE (r0
))
28141 if (exact_real_truncate (DFmode
, &r0
))
28143 HOST_WIDE_INT value
= real_to_integer (&r0
);
28144 value
= value
& 0xffffffff;
28145 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
28147 int ret
= exact_log2 (value
);
28148 gcc_assert (IN_RANGE (ret
, 0, 31));
28156 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28157 log2 is in [1, 32], return that log2. Otherwise return -1.
28158 This is used in the patterns for vcvt.s32.f32 floating-point to
28159 fixed-point conversions. */
28162 vfp3_const_double_for_bits (rtx x
)
28164 const REAL_VALUE_TYPE
*r
;
28166 if (!CONST_DOUBLE_P (x
))
28169 r
= CONST_DOUBLE_REAL_VALUE (x
);
28171 if (REAL_VALUE_NEGATIVE (*r
)
28172 || REAL_VALUE_ISNAN (*r
)
28173 || REAL_VALUE_ISINF (*r
)
28174 || !real_isinteger (r
, SFmode
))
28177 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
28179 /* The exact_log2 above will have returned -1 if this is
28180 not an exact log2. */
28181 if (!IN_RANGE (hwint
, 1, 32))
28188 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28191 arm_pre_atomic_barrier (enum memmodel model
)
28193 if (need_atomic_barrier_p (model
, true))
28194 emit_insn (gen_memory_barrier ());
28198 arm_post_atomic_barrier (enum memmodel model
)
28200 if (need_atomic_barrier_p (model
, false))
28201 emit_insn (gen_memory_barrier ());
28204 /* Emit the load-exclusive and store-exclusive instructions.
28205 Use acquire and release versions if necessary. */
28208 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
28210 rtx (*gen
) (rtx
, rtx
);
28216 case E_QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
28217 case E_HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
28218 case E_SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
28219 case E_DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
28221 gcc_unreachable ();
28228 case E_QImode
: gen
= gen_arm_load_exclusiveqi
; break;
28229 case E_HImode
: gen
= gen_arm_load_exclusivehi
; break;
28230 case E_SImode
: gen
= gen_arm_load_exclusivesi
; break;
28231 case E_DImode
: gen
= gen_arm_load_exclusivedi
; break;
28233 gcc_unreachable ();
28237 emit_insn (gen (rval
, mem
));
28241 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
28244 rtx (*gen
) (rtx
, rtx
, rtx
);
28250 case E_QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
28251 case E_HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
28252 case E_SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
28253 case E_DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
28255 gcc_unreachable ();
28262 case E_QImode
: gen
= gen_arm_store_exclusiveqi
; break;
28263 case E_HImode
: gen
= gen_arm_store_exclusivehi
; break;
28264 case E_SImode
: gen
= gen_arm_store_exclusivesi
; break;
28265 case E_DImode
: gen
= gen_arm_store_exclusivedi
; break;
28267 gcc_unreachable ();
28271 emit_insn (gen (bval
, rval
, mem
));
28274 /* Mark the previous jump instruction as unlikely. */
28277 emit_unlikely_jump (rtx insn
)
28279 rtx_insn
*jump
= emit_jump_insn (insn
);
28280 add_reg_br_prob_note (jump
, profile_probability::very_unlikely ());
28283 /* Expand a compare and swap pattern. */
28286 arm_expand_compare_and_swap (rtx operands
[])
28288 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
28290 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
28292 bval
= operands
[0];
28293 rval
= operands
[1];
28295 oldval
= operands
[3];
28296 newval
= operands
[4];
28297 is_weak
= operands
[5];
28298 mod_s
= operands
[6];
28299 mod_f
= operands
[7];
28300 mode
= GET_MODE (mem
);
28302 /* Normally the succ memory model must be stronger than fail, but in the
28303 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28304 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28306 if (TARGET_HAVE_LDACQ
28307 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
28308 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
28309 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
28315 /* For narrow modes, we're going to perform the comparison in SImode,
28316 so do the zero-extension now. */
28317 rval
= gen_reg_rtx (SImode
);
28318 oldval
= convert_modes (SImode
, mode
, oldval
, true);
28322 /* Force the value into a register if needed. We waited until after
28323 the zero-extension above to do this properly. */
28324 if (!arm_add_operand (oldval
, SImode
))
28325 oldval
= force_reg (SImode
, oldval
);
28329 if (!cmpdi_operand (oldval
, mode
))
28330 oldval
= force_reg (mode
, oldval
);
28334 gcc_unreachable ();
28341 case E_QImode
: gen
= gen_atomic_compare_and_swapt1qi_1
; break;
28342 case E_HImode
: gen
= gen_atomic_compare_and_swapt1hi_1
; break;
28343 case E_SImode
: gen
= gen_atomic_compare_and_swapt1si_1
; break;
28344 case E_DImode
: gen
= gen_atomic_compare_and_swapt1di_1
; break;
28346 gcc_unreachable ();
28353 case E_QImode
: gen
= gen_atomic_compare_and_swap32qi_1
; break;
28354 case E_HImode
: gen
= gen_atomic_compare_and_swap32hi_1
; break;
28355 case E_SImode
: gen
= gen_atomic_compare_and_swap32si_1
; break;
28356 case E_DImode
: gen
= gen_atomic_compare_and_swap32di_1
; break;
28358 gcc_unreachable ();
28362 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CC_Zmode
, CC_REGNUM
);
28363 emit_insn (gen (bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
28365 if (mode
== QImode
|| mode
== HImode
)
28366 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
28368 /* In all cases, we arrange for success to be signaled by Z set.
28369 This arrangement allows for the boolean result to be used directly
28370 in a subsequent branch, post optimization. For Thumb-1 targets, the
28371 boolean negation of the result is also stored in bval because Thumb-1
28372 backend lacks dependency tracking for CC flag due to flag-setting not
28373 being represented at RTL level. */
28375 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
28378 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
28379 emit_insn (gen_rtx_SET (bval
, x
));
28383 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28384 another memory store between the load-exclusive and store-exclusive can
28385 reset the monitor from Exclusive to Open state. This means we must wait
28386 until after reload to split the pattern, lest we get a register spill in
28387 the middle of the atomic sequence. Success of the compare and swap is
28388 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28389 for Thumb-1 targets (ie. negation of the boolean value returned by
28390 atomic_compare_and_swapmode standard pattern in operand 0). */
28393 arm_split_compare_and_swap (rtx operands
[])
28395 rtx rval
, mem
, oldval
, newval
, neg_bval
;
28397 enum memmodel mod_s
, mod_f
;
28399 rtx_code_label
*label1
, *label2
;
28402 rval
= operands
[1];
28404 oldval
= operands
[3];
28405 newval
= operands
[4];
28406 is_weak
= (operands
[5] != const0_rtx
);
28407 mod_s
= memmodel_from_int (INTVAL (operands
[6]));
28408 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
28409 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
28410 mode
= GET_MODE (mem
);
28412 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
28414 bool use_acquire
= TARGET_HAVE_LDACQ
28415 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28416 || is_mm_release (mod_s
));
28418 bool use_release
= TARGET_HAVE_LDACQ
28419 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28420 || is_mm_acquire (mod_s
));
28422 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28423 a full barrier is emitted after the store-release. */
28425 use_acquire
= false;
28427 /* Checks whether a barrier is needed and emits one accordingly. */
28428 if (!(use_acquire
|| use_release
))
28429 arm_pre_atomic_barrier (mod_s
);
28434 label1
= gen_label_rtx ();
28435 emit_label (label1
);
28437 label2
= gen_label_rtx ();
28439 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
28441 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28442 as required to communicate with arm_expand_compare_and_swap. */
28445 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
28446 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28447 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
28448 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
28449 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
28453 emit_move_insn (neg_bval
, const1_rtx
);
28454 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
28455 if (thumb1_cmpneg_operand (oldval
, SImode
))
28456 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval
, rval
, oldval
,
28459 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
28462 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
28464 /* Weak or strong, we want EQ to be true for success, so that we
28465 match the flags that we got from the compare above. */
28468 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
28469 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
28470 emit_insn (gen_rtx_SET (cond
, x
));
28475 /* Z is set to boolean value of !neg_bval, as required to communicate
28476 with arm_expand_compare_and_swap. */
28477 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
28478 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
28481 if (!is_mm_relaxed (mod_f
))
28482 emit_label (label2
);
28484 /* Checks whether a barrier is needed and emits one accordingly. */
28486 || !(use_acquire
|| use_release
))
28487 arm_post_atomic_barrier (mod_s
);
28489 if (is_mm_relaxed (mod_f
))
28490 emit_label (label2
);
28493 /* Split an atomic operation pattern. Operation is given by CODE and is one
28494 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28495 operation). Operation is performed on the content at MEM and on VALUE
28496 following the memory model MODEL_RTX. The content at MEM before and after
28497 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28498 success of the operation is returned in COND. Using a scratch register or
28499 an operand register for these determines what result is returned for that
28503 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
28504 rtx value
, rtx model_rtx
, rtx cond
)
28506 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
28507 machine_mode mode
= GET_MODE (mem
);
28508 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
28509 rtx_code_label
*label
;
28510 bool all_low_regs
, bind_old_new
;
28513 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
28515 bool use_acquire
= TARGET_HAVE_LDACQ
28516 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28517 || is_mm_release (model
));
28519 bool use_release
= TARGET_HAVE_LDACQ
28520 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28521 || is_mm_acquire (model
));
28523 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28524 a full barrier is emitted after the store-release. */
28526 use_acquire
= false;
28528 /* Checks whether a barrier is needed and emits one accordingly. */
28529 if (!(use_acquire
|| use_release
))
28530 arm_pre_atomic_barrier (model
);
28532 label
= gen_label_rtx ();
28533 emit_label (label
);
28536 new_out
= gen_lowpart (wmode
, new_out
);
28538 old_out
= gen_lowpart (wmode
, old_out
);
28541 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
28543 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
28545 /* Does the operation require destination and first operand to use the same
28546 register? This is decided by register constraints of relevant insn
28547 patterns in thumb1.md. */
28548 gcc_assert (!new_out
|| REG_P (new_out
));
28549 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
28550 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
28551 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
28556 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
28558 /* We want to return the old value while putting the result of the operation
28559 in the same register as the old value so copy the old value over to the
28560 destination register and use that register for the operation. */
28561 if (old_out
&& bind_old_new
)
28563 emit_move_insn (new_out
, old_out
);
28574 x
= gen_rtx_AND (wmode
, old_out
, value
);
28575 emit_insn (gen_rtx_SET (new_out
, x
));
28576 x
= gen_rtx_NOT (wmode
, new_out
);
28577 emit_insn (gen_rtx_SET (new_out
, x
));
28581 if (CONST_INT_P (value
))
28583 value
= GEN_INT (-INTVAL (value
));
28589 if (mode
== DImode
)
28591 /* DImode plus/minus need to clobber flags. */
28592 /* The adddi3 and subdi3 patterns are incorrectly written so that
28593 they require matching operands, even when we could easily support
28594 three operands. Thankfully, this can be fixed up post-splitting,
28595 as the individual add+adc patterns do accept three operands and
28596 post-reload cprop can make these moves go away. */
28597 emit_move_insn (new_out
, old_out
);
28599 x
= gen_adddi3 (new_out
, new_out
, value
);
28601 x
= gen_subdi3 (new_out
, new_out
, value
);
28608 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
28609 emit_insn (gen_rtx_SET (new_out
, x
));
28613 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
28616 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28617 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
28619 /* Checks whether a barrier is needed and emits one accordingly. */
28621 || !(use_acquire
|| use_release
))
28622 arm_post_atomic_barrier (model
);
28625 #define MAX_VECT_LEN 16
28627 struct expand_vec_perm_d
28629 rtx target
, op0
, op1
;
28630 unsigned char perm
[MAX_VECT_LEN
];
28631 machine_mode vmode
;
28632 unsigned char nelt
;
28637 /* Generate a variable permutation. */
28640 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28642 machine_mode vmode
= GET_MODE (target
);
28643 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28645 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
28646 gcc_checking_assert (GET_MODE (op0
) == vmode
);
28647 gcc_checking_assert (GET_MODE (op1
) == vmode
);
28648 gcc_checking_assert (GET_MODE (sel
) == vmode
);
28649 gcc_checking_assert (TARGET_NEON
);
28653 if (vmode
== V8QImode
)
28654 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
28656 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
28662 if (vmode
== V8QImode
)
28664 pair
= gen_reg_rtx (V16QImode
);
28665 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
28666 pair
= gen_lowpart (TImode
, pair
);
28667 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
28671 pair
= gen_reg_rtx (OImode
);
28672 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
28673 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
28679 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28681 machine_mode vmode
= GET_MODE (target
);
28682 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
28683 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28684 rtx rmask
[MAX_VECT_LEN
], mask
;
28686 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28687 numbering of elements for big-endian, we must reverse the order. */
28688 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
28690 /* The VTBL instruction does not use a modulo index, so we must take care
28691 of that ourselves. */
28692 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28693 for (i
= 0; i
< nelt
; ++i
)
28695 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
28696 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
28698 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
28701 /* Map lane ordering between architectural lane order, and GCC lane order,
28702 taking into account ABI. See comment above output_move_neon for details. */
28705 neon_endian_lane_map (machine_mode mode
, int lane
)
28707 if (BYTES_BIG_ENDIAN
)
28709 int nelems
= GET_MODE_NUNITS (mode
);
28710 /* Reverse lane order. */
28711 lane
= (nelems
- 1 - lane
);
28712 /* Reverse D register order, to match ABI. */
28713 if (GET_MODE_SIZE (mode
) == 16)
28714 lane
= lane
^ (nelems
/ 2);
28719 /* Some permutations index into pairs of vectors, this is a helper function
28720 to map indexes into those pairs of vectors. */
28723 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
28725 int nelem
= GET_MODE_NUNITS (mode
);
28726 if (BYTES_BIG_ENDIAN
)
28728 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
28732 /* Generate or test for an insn that supports a constant permutation. */
28734 /* Recognize patterns for the VUZP insns. */
28737 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
28739 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28740 rtx out0
, out1
, in0
, in1
;
28741 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28745 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28748 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28749 big endian pattern on 64 bit vectors, so we correct for that. */
28750 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
28751 && GET_MODE_SIZE (d
->vmode
) == 8 ? d
->nelt
: 0;
28753 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
28755 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28757 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
28761 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28763 for (i
= 0; i
< nelt
; i
++)
28766 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
28767 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
28777 case E_V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
28778 case E_V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
28779 case E_V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
28780 case E_V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
28781 case E_V8HFmode
: gen
= gen_neon_vuzpv8hf_internal
; break;
28782 case E_V4HFmode
: gen
= gen_neon_vuzpv4hf_internal
; break;
28783 case E_V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
28784 case E_V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
28785 case E_V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
28786 case E_V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
28788 gcc_unreachable ();
28793 if (swap_nelt
!= 0)
28794 std::swap (in0
, in1
);
28797 out1
= gen_reg_rtx (d
->vmode
);
28799 std::swap (out0
, out1
);
28801 emit_insn (gen (out0
, in0
, in1
, out1
));
28805 /* Recognize patterns for the VZIP insns. */
28808 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
28810 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
28811 rtx out0
, out1
, in0
, in1
;
28812 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28816 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28819 is_swapped
= BYTES_BIG_ENDIAN
;
28821 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
28824 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
28826 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28830 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28832 for (i
= 0; i
< nelt
/ 2; i
++)
28835 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
28836 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
28840 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
28841 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
28852 case E_V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
28853 case E_V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
28854 case E_V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
28855 case E_V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
28856 case E_V8HFmode
: gen
= gen_neon_vzipv8hf_internal
; break;
28857 case E_V4HFmode
: gen
= gen_neon_vzipv4hf_internal
; break;
28858 case E_V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
28859 case E_V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
28860 case E_V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
28861 case E_V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
28863 gcc_unreachable ();
28869 std::swap (in0
, in1
);
28872 out1
= gen_reg_rtx (d
->vmode
);
28874 std::swap (out0
, out1
);
28876 emit_insn (gen (out0
, in0
, in1
, out1
));
28880 /* Recognize patterns for the VREV insns. */
28883 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
28885 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
28886 rtx (*gen
)(rtx
, rtx
);
28888 if (!d
->one_vector_p
)
28897 case E_V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
28898 case E_V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
28906 case E_V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
28907 case E_V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
28908 case E_V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
28909 case E_V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
28910 case E_V8HFmode
: gen
= gen_neon_vrev64v8hf
; break;
28911 case E_V4HFmode
: gen
= gen_neon_vrev64v4hf
; break;
28919 case E_V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
28920 case E_V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
28921 case E_V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
28922 case E_V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
28923 case E_V4SImode
: gen
= gen_neon_vrev64v4si
; break;
28924 case E_V2SImode
: gen
= gen_neon_vrev64v2si
; break;
28925 case E_V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
28926 case E_V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
28935 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
28936 for (j
= 0; j
<= diff
; j
+= 1)
28938 /* This is guaranteed to be true as the value of diff
28939 is 7, 3, 1 and we should have enough elements in the
28940 queue to generate this. Getting a vector mask with a
28941 value of diff other than these values implies that
28942 something is wrong by the time we get here. */
28943 gcc_assert (i
+ j
< nelt
);
28944 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
28952 emit_insn (gen (d
->target
, d
->op0
));
28956 /* Recognize patterns for the VTRN insns. */
28959 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
28961 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28962 rtx out0
, out1
, in0
, in1
;
28963 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28965 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28968 /* Note that these are little-endian tests. Adjust for big-endian later. */
28969 if (d
->perm
[0] == 0)
28971 else if (d
->perm
[0] == 1)
28975 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28977 for (i
= 0; i
< nelt
; i
+= 2)
28979 if (d
->perm
[i
] != i
+ odd
)
28981 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
28991 case E_V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
28992 case E_V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
28993 case E_V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
28994 case E_V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
28995 case E_V8HFmode
: gen
= gen_neon_vtrnv8hf_internal
; break;
28996 case E_V4HFmode
: gen
= gen_neon_vtrnv4hf_internal
; break;
28997 case E_V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
28998 case E_V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
28999 case E_V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
29000 case E_V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
29002 gcc_unreachable ();
29007 if (BYTES_BIG_ENDIAN
)
29009 std::swap (in0
, in1
);
29014 out1
= gen_reg_rtx (d
->vmode
);
29016 std::swap (out0
, out1
);
29018 emit_insn (gen (out0
, in0
, in1
, out1
));
29022 /* Recognize patterns for the VEXT insns. */
29025 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
29027 unsigned int i
, nelt
= d
->nelt
;
29028 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
29031 unsigned int location
;
29033 unsigned int next
= d
->perm
[0] + 1;
29035 /* TODO: Handle GCC's numbering of elements for big-endian. */
29036 if (BYTES_BIG_ENDIAN
)
29039 /* Check if the extracted indexes are increasing by one. */
29040 for (i
= 1; i
< nelt
; next
++, i
++)
29042 /* If we hit the most significant element of the 2nd vector in
29043 the previous iteration, no need to test further. */
29044 if (next
== 2 * nelt
)
29047 /* If we are operating on only one vector: it could be a
29048 rotation. If there are only two elements of size < 64, let
29049 arm_evpc_neon_vrev catch it. */
29050 if (d
->one_vector_p
&& (next
== nelt
))
29052 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
29058 if (d
->perm
[i
] != next
)
29062 location
= d
->perm
[0];
29066 case E_V16QImode
: gen
= gen_neon_vextv16qi
; break;
29067 case E_V8QImode
: gen
= gen_neon_vextv8qi
; break;
29068 case E_V4HImode
: gen
= gen_neon_vextv4hi
; break;
29069 case E_V8HImode
: gen
= gen_neon_vextv8hi
; break;
29070 case E_V2SImode
: gen
= gen_neon_vextv2si
; break;
29071 case E_V4SImode
: gen
= gen_neon_vextv4si
; break;
29072 case E_V4HFmode
: gen
= gen_neon_vextv4hf
; break;
29073 case E_V8HFmode
: gen
= gen_neon_vextv8hf
; break;
29074 case E_V2SFmode
: gen
= gen_neon_vextv2sf
; break;
29075 case E_V4SFmode
: gen
= gen_neon_vextv4sf
; break;
29076 case E_V2DImode
: gen
= gen_neon_vextv2di
; break;
29085 offset
= GEN_INT (location
);
29086 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
29090 /* The NEON VTBL instruction is a fully variable permuation that's even
29091 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29092 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29093 can do slightly better by expanding this as a constant where we don't
29094 have to apply a mask. */
29097 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
29099 rtx rperm
[MAX_VECT_LEN
], sel
;
29100 machine_mode vmode
= d
->vmode
;
29101 unsigned int i
, nelt
= d
->nelt
;
29103 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29104 numbering of elements for big-endian, we must reverse the order. */
29105 if (BYTES_BIG_ENDIAN
)
29111 /* Generic code will try constant permutation twice. Once with the
29112 original mode and again with the elements lowered to QImode.
29113 So wait and don't do the selector expansion ourselves. */
29114 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
29117 for (i
= 0; i
< nelt
; ++i
)
29118 rperm
[i
] = GEN_INT (d
->perm
[i
]);
29119 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
29120 sel
= force_reg (vmode
, sel
);
29122 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
29127 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
29129 /* Check if the input mask matches vext before reordering the
29132 if (arm_evpc_neon_vext (d
))
29135 /* The pattern matching functions above are written to look for a small
29136 number to begin the sequence (0, 1, N/2). If we begin with an index
29137 from the second operand, we can swap the operands. */
29138 if (d
->perm
[0] >= d
->nelt
)
29140 unsigned i
, nelt
= d
->nelt
;
29142 for (i
= 0; i
< nelt
; ++i
)
29143 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
29145 std::swap (d
->op0
, d
->op1
);
29150 if (arm_evpc_neon_vuzp (d
))
29152 if (arm_evpc_neon_vzip (d
))
29154 if (arm_evpc_neon_vrev (d
))
29156 if (arm_evpc_neon_vtrn (d
))
29158 return arm_evpc_neon_vtbl (d
);
29163 /* Expand a vec_perm_const pattern. */
29166 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
29168 struct expand_vec_perm_d d
;
29169 int i
, nelt
, which
;
29175 d
.vmode
= GET_MODE (target
);
29176 gcc_assert (VECTOR_MODE_P (d
.vmode
));
29177 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
29178 d
.testing_p
= false;
29180 for (i
= which
= 0; i
< nelt
; ++i
)
29182 rtx e
= XVECEXP (sel
, 0, i
);
29183 int ei
= INTVAL (e
) & (2 * nelt
- 1);
29184 which
|= (ei
< nelt
? 1 : 2);
29194 d
.one_vector_p
= false;
29195 if (!rtx_equal_p (op0
, op1
))
29198 /* The elements of PERM do not suggest that only the first operand
29199 is used, but both operands are identical. Allow easier matching
29200 of the permutation by folding the permutation into the single
29204 for (i
= 0; i
< nelt
; ++i
)
29205 d
.perm
[i
] &= nelt
- 1;
29207 d
.one_vector_p
= true;
29212 d
.one_vector_p
= true;
29216 return arm_expand_vec_perm_const_1 (&d
);
29219 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29222 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
29223 const unsigned char *sel
)
29225 struct expand_vec_perm_d d
;
29226 unsigned int i
, nelt
, which
;
29230 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
29231 d
.testing_p
= true;
29232 memcpy (d
.perm
, sel
, nelt
);
29234 /* Categorize the set of elements in the selector. */
29235 for (i
= which
= 0; i
< nelt
; ++i
)
29237 unsigned char e
= d
.perm
[i
];
29238 gcc_assert (e
< 2 * nelt
);
29239 which
|= (e
< nelt
? 1 : 2);
29242 /* For all elements from second vector, fold the elements to first. */
29244 for (i
= 0; i
< nelt
; ++i
)
29247 /* Check whether the mask can be applied to the vector type. */
29248 d
.one_vector_p
= (which
!= 3);
29250 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
29251 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
29252 if (!d
.one_vector_p
)
29253 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
29256 ret
= arm_expand_vec_perm_const_1 (&d
);
29263 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
29265 /* If we are soft float and we do not have ldrd
29266 then all auto increment forms are ok. */
29267 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
29272 /* Post increment and Pre Decrement are supported for all
29273 instruction forms except for vector forms. */
29276 if (VECTOR_MODE_P (mode
))
29278 if (code
!= ARM_PRE_DEC
)
29288 /* Without LDRD and mode size greater than
29289 word size, there is no point in auto-incrementing
29290 because ldm and stm will not have these forms. */
29291 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
29294 /* Vector and floating point modes do not support
29295 these auto increment forms. */
29296 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
29309 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29310 on ARM, since we know that shifts by negative amounts are no-ops.
29311 Additionally, the default expansion code is not available or suitable
29312 for post-reload insn splits (this can occur when the register allocator
29313 chooses not to do a shift in NEON).
29315 This function is used in both initial expand and post-reload splits, and
29316 handles all kinds of 64-bit shifts.
29318 Input requirements:
29319 - It is safe for the input and output to be the same register, but
29320 early-clobber rules apply for the shift amount and scratch registers.
29321 - Shift by register requires both scratch registers. In all other cases
29322 the scratch registers may be NULL.
29323 - Ashiftrt by a register also clobbers the CC register. */
29325 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
29326 rtx amount
, rtx scratch1
, rtx scratch2
)
29328 rtx out_high
= gen_highpart (SImode
, out
);
29329 rtx out_low
= gen_lowpart (SImode
, out
);
29330 rtx in_high
= gen_highpart (SImode
, in
);
29331 rtx in_low
= gen_lowpart (SImode
, in
);
29334 in = the register pair containing the input value.
29335 out = the destination register pair.
29336 up = the high- or low-part of each pair.
29337 down = the opposite part to "up".
29338 In a shift, we can consider bits to shift from "up"-stream to
29339 "down"-stream, so in a left-shift "up" is the low-part and "down"
29340 is the high-part of each register pair. */
29342 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
29343 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
29344 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
29345 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
29347 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
29349 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
29350 && GET_MODE (out
) == DImode
);
29352 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
29353 && GET_MODE (in
) == DImode
);
29355 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
29356 && GET_MODE (amount
) == SImode
)
29357 || CONST_INT_P (amount
)));
29358 gcc_assert (scratch1
== NULL
29359 || (GET_CODE (scratch1
) == SCRATCH
)
29360 || (GET_MODE (scratch1
) == SImode
29361 && REG_P (scratch1
)));
29362 gcc_assert (scratch2
== NULL
29363 || (GET_CODE (scratch2
) == SCRATCH
)
29364 || (GET_MODE (scratch2
) == SImode
29365 && REG_P (scratch2
)));
29366 gcc_assert (!REG_P (out
) || !REG_P (amount
)
29367 || !HARD_REGISTER_P (out
)
29368 || (REGNO (out
) != REGNO (amount
)
29369 && REGNO (out
) + 1 != REGNO (amount
)));
29371 /* Macros to make following code more readable. */
29372 #define SUB_32(DEST,SRC) \
29373 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29374 #define RSB_32(DEST,SRC) \
29375 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29376 #define SUB_S_32(DEST,SRC) \
29377 gen_addsi3_compare0 ((DEST), (SRC), \
29379 #define SET(DEST,SRC) \
29380 gen_rtx_SET ((DEST), (SRC))
29381 #define SHIFT(CODE,SRC,AMOUNT) \
29382 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29383 #define LSHIFT(CODE,SRC,AMOUNT) \
29384 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29385 SImode, (SRC), (AMOUNT))
29386 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29387 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29388 SImode, (SRC), (AMOUNT))
29390 gen_rtx_IOR (SImode, (A), (B))
29391 #define BRANCH(COND,LABEL) \
29392 gen_arm_cond_branch ((LABEL), \
29393 gen_rtx_ ## COND (CCmode, cc_reg, \
29397 /* Shifts by register and shifts by constant are handled separately. */
29398 if (CONST_INT_P (amount
))
29400 /* We have a shift-by-constant. */
29402 /* First, handle out-of-range shift amounts.
29403 In both cases we try to match the result an ARM instruction in a
29404 shift-by-register would give. This helps reduce execution
29405 differences between optimization levels, but it won't stop other
29406 parts of the compiler doing different things. This is "undefined
29407 behavior, in any case. */
29408 if (INTVAL (amount
) <= 0)
29409 emit_insn (gen_movdi (out
, in
));
29410 else if (INTVAL (amount
) >= 64)
29412 if (code
== ASHIFTRT
)
29414 rtx const31_rtx
= GEN_INT (31);
29415 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
29416 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
29419 emit_insn (gen_movdi (out
, const0_rtx
));
29422 /* Now handle valid shifts. */
29423 else if (INTVAL (amount
) < 32)
29425 /* Shifts by a constant less than 32. */
29426 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
29428 /* Clearing the out register in DImode first avoids lots
29429 of spilling and results in less stack usage.
29430 Later this redundant insn is completely removed.
29431 Do that only if "in" and "out" are different registers. */
29432 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29433 emit_insn (SET (out
, const0_rtx
));
29434 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29435 emit_insn (SET (out_down
,
29436 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
29438 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29442 /* Shifts by a constant greater than 31. */
29443 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
29445 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29446 emit_insn (SET (out
, const0_rtx
));
29447 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
29448 if (code
== ASHIFTRT
)
29449 emit_insn (gen_ashrsi3 (out_up
, in_up
,
29452 emit_insn (SET (out_up
, const0_rtx
));
29457 /* We have a shift-by-register. */
29458 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
29460 /* This alternative requires the scratch registers. */
29461 gcc_assert (scratch1
&& REG_P (scratch1
));
29462 gcc_assert (scratch2
&& REG_P (scratch2
));
29464 /* We will need the values "amount-32" and "32-amount" later.
29465 Swapping them around now allows the later code to be more general. */
29469 emit_insn (SUB_32 (scratch1
, amount
));
29470 emit_insn (RSB_32 (scratch2
, amount
));
29473 emit_insn (RSB_32 (scratch1
, amount
));
29474 /* Also set CC = amount > 32. */
29475 emit_insn (SUB_S_32 (scratch2
, amount
));
29478 emit_insn (RSB_32 (scratch1
, amount
));
29479 emit_insn (SUB_32 (scratch2
, amount
));
29482 gcc_unreachable ();
29485 /* Emit code like this:
29488 out_down = in_down << amount;
29489 out_down = (in_up << (amount - 32)) | out_down;
29490 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29491 out_up = in_up << amount;
29494 out_down = in_down >> amount;
29495 out_down = (in_up << (32 - amount)) | out_down;
29497 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29498 out_up = in_up << amount;
29501 out_down = in_down >> amount;
29502 out_down = (in_up << (32 - amount)) | out_down;
29504 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29505 out_up = in_up << amount;
29507 The ARM and Thumb2 variants are the same but implemented slightly
29508 differently. If this were only called during expand we could just
29509 use the Thumb2 case and let combine do the right thing, but this
29510 can also be called from post-reload splitters. */
29512 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29514 if (!TARGET_THUMB2
)
29516 /* Emit code for ARM mode. */
29517 emit_insn (SET (out_down
,
29518 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
29519 if (code
== ASHIFTRT
)
29521 rtx_code_label
*done_label
= gen_label_rtx ();
29522 emit_jump_insn (BRANCH (LT
, done_label
));
29523 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
29525 emit_label (done_label
);
29528 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
29533 /* Emit code for Thumb2 mode.
29534 Thumb2 can't do shift and or in one insn. */
29535 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
29536 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
29538 if (code
== ASHIFTRT
)
29540 rtx_code_label
*done_label
= gen_label_rtx ();
29541 emit_jump_insn (BRANCH (LT
, done_label
));
29542 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
29543 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
29544 emit_label (done_label
);
29548 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
29549 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
29553 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29567 /* Returns true if the pattern is a valid symbolic address, which is either a
29568 symbol_ref or (symbol_ref + addend).
29570 According to the ARM ELF ABI, the initial addend of REL-type relocations
29571 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29572 literal field of the instruction as a 16-bit signed value in the range
29573 -32768 <= A < 32768. */
29576 arm_valid_symbolic_address_p (rtx addr
)
29578 rtx xop0
, xop1
= NULL_RTX
;
29581 if (GET_CODE (tmp
) == SYMBOL_REF
|| GET_CODE (tmp
) == LABEL_REF
)
29584 /* (const (plus: symbol_ref const_int)) */
29585 if (GET_CODE (addr
) == CONST
)
29586 tmp
= XEXP (addr
, 0);
29588 if (GET_CODE (tmp
) == PLUS
)
29590 xop0
= XEXP (tmp
, 0);
29591 xop1
= XEXP (tmp
, 1);
29593 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
29594 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
29600 /* Returns true if a valid comparison operation and makes
29601 the operands in a form that is valid. */
29603 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
29605 enum rtx_code code
= GET_CODE (*comparison
);
29607 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
29608 ? GET_MODE (*op2
) : GET_MODE (*op1
);
29610 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
29612 if (code
== UNEQ
|| code
== LTGT
)
29615 code_int
= (int)code
;
29616 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
29617 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
29622 if (!arm_add_operand (*op1
, mode
))
29623 *op1
= force_reg (mode
, *op1
);
29624 if (!arm_add_operand (*op2
, mode
))
29625 *op2
= force_reg (mode
, *op2
);
29629 if (!cmpdi_operand (*op1
, mode
))
29630 *op1
= force_reg (mode
, *op1
);
29631 if (!cmpdi_operand (*op2
, mode
))
29632 *op2
= force_reg (mode
, *op2
);
29636 if (!TARGET_VFP_FP16INST
)
29638 /* FP16 comparisons are done in SF mode. */
29640 *op1
= convert_to_mode (mode
, *op1
, 1);
29641 *op2
= convert_to_mode (mode
, *op2
, 1);
29642 /* Fall through. */
29645 if (!vfp_compare_operand (*op1
, mode
))
29646 *op1
= force_reg (mode
, *op1
);
29647 if (!vfp_compare_operand (*op2
, mode
))
29648 *op2
= force_reg (mode
, *op2
);
29658 /* Maximum number of instructions to set block of memory. */
29660 arm_block_set_max_insns (void)
29662 if (optimize_function_for_size_p (cfun
))
29665 return current_tune
->max_insns_inline_memset
;
29668 /* Return TRUE if it's profitable to set block of memory for
29669 non-vectorized case. VAL is the value to set the memory
29670 with. LENGTH is the number of bytes to set. ALIGN is the
29671 alignment of the destination memory in bytes. UNALIGNED_P
29672 is TRUE if we can only set the memory with instructions
29673 meeting alignment requirements. USE_STRD_P is TRUE if we
29674 can use strd to set the memory. */
29676 arm_block_set_non_vect_profit_p (rtx val
,
29677 unsigned HOST_WIDE_INT length
,
29678 unsigned HOST_WIDE_INT align
,
29679 bool unaligned_p
, bool use_strd_p
)
29682 /* For leftovers in bytes of 0-7, we can set the memory block using
29683 strb/strh/str with minimum instruction number. */
29684 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29688 num
= arm_const_inline_cost (SET
, val
);
29689 num
+= length
/ align
+ length
% align
;
29691 else if (use_strd_p
)
29693 num
= arm_const_double_inline_cost (val
);
29694 num
+= (length
>> 3) + leftover
[length
& 7];
29698 num
= arm_const_inline_cost (SET
, val
);
29699 num
+= (length
>> 2) + leftover
[length
& 3];
29702 /* We may be able to combine last pair STRH/STRB into a single STR
29703 by shifting one byte back. */
29704 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
29707 return (num
<= arm_block_set_max_insns ());
29710 /* Return TRUE if it's profitable to set block of memory for
29711 vectorized case. LENGTH is the number of bytes to set.
29712 ALIGN is the alignment of destination memory in bytes.
29713 MODE is the vector mode used to set the memory. */
29715 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
29716 unsigned HOST_WIDE_INT align
,
29720 bool unaligned_p
= ((align
& 3) != 0);
29721 unsigned int nelt
= GET_MODE_NUNITS (mode
);
29723 /* Instruction loading constant value. */
29725 /* Instructions storing the memory. */
29726 num
+= (length
+ nelt
- 1) / nelt
;
29727 /* Instructions adjusting the address expression. Only need to
29728 adjust address expression if it's 4 bytes aligned and bytes
29729 leftover can only be stored by mis-aligned store instruction. */
29730 if (!unaligned_p
&& (length
& 3) != 0)
29733 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29734 if (!unaligned_p
&& mode
== V16QImode
)
29737 return (num
<= arm_block_set_max_insns ());
29740 /* Set a block of memory using vectorization instructions for the
29741 unaligned case. We fill the first LENGTH bytes of the memory
29742 area starting from DSTBASE with byte constant VALUE. ALIGN is
29743 the alignment requirement of memory. Return TRUE if succeeded. */
29745 arm_block_set_unaligned_vect (rtx dstbase
,
29746 unsigned HOST_WIDE_INT length
,
29747 unsigned HOST_WIDE_INT value
,
29748 unsigned HOST_WIDE_INT align
)
29750 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
29752 rtx val_elt
, val_vec
, reg
;
29753 rtx rval
[MAX_VECT_LEN
];
29754 rtx (*gen_func
) (rtx
, rtx
);
29756 unsigned HOST_WIDE_INT v
= value
;
29757 unsigned int offset
= 0;
29758 gcc_assert ((align
& 0x3) != 0);
29759 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29760 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29761 if (length
>= nelt_v16
)
29764 gen_func
= gen_movmisalignv16qi
;
29769 gen_func
= gen_movmisalignv8qi
;
29771 nelt_mode
= GET_MODE_NUNITS (mode
);
29772 gcc_assert (length
>= nelt_mode
);
29773 /* Skip if it isn't profitable. */
29774 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29777 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29778 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29780 v
= sext_hwi (v
, BITS_PER_WORD
);
29781 val_elt
= GEN_INT (v
);
29782 for (j
= 0; j
< nelt_mode
; j
++)
29785 reg
= gen_reg_rtx (mode
);
29786 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29787 /* Emit instruction loading the constant value. */
29788 emit_move_insn (reg
, val_vec
);
29790 /* Handle nelt_mode bytes in a vector. */
29791 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29793 emit_insn ((*gen_func
) (mem
, reg
));
29794 if (i
+ 2 * nelt_mode
<= length
)
29796 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
29797 offset
+= nelt_mode
;
29798 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29802 /* If there are not less than nelt_v8 bytes leftover, we must be in
29804 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
29806 /* Handle (8, 16) bytes leftover. */
29807 if (i
+ nelt_v8
< length
)
29809 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
29810 offset
+= length
- i
;
29811 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29813 /* We are shifting bytes back, set the alignment accordingly. */
29814 if ((length
& 1) != 0 && align
>= 2)
29815 set_mem_align (mem
, BITS_PER_UNIT
);
29817 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29819 /* Handle (0, 8] bytes leftover. */
29820 else if (i
< length
&& i
+ nelt_v8
>= length
)
29822 if (mode
== V16QImode
)
29823 reg
= gen_lowpart (V8QImode
, reg
);
29825 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
29826 + (nelt_mode
- nelt_v8
))));
29827 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
29828 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
29830 /* We are shifting bytes back, set the alignment accordingly. */
29831 if ((length
& 1) != 0 && align
>= 2)
29832 set_mem_align (mem
, BITS_PER_UNIT
);
29834 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29840 /* Set a block of memory using vectorization instructions for the
29841 aligned case. We fill the first LENGTH bytes of the memory area
29842 starting from DSTBASE with byte constant VALUE. ALIGN is the
29843 alignment requirement of memory. Return TRUE if succeeded. */
29845 arm_block_set_aligned_vect (rtx dstbase
,
29846 unsigned HOST_WIDE_INT length
,
29847 unsigned HOST_WIDE_INT value
,
29848 unsigned HOST_WIDE_INT align
)
29850 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
29851 rtx dst
, addr
, mem
;
29852 rtx val_elt
, val_vec
, reg
;
29853 rtx rval
[MAX_VECT_LEN
];
29855 unsigned HOST_WIDE_INT v
= value
;
29856 unsigned int offset
= 0;
29858 gcc_assert ((align
& 0x3) == 0);
29859 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29860 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29861 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
29866 nelt_mode
= GET_MODE_NUNITS (mode
);
29867 gcc_assert (length
>= nelt_mode
);
29868 /* Skip if it isn't profitable. */
29869 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29872 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29874 v
= sext_hwi (v
, BITS_PER_WORD
);
29875 val_elt
= GEN_INT (v
);
29876 for (j
= 0; j
< nelt_mode
; j
++)
29879 reg
= gen_reg_rtx (mode
);
29880 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29881 /* Emit instruction loading the constant value. */
29882 emit_move_insn (reg
, val_vec
);
29885 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29886 if (mode
== V16QImode
)
29888 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29889 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29891 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29892 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
29894 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29895 offset
+= length
- nelt_mode
;
29896 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29897 /* We are shifting bytes back, set the alignment accordingly. */
29898 if ((length
& 0x3) == 0)
29899 set_mem_align (mem
, BITS_PER_UNIT
* 4);
29900 else if ((length
& 0x1) == 0)
29901 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29903 set_mem_align (mem
, BITS_PER_UNIT
);
29905 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29908 /* Fall through for bytes leftover. */
29910 nelt_mode
= GET_MODE_NUNITS (mode
);
29911 reg
= gen_lowpart (V8QImode
, reg
);
29914 /* Handle 8 bytes in a vector. */
29915 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29917 addr
= plus_constant (Pmode
, dst
, i
);
29918 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
29919 emit_move_insn (mem
, reg
);
29922 /* Handle single word leftover by shifting 4 bytes back. We can
29923 use aligned access for this case. */
29924 if (i
+ UNITS_PER_WORD
== length
)
29926 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
29927 offset
+= i
- UNITS_PER_WORD
;
29928 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
29929 /* We are shifting 4 bytes back, set the alignment accordingly. */
29930 if (align
> UNITS_PER_WORD
)
29931 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
29933 emit_move_insn (mem
, reg
);
29935 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29936 We have to use unaligned access for this case. */
29937 else if (i
< length
)
29939 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29940 offset
+= length
- nelt_mode
;
29941 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29942 /* We are shifting bytes back, set the alignment accordingly. */
29943 if ((length
& 1) == 0)
29944 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29946 set_mem_align (mem
, BITS_PER_UNIT
);
29948 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29954 /* Set a block of memory using plain strh/strb instructions, only
29955 using instructions allowed by ALIGN on processor. We fill the
29956 first LENGTH bytes of the memory area starting from DSTBASE
29957 with byte constant VALUE. ALIGN is the alignment requirement
29960 arm_block_set_unaligned_non_vect (rtx dstbase
,
29961 unsigned HOST_WIDE_INT length
,
29962 unsigned HOST_WIDE_INT value
,
29963 unsigned HOST_WIDE_INT align
)
29966 rtx dst
, addr
, mem
;
29967 rtx val_exp
, val_reg
, reg
;
29969 HOST_WIDE_INT v
= value
;
29971 gcc_assert (align
== 1 || align
== 2);
29974 v
|= (value
<< BITS_PER_UNIT
);
29976 v
= sext_hwi (v
, BITS_PER_WORD
);
29977 val_exp
= GEN_INT (v
);
29978 /* Skip if it isn't profitable. */
29979 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29980 align
, true, false))
29983 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29984 mode
= (align
== 2 ? HImode
: QImode
);
29985 val_reg
= force_reg (SImode
, val_exp
);
29986 reg
= gen_lowpart (mode
, val_reg
);
29988 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
29990 addr
= plus_constant (Pmode
, dst
, i
);
29991 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
29992 emit_move_insn (mem
, reg
);
29995 /* Handle single byte leftover. */
29996 if (i
+ 1 == length
)
29998 reg
= gen_lowpart (QImode
, val_reg
);
29999 addr
= plus_constant (Pmode
, dst
, i
);
30000 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
30001 emit_move_insn (mem
, reg
);
30005 gcc_assert (i
== length
);
30009 /* Set a block of memory using plain strd/str/strh/strb instructions,
30010 to permit unaligned copies on processors which support unaligned
30011 semantics for those instructions. We fill the first LENGTH bytes
30012 of the memory area starting from DSTBASE with byte constant VALUE.
30013 ALIGN is the alignment requirement of memory. */
30015 arm_block_set_aligned_non_vect (rtx dstbase
,
30016 unsigned HOST_WIDE_INT length
,
30017 unsigned HOST_WIDE_INT value
,
30018 unsigned HOST_WIDE_INT align
)
30021 rtx dst
, addr
, mem
;
30022 rtx val_exp
, val_reg
, reg
;
30023 unsigned HOST_WIDE_INT v
;
30026 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
30027 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
30029 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
30030 if (length
< UNITS_PER_WORD
)
30031 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
30034 v
|= (v
<< BITS_PER_WORD
);
30036 v
= sext_hwi (v
, BITS_PER_WORD
);
30038 val_exp
= GEN_INT (v
);
30039 /* Skip if it isn't profitable. */
30040 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30041 align
, false, use_strd_p
))
30046 /* Try without strd. */
30047 v
= (v
>> BITS_PER_WORD
);
30048 v
= sext_hwi (v
, BITS_PER_WORD
);
30049 val_exp
= GEN_INT (v
);
30050 use_strd_p
= false;
30051 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30052 align
, false, use_strd_p
))
30057 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30058 /* Handle double words using strd if possible. */
30061 val_reg
= force_reg (DImode
, val_exp
);
30063 for (; (i
+ 8 <= length
); i
+= 8)
30065 addr
= plus_constant (Pmode
, dst
, i
);
30066 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
30067 emit_move_insn (mem
, reg
);
30071 val_reg
= force_reg (SImode
, val_exp
);
30073 /* Handle words. */
30074 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
30075 for (; (i
+ 4 <= length
); i
+= 4)
30077 addr
= plus_constant (Pmode
, dst
, i
);
30078 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
30079 if ((align
& 3) == 0)
30080 emit_move_insn (mem
, reg
);
30082 emit_insn (gen_unaligned_storesi (mem
, reg
));
30085 /* Merge last pair of STRH and STRB into a STR if possible. */
30086 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
30088 addr
= plus_constant (Pmode
, dst
, i
- 1);
30089 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
30090 /* We are shifting one byte back, set the alignment accordingly. */
30091 if ((align
& 1) == 0)
30092 set_mem_align (mem
, BITS_PER_UNIT
);
30094 /* Most likely this is an unaligned access, and we can't tell at
30095 compilation time. */
30096 emit_insn (gen_unaligned_storesi (mem
, reg
));
30100 /* Handle half word leftover. */
30101 if (i
+ 2 <= length
)
30103 reg
= gen_lowpart (HImode
, val_reg
);
30104 addr
= plus_constant (Pmode
, dst
, i
);
30105 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
30106 if ((align
& 1) == 0)
30107 emit_move_insn (mem
, reg
);
30109 emit_insn (gen_unaligned_storehi (mem
, reg
));
30114 /* Handle single byte leftover. */
30115 if (i
+ 1 == length
)
30117 reg
= gen_lowpart (QImode
, val_reg
);
30118 addr
= plus_constant (Pmode
, dst
, i
);
30119 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
30120 emit_move_insn (mem
, reg
);
30126 /* Set a block of memory using vectorization instructions for both
30127 aligned and unaligned cases. We fill the first LENGTH bytes of
30128 the memory area starting from DSTBASE with byte constant VALUE.
30129 ALIGN is the alignment requirement of memory. */
30131 arm_block_set_vect (rtx dstbase
,
30132 unsigned HOST_WIDE_INT length
,
30133 unsigned HOST_WIDE_INT value
,
30134 unsigned HOST_WIDE_INT align
)
30136 /* Check whether we need to use unaligned store instruction. */
30137 if (((align
& 3) != 0 || (length
& 3) != 0)
30138 /* Check whether unaligned store instruction is available. */
30139 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
30142 if ((align
& 3) == 0)
30143 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
30145 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
30148 /* Expand string store operation. Firstly we try to do that by using
30149 vectorization instructions, then try with ARM unaligned access and
30150 double-word store if profitable. OPERANDS[0] is the destination,
30151 OPERANDS[1] is the number of bytes, operands[2] is the value to
30152 initialize the memory, OPERANDS[3] is the known alignment of the
30155 arm_gen_setmem (rtx
*operands
)
30157 rtx dstbase
= operands
[0];
30158 unsigned HOST_WIDE_INT length
;
30159 unsigned HOST_WIDE_INT value
;
30160 unsigned HOST_WIDE_INT align
;
30162 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
30165 length
= UINTVAL (operands
[1]);
30169 value
= (UINTVAL (operands
[2]) & 0xFF);
30170 align
= UINTVAL (operands
[3]);
30171 if (TARGET_NEON
&& length
>= 8
30172 && current_tune
->string_ops_prefer_neon
30173 && arm_block_set_vect (dstbase
, length
, value
, align
))
30176 if (!unaligned_access
&& (align
& 3) != 0)
30177 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
30179 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
30184 arm_macro_fusion_p (void)
30186 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
30189 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30190 for MOVW / MOVT macro fusion. */
30193 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
30195 /* We are trying to fuse
30196 movw imm / movt imm
30197 instructions as a group that gets scheduled together. */
30199 rtx set_dest
= SET_DEST (curr_set
);
30201 if (GET_MODE (set_dest
) != SImode
)
30204 /* We are trying to match:
30205 prev (movw) == (set (reg r0) (const_int imm16))
30206 curr (movt) == (set (zero_extract (reg r0)
30209 (const_int imm16_1))
30211 prev (movw) == (set (reg r1)
30212 (high (symbol_ref ("SYM"))))
30213 curr (movt) == (set (reg r0)
30215 (symbol_ref ("SYM")))) */
30217 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
30219 if (CONST_INT_P (SET_SRC (curr_set
))
30220 && CONST_INT_P (SET_SRC (prev_set
))
30221 && REG_P (XEXP (set_dest
, 0))
30222 && REG_P (SET_DEST (prev_set
))
30223 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
30227 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
30228 && REG_P (SET_DEST (curr_set
))
30229 && REG_P (SET_DEST (prev_set
))
30230 && GET_CODE (SET_SRC (prev_set
)) == HIGH
30231 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
30238 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
30240 rtx prev_set
= single_set (prev
);
30241 rtx curr_set
= single_set (curr
);
30247 if (any_condjump_p (curr
))
30250 if (!arm_macro_fusion_p ())
30253 if (current_tune
->fusible_ops
& tune_params::FUSE_AES_AESMC
30254 && aarch_crypto_can_dual_issue (prev
, curr
))
30257 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
30258 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
30264 /* Return true iff the instruction fusion described by OP is enabled. */
30266 arm_fusion_enabled_p (tune_params::fuse_ops op
)
30268 return current_tune
->fusible_ops
& op
;
30271 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30272 scheduled for speculative execution. Reject the long-running division
30273 and square-root instructions. */
30276 arm_sched_can_speculate_insn (rtx_insn
*insn
)
30278 switch (get_attr_type (insn
))
30286 case TYPE_NEON_FP_SQRT_S
:
30287 case TYPE_NEON_FP_SQRT_D
:
30288 case TYPE_NEON_FP_SQRT_S_Q
:
30289 case TYPE_NEON_FP_SQRT_D_Q
:
30290 case TYPE_NEON_FP_DIV_S
:
30291 case TYPE_NEON_FP_DIV_D
:
30292 case TYPE_NEON_FP_DIV_S_Q
:
30293 case TYPE_NEON_FP_DIV_D_Q
:
30300 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30302 static unsigned HOST_WIDE_INT
30303 arm_asan_shadow_offset (void)
30305 return HOST_WIDE_INT_1U
<< 29;
30309 /* This is a temporary fix for PR60655. Ideally we need
30310 to handle most of these cases in the generic part but
30311 currently we reject minus (..) (sym_ref). We try to
30312 ameliorate the case with minus (sym_ref1) (sym_ref2)
30313 where they are in the same section. */
30316 arm_const_not_ok_for_debug_p (rtx p
)
30318 tree decl_op0
= NULL
;
30319 tree decl_op1
= NULL
;
30321 if (GET_CODE (p
) == MINUS
)
30323 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
30325 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
30327 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
30328 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
30330 if ((VAR_P (decl_op1
)
30331 || TREE_CODE (decl_op1
) == CONST_DECL
)
30332 && (VAR_P (decl_op0
)
30333 || TREE_CODE (decl_op0
) == CONST_DECL
))
30334 return (get_variable_section (decl_op1
, false)
30335 != get_variable_section (decl_op0
, false));
30337 if (TREE_CODE (decl_op1
) == LABEL_DECL
30338 && TREE_CODE (decl_op0
) == LABEL_DECL
)
30339 return (DECL_CONTEXT (decl_op1
)
30340 != DECL_CONTEXT (decl_op0
));
30350 /* return TRUE if x is a reference to a value in a constant pool */
30352 arm_is_constant_pool_ref (rtx x
)
30355 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
30356 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
30359 /* Remember the last target of arm_set_current_function. */
30360 static GTY(()) tree arm_previous_fndecl
;
30362 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30365 save_restore_target_globals (tree new_tree
)
30367 /* If we have a previous state, use it. */
30368 if (TREE_TARGET_GLOBALS (new_tree
))
30369 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
30370 else if (new_tree
== target_option_default_node
)
30371 restore_target_globals (&default_target_globals
);
30374 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30375 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
30378 arm_option_params_internal ();
30381 /* Invalidate arm_previous_fndecl. */
30384 arm_reset_previous_fndecl (void)
30386 arm_previous_fndecl
= NULL_TREE
;
30389 /* Establish appropriate back-end context for processing the function
30390 FNDECL. The argument might be NULL to indicate processing at top
30391 level, outside of any function scope. */
30394 arm_set_current_function (tree fndecl
)
30396 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
30399 tree old_tree
= (arm_previous_fndecl
30400 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
30403 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30405 /* If current function has no attributes but previous one did,
30406 use the default node. */
30407 if (! new_tree
&& old_tree
)
30408 new_tree
= target_option_default_node
;
30410 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30411 the default have been handled by save_restore_target_globals from
30412 arm_pragma_target_parse. */
30413 if (old_tree
== new_tree
)
30416 arm_previous_fndecl
= fndecl
;
30418 /* First set the target options. */
30419 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
30421 save_restore_target_globals (new_tree
);
30424 /* Implement TARGET_OPTION_PRINT. */
30427 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
30429 int flags
= ptr
->x_target_flags
;
30430 const char *fpu_name
;
30432 fpu_name
= (ptr
->x_arm_fpu_index
== TARGET_FPU_auto
30433 ? "auto" : all_fpus
[ptr
->x_arm_fpu_index
].name
);
30435 fprintf (file
, "%*sselected isa %s\n", indent
, "",
30436 TARGET_THUMB2_P (flags
) ? "thumb2" :
30437 TARGET_THUMB_P (flags
) ? "thumb1" :
30440 if (ptr
->x_arm_arch_string
)
30441 fprintf (file
, "%*sselected architecture %s\n", indent
, "",
30442 ptr
->x_arm_arch_string
);
30444 if (ptr
->x_arm_cpu_string
)
30445 fprintf (file
, "%*sselected CPU %s\n", indent
, "",
30446 ptr
->x_arm_cpu_string
);
30448 if (ptr
->x_arm_tune_string
)
30449 fprintf (file
, "%*sselected tune %s\n", indent
, "",
30450 ptr
->x_arm_tune_string
);
30452 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_name
);
30455 /* Hook to determine if one function can safely inline another. */
30458 arm_can_inline_p (tree caller
, tree callee
)
30460 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
30461 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
30462 bool can_inline
= true;
30464 struct cl_target_option
*caller_opts
30465 = TREE_TARGET_OPTION (caller_tree
? caller_tree
30466 : target_option_default_node
);
30468 struct cl_target_option
*callee_opts
30469 = TREE_TARGET_OPTION (callee_tree
? callee_tree
30470 : target_option_default_node
);
30472 if (callee_opts
== caller_opts
)
30475 /* Callee's ISA features should be a subset of the caller's. */
30476 struct arm_build_target caller_target
;
30477 struct arm_build_target callee_target
;
30478 caller_target
.isa
= sbitmap_alloc (isa_num_bits
);
30479 callee_target
.isa
= sbitmap_alloc (isa_num_bits
);
30481 arm_configure_build_target (&caller_target
, caller_opts
, &global_options_set
,
30483 arm_configure_build_target (&callee_target
, callee_opts
, &global_options_set
,
30485 if (!bitmap_subset_p (callee_target
.isa
, caller_target
.isa
))
30486 can_inline
= false;
30488 sbitmap_free (caller_target
.isa
);
30489 sbitmap_free (callee_target
.isa
);
30491 /* OK to inline between different modes.
30492 Function with mode specific instructions, e.g using asm,
30493 must be explicitly protected with noinline. */
30497 /* Hook to fix function's alignment affected by target attribute. */
30500 arm_relayout_function (tree fndecl
)
30502 if (DECL_USER_ALIGN (fndecl
))
30505 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30508 callee_tree
= target_option_default_node
;
30510 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
30513 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts
->x_target_flags
)));
30516 /* Inner function to process the attribute((target(...))), take an argument and
30517 set the current options from the argument. If we have a list, recursively
30518 go over the list. */
30521 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
30523 if (TREE_CODE (args
) == TREE_LIST
)
30527 for (; args
; args
= TREE_CHAIN (args
))
30528 if (TREE_VALUE (args
)
30529 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
30534 else if (TREE_CODE (args
) != STRING_CST
)
30536 error ("attribute %<target%> argument not a string");
30540 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
30543 while ((q
= strtok (argstr
, ",")) != NULL
)
30545 while (ISSPACE (*q
)) ++q
;
30548 if (!strncmp (q
, "thumb", 5))
30549 opts
->x_target_flags
|= MASK_THUMB
;
30551 else if (!strncmp (q
, "arm", 3))
30552 opts
->x_target_flags
&= ~MASK_THUMB
;
30554 else if (!strncmp (q
, "fpu=", 4))
30557 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+4,
30558 &fpu_index
, CL_TARGET
))
30560 error ("invalid fpu for attribute(target(\"%s\"))", q
);
30563 if (fpu_index
== TARGET_FPU_auto
)
30565 /* This doesn't really make sense until we support
30566 general dynamic selection of the architecture and all
30568 sorry ("auto fpu selection not currently permitted here");
30571 opts
->x_arm_fpu_index
= (enum fpu_type
) fpu_index
;
30575 error ("attribute(target(\"%s\")) is unknown", q
);
30583 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30586 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
30587 struct gcc_options
*opts_set
)
30589 struct cl_target_option cl_opts
;
30591 if (!arm_valid_target_attribute_rec (args
, opts
))
30594 cl_target_option_save (&cl_opts
, opts
);
30595 arm_configure_build_target (&arm_active_target
, &cl_opts
, opts_set
, false);
30596 arm_option_check_internal (opts
);
30597 /* Do any overrides, such as global options arch=xxx. */
30598 arm_option_override_internal (opts
, opts_set
);
30600 return build_target_option_node (opts
);
30604 add_attribute (const char * mode
, tree
*attributes
)
30606 size_t len
= strlen (mode
);
30607 tree value
= build_string (len
, mode
);
30609 TREE_TYPE (value
) = build_array_type (char_type_node
,
30610 build_index_type (size_int (len
)));
30612 *attributes
= tree_cons (get_identifier ("target"),
30613 build_tree_list (NULL_TREE
, value
),
30617 /* For testing. Insert thumb or arm modes alternatively on functions. */
30620 arm_insert_attributes (tree fndecl
, tree
* attributes
)
30624 if (! TARGET_FLIP_THUMB
)
30627 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
30628 || DECL_BUILT_IN (fndecl
) || DECL_ARTIFICIAL (fndecl
))
30631 /* Nested definitions must inherit mode. */
30632 if (current_function_decl
)
30634 mode
= TARGET_THUMB
? "thumb" : "arm";
30635 add_attribute (mode
, attributes
);
30639 /* If there is already a setting don't change it. */
30640 if (lookup_attribute ("target", *attributes
) != NULL
)
30643 mode
= thumb_flipper
? "thumb" : "arm";
30644 add_attribute (mode
, attributes
);
30646 thumb_flipper
= !thumb_flipper
;
30649 /* Hook to validate attribute((target("string"))). */
30652 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
30653 tree args
, int ARG_UNUSED (flags
))
30656 struct gcc_options func_options
;
30657 tree cur_tree
, new_optimize
;
30658 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
30660 /* Get the optimization options of the current function. */
30661 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
30663 /* If the function changed the optimization levels as well as setting target
30664 options, start with the optimizations specified. */
30665 if (!func_optimize
)
30666 func_optimize
= optimization_default_node
;
30668 /* Init func_options. */
30669 memset (&func_options
, 0, sizeof (func_options
));
30670 init_options_struct (&func_options
, NULL
);
30671 lang_hooks
.init_options_struct (&func_options
);
30673 /* Initialize func_options to the defaults. */
30674 cl_optimization_restore (&func_options
,
30675 TREE_OPTIMIZATION (func_optimize
));
30677 cl_target_option_restore (&func_options
,
30678 TREE_TARGET_OPTION (target_option_default_node
));
30680 /* Set func_options flags with new target mode. */
30681 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
30682 &global_options_set
);
30684 if (cur_tree
== NULL_TREE
)
30687 new_optimize
= build_optimization_node (&func_options
);
30689 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
30691 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
30693 finalize_options_struct (&func_options
);
30698 /* Match an ISA feature bitmap to a named FPU. We always use the
30699 first entry that exactly matches the feature set, so that we
30700 effectively canonicalize the FPU name for the assembler. */
30702 arm_identify_fpu_from_isa (sbitmap isa
)
30704 auto_sbitmap
fpubits (isa_num_bits
);
30705 auto_sbitmap
cand_fpubits (isa_num_bits
);
30707 bitmap_and (fpubits
, isa
, isa_all_fpubits
);
30709 /* If there are no ISA feature bits relating to the FPU, we must be
30710 doing soft-float. */
30711 if (bitmap_empty_p (fpubits
))
30714 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
30716 arm_initialize_isa (cand_fpubits
, all_fpus
[i
].isa_bits
);
30717 if (bitmap_equal_p (fpubits
, cand_fpubits
))
30718 return all_fpus
[i
].name
;
30720 /* We must find an entry, or things have gone wrong. */
30721 gcc_unreachable ();
30725 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
30728 fprintf (stream
, "\t.syntax unified\n");
30732 if (is_called_in_ARM_mode (decl
)
30733 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
30734 && cfun
->is_thunk
))
30735 fprintf (stream
, "\t.code 32\n");
30736 else if (TARGET_THUMB1
)
30737 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
30739 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
30742 fprintf (stream
, "\t.arm\n");
30744 asm_fprintf (asm_out_file
, "\t.fpu %s\n",
30747 : arm_identify_fpu_from_isa (arm_active_target
.isa
)));
30749 if (TARGET_POKE_FUNCTION_NAME
)
30750 arm_poke_function_name (stream
, (const char *) name
);
30753 /* If MEM is in the form of [base+offset], extract the two parts
30754 of address and set to BASE and OFFSET, otherwise return false
30755 after clearing BASE and OFFSET. */
30758 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
30762 gcc_assert (MEM_P (mem
));
30764 addr
= XEXP (mem
, 0);
30766 /* Strip off const from addresses like (const (addr)). */
30767 if (GET_CODE (addr
) == CONST
)
30768 addr
= XEXP (addr
, 0);
30770 if (GET_CODE (addr
) == REG
)
30773 *offset
= const0_rtx
;
30777 if (GET_CODE (addr
) == PLUS
30778 && GET_CODE (XEXP (addr
, 0)) == REG
30779 && CONST_INT_P (XEXP (addr
, 1)))
30781 *base
= XEXP (addr
, 0);
30782 *offset
= XEXP (addr
, 1);
30787 *offset
= NULL_RTX
;
30792 /* If INSN is a load or store of address in the form of [base+offset],
30793 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30794 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30795 otherwise return FALSE. */
30798 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
30802 gcc_assert (INSN_P (insn
));
30803 x
= PATTERN (insn
);
30804 if (GET_CODE (x
) != SET
)
30808 dest
= SET_DEST (x
);
30809 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
30812 extract_base_offset_in_addr (dest
, base
, offset
);
30814 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
30817 extract_base_offset_in_addr (src
, base
, offset
);
30822 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
30825 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30827 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30828 and PRI are only calculated for these instructions. For other instruction,
30829 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30830 instruction fusion can be supported by returning different priorities.
30832 It's important that irrelevant instructions get the largest FUSION_PRI. */
30835 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
30836 int *fusion_pri
, int *pri
)
30842 gcc_assert (INSN_P (insn
));
30845 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
30852 /* Load goes first. */
30854 *fusion_pri
= tmp
- 1;
30856 *fusion_pri
= tmp
- 2;
30860 /* INSN with smaller base register goes first. */
30861 tmp
-= ((REGNO (base
) & 0xff) << 20);
30863 /* INSN with smaller offset goes first. */
30864 off_val
= (int)(INTVAL (offset
));
30866 tmp
-= (off_val
& 0xfffff);
30868 tmp
+= ((- off_val
) & 0xfffff);
30875 /* Construct and return a PARALLEL RTX vector with elements numbering the
30876 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30877 the vector - from the perspective of the architecture. This does not
30878 line up with GCC's perspective on lane numbers, so we end up with
30879 different masks depending on our target endian-ness. The diagram
30880 below may help. We must draw the distinction when building masks
30881 which select one half of the vector. An instruction selecting
30882 architectural low-lanes for a big-endian target, must be described using
30883 a mask selecting GCC high-lanes.
30885 Big-Endian Little-Endian
30887 GCC 0 1 2 3 3 2 1 0
30888 | x | x | x | x | | x | x | x | x |
30889 Architecture 3 2 1 0 3 2 1 0
30891 Low Mask: { 2, 3 } { 0, 1 }
30892 High Mask: { 0, 1 } { 2, 3 }
30896 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
30898 int nunits
= GET_MODE_NUNITS (mode
);
30899 rtvec v
= rtvec_alloc (nunits
/ 2);
30900 int high_base
= nunits
/ 2;
30906 if (BYTES_BIG_ENDIAN
)
30907 base
= high
? low_base
: high_base
;
30909 base
= high
? high_base
: low_base
;
30911 for (i
= 0; i
< nunits
/ 2; i
++)
30912 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
30914 t1
= gen_rtx_PARALLEL (mode
, v
);
30918 /* Check OP for validity as a PARALLEL RTX vector with elements
30919 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30920 from the perspective of the architecture. See the diagram above
30921 arm_simd_vect_par_cnst_half_p for more details. */
30924 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
30927 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
30928 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
30929 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
30932 if (!VECTOR_MODE_P (mode
))
30935 if (count_op
!= count_ideal
)
30938 for (i
= 0; i
< count_ideal
; i
++)
30940 rtx elt_op
= XVECEXP (op
, 0, i
);
30941 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
30943 if (!CONST_INT_P (elt_op
)
30944 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
30950 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30953 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
30956 /* For now, we punt and not handle this for TARGET_THUMB1. */
30957 if (vcall_offset
&& TARGET_THUMB1
)
30960 /* Otherwise ok. */
30964 /* Generate RTL for a conditional branch with rtx comparison CODE in
30965 mode CC_MODE. The destination of the unlikely conditional branch
30969 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
30973 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
30974 gen_rtx_REG (cc_mode
, CC_REGNUM
),
30977 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30978 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
30980 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
30983 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30985 For pure-code sections there is no letter code for this attribute, so
30986 output all the section flags numerically when this is needed. */
30989 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
30992 if (flags
& SECTION_ARM_PURECODE
)
30996 if (!(flags
& SECTION_DEBUG
))
30998 if (flags
& SECTION_EXCLUDE
)
30999 *num
|= 0x80000000;
31000 if (flags
& SECTION_WRITE
)
31002 if (flags
& SECTION_CODE
)
31004 if (flags
& SECTION_MERGE
)
31006 if (flags
& SECTION_STRINGS
)
31008 if (flags
& SECTION_TLS
)
31010 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
31019 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31021 If pure-code is passed as an option, make sure all functions are in
31022 sections that have the SHF_ARM_PURECODE attribute. */
31025 arm_function_section (tree decl
, enum node_frequency freq
,
31026 bool startup
, bool exit
)
31028 const char * section_name
;
31031 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
31032 return default_function_section (decl
, freq
, startup
, exit
);
31034 if (!target_pure_code
)
31035 return default_function_section (decl
, freq
, startup
, exit
);
31038 section_name
= DECL_SECTION_NAME (decl
);
31040 /* If a function is not in a named section then it falls under the 'default'
31041 text section, also known as '.text'. We can preserve previous behavior as
31042 the default text section already has the SHF_ARM_PURECODE section
31046 section
*default_sec
= default_function_section (decl
, freq
, startup
,
31049 /* If default_sec is not null, then it must be a special section like for
31050 example .text.startup. We set the pure-code attribute and return the
31051 same section to preserve existing behavior. */
31053 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
31054 return default_sec
;
31057 /* Otherwise look whether a section has already been created with
31059 sec
= get_named_section (decl
, section_name
, 0);
31061 /* If that is not the case passing NULL as the section's name to
31062 'get_named_section' will create a section with the declaration's
31064 sec
= get_named_section (decl
, NULL
, 0);
31066 /* Set the SHF_ARM_PURECODE attribute. */
31067 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
31072 /* Implements the TARGET_SECTION_FLAGS hook.
31074 If DECL is a function declaration and pure-code is passed as an option
31075 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31076 section's name and RELOC indicates whether the declarations initializer may
31077 contain runtime relocations. */
31079 static unsigned int
31080 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
31082 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
31084 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
31085 flags
|= SECTION_ARM_PURECODE
;
31090 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31093 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
31095 rtx
*quot_p
, rtx
*rem_p
)
31097 if (mode
== SImode
)
31098 gcc_assert (!TARGET_IDIV
);
31100 scalar_int_mode libval_mode
31101 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode
));
31103 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
31105 op0
, GET_MODE (op0
),
31106 op1
, GET_MODE (op1
));
31108 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
31109 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
31110 GET_MODE_SIZE (mode
));
31112 gcc_assert (quotient
);
31113 gcc_assert (remainder
);
31115 *quot_p
= quotient
;
31116 *rem_p
= remainder
;
31119 /* This function checks for the availability of the coprocessor builtin passed
31120 in BUILTIN for the current target. Returns true if it is available and
31121 false otherwise. If a BUILTIN is passed for which this function has not
31122 been implemented it will cause an exception. */
31125 arm_coproc_builtin_available (enum unspecv builtin
)
31127 /* None of these builtins are available in Thumb mode if the target only
31128 supports Thumb-1. */
31146 case VUNSPEC_LDC2L
:
31148 case VUNSPEC_STC2L
:
31151 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31158 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31160 if (arm_arch6
|| arm_arch5te
)
31163 case VUNSPEC_MCRR2
:
31164 case VUNSPEC_MRRC2
:
31169 gcc_unreachable ();
31174 /* This function returns true if OP is a valid memory operand for the ldc and
31175 stc coprocessor instructions and false otherwise. */
31178 arm_coproc_ldc_stc_legitimate_address (rtx op
)
31180 HOST_WIDE_INT range
;
31181 /* Has to be a memory operand. */
31187 /* We accept registers. */
31191 switch GET_CODE (op
)
31195 /* Or registers with an offset. */
31196 if (!REG_P (XEXP (op
, 0)))
31201 /* The offset must be an immediate though. */
31202 if (!CONST_INT_P (op
))
31205 range
= INTVAL (op
);
31207 /* Within the range of [-1020,1020]. */
31208 if (!IN_RANGE (range
, -1020, 1020))
31211 /* And a multiple of 4. */
31212 return (range
% 4) == 0;
31218 return REG_P (XEXP (op
, 0));
31220 gcc_unreachable ();
31226 namespace selftest
{
31228 /* Scan the static data tables generated by parsecpu.awk looking for
31229 potential issues with the data. We primarily check for
31230 inconsistencies in the option extensions at present (extensions
31231 that duplicate others but aren't marked as aliases). Furthermore,
31232 for correct canonicalization later options must never be a subset
31233 of an earlier option. Any extension should also only specify other
31234 feature bits and never an architecture bit. The architecture is inferred
31235 from the declaration of the extension. */
31237 arm_test_cpu_arch_data (void)
31239 const arch_option
*arch
;
31240 const cpu_option
*cpu
;
31241 auto_sbitmap
target_isa (isa_num_bits
);
31242 auto_sbitmap
isa1 (isa_num_bits
);
31243 auto_sbitmap
isa2 (isa_num_bits
);
31245 for (arch
= all_architectures
; arch
->common
.name
!= NULL
; ++arch
)
31247 const cpu_arch_extension
*ext1
, *ext2
;
31249 if (arch
->common
.extensions
== NULL
)
31252 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
31254 for (ext1
= arch
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
31259 arm_initialize_isa (isa1
, ext1
->isa_bits
);
31260 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
31262 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
31265 arm_initialize_isa (isa2
, ext2
->isa_bits
);
31266 /* If the option is a subset of the parent option, it doesn't
31267 add anything and so isn't useful. */
31268 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
31270 /* If the extension specifies any architectural bits then
31271 disallow it. Extensions should only specify feature bits. */
31272 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
31277 for (cpu
= all_cores
; cpu
->common
.name
!= NULL
; ++cpu
)
31279 const cpu_arch_extension
*ext1
, *ext2
;
31281 if (cpu
->common
.extensions
== NULL
)
31284 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
31286 for (ext1
= cpu
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
31291 arm_initialize_isa (isa1
, ext1
->isa_bits
);
31292 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
31294 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
31297 arm_initialize_isa (isa2
, ext2
->isa_bits
);
31298 /* If the option is a subset of the parent option, it doesn't
31299 add anything and so isn't useful. */
31300 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
31302 /* If the extension specifies any architectural bits then
31303 disallow it. Extensions should only specify feature bits. */
31304 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
31311 arm_run_selftests (void)
31313 arm_test_cpu_arch_data ();
31315 } /* Namespace selftest. */
31317 #undef TARGET_RUN_TARGET_SELFTESTS
31318 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31319 #endif /* CHECKING_P */
31321 struct gcc_target targetm
= TARGET_INITIALIZER
;
31323 #include "gt-arm.h"