1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
34 #include "stringpool.h"
41 #include "diagnostic-core.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
48 #include "insn-attr.h"
54 #include "sched-int.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
62 #include "target-globals.h"
64 #include "tm-constrs.h"
66 #include "optabs-libfuncs.h"
71 /* This file should be included last. */
72 #include "target-def.h"
74 /* Forward definitions of types. */
75 typedef struct minipool_node Mnode
;
76 typedef struct minipool_fixup Mfix
;
78 void (*arm_lang_output_object_attributes_hook
)(void);
85 /* Forward function declarations. */
86 static bool arm_const_not_ok_for_debug_p (rtx
);
87 static int arm_needs_doubleword_align (machine_mode
, const_tree
);
88 static int arm_compute_static_chain_stack_bytes (void);
89 static arm_stack_offsets
*arm_get_frame_offsets (void);
90 static void arm_compute_frame_layout (void);
91 static void arm_add_gc_roots (void);
92 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
93 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
94 static unsigned bit_count (unsigned long);
95 static unsigned bitmap_popcount (const sbitmap
);
96 static int arm_address_register_rtx_p (rtx
, int);
97 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
98 static bool is_called_in_ARM_mode (tree
);
99 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
100 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
101 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
102 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
103 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
104 inline static int thumb1_index_register_rtx_p (rtx
, int);
105 static int thumb_far_jump_used_p (void);
106 static bool thumb_force_lr_save (void);
107 static unsigned arm_size_return_regs (void);
108 static bool arm_assemble_integer (rtx
, unsigned int, int);
109 static void arm_print_operand (FILE *, rtx
, int);
110 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
111 static bool arm_print_operand_punct_valid_p (unsigned char code
);
112 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
113 static arm_cc
get_arm_condition_code (rtx
);
114 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
115 static const char *output_multi_immediate (rtx
*, const char *, const char *,
117 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
118 static struct machine_function
*arm_init_machine_status (void);
119 static void thumb_exit (FILE *, int);
120 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
121 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
122 static Mnode
*add_minipool_forward_ref (Mfix
*);
123 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
124 static Mnode
*add_minipool_backward_ref (Mfix
*);
125 static void assign_minipool_offsets (Mfix
*);
126 static void arm_print_value (FILE *, rtx
);
127 static void dump_minipool (rtx_insn
*);
128 static int arm_barrier_cost (rtx_insn
*);
129 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
130 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
131 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
133 static void arm_reorg (void);
134 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
135 static unsigned long arm_compute_save_reg0_reg12_mask (void);
136 static unsigned long arm_compute_save_core_reg_mask (void);
137 static unsigned long arm_isr_value (tree
);
138 static unsigned long arm_compute_func_type (void);
139 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
140 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
141 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
143 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
145 static tree
arm_handle_cmse_nonsecure_entry (tree
*, tree
, tree
, int, bool *);
146 static tree
arm_handle_cmse_nonsecure_call (tree
*, tree
, tree
, int, bool *);
147 static void arm_output_function_epilogue (FILE *);
148 static void arm_output_function_prologue (FILE *);
149 static int arm_comp_type_attributes (const_tree
, const_tree
);
150 static void arm_set_default_type_attributes (tree
);
151 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
152 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
153 static int optimal_immediate_sequence (enum rtx_code code
,
154 unsigned HOST_WIDE_INT val
,
155 struct four_ints
*return_sequence
);
156 static int optimal_immediate_sequence_1 (enum rtx_code code
,
157 unsigned HOST_WIDE_INT val
,
158 struct four_ints
*return_sequence
,
160 static int arm_get_strip_length (int);
161 static bool arm_function_ok_for_sibcall (tree
, tree
);
162 static machine_mode
arm_promote_function_mode (const_tree
,
165 static bool arm_return_in_memory (const_tree
, const_tree
);
166 static rtx
arm_function_value (const_tree
, const_tree
, bool);
167 static rtx
arm_libcall_value_1 (machine_mode
);
168 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
169 static bool arm_function_value_regno_p (const unsigned int);
170 static void arm_internal_label (FILE *, const char *, unsigned long);
171 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
173 static bool arm_have_conditional_execution (void);
174 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
175 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
176 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
177 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
178 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
179 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
180 static void emit_constant_insn (rtx cond
, rtx pattern
);
181 static rtx_insn
*emit_set_insn (rtx
, rtx
);
182 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
183 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
185 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
187 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
189 static pad_direction
arm_function_arg_padding (machine_mode
, const_tree
);
190 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
191 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
193 static rtx
aapcs_libcall_value (machine_mode
);
194 static int aapcs_select_return_coproc (const_tree
, const_tree
);
196 #ifdef OBJECT_FORMAT_ELF
197 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
198 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
201 static void arm_encode_section_info (tree
, rtx
, int);
204 static void arm_file_end (void);
205 static void arm_file_start (void);
206 static void arm_insert_attributes (tree
, tree
*);
208 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
210 static bool arm_pass_by_reference (cumulative_args_t
,
211 machine_mode
, const_tree
, bool);
212 static bool arm_promote_prototypes (const_tree
);
213 static bool arm_default_short_enums (void);
214 static bool arm_align_anon_bitfield (void);
215 static bool arm_return_in_msb (const_tree
);
216 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
217 static bool arm_return_in_memory (const_tree
, const_tree
);
219 static void arm_unwind_emit (FILE *, rtx_insn
*);
220 static bool arm_output_ttype (rtx
);
221 static void arm_asm_emit_except_personality (rtx
);
223 static void arm_asm_init_sections (void);
224 static rtx
arm_dwarf_register_span (rtx
);
226 static tree
arm_cxx_guard_type (void);
227 static bool arm_cxx_guard_mask_bit (void);
228 static tree
arm_get_cookie_size (tree
);
229 static bool arm_cookie_has_size (void);
230 static bool arm_cxx_cdtor_returns_this (void);
231 static bool arm_cxx_key_method_may_be_inline (void);
232 static void arm_cxx_determine_class_data_visibility (tree
);
233 static bool arm_cxx_class_data_always_comdat (void);
234 static bool arm_cxx_use_aeabi_atexit (void);
235 static void arm_init_libfuncs (void);
236 static tree
arm_build_builtin_va_list (void);
237 static void arm_expand_builtin_va_start (tree
, rtx
);
238 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
239 static void arm_option_override (void);
240 static void arm_option_save (struct cl_target_option
*, struct gcc_options
*);
241 static void arm_option_restore (struct gcc_options
*,
242 struct cl_target_option
*);
243 static void arm_override_options_after_change (void);
244 static void arm_option_print (FILE *, int, struct cl_target_option
*);
245 static void arm_set_current_function (tree
);
246 static bool arm_can_inline_p (tree
, tree
);
247 static void arm_relayout_function (tree
);
248 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
249 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
250 static bool arm_sched_can_speculate_insn (rtx_insn
*);
251 static bool arm_macro_fusion_p (void);
252 static bool arm_cannot_copy_insn_p (rtx_insn
*);
253 static int arm_issue_rate (void);
254 static int arm_first_cycle_multipass_dfa_lookahead (void);
255 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
256 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
257 static bool arm_output_addr_const_extra (FILE *, rtx
);
258 static bool arm_allocate_stack_slots_for_args (void);
259 static bool arm_warn_func_return (tree
);
260 static tree
arm_promoted_type (const_tree t
);
261 static bool arm_scalar_mode_supported_p (scalar_mode
);
262 static bool arm_frame_pointer_required (void);
263 static bool arm_can_eliminate (const int, const int);
264 static void arm_asm_trampoline_template (FILE *);
265 static void arm_trampoline_init (rtx
, tree
, rtx
);
266 static rtx
arm_trampoline_adjust_address (rtx
);
267 static rtx_insn
*arm_pic_static_addr (rtx orig
, rtx reg
);
268 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
269 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
270 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
271 static bool arm_array_mode_supported_p (machine_mode
,
272 unsigned HOST_WIDE_INT
);
273 static machine_mode
arm_preferred_simd_mode (scalar_mode
);
274 static bool arm_class_likely_spilled_p (reg_class_t
);
275 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
276 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
277 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
281 static void arm_conditional_register_usage (void);
282 static enum flt_eval_method
arm_excess_precision (enum excess_precision_type
);
283 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
284 static unsigned int arm_autovectorize_vector_sizes (void);
285 static int arm_default_branch_cost (bool, bool);
286 static int arm_cortex_a5_branch_cost (bool, bool);
287 static int arm_cortex_m_branch_cost (bool, bool);
288 static int arm_cortex_m7_branch_cost (bool, bool);
290 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
291 const unsigned char *sel
);
293 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
295 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
297 int misalign ATTRIBUTE_UNUSED
);
298 static unsigned arm_add_stmt_cost (void *data
, int count
,
299 enum vect_cost_for_stmt kind
,
300 struct _stmt_vec_info
*stmt_info
,
302 enum vect_cost_model_location where
);
304 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
305 bool op0_preserve_value
);
306 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
308 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
309 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
311 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
312 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
313 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
315 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
316 static opt_scalar_float_mode
arm_floatn_mode (int, bool);
317 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode
);
318 static bool arm_modes_tieable_p (machine_mode
, machine_mode
);
320 /* Table of machine attributes. */
321 static const struct attribute_spec arm_attribute_table
[] =
323 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
324 affects_type_identity } */
325 /* Function calls made to this symbol must be done indirectly, because
326 it may lie outside of the 26 bit addressing range of a normal function
328 { "long_call", 0, 0, false, true, true, NULL
, false },
329 /* Whereas these functions are always known to reside within the 26 bit
331 { "short_call", 0, 0, false, true, true, NULL
, false },
332 /* Specify the procedure call conventions for a function. */
333 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
335 /* Interrupt Service Routines have special prologue and epilogue requirements. */
336 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
338 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
340 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
343 /* ARM/PE has three new attributes:
345 dllexport - for exporting a function/variable that will live in a dll
346 dllimport - for importing a function/variable from a dll
348 Microsoft allows multiple declspecs in one __declspec, separating
349 them with spaces. We do NOT support this. Instead, use __declspec
352 { "dllimport", 0, 0, true, false, false, NULL
, false },
353 { "dllexport", 0, 0, true, false, false, NULL
, false },
354 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
356 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
357 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
358 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
359 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
362 /* ARMv8-M Security Extensions support. */
363 { "cmse_nonsecure_entry", 0, 0, true, false, false,
364 arm_handle_cmse_nonsecure_entry
, false },
365 { "cmse_nonsecure_call", 0, 0, true, false, false,
366 arm_handle_cmse_nonsecure_call
, true },
367 { NULL
, 0, 0, false, false, false, NULL
, false }
370 /* Initialize the GCC target structure. */
371 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
372 #undef TARGET_MERGE_DECL_ATTRIBUTES
373 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
376 #undef TARGET_LEGITIMIZE_ADDRESS
377 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
379 #undef TARGET_ATTRIBUTE_TABLE
380 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
382 #undef TARGET_INSERT_ATTRIBUTES
383 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
385 #undef TARGET_ASM_FILE_START
386 #define TARGET_ASM_FILE_START arm_file_start
387 #undef TARGET_ASM_FILE_END
388 #define TARGET_ASM_FILE_END arm_file_end
390 #undef TARGET_ASM_ALIGNED_SI_OP
391 #define TARGET_ASM_ALIGNED_SI_OP NULL
392 #undef TARGET_ASM_INTEGER
393 #define TARGET_ASM_INTEGER arm_assemble_integer
395 #undef TARGET_PRINT_OPERAND
396 #define TARGET_PRINT_OPERAND arm_print_operand
397 #undef TARGET_PRINT_OPERAND_ADDRESS
398 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
399 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
400 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
402 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
403 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
405 #undef TARGET_ASM_FUNCTION_PROLOGUE
406 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
408 #undef TARGET_ASM_FUNCTION_EPILOGUE
409 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
411 #undef TARGET_CAN_INLINE_P
412 #define TARGET_CAN_INLINE_P arm_can_inline_p
414 #undef TARGET_RELAYOUT_FUNCTION
415 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
417 #undef TARGET_OPTION_OVERRIDE
418 #define TARGET_OPTION_OVERRIDE arm_option_override
420 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
421 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
423 #undef TARGET_OPTION_SAVE
424 #define TARGET_OPTION_SAVE arm_option_save
426 #undef TARGET_OPTION_RESTORE
427 #define TARGET_OPTION_RESTORE arm_option_restore
429 #undef TARGET_OPTION_PRINT
430 #define TARGET_OPTION_PRINT arm_option_print
432 #undef TARGET_COMP_TYPE_ATTRIBUTES
433 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
435 #undef TARGET_SCHED_CAN_SPECULATE_INSN
436 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
438 #undef TARGET_SCHED_MACRO_FUSION_P
439 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
441 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
442 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
444 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
445 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
447 #undef TARGET_SCHED_ADJUST_COST
448 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
450 #undef TARGET_SET_CURRENT_FUNCTION
451 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
453 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
454 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
456 #undef TARGET_SCHED_REORDER
457 #define TARGET_SCHED_REORDER arm_sched_reorder
459 #undef TARGET_REGISTER_MOVE_COST
460 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
462 #undef TARGET_MEMORY_MOVE_COST
463 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
465 #undef TARGET_ENCODE_SECTION_INFO
467 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
469 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
472 #undef TARGET_STRIP_NAME_ENCODING
473 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
475 #undef TARGET_ASM_INTERNAL_LABEL
476 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
478 #undef TARGET_FLOATN_MODE
479 #define TARGET_FLOATN_MODE arm_floatn_mode
481 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
482 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
484 #undef TARGET_FUNCTION_VALUE
485 #define TARGET_FUNCTION_VALUE arm_function_value
487 #undef TARGET_LIBCALL_VALUE
488 #define TARGET_LIBCALL_VALUE arm_libcall_value
490 #undef TARGET_FUNCTION_VALUE_REGNO_P
491 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
493 #undef TARGET_ASM_OUTPUT_MI_THUNK
494 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
495 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
496 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
498 #undef TARGET_RTX_COSTS
499 #define TARGET_RTX_COSTS arm_rtx_costs
500 #undef TARGET_ADDRESS_COST
501 #define TARGET_ADDRESS_COST arm_address_cost
503 #undef TARGET_SHIFT_TRUNCATION_MASK
504 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
505 #undef TARGET_VECTOR_MODE_SUPPORTED_P
506 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
507 #undef TARGET_ARRAY_MODE_SUPPORTED_P
508 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
509 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
510 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
511 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
512 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
513 arm_autovectorize_vector_sizes
515 #undef TARGET_MACHINE_DEPENDENT_REORG
516 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
518 #undef TARGET_INIT_BUILTINS
519 #define TARGET_INIT_BUILTINS arm_init_builtins
520 #undef TARGET_EXPAND_BUILTIN
521 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
522 #undef TARGET_BUILTIN_DECL
523 #define TARGET_BUILTIN_DECL arm_builtin_decl
525 #undef TARGET_INIT_LIBFUNCS
526 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
528 #undef TARGET_PROMOTE_FUNCTION_MODE
529 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
530 #undef TARGET_PROMOTE_PROTOTYPES
531 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
532 #undef TARGET_PASS_BY_REFERENCE
533 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
534 #undef TARGET_ARG_PARTIAL_BYTES
535 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
536 #undef TARGET_FUNCTION_ARG
537 #define TARGET_FUNCTION_ARG arm_function_arg
538 #undef TARGET_FUNCTION_ARG_ADVANCE
539 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
540 #undef TARGET_FUNCTION_ARG_PADDING
541 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
542 #undef TARGET_FUNCTION_ARG_BOUNDARY
543 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
545 #undef TARGET_SETUP_INCOMING_VARARGS
546 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
548 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
549 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
551 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
552 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
553 #undef TARGET_TRAMPOLINE_INIT
554 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
555 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
556 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
558 #undef TARGET_WARN_FUNC_RETURN
559 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
561 #undef TARGET_DEFAULT_SHORT_ENUMS
562 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
564 #undef TARGET_ALIGN_ANON_BITFIELD
565 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
567 #undef TARGET_NARROW_VOLATILE_BITFIELD
568 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
570 #undef TARGET_CXX_GUARD_TYPE
571 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
573 #undef TARGET_CXX_GUARD_MASK_BIT
574 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
576 #undef TARGET_CXX_GET_COOKIE_SIZE
577 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
579 #undef TARGET_CXX_COOKIE_HAS_SIZE
580 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
582 #undef TARGET_CXX_CDTOR_RETURNS_THIS
583 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
585 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
586 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
588 #undef TARGET_CXX_USE_AEABI_ATEXIT
589 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
591 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
592 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
593 arm_cxx_determine_class_data_visibility
595 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
596 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
598 #undef TARGET_RETURN_IN_MSB
599 #define TARGET_RETURN_IN_MSB arm_return_in_msb
601 #undef TARGET_RETURN_IN_MEMORY
602 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
604 #undef TARGET_MUST_PASS_IN_STACK
605 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
608 #undef TARGET_ASM_UNWIND_EMIT
609 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
611 /* EABI unwinding tables use a different format for the typeinfo tables. */
612 #undef TARGET_ASM_TTYPE
613 #define TARGET_ASM_TTYPE arm_output_ttype
615 #undef TARGET_ARM_EABI_UNWINDER
616 #define TARGET_ARM_EABI_UNWINDER true
618 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
619 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
621 #endif /* ARM_UNWIND_INFO */
623 #undef TARGET_ASM_INIT_SECTIONS
624 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
626 #undef TARGET_DWARF_REGISTER_SPAN
627 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
629 #undef TARGET_CANNOT_COPY_INSN_P
630 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
633 #undef TARGET_HAVE_TLS
634 #define TARGET_HAVE_TLS true
637 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
638 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
640 #undef TARGET_LEGITIMATE_CONSTANT_P
641 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
643 #undef TARGET_CANNOT_FORCE_CONST_MEM
644 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
646 #undef TARGET_MAX_ANCHOR_OFFSET
647 #define TARGET_MAX_ANCHOR_OFFSET 4095
649 /* The minimum is set such that the total size of the block
650 for a particular anchor is -4088 + 1 + 4095 bytes, which is
651 divisible by eight, ensuring natural spacing of anchors. */
652 #undef TARGET_MIN_ANCHOR_OFFSET
653 #define TARGET_MIN_ANCHOR_OFFSET -4088
655 #undef TARGET_SCHED_ISSUE_RATE
656 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
658 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
659 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
660 arm_first_cycle_multipass_dfa_lookahead
662 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
663 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
664 arm_first_cycle_multipass_dfa_lookahead_guard
666 #undef TARGET_MANGLE_TYPE
667 #define TARGET_MANGLE_TYPE arm_mangle_type
669 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
670 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
672 #undef TARGET_BUILD_BUILTIN_VA_LIST
673 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
674 #undef TARGET_EXPAND_BUILTIN_VA_START
675 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
676 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
677 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
680 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
681 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
684 #undef TARGET_LEGITIMATE_ADDRESS_P
685 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
687 #undef TARGET_PREFERRED_RELOAD_CLASS
688 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
690 #undef TARGET_PROMOTED_TYPE
691 #define TARGET_PROMOTED_TYPE arm_promoted_type
693 #undef TARGET_SCALAR_MODE_SUPPORTED_P
694 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
696 #undef TARGET_COMPUTE_FRAME_LAYOUT
697 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
699 #undef TARGET_FRAME_POINTER_REQUIRED
700 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
702 #undef TARGET_CAN_ELIMINATE
703 #define TARGET_CAN_ELIMINATE arm_can_eliminate
705 #undef TARGET_CONDITIONAL_REGISTER_USAGE
706 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
708 #undef TARGET_CLASS_LIKELY_SPILLED_P
709 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
711 #undef TARGET_VECTORIZE_BUILTINS
712 #define TARGET_VECTORIZE_BUILTINS
714 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
715 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
716 arm_builtin_vectorized_function
718 #undef TARGET_VECTOR_ALIGNMENT
719 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
721 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
722 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
723 arm_vector_alignment_reachable
725 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
726 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
727 arm_builtin_support_vector_misalignment
729 #undef TARGET_PREFERRED_RENAME_CLASS
730 #define TARGET_PREFERRED_RENAME_CLASS \
731 arm_preferred_rename_class
733 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
734 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
735 arm_vectorize_vec_perm_const_ok
737 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
738 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
739 arm_builtin_vectorization_cost
740 #undef TARGET_VECTORIZE_ADD_STMT_COST
741 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
743 #undef TARGET_CANONICALIZE_COMPARISON
744 #define TARGET_CANONICALIZE_COMPARISON \
745 arm_canonicalize_comparison
747 #undef TARGET_ASAN_SHADOW_OFFSET
748 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
750 #undef MAX_INSN_PER_IT_BLOCK
751 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
753 #undef TARGET_CAN_USE_DOLOOP_P
754 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
756 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
757 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
759 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
760 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
762 #undef TARGET_SCHED_FUSION_PRIORITY
763 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
765 #undef TARGET_ASM_FUNCTION_SECTION
766 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
768 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
769 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
771 #undef TARGET_SECTION_TYPE_FLAGS
772 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
774 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
775 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
777 #undef TARGET_C_EXCESS_PRECISION
778 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
780 /* Although the architecture reserves bits 0 and 1, only the former is
781 used for ARM/Thumb ISA selection in v7 and earlier versions. */
782 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
783 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
785 #undef TARGET_FIXED_CONDITION_CODE_REGS
786 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
788 #undef TARGET_HARD_REGNO_MODE_OK
789 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
791 #undef TARGET_MODES_TIEABLE_P
792 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
794 /* Obstack for minipool constant handling. */
795 static struct obstack minipool_obstack
;
796 static char * minipool_startobj
;
798 /* The maximum number of insns skipped which
799 will be conditionalised if possible. */
800 static int max_insns_skipped
= 5;
802 extern FILE * asm_out_file
;
804 /* True if we are currently building a constant table. */
805 int making_const_table
;
807 /* The processor for which instructions should be scheduled. */
808 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
810 /* The current tuning set. */
811 const struct tune_params
*current_tune
;
813 /* Which floating point hardware to schedule for. */
816 /* Used for Thumb call_via trampolines. */
817 rtx thumb_call_via_label
[14];
818 static int thumb_call_reg_needed
;
820 /* The bits in this mask specify which instruction scheduling options should
822 unsigned int tune_flags
= 0;
824 /* The highest ARM architecture version supported by the
826 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
828 /* Active target architecture and tuning. */
830 struct arm_build_target arm_active_target
;
832 /* The following are used in the arm.md file as equivalents to bits
833 in the above two flag variables. */
835 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
838 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
841 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
844 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
847 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
850 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
853 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
856 /* Nonzero if this chip supports the ARM 6K extensions. */
859 /* Nonzero if this chip supports the ARM 6KZ extensions. */
862 /* Nonzero if instructions present in ARMv6-M can be used. */
865 /* Nonzero if this chip supports the ARM 7 extensions. */
868 /* Nonzero if this chip supports the Large Physical Address Extension. */
869 int arm_arch_lpae
= 0;
871 /* Nonzero if instructions not present in the 'M' profile can be used. */
872 int arm_arch_notm
= 0;
874 /* Nonzero if instructions present in ARMv7E-M can be used. */
877 /* Nonzero if instructions present in ARMv8 can be used. */
880 /* Nonzero if this chip supports the ARMv8.1 extensions. */
883 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
886 /* Nonzero if this chip supports the FP16 instructions extension of ARM
888 int arm_fp16_inst
= 0;
890 /* Nonzero if this chip can benefit from load scheduling. */
891 int arm_ld_sched
= 0;
893 /* Nonzero if this chip is a StrongARM. */
894 int arm_tune_strongarm
= 0;
896 /* Nonzero if this chip supports Intel Wireless MMX technology. */
897 int arm_arch_iwmmxt
= 0;
899 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
900 int arm_arch_iwmmxt2
= 0;
902 /* Nonzero if this chip is an XScale. */
903 int arm_arch_xscale
= 0;
905 /* Nonzero if tuning for XScale */
906 int arm_tune_xscale
= 0;
908 /* Nonzero if we want to tune for stores that access the write-buffer.
909 This typically means an ARM6 or ARM7 with MMU or MPU. */
910 int arm_tune_wbuf
= 0;
912 /* Nonzero if tuning for Cortex-A9. */
913 int arm_tune_cortex_a9
= 0;
915 /* Nonzero if we should define __THUMB_INTERWORK__ in the
917 XXX This is a bit of a hack, it's intended to help work around
918 problems in GLD which doesn't understand that armv5t code is
919 interworking clean. */
920 int arm_cpp_interwork
= 0;
922 /* Nonzero if chip supports Thumb 1. */
925 /* Nonzero if chip supports Thumb 2. */
928 /* Nonzero if chip supports integer division instruction. */
929 int arm_arch_arm_hwdiv
;
930 int arm_arch_thumb_hwdiv
;
932 /* Nonzero if chip disallows volatile memory access in IT block. */
933 int arm_arch_no_volatile_ce
;
935 /* Nonzero if we should use Neon to handle 64-bits operations rather
936 than core registers. */
937 int prefer_neon_for_64bits
= 0;
939 /* Nonzero if we shouldn't use literal pools. */
940 bool arm_disable_literal_pool
= false;
942 /* The register number to be used for the PIC offset register. */
943 unsigned arm_pic_register
= INVALID_REGNUM
;
945 enum arm_pcs arm_pcs_default
;
947 /* For an explanation of these variables, see final_prescan_insn below. */
949 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
950 enum arm_cond_code arm_current_cc
;
953 int arm_target_label
;
954 /* The number of conditionally executed insns, including the current insn. */
955 int arm_condexec_count
= 0;
956 /* A bitmask specifying the patterns for the IT block.
957 Zero means do not output an IT block before this insn. */
958 int arm_condexec_mask
= 0;
959 /* The number of bits used in arm_condexec_mask. */
960 int arm_condexec_masklen
= 0;
962 /* Nonzero if chip supports the ARMv8 CRC instructions. */
963 int arm_arch_crc
= 0;
965 /* Nonzero if chip supports the ARMv8-M security extensions. */
966 int arm_arch_cmse
= 0;
968 /* Nonzero if the core has a very small, high-latency, multiply unit. */
969 int arm_m_profile_small_mul
= 0;
971 /* The condition codes of the ARM, and the inverse function. */
972 static const char * const arm_condition_codes
[] =
974 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
975 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
978 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
979 int arm_regs_in_sequence
[] =
981 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
984 #define ARM_LSL_NAME "lsl"
985 #define streq(string1, string2) (strcmp (string1, string2) == 0)
987 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
988 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
989 | (1 << PIC_OFFSET_TABLE_REGNUM)))
991 /* Initialization code. */
995 enum processor_type scheduler
;
996 unsigned int tune_flags
;
997 const struct tune_params
*tune
;
1000 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1001 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1008 /* arm generic vectorizer costs. */
1010 struct cpu_vec_costs arm_default_vec_cost
= {
1011 1, /* scalar_stmt_cost. */
1012 1, /* scalar load_cost. */
1013 1, /* scalar_store_cost. */
1014 1, /* vec_stmt_cost. */
1015 1, /* vec_to_scalar_cost. */
1016 1, /* scalar_to_vec_cost. */
1017 1, /* vec_align_load_cost. */
1018 1, /* vec_unalign_load_cost. */
1019 1, /* vec_unalign_store_cost. */
1020 1, /* vec_store_cost. */
1021 3, /* cond_taken_branch_cost. */
1022 1, /* cond_not_taken_branch_cost. */
1025 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1026 #include "aarch-cost-tables.h"
1030 const struct cpu_cost_table cortexa9_extra_costs
=
1037 COSTS_N_INSNS (1), /* shift_reg. */
1038 COSTS_N_INSNS (1), /* arith_shift. */
1039 COSTS_N_INSNS (2), /* arith_shift_reg. */
1041 COSTS_N_INSNS (1), /* log_shift_reg. */
1042 COSTS_N_INSNS (1), /* extend. */
1043 COSTS_N_INSNS (2), /* extend_arith. */
1044 COSTS_N_INSNS (1), /* bfi. */
1045 COSTS_N_INSNS (1), /* bfx. */
1049 true /* non_exec_costs_exec. */
1054 COSTS_N_INSNS (3), /* simple. */
1055 COSTS_N_INSNS (3), /* flag_setting. */
1056 COSTS_N_INSNS (2), /* extend. */
1057 COSTS_N_INSNS (3), /* add. */
1058 COSTS_N_INSNS (2), /* extend_add. */
1059 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1063 0, /* simple (N/A). */
1064 0, /* flag_setting (N/A). */
1065 COSTS_N_INSNS (4), /* extend. */
1067 COSTS_N_INSNS (4), /* extend_add. */
1073 COSTS_N_INSNS (2), /* load. */
1074 COSTS_N_INSNS (2), /* load_sign_extend. */
1075 COSTS_N_INSNS (2), /* ldrd. */
1076 COSTS_N_INSNS (2), /* ldm_1st. */
1077 1, /* ldm_regs_per_insn_1st. */
1078 2, /* ldm_regs_per_insn_subsequent. */
1079 COSTS_N_INSNS (5), /* loadf. */
1080 COSTS_N_INSNS (5), /* loadd. */
1081 COSTS_N_INSNS (1), /* load_unaligned. */
1082 COSTS_N_INSNS (2), /* store. */
1083 COSTS_N_INSNS (2), /* strd. */
1084 COSTS_N_INSNS (2), /* stm_1st. */
1085 1, /* stm_regs_per_insn_1st. */
1086 2, /* stm_regs_per_insn_subsequent. */
1087 COSTS_N_INSNS (1), /* storef. */
1088 COSTS_N_INSNS (1), /* stored. */
1089 COSTS_N_INSNS (1), /* store_unaligned. */
1090 COSTS_N_INSNS (1), /* loadv. */
1091 COSTS_N_INSNS (1) /* storev. */
1096 COSTS_N_INSNS (14), /* div. */
1097 COSTS_N_INSNS (4), /* mult. */
1098 COSTS_N_INSNS (7), /* mult_addsub. */
1099 COSTS_N_INSNS (30), /* fma. */
1100 COSTS_N_INSNS (3), /* addsub. */
1101 COSTS_N_INSNS (1), /* fpconst. */
1102 COSTS_N_INSNS (1), /* neg. */
1103 COSTS_N_INSNS (3), /* compare. */
1104 COSTS_N_INSNS (3), /* widen. */
1105 COSTS_N_INSNS (3), /* narrow. */
1106 COSTS_N_INSNS (3), /* toint. */
1107 COSTS_N_INSNS (3), /* fromint. */
1108 COSTS_N_INSNS (3) /* roundint. */
1112 COSTS_N_INSNS (24), /* div. */
1113 COSTS_N_INSNS (5), /* mult. */
1114 COSTS_N_INSNS (8), /* mult_addsub. */
1115 COSTS_N_INSNS (30), /* fma. */
1116 COSTS_N_INSNS (3), /* addsub. */
1117 COSTS_N_INSNS (1), /* fpconst. */
1118 COSTS_N_INSNS (1), /* neg. */
1119 COSTS_N_INSNS (3), /* compare. */
1120 COSTS_N_INSNS (3), /* widen. */
1121 COSTS_N_INSNS (3), /* narrow. */
1122 COSTS_N_INSNS (3), /* toint. */
1123 COSTS_N_INSNS (3), /* fromint. */
1124 COSTS_N_INSNS (3) /* roundint. */
1129 COSTS_N_INSNS (1) /* alu. */
1133 const struct cpu_cost_table cortexa8_extra_costs
=
1139 COSTS_N_INSNS (1), /* shift. */
1141 COSTS_N_INSNS (1), /* arith_shift. */
1142 0, /* arith_shift_reg. */
1143 COSTS_N_INSNS (1), /* log_shift. */
1144 0, /* log_shift_reg. */
1146 0, /* extend_arith. */
1152 true /* non_exec_costs_exec. */
1157 COSTS_N_INSNS (1), /* simple. */
1158 COSTS_N_INSNS (1), /* flag_setting. */
1159 COSTS_N_INSNS (1), /* extend. */
1160 COSTS_N_INSNS (1), /* add. */
1161 COSTS_N_INSNS (1), /* extend_add. */
1162 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1166 0, /* simple (N/A). */
1167 0, /* flag_setting (N/A). */
1168 COSTS_N_INSNS (2), /* extend. */
1170 COSTS_N_INSNS (2), /* extend_add. */
1176 COSTS_N_INSNS (1), /* load. */
1177 COSTS_N_INSNS (1), /* load_sign_extend. */
1178 COSTS_N_INSNS (1), /* ldrd. */
1179 COSTS_N_INSNS (1), /* ldm_1st. */
1180 1, /* ldm_regs_per_insn_1st. */
1181 2, /* ldm_regs_per_insn_subsequent. */
1182 COSTS_N_INSNS (1), /* loadf. */
1183 COSTS_N_INSNS (1), /* loadd. */
1184 COSTS_N_INSNS (1), /* load_unaligned. */
1185 COSTS_N_INSNS (1), /* store. */
1186 COSTS_N_INSNS (1), /* strd. */
1187 COSTS_N_INSNS (1), /* stm_1st. */
1188 1, /* stm_regs_per_insn_1st. */
1189 2, /* stm_regs_per_insn_subsequent. */
1190 COSTS_N_INSNS (1), /* storef. */
1191 COSTS_N_INSNS (1), /* stored. */
1192 COSTS_N_INSNS (1), /* store_unaligned. */
1193 COSTS_N_INSNS (1), /* loadv. */
1194 COSTS_N_INSNS (1) /* storev. */
1199 COSTS_N_INSNS (36), /* div. */
1200 COSTS_N_INSNS (11), /* mult. */
1201 COSTS_N_INSNS (20), /* mult_addsub. */
1202 COSTS_N_INSNS (30), /* fma. */
1203 COSTS_N_INSNS (9), /* addsub. */
1204 COSTS_N_INSNS (3), /* fpconst. */
1205 COSTS_N_INSNS (3), /* neg. */
1206 COSTS_N_INSNS (6), /* compare. */
1207 COSTS_N_INSNS (4), /* widen. */
1208 COSTS_N_INSNS (4), /* narrow. */
1209 COSTS_N_INSNS (8), /* toint. */
1210 COSTS_N_INSNS (8), /* fromint. */
1211 COSTS_N_INSNS (8) /* roundint. */
1215 COSTS_N_INSNS (64), /* div. */
1216 COSTS_N_INSNS (16), /* mult. */
1217 COSTS_N_INSNS (25), /* mult_addsub. */
1218 COSTS_N_INSNS (30), /* fma. */
1219 COSTS_N_INSNS (9), /* addsub. */
1220 COSTS_N_INSNS (3), /* fpconst. */
1221 COSTS_N_INSNS (3), /* neg. */
1222 COSTS_N_INSNS (6), /* compare. */
1223 COSTS_N_INSNS (6), /* widen. */
1224 COSTS_N_INSNS (6), /* narrow. */
1225 COSTS_N_INSNS (8), /* toint. */
1226 COSTS_N_INSNS (8), /* fromint. */
1227 COSTS_N_INSNS (8) /* roundint. */
1232 COSTS_N_INSNS (1) /* alu. */
1236 const struct cpu_cost_table cortexa5_extra_costs
=
1242 COSTS_N_INSNS (1), /* shift. */
1243 COSTS_N_INSNS (1), /* shift_reg. */
1244 COSTS_N_INSNS (1), /* arith_shift. */
1245 COSTS_N_INSNS (1), /* arith_shift_reg. */
1246 COSTS_N_INSNS (1), /* log_shift. */
1247 COSTS_N_INSNS (1), /* log_shift_reg. */
1248 COSTS_N_INSNS (1), /* extend. */
1249 COSTS_N_INSNS (1), /* extend_arith. */
1250 COSTS_N_INSNS (1), /* bfi. */
1251 COSTS_N_INSNS (1), /* bfx. */
1252 COSTS_N_INSNS (1), /* clz. */
1253 COSTS_N_INSNS (1), /* rev. */
1255 true /* non_exec_costs_exec. */
1262 COSTS_N_INSNS (1), /* flag_setting. */
1263 COSTS_N_INSNS (1), /* extend. */
1264 COSTS_N_INSNS (1), /* add. */
1265 COSTS_N_INSNS (1), /* extend_add. */
1266 COSTS_N_INSNS (7) /* idiv. */
1270 0, /* simple (N/A). */
1271 0, /* flag_setting (N/A). */
1272 COSTS_N_INSNS (1), /* extend. */
1274 COSTS_N_INSNS (2), /* extend_add. */
1280 COSTS_N_INSNS (1), /* load. */
1281 COSTS_N_INSNS (1), /* load_sign_extend. */
1282 COSTS_N_INSNS (6), /* ldrd. */
1283 COSTS_N_INSNS (1), /* ldm_1st. */
1284 1, /* ldm_regs_per_insn_1st. */
1285 2, /* ldm_regs_per_insn_subsequent. */
1286 COSTS_N_INSNS (2), /* loadf. */
1287 COSTS_N_INSNS (4), /* loadd. */
1288 COSTS_N_INSNS (1), /* load_unaligned. */
1289 COSTS_N_INSNS (1), /* store. */
1290 COSTS_N_INSNS (3), /* strd. */
1291 COSTS_N_INSNS (1), /* stm_1st. */
1292 1, /* stm_regs_per_insn_1st. */
1293 2, /* stm_regs_per_insn_subsequent. */
1294 COSTS_N_INSNS (2), /* storef. */
1295 COSTS_N_INSNS (2), /* stored. */
1296 COSTS_N_INSNS (1), /* store_unaligned. */
1297 COSTS_N_INSNS (1), /* loadv. */
1298 COSTS_N_INSNS (1) /* storev. */
1303 COSTS_N_INSNS (15), /* div. */
1304 COSTS_N_INSNS (3), /* mult. */
1305 COSTS_N_INSNS (7), /* mult_addsub. */
1306 COSTS_N_INSNS (7), /* fma. */
1307 COSTS_N_INSNS (3), /* addsub. */
1308 COSTS_N_INSNS (3), /* fpconst. */
1309 COSTS_N_INSNS (3), /* neg. */
1310 COSTS_N_INSNS (3), /* compare. */
1311 COSTS_N_INSNS (3), /* widen. */
1312 COSTS_N_INSNS (3), /* narrow. */
1313 COSTS_N_INSNS (3), /* toint. */
1314 COSTS_N_INSNS (3), /* fromint. */
1315 COSTS_N_INSNS (3) /* roundint. */
1319 COSTS_N_INSNS (30), /* div. */
1320 COSTS_N_INSNS (6), /* mult. */
1321 COSTS_N_INSNS (10), /* mult_addsub. */
1322 COSTS_N_INSNS (7), /* fma. */
1323 COSTS_N_INSNS (3), /* addsub. */
1324 COSTS_N_INSNS (3), /* fpconst. */
1325 COSTS_N_INSNS (3), /* neg. */
1326 COSTS_N_INSNS (3), /* compare. */
1327 COSTS_N_INSNS (3), /* widen. */
1328 COSTS_N_INSNS (3), /* narrow. */
1329 COSTS_N_INSNS (3), /* toint. */
1330 COSTS_N_INSNS (3), /* fromint. */
1331 COSTS_N_INSNS (3) /* roundint. */
1336 COSTS_N_INSNS (1) /* alu. */
1341 const struct cpu_cost_table cortexa7_extra_costs
=
1347 COSTS_N_INSNS (1), /* shift. */
1348 COSTS_N_INSNS (1), /* shift_reg. */
1349 COSTS_N_INSNS (1), /* arith_shift. */
1350 COSTS_N_INSNS (1), /* arith_shift_reg. */
1351 COSTS_N_INSNS (1), /* log_shift. */
1352 COSTS_N_INSNS (1), /* log_shift_reg. */
1353 COSTS_N_INSNS (1), /* extend. */
1354 COSTS_N_INSNS (1), /* extend_arith. */
1355 COSTS_N_INSNS (1), /* bfi. */
1356 COSTS_N_INSNS (1), /* bfx. */
1357 COSTS_N_INSNS (1), /* clz. */
1358 COSTS_N_INSNS (1), /* rev. */
1360 true /* non_exec_costs_exec. */
1367 COSTS_N_INSNS (1), /* flag_setting. */
1368 COSTS_N_INSNS (1), /* extend. */
1369 COSTS_N_INSNS (1), /* add. */
1370 COSTS_N_INSNS (1), /* extend_add. */
1371 COSTS_N_INSNS (7) /* idiv. */
1375 0, /* simple (N/A). */
1376 0, /* flag_setting (N/A). */
1377 COSTS_N_INSNS (1), /* extend. */
1379 COSTS_N_INSNS (2), /* extend_add. */
1385 COSTS_N_INSNS (1), /* load. */
1386 COSTS_N_INSNS (1), /* load_sign_extend. */
1387 COSTS_N_INSNS (3), /* ldrd. */
1388 COSTS_N_INSNS (1), /* ldm_1st. */
1389 1, /* ldm_regs_per_insn_1st. */
1390 2, /* ldm_regs_per_insn_subsequent. */
1391 COSTS_N_INSNS (2), /* loadf. */
1392 COSTS_N_INSNS (2), /* loadd. */
1393 COSTS_N_INSNS (1), /* load_unaligned. */
1394 COSTS_N_INSNS (1), /* store. */
1395 COSTS_N_INSNS (3), /* strd. */
1396 COSTS_N_INSNS (1), /* stm_1st. */
1397 1, /* stm_regs_per_insn_1st. */
1398 2, /* stm_regs_per_insn_subsequent. */
1399 COSTS_N_INSNS (2), /* storef. */
1400 COSTS_N_INSNS (2), /* stored. */
1401 COSTS_N_INSNS (1), /* store_unaligned. */
1402 COSTS_N_INSNS (1), /* loadv. */
1403 COSTS_N_INSNS (1) /* storev. */
1408 COSTS_N_INSNS (15), /* div. */
1409 COSTS_N_INSNS (3), /* mult. */
1410 COSTS_N_INSNS (7), /* mult_addsub. */
1411 COSTS_N_INSNS (7), /* fma. */
1412 COSTS_N_INSNS (3), /* addsub. */
1413 COSTS_N_INSNS (3), /* fpconst. */
1414 COSTS_N_INSNS (3), /* neg. */
1415 COSTS_N_INSNS (3), /* compare. */
1416 COSTS_N_INSNS (3), /* widen. */
1417 COSTS_N_INSNS (3), /* narrow. */
1418 COSTS_N_INSNS (3), /* toint. */
1419 COSTS_N_INSNS (3), /* fromint. */
1420 COSTS_N_INSNS (3) /* roundint. */
1424 COSTS_N_INSNS (30), /* div. */
1425 COSTS_N_INSNS (6), /* mult. */
1426 COSTS_N_INSNS (10), /* mult_addsub. */
1427 COSTS_N_INSNS (7), /* fma. */
1428 COSTS_N_INSNS (3), /* addsub. */
1429 COSTS_N_INSNS (3), /* fpconst. */
1430 COSTS_N_INSNS (3), /* neg. */
1431 COSTS_N_INSNS (3), /* compare. */
1432 COSTS_N_INSNS (3), /* widen. */
1433 COSTS_N_INSNS (3), /* narrow. */
1434 COSTS_N_INSNS (3), /* toint. */
1435 COSTS_N_INSNS (3), /* fromint. */
1436 COSTS_N_INSNS (3) /* roundint. */
1441 COSTS_N_INSNS (1) /* alu. */
1445 const struct cpu_cost_table cortexa12_extra_costs
=
1452 COSTS_N_INSNS (1), /* shift_reg. */
1453 COSTS_N_INSNS (1), /* arith_shift. */
1454 COSTS_N_INSNS (1), /* arith_shift_reg. */
1455 COSTS_N_INSNS (1), /* log_shift. */
1456 COSTS_N_INSNS (1), /* log_shift_reg. */
1458 COSTS_N_INSNS (1), /* extend_arith. */
1460 COSTS_N_INSNS (1), /* bfx. */
1461 COSTS_N_INSNS (1), /* clz. */
1462 COSTS_N_INSNS (1), /* rev. */
1464 true /* non_exec_costs_exec. */
1469 COSTS_N_INSNS (2), /* simple. */
1470 COSTS_N_INSNS (3), /* flag_setting. */
1471 COSTS_N_INSNS (2), /* extend. */
1472 COSTS_N_INSNS (3), /* add. */
1473 COSTS_N_INSNS (2), /* extend_add. */
1474 COSTS_N_INSNS (18) /* idiv. */
1478 0, /* simple (N/A). */
1479 0, /* flag_setting (N/A). */
1480 COSTS_N_INSNS (3), /* extend. */
1482 COSTS_N_INSNS (3), /* extend_add. */
1488 COSTS_N_INSNS (3), /* load. */
1489 COSTS_N_INSNS (3), /* load_sign_extend. */
1490 COSTS_N_INSNS (3), /* ldrd. */
1491 COSTS_N_INSNS (3), /* ldm_1st. */
1492 1, /* ldm_regs_per_insn_1st. */
1493 2, /* ldm_regs_per_insn_subsequent. */
1494 COSTS_N_INSNS (3), /* loadf. */
1495 COSTS_N_INSNS (3), /* loadd. */
1496 0, /* load_unaligned. */
1500 1, /* stm_regs_per_insn_1st. */
1501 2, /* stm_regs_per_insn_subsequent. */
1502 COSTS_N_INSNS (2), /* storef. */
1503 COSTS_N_INSNS (2), /* stored. */
1504 0, /* store_unaligned. */
1505 COSTS_N_INSNS (1), /* loadv. */
1506 COSTS_N_INSNS (1) /* storev. */
1511 COSTS_N_INSNS (17), /* div. */
1512 COSTS_N_INSNS (4), /* mult. */
1513 COSTS_N_INSNS (8), /* mult_addsub. */
1514 COSTS_N_INSNS (8), /* fma. */
1515 COSTS_N_INSNS (4), /* addsub. */
1516 COSTS_N_INSNS (2), /* fpconst. */
1517 COSTS_N_INSNS (2), /* neg. */
1518 COSTS_N_INSNS (2), /* compare. */
1519 COSTS_N_INSNS (4), /* widen. */
1520 COSTS_N_INSNS (4), /* narrow. */
1521 COSTS_N_INSNS (4), /* toint. */
1522 COSTS_N_INSNS (4), /* fromint. */
1523 COSTS_N_INSNS (4) /* roundint. */
1527 COSTS_N_INSNS (31), /* div. */
1528 COSTS_N_INSNS (4), /* mult. */
1529 COSTS_N_INSNS (8), /* mult_addsub. */
1530 COSTS_N_INSNS (8), /* fma. */
1531 COSTS_N_INSNS (4), /* addsub. */
1532 COSTS_N_INSNS (2), /* fpconst. */
1533 COSTS_N_INSNS (2), /* neg. */
1534 COSTS_N_INSNS (2), /* compare. */
1535 COSTS_N_INSNS (4), /* widen. */
1536 COSTS_N_INSNS (4), /* narrow. */
1537 COSTS_N_INSNS (4), /* toint. */
1538 COSTS_N_INSNS (4), /* fromint. */
1539 COSTS_N_INSNS (4) /* roundint. */
1544 COSTS_N_INSNS (1) /* alu. */
1548 const struct cpu_cost_table cortexa15_extra_costs
=
1556 COSTS_N_INSNS (1), /* arith_shift. */
1557 COSTS_N_INSNS (1), /* arith_shift_reg. */
1558 COSTS_N_INSNS (1), /* log_shift. */
1559 COSTS_N_INSNS (1), /* log_shift_reg. */
1561 COSTS_N_INSNS (1), /* extend_arith. */
1562 COSTS_N_INSNS (1), /* bfi. */
1567 true /* non_exec_costs_exec. */
1572 COSTS_N_INSNS (2), /* simple. */
1573 COSTS_N_INSNS (3), /* flag_setting. */
1574 COSTS_N_INSNS (2), /* extend. */
1575 COSTS_N_INSNS (2), /* add. */
1576 COSTS_N_INSNS (2), /* extend_add. */
1577 COSTS_N_INSNS (18) /* idiv. */
1581 0, /* simple (N/A). */
1582 0, /* flag_setting (N/A). */
1583 COSTS_N_INSNS (3), /* extend. */
1585 COSTS_N_INSNS (3), /* extend_add. */
1591 COSTS_N_INSNS (3), /* load. */
1592 COSTS_N_INSNS (3), /* load_sign_extend. */
1593 COSTS_N_INSNS (3), /* ldrd. */
1594 COSTS_N_INSNS (4), /* ldm_1st. */
1595 1, /* ldm_regs_per_insn_1st. */
1596 2, /* ldm_regs_per_insn_subsequent. */
1597 COSTS_N_INSNS (4), /* loadf. */
1598 COSTS_N_INSNS (4), /* loadd. */
1599 0, /* load_unaligned. */
1602 COSTS_N_INSNS (1), /* stm_1st. */
1603 1, /* stm_regs_per_insn_1st. */
1604 2, /* stm_regs_per_insn_subsequent. */
1607 0, /* store_unaligned. */
1608 COSTS_N_INSNS (1), /* loadv. */
1609 COSTS_N_INSNS (1) /* storev. */
1614 COSTS_N_INSNS (17), /* div. */
1615 COSTS_N_INSNS (4), /* mult. */
1616 COSTS_N_INSNS (8), /* mult_addsub. */
1617 COSTS_N_INSNS (8), /* fma. */
1618 COSTS_N_INSNS (4), /* addsub. */
1619 COSTS_N_INSNS (2), /* fpconst. */
1620 COSTS_N_INSNS (2), /* neg. */
1621 COSTS_N_INSNS (5), /* compare. */
1622 COSTS_N_INSNS (4), /* widen. */
1623 COSTS_N_INSNS (4), /* narrow. */
1624 COSTS_N_INSNS (4), /* toint. */
1625 COSTS_N_INSNS (4), /* fromint. */
1626 COSTS_N_INSNS (4) /* roundint. */
1630 COSTS_N_INSNS (31), /* div. */
1631 COSTS_N_INSNS (4), /* mult. */
1632 COSTS_N_INSNS (8), /* mult_addsub. */
1633 COSTS_N_INSNS (8), /* fma. */
1634 COSTS_N_INSNS (4), /* addsub. */
1635 COSTS_N_INSNS (2), /* fpconst. */
1636 COSTS_N_INSNS (2), /* neg. */
1637 COSTS_N_INSNS (2), /* compare. */
1638 COSTS_N_INSNS (4), /* widen. */
1639 COSTS_N_INSNS (4), /* narrow. */
1640 COSTS_N_INSNS (4), /* toint. */
1641 COSTS_N_INSNS (4), /* fromint. */
1642 COSTS_N_INSNS (4) /* roundint. */
1647 COSTS_N_INSNS (1) /* alu. */
1651 const struct cpu_cost_table v7m_extra_costs
=
1659 0, /* arith_shift. */
1660 COSTS_N_INSNS (1), /* arith_shift_reg. */
1662 COSTS_N_INSNS (1), /* log_shift_reg. */
1664 COSTS_N_INSNS (1), /* extend_arith. */
1669 COSTS_N_INSNS (1), /* non_exec. */
1670 false /* non_exec_costs_exec. */
1675 COSTS_N_INSNS (1), /* simple. */
1676 COSTS_N_INSNS (1), /* flag_setting. */
1677 COSTS_N_INSNS (2), /* extend. */
1678 COSTS_N_INSNS (1), /* add. */
1679 COSTS_N_INSNS (3), /* extend_add. */
1680 COSTS_N_INSNS (8) /* idiv. */
1684 0, /* simple (N/A). */
1685 0, /* flag_setting (N/A). */
1686 COSTS_N_INSNS (2), /* extend. */
1688 COSTS_N_INSNS (3), /* extend_add. */
1694 COSTS_N_INSNS (2), /* load. */
1695 0, /* load_sign_extend. */
1696 COSTS_N_INSNS (3), /* ldrd. */
1697 COSTS_N_INSNS (2), /* ldm_1st. */
1698 1, /* ldm_regs_per_insn_1st. */
1699 1, /* ldm_regs_per_insn_subsequent. */
1700 COSTS_N_INSNS (2), /* loadf. */
1701 COSTS_N_INSNS (3), /* loadd. */
1702 COSTS_N_INSNS (1), /* load_unaligned. */
1703 COSTS_N_INSNS (2), /* store. */
1704 COSTS_N_INSNS (3), /* strd. */
1705 COSTS_N_INSNS (2), /* stm_1st. */
1706 1, /* stm_regs_per_insn_1st. */
1707 1, /* stm_regs_per_insn_subsequent. */
1708 COSTS_N_INSNS (2), /* storef. */
1709 COSTS_N_INSNS (3), /* stored. */
1710 COSTS_N_INSNS (1), /* store_unaligned. */
1711 COSTS_N_INSNS (1), /* loadv. */
1712 COSTS_N_INSNS (1) /* storev. */
1717 COSTS_N_INSNS (7), /* div. */
1718 COSTS_N_INSNS (2), /* mult. */
1719 COSTS_N_INSNS (5), /* mult_addsub. */
1720 COSTS_N_INSNS (3), /* fma. */
1721 COSTS_N_INSNS (1), /* addsub. */
1733 COSTS_N_INSNS (15), /* div. */
1734 COSTS_N_INSNS (5), /* mult. */
1735 COSTS_N_INSNS (7), /* mult_addsub. */
1736 COSTS_N_INSNS (7), /* fma. */
1737 COSTS_N_INSNS (3), /* addsub. */
1750 COSTS_N_INSNS (1) /* alu. */
1754 const struct tune_params arm_slowmul_tune
=
1756 &generic_extra_costs
, /* Insn extra costs. */
1757 NULL
, /* Sched adj cost. */
1758 arm_default_branch_cost
,
1759 &arm_default_vec_cost
,
1760 3, /* Constant limit. */
1761 5, /* Max cond insns. */
1762 8, /* Memset max inline. */
1763 1, /* Issue rate. */
1764 ARM_PREFETCH_NOT_BENEFICIAL
,
1765 tune_params::PREF_CONST_POOL_TRUE
,
1766 tune_params::PREF_LDRD_FALSE
,
1767 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1768 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1769 tune_params::DISPARAGE_FLAGS_NEITHER
,
1770 tune_params::PREF_NEON_64_FALSE
,
1771 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1772 tune_params::FUSE_NOTHING
,
1773 tune_params::SCHED_AUTOPREF_OFF
1776 const struct tune_params arm_fastmul_tune
=
1778 &generic_extra_costs
, /* Insn extra costs. */
1779 NULL
, /* Sched adj cost. */
1780 arm_default_branch_cost
,
1781 &arm_default_vec_cost
,
1782 1, /* Constant limit. */
1783 5, /* Max cond insns. */
1784 8, /* Memset max inline. */
1785 1, /* Issue rate. */
1786 ARM_PREFETCH_NOT_BENEFICIAL
,
1787 tune_params::PREF_CONST_POOL_TRUE
,
1788 tune_params::PREF_LDRD_FALSE
,
1789 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1790 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1791 tune_params::DISPARAGE_FLAGS_NEITHER
,
1792 tune_params::PREF_NEON_64_FALSE
,
1793 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1794 tune_params::FUSE_NOTHING
,
1795 tune_params::SCHED_AUTOPREF_OFF
1798 /* StrongARM has early execution of branches, so a sequence that is worth
1799 skipping is shorter. Set max_insns_skipped to a lower value. */
1801 const struct tune_params arm_strongarm_tune
=
1803 &generic_extra_costs
, /* Insn extra costs. */
1804 NULL
, /* Sched adj cost. */
1805 arm_default_branch_cost
,
1806 &arm_default_vec_cost
,
1807 1, /* Constant limit. */
1808 3, /* Max cond insns. */
1809 8, /* Memset max inline. */
1810 1, /* Issue rate. */
1811 ARM_PREFETCH_NOT_BENEFICIAL
,
1812 tune_params::PREF_CONST_POOL_TRUE
,
1813 tune_params::PREF_LDRD_FALSE
,
1814 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1815 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1816 tune_params::DISPARAGE_FLAGS_NEITHER
,
1817 tune_params::PREF_NEON_64_FALSE
,
1818 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1819 tune_params::FUSE_NOTHING
,
1820 tune_params::SCHED_AUTOPREF_OFF
1823 const struct tune_params arm_xscale_tune
=
1825 &generic_extra_costs
, /* Insn extra costs. */
1826 xscale_sched_adjust_cost
,
1827 arm_default_branch_cost
,
1828 &arm_default_vec_cost
,
1829 2, /* Constant limit. */
1830 3, /* Max cond insns. */
1831 8, /* Memset max inline. */
1832 1, /* Issue rate. */
1833 ARM_PREFETCH_NOT_BENEFICIAL
,
1834 tune_params::PREF_CONST_POOL_TRUE
,
1835 tune_params::PREF_LDRD_FALSE
,
1836 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1837 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1838 tune_params::DISPARAGE_FLAGS_NEITHER
,
1839 tune_params::PREF_NEON_64_FALSE
,
1840 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1841 tune_params::FUSE_NOTHING
,
1842 tune_params::SCHED_AUTOPREF_OFF
1845 const struct tune_params arm_9e_tune
=
1847 &generic_extra_costs
, /* Insn extra costs. */
1848 NULL
, /* Sched adj cost. */
1849 arm_default_branch_cost
,
1850 &arm_default_vec_cost
,
1851 1, /* Constant limit. */
1852 5, /* Max cond insns. */
1853 8, /* Memset max inline. */
1854 1, /* Issue rate. */
1855 ARM_PREFETCH_NOT_BENEFICIAL
,
1856 tune_params::PREF_CONST_POOL_TRUE
,
1857 tune_params::PREF_LDRD_FALSE
,
1858 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1859 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1860 tune_params::DISPARAGE_FLAGS_NEITHER
,
1861 tune_params::PREF_NEON_64_FALSE
,
1862 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1863 tune_params::FUSE_NOTHING
,
1864 tune_params::SCHED_AUTOPREF_OFF
1867 const struct tune_params arm_marvell_pj4_tune
=
1869 &generic_extra_costs
, /* Insn extra costs. */
1870 NULL
, /* Sched adj cost. */
1871 arm_default_branch_cost
,
1872 &arm_default_vec_cost
,
1873 1, /* Constant limit. */
1874 5, /* Max cond insns. */
1875 8, /* Memset max inline. */
1876 2, /* Issue rate. */
1877 ARM_PREFETCH_NOT_BENEFICIAL
,
1878 tune_params::PREF_CONST_POOL_TRUE
,
1879 tune_params::PREF_LDRD_FALSE
,
1880 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1881 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1882 tune_params::DISPARAGE_FLAGS_NEITHER
,
1883 tune_params::PREF_NEON_64_FALSE
,
1884 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1885 tune_params::FUSE_NOTHING
,
1886 tune_params::SCHED_AUTOPREF_OFF
1889 const struct tune_params arm_v6t2_tune
=
1891 &generic_extra_costs
, /* Insn extra costs. */
1892 NULL
, /* Sched adj cost. */
1893 arm_default_branch_cost
,
1894 &arm_default_vec_cost
,
1895 1, /* Constant limit. */
1896 5, /* Max cond insns. */
1897 8, /* Memset max inline. */
1898 1, /* Issue rate. */
1899 ARM_PREFETCH_NOT_BENEFICIAL
,
1900 tune_params::PREF_CONST_POOL_FALSE
,
1901 tune_params::PREF_LDRD_FALSE
,
1902 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1903 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1904 tune_params::DISPARAGE_FLAGS_NEITHER
,
1905 tune_params::PREF_NEON_64_FALSE
,
1906 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1907 tune_params::FUSE_NOTHING
,
1908 tune_params::SCHED_AUTOPREF_OFF
1912 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1913 const struct tune_params arm_cortex_tune
=
1915 &generic_extra_costs
,
1916 NULL
, /* Sched adj cost. */
1917 arm_default_branch_cost
,
1918 &arm_default_vec_cost
,
1919 1, /* Constant limit. */
1920 5, /* Max cond insns. */
1921 8, /* Memset max inline. */
1922 2, /* Issue rate. */
1923 ARM_PREFETCH_NOT_BENEFICIAL
,
1924 tune_params::PREF_CONST_POOL_FALSE
,
1925 tune_params::PREF_LDRD_FALSE
,
1926 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1927 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1928 tune_params::DISPARAGE_FLAGS_NEITHER
,
1929 tune_params::PREF_NEON_64_FALSE
,
1930 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1931 tune_params::FUSE_NOTHING
,
1932 tune_params::SCHED_AUTOPREF_OFF
1935 const struct tune_params arm_cortex_a8_tune
=
1937 &cortexa8_extra_costs
,
1938 NULL
, /* Sched adj cost. */
1939 arm_default_branch_cost
,
1940 &arm_default_vec_cost
,
1941 1, /* Constant limit. */
1942 5, /* Max cond insns. */
1943 8, /* Memset max inline. */
1944 2, /* Issue rate. */
1945 ARM_PREFETCH_NOT_BENEFICIAL
,
1946 tune_params::PREF_CONST_POOL_FALSE
,
1947 tune_params::PREF_LDRD_FALSE
,
1948 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1949 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1950 tune_params::DISPARAGE_FLAGS_NEITHER
,
1951 tune_params::PREF_NEON_64_FALSE
,
1952 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1953 tune_params::FUSE_NOTHING
,
1954 tune_params::SCHED_AUTOPREF_OFF
1957 const struct tune_params arm_cortex_a7_tune
=
1959 &cortexa7_extra_costs
,
1960 NULL
, /* Sched adj cost. */
1961 arm_default_branch_cost
,
1962 &arm_default_vec_cost
,
1963 1, /* Constant limit. */
1964 5, /* Max cond insns. */
1965 8, /* Memset max inline. */
1966 2, /* Issue rate. */
1967 ARM_PREFETCH_NOT_BENEFICIAL
,
1968 tune_params::PREF_CONST_POOL_FALSE
,
1969 tune_params::PREF_LDRD_FALSE
,
1970 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1971 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1972 tune_params::DISPARAGE_FLAGS_NEITHER
,
1973 tune_params::PREF_NEON_64_FALSE
,
1974 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1975 tune_params::FUSE_NOTHING
,
1976 tune_params::SCHED_AUTOPREF_OFF
1979 const struct tune_params arm_cortex_a15_tune
=
1981 &cortexa15_extra_costs
,
1982 NULL
, /* Sched adj cost. */
1983 arm_default_branch_cost
,
1984 &arm_default_vec_cost
,
1985 1, /* Constant limit. */
1986 2, /* Max cond insns. */
1987 8, /* Memset max inline. */
1988 3, /* Issue rate. */
1989 ARM_PREFETCH_NOT_BENEFICIAL
,
1990 tune_params::PREF_CONST_POOL_FALSE
,
1991 tune_params::PREF_LDRD_TRUE
,
1992 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1993 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1994 tune_params::DISPARAGE_FLAGS_ALL
,
1995 tune_params::PREF_NEON_64_FALSE
,
1996 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1997 tune_params::FUSE_NOTHING
,
1998 tune_params::SCHED_AUTOPREF_FULL
2001 const struct tune_params arm_cortex_a35_tune
=
2003 &cortexa53_extra_costs
,
2004 NULL
, /* Sched adj cost. */
2005 arm_default_branch_cost
,
2006 &arm_default_vec_cost
,
2007 1, /* Constant limit. */
2008 5, /* Max cond insns. */
2009 8, /* Memset max inline. */
2010 1, /* Issue rate. */
2011 ARM_PREFETCH_NOT_BENEFICIAL
,
2012 tune_params::PREF_CONST_POOL_FALSE
,
2013 tune_params::PREF_LDRD_FALSE
,
2014 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2015 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2016 tune_params::DISPARAGE_FLAGS_NEITHER
,
2017 tune_params::PREF_NEON_64_FALSE
,
2018 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2019 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2020 tune_params::SCHED_AUTOPREF_OFF
2023 const struct tune_params arm_cortex_a53_tune
=
2025 &cortexa53_extra_costs
,
2026 NULL
, /* Sched adj cost. */
2027 arm_default_branch_cost
,
2028 &arm_default_vec_cost
,
2029 1, /* Constant limit. */
2030 5, /* Max cond insns. */
2031 8, /* Memset max inline. */
2032 2, /* Issue rate. */
2033 ARM_PREFETCH_NOT_BENEFICIAL
,
2034 tune_params::PREF_CONST_POOL_FALSE
,
2035 tune_params::PREF_LDRD_FALSE
,
2036 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2037 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2038 tune_params::DISPARAGE_FLAGS_NEITHER
,
2039 tune_params::PREF_NEON_64_FALSE
,
2040 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2041 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2042 tune_params::SCHED_AUTOPREF_OFF
2045 const struct tune_params arm_cortex_a57_tune
=
2047 &cortexa57_extra_costs
,
2048 NULL
, /* Sched adj cost. */
2049 arm_default_branch_cost
,
2050 &arm_default_vec_cost
,
2051 1, /* Constant limit. */
2052 2, /* Max cond insns. */
2053 8, /* Memset max inline. */
2054 3, /* Issue rate. */
2055 ARM_PREFETCH_NOT_BENEFICIAL
,
2056 tune_params::PREF_CONST_POOL_FALSE
,
2057 tune_params::PREF_LDRD_TRUE
,
2058 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2059 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2060 tune_params::DISPARAGE_FLAGS_ALL
,
2061 tune_params::PREF_NEON_64_FALSE
,
2062 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2063 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2064 tune_params::SCHED_AUTOPREF_FULL
2067 const struct tune_params arm_exynosm1_tune
=
2069 &exynosm1_extra_costs
,
2070 NULL
, /* Sched adj cost. */
2071 arm_default_branch_cost
,
2072 &arm_default_vec_cost
,
2073 1, /* Constant limit. */
2074 2, /* Max cond insns. */
2075 8, /* Memset max inline. */
2076 3, /* Issue rate. */
2077 ARM_PREFETCH_NOT_BENEFICIAL
,
2078 tune_params::PREF_CONST_POOL_FALSE
,
2079 tune_params::PREF_LDRD_TRUE
,
2080 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2081 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2082 tune_params::DISPARAGE_FLAGS_ALL
,
2083 tune_params::PREF_NEON_64_FALSE
,
2084 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2085 tune_params::FUSE_NOTHING
,
2086 tune_params::SCHED_AUTOPREF_OFF
2089 const struct tune_params arm_xgene1_tune
=
2091 &xgene1_extra_costs
,
2092 NULL
, /* Sched adj cost. */
2093 arm_default_branch_cost
,
2094 &arm_default_vec_cost
,
2095 1, /* Constant limit. */
2096 2, /* Max cond insns. */
2097 32, /* Memset max inline. */
2098 4, /* Issue rate. */
2099 ARM_PREFETCH_NOT_BENEFICIAL
,
2100 tune_params::PREF_CONST_POOL_FALSE
,
2101 tune_params::PREF_LDRD_TRUE
,
2102 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2103 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2104 tune_params::DISPARAGE_FLAGS_ALL
,
2105 tune_params::PREF_NEON_64_FALSE
,
2106 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2107 tune_params::FUSE_NOTHING
,
2108 tune_params::SCHED_AUTOPREF_OFF
2111 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2112 less appealing. Set max_insns_skipped to a low value. */
2114 const struct tune_params arm_cortex_a5_tune
=
2116 &cortexa5_extra_costs
,
2117 NULL
, /* Sched adj cost. */
2118 arm_cortex_a5_branch_cost
,
2119 &arm_default_vec_cost
,
2120 1, /* Constant limit. */
2121 1, /* Max cond insns. */
2122 8, /* Memset max inline. */
2123 2, /* Issue rate. */
2124 ARM_PREFETCH_NOT_BENEFICIAL
,
2125 tune_params::PREF_CONST_POOL_FALSE
,
2126 tune_params::PREF_LDRD_FALSE
,
2127 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2128 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2129 tune_params::DISPARAGE_FLAGS_NEITHER
,
2130 tune_params::PREF_NEON_64_FALSE
,
2131 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2132 tune_params::FUSE_NOTHING
,
2133 tune_params::SCHED_AUTOPREF_OFF
2136 const struct tune_params arm_cortex_a9_tune
=
2138 &cortexa9_extra_costs
,
2139 cortex_a9_sched_adjust_cost
,
2140 arm_default_branch_cost
,
2141 &arm_default_vec_cost
,
2142 1, /* Constant limit. */
2143 5, /* Max cond insns. */
2144 8, /* Memset max inline. */
2145 2, /* Issue rate. */
2146 ARM_PREFETCH_BENEFICIAL(4,32,32),
2147 tune_params::PREF_CONST_POOL_FALSE
,
2148 tune_params::PREF_LDRD_FALSE
,
2149 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2150 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2151 tune_params::DISPARAGE_FLAGS_NEITHER
,
2152 tune_params::PREF_NEON_64_FALSE
,
2153 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2154 tune_params::FUSE_NOTHING
,
2155 tune_params::SCHED_AUTOPREF_OFF
2158 const struct tune_params arm_cortex_a12_tune
=
2160 &cortexa12_extra_costs
,
2161 NULL
, /* Sched adj cost. */
2162 arm_default_branch_cost
,
2163 &arm_default_vec_cost
, /* Vectorizer costs. */
2164 1, /* Constant limit. */
2165 2, /* Max cond insns. */
2166 8, /* Memset max inline. */
2167 2, /* Issue rate. */
2168 ARM_PREFETCH_NOT_BENEFICIAL
,
2169 tune_params::PREF_CONST_POOL_FALSE
,
2170 tune_params::PREF_LDRD_TRUE
,
2171 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2172 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2173 tune_params::DISPARAGE_FLAGS_ALL
,
2174 tune_params::PREF_NEON_64_FALSE
,
2175 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2176 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2177 tune_params::SCHED_AUTOPREF_OFF
2180 const struct tune_params arm_cortex_a73_tune
=
2182 &cortexa57_extra_costs
,
2183 NULL
, /* Sched adj cost. */
2184 arm_default_branch_cost
,
2185 &arm_default_vec_cost
, /* Vectorizer costs. */
2186 1, /* Constant limit. */
2187 2, /* Max cond insns. */
2188 8, /* Memset max inline. */
2189 2, /* Issue rate. */
2190 ARM_PREFETCH_NOT_BENEFICIAL
,
2191 tune_params::PREF_CONST_POOL_FALSE
,
2192 tune_params::PREF_LDRD_TRUE
,
2193 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2194 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2195 tune_params::DISPARAGE_FLAGS_ALL
,
2196 tune_params::PREF_NEON_64_FALSE
,
2197 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2198 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2199 tune_params::SCHED_AUTOPREF_FULL
2202 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2203 cycle to execute each. An LDR from the constant pool also takes two cycles
2204 to execute, but mildly increases pipelining opportunity (consecutive
2205 loads/stores can be pipelined together, saving one cycle), and may also
2206 improve icache utilisation. Hence we prefer the constant pool for such
2209 const struct tune_params arm_v7m_tune
=
2212 NULL
, /* Sched adj cost. */
2213 arm_cortex_m_branch_cost
,
2214 &arm_default_vec_cost
,
2215 1, /* Constant limit. */
2216 2, /* Max cond insns. */
2217 8, /* Memset max inline. */
2218 1, /* Issue rate. */
2219 ARM_PREFETCH_NOT_BENEFICIAL
,
2220 tune_params::PREF_CONST_POOL_TRUE
,
2221 tune_params::PREF_LDRD_FALSE
,
2222 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2223 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2224 tune_params::DISPARAGE_FLAGS_NEITHER
,
2225 tune_params::PREF_NEON_64_FALSE
,
2226 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2227 tune_params::FUSE_NOTHING
,
2228 tune_params::SCHED_AUTOPREF_OFF
2231 /* Cortex-M7 tuning. */
2233 const struct tune_params arm_cortex_m7_tune
=
2236 NULL
, /* Sched adj cost. */
2237 arm_cortex_m7_branch_cost
,
2238 &arm_default_vec_cost
,
2239 0, /* Constant limit. */
2240 1, /* Max cond insns. */
2241 8, /* Memset max inline. */
2242 2, /* Issue rate. */
2243 ARM_PREFETCH_NOT_BENEFICIAL
,
2244 tune_params::PREF_CONST_POOL_TRUE
,
2245 tune_params::PREF_LDRD_FALSE
,
2246 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2247 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2248 tune_params::DISPARAGE_FLAGS_NEITHER
,
2249 tune_params::PREF_NEON_64_FALSE
,
2250 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2251 tune_params::FUSE_NOTHING
,
2252 tune_params::SCHED_AUTOPREF_OFF
2255 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2256 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2258 const struct tune_params arm_v6m_tune
=
2260 &generic_extra_costs
, /* Insn extra costs. */
2261 NULL
, /* Sched adj cost. */
2262 arm_default_branch_cost
,
2263 &arm_default_vec_cost
, /* Vectorizer costs. */
2264 1, /* Constant limit. */
2265 5, /* Max cond insns. */
2266 8, /* Memset max inline. */
2267 1, /* Issue rate. */
2268 ARM_PREFETCH_NOT_BENEFICIAL
,
2269 tune_params::PREF_CONST_POOL_FALSE
,
2270 tune_params::PREF_LDRD_FALSE
,
2271 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2272 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2273 tune_params::DISPARAGE_FLAGS_NEITHER
,
2274 tune_params::PREF_NEON_64_FALSE
,
2275 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2276 tune_params::FUSE_NOTHING
,
2277 tune_params::SCHED_AUTOPREF_OFF
2280 const struct tune_params arm_fa726te_tune
=
2282 &generic_extra_costs
, /* Insn extra costs. */
2283 fa726te_sched_adjust_cost
,
2284 arm_default_branch_cost
,
2285 &arm_default_vec_cost
,
2286 1, /* Constant limit. */
2287 5, /* Max cond insns. */
2288 8, /* Memset max inline. */
2289 2, /* Issue rate. */
2290 ARM_PREFETCH_NOT_BENEFICIAL
,
2291 tune_params::PREF_CONST_POOL_TRUE
,
2292 tune_params::PREF_LDRD_FALSE
,
2293 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2294 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2295 tune_params::DISPARAGE_FLAGS_NEITHER
,
2296 tune_params::PREF_NEON_64_FALSE
,
2297 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2298 tune_params::FUSE_NOTHING
,
2299 tune_params::SCHED_AUTOPREF_OFF
2302 /* Auto-generated CPU, FPU and architecture tables. */
2303 #include "arm-cpu-data.h"
2305 /* The name of the preprocessor macro to define for this architecture. PROFILE
2306 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2307 is thus chosen to be big enough to hold the longest architecture name. */
2309 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2311 /* Supported TLS relocations. */
2319 TLS_DESCSEQ
/* GNU scheme */
2322 /* The maximum number of insns to be used when loading a constant. */
2324 arm_constant_limit (bool size_p
)
2326 return size_p
? 1 : current_tune
->constant_limit
;
2329 /* Emit an insn that's a simple single-set. Both the operands must be known
2331 inline static rtx_insn
*
2332 emit_set_insn (rtx x
, rtx y
)
2334 return emit_insn (gen_rtx_SET (x
, y
));
2337 /* Return the number of bits set in VALUE. */
2339 bit_count (unsigned long value
)
2341 unsigned long count
= 0;
2346 value
&= value
- 1; /* Clear the least-significant set bit. */
2352 /* Return the number of bits set in BMAP. */
2354 bitmap_popcount (const sbitmap bmap
)
2356 unsigned int count
= 0;
2358 sbitmap_iterator sbi
;
2360 EXECUTE_IF_SET_IN_BITMAP (bmap
, 0, n
, sbi
)
2369 } arm_fixed_mode_set
;
2371 /* A small helper for setting fixed-point library libfuncs. */
2374 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2375 const char *funcname
, const char *modename
,
2380 if (num_suffix
== 0)
2381 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2383 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2385 set_optab_libfunc (optable
, mode
, buffer
);
2389 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2390 machine_mode from
, const char *funcname
,
2391 const char *toname
, const char *fromname
)
2394 const char *maybe_suffix_2
= "";
2396 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2397 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2398 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2399 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2400 maybe_suffix_2
= "2";
2402 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2405 set_conv_libfunc (optable
, to
, from
, buffer
);
2408 /* Set up library functions unique to ARM. */
2411 arm_init_libfuncs (void)
2413 /* For Linux, we have access to kernel support for atomic operations. */
2414 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2415 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2417 /* There are no special library functions unless we are using the
2422 /* The functions below are described in Section 4 of the "Run-Time
2423 ABI for the ARM architecture", Version 1.0. */
2425 /* Double-precision floating-point arithmetic. Table 2. */
2426 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2427 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2428 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2429 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2430 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2432 /* Double-precision comparisons. Table 3. */
2433 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2434 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2435 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2436 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2437 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2438 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2439 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2441 /* Single-precision floating-point arithmetic. Table 4. */
2442 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2443 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2444 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2445 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2446 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2448 /* Single-precision comparisons. Table 5. */
2449 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2450 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2451 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2452 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2453 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2454 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2455 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2457 /* Floating-point to integer conversions. Table 6. */
2458 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2459 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2460 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2461 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2462 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2463 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2464 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2465 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2467 /* Conversions between floating types. Table 7. */
2468 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2469 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2471 /* Integer to floating-point conversions. Table 8. */
2472 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2473 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2474 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2475 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2476 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2477 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2478 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2479 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2481 /* Long long. Table 9. */
2482 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2483 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2484 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2485 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2486 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2487 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2488 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2489 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2491 /* Integer (32/32->32) division. \S 4.3.1. */
2492 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2493 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2495 /* The divmod functions are designed so that they can be used for
2496 plain division, even though they return both the quotient and the
2497 remainder. The quotient is returned in the usual location (i.e.,
2498 r0 for SImode, {r0, r1} for DImode), just as would be expected
2499 for an ordinary division routine. Because the AAPCS calling
2500 conventions specify that all of { r0, r1, r2, r3 } are
2501 callee-saved registers, there is no need to tell the compiler
2502 explicitly that those registers are clobbered by these
2504 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2505 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2507 /* For SImode division the ABI provides div-without-mod routines,
2508 which are faster. */
2509 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2510 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2512 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2513 divmod libcalls instead. */
2514 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2515 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2516 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2517 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2519 /* Half-precision float operations. The compiler handles all operations
2520 with NULL libfuncs by converting the SFmode. */
2521 switch (arm_fp16_format
)
2523 case ARM_FP16_FORMAT_IEEE
:
2524 case ARM_FP16_FORMAT_ALTERNATIVE
:
2527 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2528 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2530 : "__gnu_f2h_alternative"));
2531 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2532 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2534 : "__gnu_h2f_alternative"));
2536 set_conv_libfunc (trunc_optab
, HFmode
, DFmode
,
2537 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2539 : "__gnu_d2h_alternative"));
2542 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2543 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2544 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2545 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2546 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2549 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2550 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2551 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2552 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2553 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2554 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2555 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2562 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2564 const arm_fixed_mode_set fixed_arith_modes
[] =
2567 { E_UQQmode
, "uqq" },
2569 { E_UHQmode
, "uhq" },
2571 { E_USQmode
, "usq" },
2573 { E_UDQmode
, "udq" },
2575 { E_UTQmode
, "utq" },
2577 { E_UHAmode
, "uha" },
2579 { E_USAmode
, "usa" },
2581 { E_UDAmode
, "uda" },
2583 { E_UTAmode
, "uta" }
2585 const arm_fixed_mode_set fixed_conv_modes
[] =
2588 { E_UQQmode
, "uqq" },
2590 { E_UHQmode
, "uhq" },
2592 { E_USQmode
, "usq" },
2594 { E_UDQmode
, "udq" },
2596 { E_UTQmode
, "utq" },
2598 { E_UHAmode
, "uha" },
2600 { E_USAmode
, "usa" },
2602 { E_UDAmode
, "uda" },
2604 { E_UTAmode
, "uta" },
2615 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2617 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2618 "add", fixed_arith_modes
[i
].name
, 3);
2619 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2620 "ssadd", fixed_arith_modes
[i
].name
, 3);
2621 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2622 "usadd", fixed_arith_modes
[i
].name
, 3);
2623 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2624 "sub", fixed_arith_modes
[i
].name
, 3);
2625 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2626 "sssub", fixed_arith_modes
[i
].name
, 3);
2627 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2628 "ussub", fixed_arith_modes
[i
].name
, 3);
2629 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2630 "mul", fixed_arith_modes
[i
].name
, 3);
2631 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2632 "ssmul", fixed_arith_modes
[i
].name
, 3);
2633 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2634 "usmul", fixed_arith_modes
[i
].name
, 3);
2635 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2636 "div", fixed_arith_modes
[i
].name
, 3);
2637 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2638 "udiv", fixed_arith_modes
[i
].name
, 3);
2639 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2640 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2641 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2642 "usdiv", fixed_arith_modes
[i
].name
, 3);
2643 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2644 "neg", fixed_arith_modes
[i
].name
, 2);
2645 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2646 "ssneg", fixed_arith_modes
[i
].name
, 2);
2647 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2648 "usneg", fixed_arith_modes
[i
].name
, 2);
2649 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2650 "ashl", fixed_arith_modes
[i
].name
, 3);
2651 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2652 "ashr", fixed_arith_modes
[i
].name
, 3);
2653 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2654 "lshr", fixed_arith_modes
[i
].name
, 3);
2655 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2656 "ssashl", fixed_arith_modes
[i
].name
, 3);
2657 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2658 "usashl", fixed_arith_modes
[i
].name
, 3);
2659 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2660 "cmp", fixed_arith_modes
[i
].name
, 2);
2663 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2664 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2667 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2668 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2671 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2672 fixed_conv_modes
[j
].mode
, "fract",
2673 fixed_conv_modes
[i
].name
,
2674 fixed_conv_modes
[j
].name
);
2675 arm_set_fixed_conv_libfunc (satfract_optab
,
2676 fixed_conv_modes
[i
].mode
,
2677 fixed_conv_modes
[j
].mode
, "satfract",
2678 fixed_conv_modes
[i
].name
,
2679 fixed_conv_modes
[j
].name
);
2680 arm_set_fixed_conv_libfunc (fractuns_optab
,
2681 fixed_conv_modes
[i
].mode
,
2682 fixed_conv_modes
[j
].mode
, "fractuns",
2683 fixed_conv_modes
[i
].name
,
2684 fixed_conv_modes
[j
].name
);
2685 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2686 fixed_conv_modes
[i
].mode
,
2687 fixed_conv_modes
[j
].mode
, "satfractuns",
2688 fixed_conv_modes
[i
].name
,
2689 fixed_conv_modes
[j
].name
);
2693 if (TARGET_AAPCS_BASED
)
2694 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2697 /* On AAPCS systems, this is the "struct __va_list". */
2698 static GTY(()) tree va_list_type
;
2700 /* Return the type to use as __builtin_va_list. */
2702 arm_build_builtin_va_list (void)
2707 if (!TARGET_AAPCS_BASED
)
2708 return std_build_builtin_va_list ();
2710 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2718 The C Library ABI further reinforces this definition in \S
2721 We must follow this definition exactly. The structure tag
2722 name is visible in C++ mangled names, and thus forms a part
2723 of the ABI. The field name may be used by people who
2724 #include <stdarg.h>. */
2725 /* Create the type. */
2726 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2727 /* Give it the required name. */
2728 va_list_name
= build_decl (BUILTINS_LOCATION
,
2730 get_identifier ("__va_list"),
2732 DECL_ARTIFICIAL (va_list_name
) = 1;
2733 TYPE_NAME (va_list_type
) = va_list_name
;
2734 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2735 /* Create the __ap field. */
2736 ap_field
= build_decl (BUILTINS_LOCATION
,
2738 get_identifier ("__ap"),
2740 DECL_ARTIFICIAL (ap_field
) = 1;
2741 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2742 TYPE_FIELDS (va_list_type
) = ap_field
;
2743 /* Compute its layout. */
2744 layout_type (va_list_type
);
2746 return va_list_type
;
2749 /* Return an expression of type "void *" pointing to the next
2750 available argument in a variable-argument list. VALIST is the
2751 user-level va_list object, of type __builtin_va_list. */
2753 arm_extract_valist_ptr (tree valist
)
2755 if (TREE_TYPE (valist
) == error_mark_node
)
2756 return error_mark_node
;
2758 /* On an AAPCS target, the pointer is stored within "struct
2760 if (TARGET_AAPCS_BASED
)
2762 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2763 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2764 valist
, ap_field
, NULL_TREE
);
2770 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2772 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2774 valist
= arm_extract_valist_ptr (valist
);
2775 std_expand_builtin_va_start (valist
, nextarg
);
2778 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2780 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2783 valist
= arm_extract_valist_ptr (valist
);
2784 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2787 /* Check any incompatible options that the user has specified. */
2789 arm_option_check_internal (struct gcc_options
*opts
)
2791 int flags
= opts
->x_target_flags
;
2793 /* iWMMXt and NEON are incompatible. */
2795 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_neon
))
2796 error ("iWMMXt and NEON are incompatible");
2798 /* Make sure that the processor choice does not conflict with any of the
2799 other command line choices. */
2800 if (TARGET_ARM_P (flags
)
2801 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
))
2802 error ("target CPU does not support ARM mode");
2804 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2805 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2806 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2808 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2809 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2811 /* If this target is normally configured to use APCS frames, warn if they
2812 are turned off and debugging is turned on. */
2813 if (TARGET_ARM_P (flags
)
2814 && write_symbols
!= NO_DEBUG
2815 && !TARGET_APCS_FRAME
2816 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2817 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2819 /* iWMMXt unsupported under Thumb mode. */
2820 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2821 error ("iWMMXt unsupported under Thumb mode");
2823 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2824 error ("can not use -mtp=cp15 with 16-bit Thumb");
2826 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2828 error ("RTP PIC is incompatible with Thumb");
2832 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2834 if ((target_pure_code
|| target_slow_flash_data
)
2835 && (!TARGET_HAVE_MOVT
|| arm_arch_notm
|| flag_pic
|| TARGET_NEON
))
2837 const char *flag
= (target_pure_code
? "-mpure-code" :
2838 "-mslow-flash-data");
2839 error ("%s only supports non-pic code on M-profile targets with the "
2840 "MOVT instruction", flag
);
2845 /* Recompute the global settings depending on target attribute options. */
2848 arm_option_params_internal (void)
2850 /* If we are not using the default (ARM mode) section anchor offset
2851 ranges, then set the correct ranges now. */
2854 /* Thumb-1 LDR instructions cannot have negative offsets.
2855 Permissible positive offset ranges are 5-bit (for byte loads),
2856 6-bit (for halfword loads), or 7-bit (for word loads).
2857 Empirical results suggest a 7-bit anchor range gives the best
2858 overall code size. */
2859 targetm
.min_anchor_offset
= 0;
2860 targetm
.max_anchor_offset
= 127;
2862 else if (TARGET_THUMB2
)
2864 /* The minimum is set such that the total size of the block
2865 for a particular anchor is 248 + 1 + 4095 bytes, which is
2866 divisible by eight, ensuring natural spacing of anchors. */
2867 targetm
.min_anchor_offset
= -248;
2868 targetm
.max_anchor_offset
= 4095;
2872 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2873 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2876 /* Increase the number of conditional instructions with -Os. */
2877 max_insns_skipped
= optimize_size
? 4 : current_tune
->max_insns_skipped
;
2879 /* For THUMB2, we limit the conditional sequence to one IT block. */
2881 max_insns_skipped
= MIN (max_insns_skipped
, MAX_INSN_PER_IT_BLOCK
);
2884 /* True if -mflip-thumb should next add an attribute for the default
2885 mode, false if it should next add an attribute for the opposite mode. */
2886 static GTY(()) bool thumb_flipper
;
2888 /* Options after initial target override. */
2889 static GTY(()) tree init_optimize
;
2892 arm_override_options_after_change_1 (struct gcc_options
*opts
)
2894 if (opts
->x_align_functions
<= 0)
2895 opts
->x_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
2896 && opts
->x_optimize_size
? 2 : 4;
2899 /* Implement targetm.override_options_after_change. */
2902 arm_override_options_after_change (void)
2904 arm_configure_build_target (&arm_active_target
,
2905 TREE_TARGET_OPTION (target_option_default_node
),
2906 &global_options_set
, false);
2908 arm_override_options_after_change_1 (&global_options
);
2911 /* Implement TARGET_OPTION_SAVE. */
2913 arm_option_save (struct cl_target_option
*ptr
, struct gcc_options
*opts
)
2915 ptr
->x_arm_arch_string
= opts
->x_arm_arch_string
;
2916 ptr
->x_arm_cpu_string
= opts
->x_arm_cpu_string
;
2917 ptr
->x_arm_tune_string
= opts
->x_arm_tune_string
;
2920 /* Implement TARGET_OPTION_RESTORE. */
2922 arm_option_restore (struct gcc_options
*opts
, struct cl_target_option
*ptr
)
2924 opts
->x_arm_arch_string
= ptr
->x_arm_arch_string
;
2925 opts
->x_arm_cpu_string
= ptr
->x_arm_cpu_string
;
2926 opts
->x_arm_tune_string
= ptr
->x_arm_tune_string
;
2927 arm_configure_build_target (&arm_active_target
, ptr
, &global_options_set
,
2931 /* Reset options between modes that the user has specified. */
2933 arm_option_override_internal (struct gcc_options
*opts
,
2934 struct gcc_options
*opts_set
)
2936 arm_override_options_after_change_1 (opts
);
2938 if (TARGET_INTERWORK
&& !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
2940 /* The default is to enable interworking, so this warning message would
2941 be confusing to users who have just compiled with, eg, -march=armv3. */
2942 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2943 opts
->x_target_flags
&= ~MASK_INTERWORK
;
2946 if (TARGET_THUMB_P (opts
->x_target_flags
)
2947 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
2949 warning (0, "target CPU does not support THUMB instructions");
2950 opts
->x_target_flags
&= ~MASK_THUMB
;
2953 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
2955 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2956 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
2959 /* Callee super interworking implies thumb interworking. Adding
2960 this to the flags here simplifies the logic elsewhere. */
2961 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
2962 opts
->x_target_flags
|= MASK_INTERWORK
;
2964 /* need to remember initial values so combinaisons of options like
2965 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2966 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
2968 if (! opts_set
->x_arm_restrict_it
)
2969 opts
->x_arm_restrict_it
= arm_arch8
;
2971 /* ARM execution state and M profile don't have [restrict] IT. */
2972 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
2973 opts
->x_arm_restrict_it
= 0;
2975 /* Enable -munaligned-access by default for
2976 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2977 i.e. Thumb2 and ARM state only.
2978 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2979 - ARMv8 architecture-base processors.
2981 Disable -munaligned-access by default for
2982 - all pre-ARMv6 architecture-based processors
2983 - ARMv6-M architecture-based processors
2984 - ARMv8-M Baseline processors. */
2986 if (! opts_set
->x_unaligned_access
)
2988 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
2989 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
2991 else if (opts
->x_unaligned_access
== 1
2992 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
2994 warning (0, "target CPU does not support unaligned accesses");
2995 opts
->x_unaligned_access
= 0;
2998 /* Don't warn since it's on by default in -O2. */
2999 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3000 opts
->x_flag_schedule_insns
= 0;
3002 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
3004 /* Disable shrink-wrap when optimizing function for size, since it tends to
3005 generate additional returns. */
3006 if (optimize_function_for_size_p (cfun
)
3007 && TARGET_THUMB2_P (opts
->x_target_flags
))
3008 opts
->x_flag_shrink_wrap
= false;
3010 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
3012 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3013 - epilogue_insns - does not accurately model the corresponding insns
3014 emitted in the asm file. In particular, see the comment in thumb_exit
3015 'Find out how many of the (return) argument registers we can corrupt'.
3016 As a consequence, the epilogue may clobber registers without fipa-ra
3017 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3018 TODO: Accurately model clobbers for epilogue_insns and reenable
3020 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3021 opts
->x_flag_ipa_ra
= 0;
3023 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3025 /* Thumb2 inline assembly code should always use unified syntax.
3026 This will apply to ARM and Thumb1 eventually. */
3027 opts
->x_inline_asm_unified
= TARGET_THUMB2_P (opts
->x_target_flags
);
3029 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3030 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3034 static sbitmap isa_all_fpubits
;
3035 static sbitmap isa_quirkbits
;
3037 /* Configure a build target TARGET from the user-specified options OPTS and
3038 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3039 architecture have been specified, but the two are not identical. */
3041 arm_configure_build_target (struct arm_build_target
*target
,
3042 struct cl_target_option
*opts
,
3043 struct gcc_options
*opts_set
,
3044 bool warn_compatible
)
3046 const cpu_option
*arm_selected_tune
= NULL
;
3047 const arch_option
*arm_selected_arch
= NULL
;
3048 const cpu_option
*arm_selected_cpu
= NULL
;
3049 const arm_fpu_desc
*arm_selected_fpu
= NULL
;
3050 const char *tune_opts
= NULL
;
3051 const char *arch_opts
= NULL
;
3052 const char *cpu_opts
= NULL
;
3054 bitmap_clear (target
->isa
);
3055 target
->core_name
= NULL
;
3056 target
->arch_name
= NULL
;
3058 if (opts_set
->x_arm_arch_string
)
3060 arm_selected_arch
= arm_parse_arch_option_name (all_architectures
,
3062 opts
->x_arm_arch_string
);
3063 arch_opts
= strchr (opts
->x_arm_arch_string
, '+');
3066 if (opts_set
->x_arm_cpu_string
)
3068 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "-mcpu",
3069 opts
->x_arm_cpu_string
);
3070 cpu_opts
= strchr (opts
->x_arm_cpu_string
, '+');
3071 arm_selected_tune
= arm_selected_cpu
;
3072 /* If taking the tuning from -mcpu, we don't need to rescan the
3073 options for tuning. */
3076 if (opts_set
->x_arm_tune_string
)
3078 arm_selected_tune
= arm_parse_cpu_option_name (all_cores
, "-mtune",
3079 opts
->x_arm_tune_string
);
3080 tune_opts
= strchr (opts
->x_arm_tune_string
, '+');
3083 if (arm_selected_arch
)
3085 arm_initialize_isa (target
->isa
, arm_selected_arch
->common
.isa_bits
);
3086 arm_parse_option_features (target
->isa
, &arm_selected_arch
->common
,
3089 if (arm_selected_cpu
)
3091 auto_sbitmap
cpu_isa (isa_num_bits
);
3092 auto_sbitmap
isa_delta (isa_num_bits
);
3094 arm_initialize_isa (cpu_isa
, arm_selected_cpu
->common
.isa_bits
);
3095 arm_parse_option_features (cpu_isa
, &arm_selected_cpu
->common
,
3097 bitmap_xor (isa_delta
, cpu_isa
, target
->isa
);
3098 /* Ignore any bits that are quirk bits. */
3099 bitmap_and_compl (isa_delta
, isa_delta
, isa_quirkbits
);
3100 /* Ignore (for now) any bits that might be set by -mfpu. */
3101 bitmap_and_compl (isa_delta
, isa_delta
, isa_all_fpubits
);
3103 if (!bitmap_empty_p (isa_delta
))
3105 if (warn_compatible
)
3106 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3107 arm_selected_cpu
->common
.name
,
3108 arm_selected_arch
->common
.name
);
3109 /* -march wins for code generation.
3110 -mcpu wins for default tuning. */
3111 if (!arm_selected_tune
)
3112 arm_selected_tune
= arm_selected_cpu
;
3114 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3115 target
->arch_name
= arm_selected_arch
->common
.name
;
3119 /* Architecture and CPU are essentially the same.
3120 Prefer the CPU setting. */
3121 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3122 target
->core_name
= arm_selected_cpu
->common
.name
;
3123 /* Copy the CPU's capabilities, so that we inherit the
3124 appropriate extensions and quirks. */
3125 bitmap_copy (target
->isa
, cpu_isa
);
3130 /* Pick a CPU based on the architecture. */
3131 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3132 target
->arch_name
= arm_selected_arch
->common
.name
;
3133 /* Note: target->core_name is left unset in this path. */
3136 else if (arm_selected_cpu
)
3138 target
->core_name
= arm_selected_cpu
->common
.name
;
3139 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3140 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3142 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3144 /* If the user did not specify a processor or architecture, choose
3148 const cpu_option
*sel
;
3149 auto_sbitmap
sought_isa (isa_num_bits
);
3150 bitmap_clear (sought_isa
);
3151 auto_sbitmap
default_isa (isa_num_bits
);
3153 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "default CPU",
3154 TARGET_CPU_DEFAULT
);
3155 cpu_opts
= strchr (TARGET_CPU_DEFAULT
, '+');
3156 gcc_assert (arm_selected_cpu
->common
.name
);
3158 /* RWE: All of the selection logic below (to the end of this
3159 'if' clause) looks somewhat suspect. It appears to be mostly
3160 there to support forcing thumb support when the default CPU
3161 does not have thumb (somewhat dubious in terms of what the
3162 user might be expecting). I think it should be removed once
3163 support for the pre-thumb era cores is removed. */
3164 sel
= arm_selected_cpu
;
3165 arm_initialize_isa (default_isa
, sel
->common
.isa_bits
);
3166 arm_parse_option_features (default_isa
, &arm_selected_cpu
->common
,
3169 /* Now check to see if the user has specified any command line
3170 switches that require certain abilities from the cpu. */
3172 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3174 bitmap_set_bit (sought_isa
, isa_bit_thumb
);
3175 bitmap_set_bit (sought_isa
, isa_bit_mode32
);
3177 /* There are no ARM processors that support both APCS-26 and
3178 interworking. Therefore we forcibly remove MODE26 from
3179 from the isa features here (if it was set), so that the
3180 search below will always be able to find a compatible
3182 bitmap_clear_bit (default_isa
, isa_bit_mode26
);
3185 /* If there are such requirements and the default CPU does not
3186 satisfy them, we need to run over the complete list of
3187 cores looking for one that is satisfactory. */
3188 if (!bitmap_empty_p (sought_isa
)
3189 && !bitmap_subset_p (sought_isa
, default_isa
))
3191 auto_sbitmap
candidate_isa (isa_num_bits
);
3192 /* We're only interested in a CPU with at least the
3193 capabilities of the default CPU and the required
3194 additional features. */
3195 bitmap_ior (default_isa
, default_isa
, sought_isa
);
3197 /* Try to locate a CPU type that supports all of the abilities
3198 of the default CPU, plus the extra abilities requested by
3200 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3202 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3203 /* An exact match? */
3204 if (bitmap_equal_p (default_isa
, candidate_isa
))
3208 if (sel
->common
.name
== NULL
)
3210 unsigned current_bit_count
= isa_num_bits
;
3211 const cpu_option
*best_fit
= NULL
;
3213 /* Ideally we would like to issue an error message here
3214 saying that it was not possible to find a CPU compatible
3215 with the default CPU, but which also supports the command
3216 line options specified by the programmer, and so they
3217 ought to use the -mcpu=<name> command line option to
3218 override the default CPU type.
3220 If we cannot find a CPU that has exactly the
3221 characteristics of the default CPU and the given
3222 command line options we scan the array again looking
3223 for a best match. The best match must have at least
3224 the capabilities of the perfect match. */
3225 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3227 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3229 if (bitmap_subset_p (default_isa
, candidate_isa
))
3233 bitmap_and_compl (candidate_isa
, candidate_isa
,
3235 count
= bitmap_popcount (candidate_isa
);
3237 if (count
< current_bit_count
)
3240 current_bit_count
= count
;
3244 gcc_assert (best_fit
);
3248 arm_selected_cpu
= sel
;
3251 /* Now we know the CPU, we can finally initialize the target
3253 target
->core_name
= arm_selected_cpu
->common
.name
;
3254 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3255 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3257 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3260 gcc_assert (arm_selected_cpu
);
3261 gcc_assert (arm_selected_arch
);
3263 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
)
3265 arm_selected_fpu
= &all_fpus
[opts
->x_arm_fpu_index
];
3266 auto_sbitmap
fpu_bits (isa_num_bits
);
3268 arm_initialize_isa (fpu_bits
, arm_selected_fpu
->isa_bits
);
3269 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpubits
);
3270 bitmap_ior (target
->isa
, target
->isa
, fpu_bits
);
3273 if (!arm_selected_tune
)
3274 arm_selected_tune
= arm_selected_cpu
;
3275 else /* Validate the features passed to -mtune. */
3276 arm_parse_option_features (NULL
, &arm_selected_tune
->common
, tune_opts
);
3278 const cpu_tune
*tune_data
= &all_tunes
[arm_selected_tune
- all_cores
];
3280 /* Finish initializing the target structure. */
3281 target
->arch_pp_name
= arm_selected_arch
->arch
;
3282 target
->base_arch
= arm_selected_arch
->base_arch
;
3283 target
->profile
= arm_selected_arch
->profile
;
3285 target
->tune_flags
= tune_data
->tune_flags
;
3286 target
->tune
= tune_data
->tune
;
3287 target
->tune_core
= tune_data
->scheduler
;
3290 /* Fix up any incompatible options that the user has specified. */
3292 arm_option_override (void)
3294 static const enum isa_feature fpu_bitlist
[]
3295 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
3296 static const enum isa_feature quirk_bitlist
[] = { ISA_ALL_QUIRKS
, isa_nobit
};
3297 cl_target_option opts
;
3299 isa_quirkbits
= sbitmap_alloc (isa_num_bits
);
3300 arm_initialize_isa (isa_quirkbits
, quirk_bitlist
);
3302 isa_all_fpubits
= sbitmap_alloc (isa_num_bits
);
3303 arm_initialize_isa (isa_all_fpubits
, fpu_bitlist
);
3305 arm_active_target
.isa
= sbitmap_alloc (isa_num_bits
);
3307 if (!global_options_set
.x_arm_fpu_index
)
3312 ok
= opt_enum_arg_to_value (OPT_mfpu_
, FPUTYPE_AUTO
, &fpu_index
,
3315 arm_fpu_index
= (enum fpu_type
) fpu_index
;
3318 cl_target_option_save (&opts
, &global_options
);
3319 arm_configure_build_target (&arm_active_target
, &opts
, &global_options_set
,
3322 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3323 SUBTARGET_OVERRIDE_OPTIONS
;
3326 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_active_target
.arch_pp_name
);
3327 arm_base_arch
= arm_active_target
.base_arch
;
3329 arm_tune
= arm_active_target
.tune_core
;
3330 tune_flags
= arm_active_target
.tune_flags
;
3331 current_tune
= arm_active_target
.tune
;
3333 /* TBD: Dwarf info for apcs frame is not handled yet. */
3334 if (TARGET_APCS_FRAME
)
3335 flag_shrink_wrap
= false;
3337 /* BPABI targets use linker tricks to allow interworking on cores
3338 without thumb support. */
3339 if (TARGET_INTERWORK
3341 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3343 warning (0, "target CPU does not support interworking" );
3344 target_flags
&= ~MASK_INTERWORK
;
3347 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3349 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3350 target_flags
|= MASK_APCS_FRAME
;
3353 if (TARGET_POKE_FUNCTION_NAME
)
3354 target_flags
|= MASK_APCS_FRAME
;
3356 if (TARGET_APCS_REENT
&& flag_pic
)
3357 error ("-fpic and -mapcs-reent are incompatible");
3359 if (TARGET_APCS_REENT
)
3360 warning (0, "APCS reentrant code not supported. Ignored");
3362 /* Initialize boolean versions of the architectural flags, for use
3363 in the arm.md file. */
3364 arm_arch3m
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv3m
);
3365 arm_arch4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv4
);
3366 arm_arch4t
= arm_arch4
&& bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3367 arm_arch5
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv5
);
3368 arm_arch5e
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv5e
);
3369 arm_arch5te
= arm_arch5e
3370 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3371 arm_arch6
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv6
);
3372 arm_arch6k
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv6k
);
3373 arm_arch_notm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
);
3374 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3375 arm_arch7
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv7
);
3376 arm_arch7em
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv7em
);
3377 arm_arch8
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8
);
3378 arm_arch8_1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8_1
);
3379 arm_arch8_2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8_2
);
3380 arm_arch_thumb1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3381 arm_arch_thumb2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb2
);
3382 arm_arch_xscale
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_xscale
);
3383 arm_arch_iwmmxt
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt
);
3384 arm_arch_iwmmxt2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt2
);
3385 arm_arch_thumb_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_tdiv
);
3386 arm_arch_arm_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_adiv
);
3387 arm_arch_crc
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_crc32
);
3388 arm_arch_cmse
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_cmse
);
3389 arm_fp16_inst
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_fp16
);
3390 arm_arch_lpae
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_lpae
);
3393 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3394 error ("selected fp16 options are incompatible");
3395 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3399 /* Set up some tuning parameters. */
3400 arm_ld_sched
= (tune_flags
& TF_LDSCHED
) != 0;
3401 arm_tune_strongarm
= (tune_flags
& TF_STRONG
) != 0;
3402 arm_tune_wbuf
= (tune_flags
& TF_WBUF
) != 0;
3403 arm_tune_xscale
= (tune_flags
& TF_XSCALE
) != 0;
3404 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3405 arm_m_profile_small_mul
= (tune_flags
& TF_SMALLMUL
) != 0;
3407 /* And finally, set up some quirks. */
3408 arm_arch_no_volatile_ce
3409 = bitmap_bit_p (arm_active_target
.isa
, isa_quirk_no_volatile_ce
);
3411 = arm_arch6k
&& bitmap_bit_p (arm_active_target
.isa
, isa_quirk_ARMv6kz
);
3413 /* V5 code we generate is completely interworking capable, so we turn off
3414 TARGET_INTERWORK here to avoid many tests later on. */
3416 /* XXX However, we must pass the right pre-processor defines to CPP
3417 or GLD can get confused. This is a hack. */
3418 if (TARGET_INTERWORK
)
3419 arm_cpp_interwork
= 1;
3422 target_flags
&= ~MASK_INTERWORK
;
3424 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3425 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3427 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3428 error ("iwmmxt abi requires an iwmmxt capable cpu");
3430 /* If soft-float is specified then don't use FPU. */
3431 if (TARGET_SOFT_FLOAT
)
3432 arm_fpu_attr
= FPU_NONE
;
3434 arm_fpu_attr
= FPU_VFP
;
3436 if (TARGET_AAPCS_BASED
)
3438 if (TARGET_CALLER_INTERWORKING
)
3439 error ("AAPCS does not support -mcaller-super-interworking");
3441 if (TARGET_CALLEE_INTERWORKING
)
3442 error ("AAPCS does not support -mcallee-super-interworking");
3445 /* __fp16 support currently assumes the core has ldrh. */
3446 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3447 sorry ("__fp16 and no ldrh");
3449 if (TARGET_AAPCS_BASED
)
3451 if (arm_abi
== ARM_ABI_IWMMXT
)
3452 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3453 else if (TARGET_HARD_FLOAT_ABI
)
3455 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3456 if (!bitmap_bit_p (arm_active_target
.isa
, isa_bit_VFPv2
))
3457 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3460 arm_pcs_default
= ARM_PCS_AAPCS
;
3464 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
3465 sorry ("-mfloat-abi=hard and VFP");
3467 if (arm_abi
== ARM_ABI_APCS
)
3468 arm_pcs_default
= ARM_PCS_APCS
;
3470 arm_pcs_default
= ARM_PCS_ATPCS
;
3473 /* For arm2/3 there is no need to do any scheduling if we are doing
3474 software floating-point. */
3475 if (TARGET_SOFT_FLOAT
&& (tune_flags
& TF_NO_MODE32
))
3476 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3478 /* Use the cp15 method if it is available. */
3479 if (target_thread_pointer
== TP_AUTO
)
3481 if (arm_arch6k
&& !TARGET_THUMB1
)
3482 target_thread_pointer
= TP_CP15
;
3484 target_thread_pointer
= TP_SOFT
;
3487 /* Override the default structure alignment for AAPCS ABI. */
3488 if (!global_options_set
.x_arm_structure_size_boundary
)
3490 if (TARGET_AAPCS_BASED
)
3491 arm_structure_size_boundary
= 8;
3495 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3497 if (arm_structure_size_boundary
!= 8
3498 && arm_structure_size_boundary
!= 32
3499 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3501 if (ARM_DOUBLEWORD_ALIGN
)
3503 "structure size boundary can only be set to 8, 32 or 64");
3505 warning (0, "structure size boundary can only be set to 8 or 32");
3506 arm_structure_size_boundary
3507 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3511 if (TARGET_VXWORKS_RTP
)
3513 if (!global_options_set
.x_arm_pic_data_is_text_relative
)
3514 arm_pic_data_is_text_relative
= 0;
3517 && !arm_pic_data_is_text_relative
3518 && !(global_options_set
.x_target_flags
& MASK_SINGLE_PIC_BASE
))
3519 /* When text & data segments don't have a fixed displacement, the
3520 intended use is with a single, read only, pic base register.
3521 Unless the user explicitly requested not to do that, set
3523 target_flags
|= MASK_SINGLE_PIC_BASE
;
3525 /* If stack checking is disabled, we can use r10 as the PIC register,
3526 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3527 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3529 if (TARGET_VXWORKS_RTP
)
3530 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3531 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3534 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3535 arm_pic_register
= 9;
3537 if (arm_pic_register_string
!= NULL
)
3539 int pic_register
= decode_reg_name (arm_pic_register_string
);
3542 warning (0, "-mpic-register= is useless without -fpic");
3544 /* Prevent the user from choosing an obviously stupid PIC register. */
3545 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3546 || pic_register
== HARD_FRAME_POINTER_REGNUM
3547 || pic_register
== STACK_POINTER_REGNUM
3548 || pic_register
>= PC_REGNUM
3549 || (TARGET_VXWORKS_RTP
3550 && (unsigned int) pic_register
!= arm_pic_register
))
3551 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3553 arm_pic_register
= pic_register
;
3556 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3557 if (fix_cm3_ldrd
== 2)
3559 if (bitmap_bit_p (arm_active_target
.isa
, isa_quirk_cm3_ldrd
))
3565 /* Hot/Cold partitioning is not currently supported, since we can't
3566 handle literal pool placement in that case. */
3567 if (flag_reorder_blocks_and_partition
)
3569 inform (input_location
,
3570 "-freorder-blocks-and-partition not supported on this architecture");
3571 flag_reorder_blocks_and_partition
= 0;
3572 flag_reorder_blocks
= 1;
3576 /* Hoisting PIC address calculations more aggressively provides a small,
3577 but measurable, size reduction for PIC code. Therefore, we decrease
3578 the bar for unrestricted expression hoisting to the cost of PIC address
3579 calculation, which is 2 instructions. */
3580 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3581 global_options
.x_param_values
,
3582 global_options_set
.x_param_values
);
3584 /* ARM EABI defaults to strict volatile bitfields. */
3585 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3586 && abi_version_at_least(2))
3587 flag_strict_volatile_bitfields
= 1;
3589 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3590 have deemed it beneficial (signified by setting
3591 prefetch.num_slots to 1 or more). */
3592 if (flag_prefetch_loop_arrays
< 0
3595 && current_tune
->prefetch
.num_slots
> 0)
3596 flag_prefetch_loop_arrays
= 1;
3598 /* Set up parameters to be used in prefetching algorithm. Do not
3599 override the defaults unless we are tuning for a core we have
3600 researched values for. */
3601 if (current_tune
->prefetch
.num_slots
> 0)
3602 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3603 current_tune
->prefetch
.num_slots
,
3604 global_options
.x_param_values
,
3605 global_options_set
.x_param_values
);
3606 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3607 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3608 current_tune
->prefetch
.l1_cache_line_size
,
3609 global_options
.x_param_values
,
3610 global_options_set
.x_param_values
);
3611 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3612 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3613 current_tune
->prefetch
.l1_cache_size
,
3614 global_options
.x_param_values
,
3615 global_options_set
.x_param_values
);
3617 /* Use Neon to perform 64-bits operations rather than core
3619 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3620 if (use_neon_for_64bits
== 1)
3621 prefer_neon_for_64bits
= true;
3623 /* Use the alternative scheduling-pressure algorithm by default. */
3624 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3625 global_options
.x_param_values
,
3626 global_options_set
.x_param_values
);
3628 /* Look through ready list and all of queue for instructions
3629 relevant for L2 auto-prefetcher. */
3630 int param_sched_autopref_queue_depth
;
3632 switch (current_tune
->sched_autopref
)
3634 case tune_params::SCHED_AUTOPREF_OFF
:
3635 param_sched_autopref_queue_depth
= -1;
3638 case tune_params::SCHED_AUTOPREF_RANK
:
3639 param_sched_autopref_queue_depth
= 0;
3642 case tune_params::SCHED_AUTOPREF_FULL
:
3643 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3650 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3651 param_sched_autopref_queue_depth
,
3652 global_options
.x_param_values
,
3653 global_options_set
.x_param_values
);
3655 /* Currently, for slow flash data, we just disable literal pools. We also
3656 disable it for pure-code. */
3657 if (target_slow_flash_data
|| target_pure_code
)
3658 arm_disable_literal_pool
= true;
3660 if (use_cmse
&& !arm_arch_cmse
)
3661 error ("target CPU does not support ARMv8-M Security Extensions");
3663 /* Disable scheduling fusion by default if it's not armv7 processor
3664 or doesn't prefer ldrd/strd. */
3665 if (flag_schedule_fusion
== 2
3666 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3667 flag_schedule_fusion
= 0;
3669 /* Need to remember initial options before they are overriden. */
3670 init_optimize
= build_optimization_node (&global_options
);
3672 arm_option_override_internal (&global_options
, &global_options_set
);
3673 arm_option_check_internal (&global_options
);
3674 arm_option_params_internal ();
3676 /* Create the default target_options structure. */
3677 target_option_default_node
= target_option_current_node
3678 = build_target_option_node (&global_options
);
3680 /* Register global variables with the garbage collector. */
3681 arm_add_gc_roots ();
3683 /* Init initial mode for testing. */
3684 thumb_flipper
= TARGET_THUMB
;
3688 arm_add_gc_roots (void)
3690 gcc_obstack_init(&minipool_obstack
);
3691 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3694 /* A table of known ARM exception types.
3695 For use with the interrupt function attribute. */
3699 const char *const arg
;
3700 const unsigned long return_value
;
3704 static const isr_attribute_arg isr_attribute_args
[] =
3706 { "IRQ", ARM_FT_ISR
},
3707 { "irq", ARM_FT_ISR
},
3708 { "FIQ", ARM_FT_FIQ
},
3709 { "fiq", ARM_FT_FIQ
},
3710 { "ABORT", ARM_FT_ISR
},
3711 { "abort", ARM_FT_ISR
},
3712 { "ABORT", ARM_FT_ISR
},
3713 { "abort", ARM_FT_ISR
},
3714 { "UNDEF", ARM_FT_EXCEPTION
},
3715 { "undef", ARM_FT_EXCEPTION
},
3716 { "SWI", ARM_FT_EXCEPTION
},
3717 { "swi", ARM_FT_EXCEPTION
},
3718 { NULL
, ARM_FT_NORMAL
}
3721 /* Returns the (interrupt) function type of the current
3722 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3724 static unsigned long
3725 arm_isr_value (tree argument
)
3727 const isr_attribute_arg
* ptr
;
3731 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3733 /* No argument - default to IRQ. */
3734 if (argument
== NULL_TREE
)
3737 /* Get the value of the argument. */
3738 if (TREE_VALUE (argument
) == NULL_TREE
3739 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3740 return ARM_FT_UNKNOWN
;
3742 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3744 /* Check it against the list of known arguments. */
3745 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3746 if (streq (arg
, ptr
->arg
))
3747 return ptr
->return_value
;
3749 /* An unrecognized interrupt type. */
3750 return ARM_FT_UNKNOWN
;
3753 /* Computes the type of the current function. */
3755 static unsigned long
3756 arm_compute_func_type (void)
3758 unsigned long type
= ARM_FT_UNKNOWN
;
3762 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3764 /* Decide if the current function is volatile. Such functions
3765 never return, and many memory cycles can be saved by not storing
3766 register values that will never be needed again. This optimization
3767 was added to speed up context switching in a kernel application. */
3769 && (TREE_NOTHROW (current_function_decl
)
3770 || !(flag_unwind_tables
3772 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3773 && TREE_THIS_VOLATILE (current_function_decl
))
3774 type
|= ARM_FT_VOLATILE
;
3776 if (cfun
->static_chain_decl
!= NULL
)
3777 type
|= ARM_FT_NESTED
;
3779 attr
= DECL_ATTRIBUTES (current_function_decl
);
3781 a
= lookup_attribute ("naked", attr
);
3783 type
|= ARM_FT_NAKED
;
3785 a
= lookup_attribute ("isr", attr
);
3787 a
= lookup_attribute ("interrupt", attr
);
3790 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3792 type
|= arm_isr_value (TREE_VALUE (a
));
3794 if (lookup_attribute ("cmse_nonsecure_entry", attr
))
3795 type
|= ARM_FT_CMSE_ENTRY
;
3800 /* Returns the type of the current function. */
3803 arm_current_func_type (void)
3805 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3806 cfun
->machine
->func_type
= arm_compute_func_type ();
3808 return cfun
->machine
->func_type
;
3812 arm_allocate_stack_slots_for_args (void)
3814 /* Naked functions should not allocate stack slots for arguments. */
3815 return !IS_NAKED (arm_current_func_type ());
3819 arm_warn_func_return (tree decl
)
3821 /* Naked functions are implemented entirely in assembly, including the
3822 return sequence, so suppress warnings about this. */
3823 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3827 /* Output assembler code for a block containing the constant parts
3828 of a trampoline, leaving space for the variable parts.
3830 On the ARM, (if r8 is the static chain regnum, and remembering that
3831 referencing pc adds an offset of 8) the trampoline looks like:
3834 .word static chain value
3835 .word function's address
3836 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3839 arm_asm_trampoline_template (FILE *f
)
3841 fprintf (f
, "\t.syntax unified\n");
3845 fprintf (f
, "\t.arm\n");
3846 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3847 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3849 else if (TARGET_THUMB2
)
3851 fprintf (f
, "\t.thumb\n");
3852 /* The Thumb-2 trampoline is similar to the arm implementation.
3853 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3854 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3855 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3856 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3860 ASM_OUTPUT_ALIGN (f
, 2);
3861 fprintf (f
, "\t.code\t16\n");
3862 fprintf (f
, ".Ltrampoline_start:\n");
3863 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3864 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3865 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3866 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3867 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3868 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3870 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3871 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3874 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3877 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3879 rtx fnaddr
, mem
, a_tramp
;
3881 emit_block_move (m_tramp
, assemble_trampoline_template (),
3882 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3884 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3885 emit_move_insn (mem
, chain_value
);
3887 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3888 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3889 emit_move_insn (mem
, fnaddr
);
3891 a_tramp
= XEXP (m_tramp
, 0);
3892 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3893 LCT_NORMAL
, VOIDmode
, a_tramp
, Pmode
,
3894 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3897 /* Thumb trampolines should be entered in thumb mode, so set
3898 the bottom bit of the address. */
3901 arm_trampoline_adjust_address (rtx addr
)
3904 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3905 NULL
, 0, OPTAB_LIB_WIDEN
);
3909 /* Return 1 if it is possible to return using a single instruction.
3910 If SIBLING is non-null, this is a test for a return before a sibling
3911 call. SIBLING is the call insn, so we can examine its register usage. */
3914 use_return_insn (int iscond
, rtx sibling
)
3917 unsigned int func_type
;
3918 unsigned long saved_int_regs
;
3919 unsigned HOST_WIDE_INT stack_adjust
;
3920 arm_stack_offsets
*offsets
;
3922 /* Never use a return instruction before reload has run. */
3923 if (!reload_completed
)
3926 func_type
= arm_current_func_type ();
3928 /* Naked, volatile and stack alignment functions need special
3930 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3933 /* So do interrupt functions that use the frame pointer and Thumb
3934 interrupt functions. */
3935 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3938 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3939 && !optimize_function_for_size_p (cfun
))
3942 offsets
= arm_get_frame_offsets ();
3943 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3945 /* As do variadic functions. */
3946 if (crtl
->args
.pretend_args_size
3947 || cfun
->machine
->uses_anonymous_args
3948 /* Or if the function calls __builtin_eh_return () */
3949 || crtl
->calls_eh_return
3950 /* Or if the function calls alloca */
3951 || cfun
->calls_alloca
3952 /* Or if there is a stack adjustment. However, if the stack pointer
3953 is saved on the stack, we can use a pre-incrementing stack load. */
3954 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3955 && stack_adjust
== 4))
3956 /* Or if the static chain register was saved above the frame, under the
3957 assumption that the stack pointer isn't saved on the stack. */
3958 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
3959 && arm_compute_static_chain_stack_bytes() != 0))
3962 saved_int_regs
= offsets
->saved_regs_mask
;
3964 /* Unfortunately, the insn
3966 ldmib sp, {..., sp, ...}
3968 triggers a bug on most SA-110 based devices, such that the stack
3969 pointer won't be correctly restored if the instruction takes a
3970 page fault. We work around this problem by popping r3 along with
3971 the other registers, since that is never slower than executing
3972 another instruction.
3974 We test for !arm_arch5 here, because code for any architecture
3975 less than this could potentially be run on one of the buggy
3977 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3979 /* Validate that r3 is a call-clobbered register (always true in
3980 the default abi) ... */
3981 if (!call_used_regs
[3])
3984 /* ... that it isn't being used for a return value ... */
3985 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3988 /* ... or for a tail-call argument ... */
3991 gcc_assert (CALL_P (sibling
));
3993 if (find_regno_fusage (sibling
, USE
, 3))
3997 /* ... and that there are no call-saved registers in r0-r2
3998 (always true in the default ABI). */
3999 if (saved_int_regs
& 0x7)
4003 /* Can't be done if interworking with Thumb, and any registers have been
4005 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
4008 /* On StrongARM, conditional returns are expensive if they aren't
4009 taken and multiple registers have been stacked. */
4010 if (iscond
&& arm_tune_strongarm
)
4012 /* Conditional return when just the LR is stored is a simple
4013 conditional-load instruction, that's not expensive. */
4014 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
4018 && arm_pic_register
!= INVALID_REGNUM
4019 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
4023 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4024 several instructions if anything needs to be popped. */
4025 if (saved_int_regs
&& IS_CMSE_ENTRY (func_type
))
4028 /* If there are saved registers but the LR isn't saved, then we need
4029 two instructions for the return. */
4030 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
4033 /* Can't be done if any of the VFP regs are pushed,
4034 since this also requires an insn. */
4035 if (TARGET_HARD_FLOAT
)
4036 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
4037 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
4040 if (TARGET_REALLY_IWMMXT
)
4041 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
4042 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
4048 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4049 shrink-wrapping if possible. This is the case if we need to emit a
4050 prologue, which we can test by looking at the offsets. */
4052 use_simple_return_p (void)
4054 arm_stack_offsets
*offsets
;
4056 /* Note this function can be called before or after reload. */
4057 if (!reload_completed
)
4058 arm_compute_frame_layout ();
4060 offsets
= arm_get_frame_offsets ();
4061 return offsets
->outgoing_args
!= 0;
4064 /* Return TRUE if int I is a valid immediate ARM constant. */
4067 const_ok_for_arm (HOST_WIDE_INT i
)
4071 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4072 be all zero, or all one. */
4073 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
4074 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
4075 != ((~(unsigned HOST_WIDE_INT
) 0)
4076 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
4079 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
4081 /* Fast return for 0 and small values. We must do this for zero, since
4082 the code below can't handle that one case. */
4083 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
4086 /* Get the number of trailing zeros. */
4087 lowbit
= ffs((int) i
) - 1;
4089 /* Only even shifts are allowed in ARM mode so round down to the
4090 nearest even number. */
4094 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
4099 /* Allow rotated constants in ARM mode. */
4101 && ((i
& ~0xc000003f) == 0
4102 || (i
& ~0xf000000f) == 0
4103 || (i
& ~0xfc000003) == 0))
4106 else if (TARGET_THUMB2
)
4110 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4113 if (i
== v
|| i
== (v
| (v
<< 8)))
4116 /* Allow repeated pattern 0xXY00XY00. */
4122 else if (TARGET_HAVE_MOVT
)
4124 /* Thumb-1 Targets with MOVT. */
4134 /* Return true if I is a valid constant for the operation CODE. */
4136 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
4138 if (const_ok_for_arm (i
))
4144 /* See if we can use movw. */
4145 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
4148 /* Otherwise, try mvn. */
4149 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4152 /* See if we can use addw or subw. */
4154 && ((i
& 0xfffff000) == 0
4155 || ((-i
) & 0xfffff000) == 0))
4176 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4178 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4184 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4188 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4195 /* Return true if I is a valid di mode constant for the operation CODE. */
4197 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4199 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4200 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4201 rtx hi
= GEN_INT (hi_val
);
4202 rtx lo
= GEN_INT (lo_val
);
4212 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
4213 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
4215 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4222 /* Emit a sequence of insns to handle a large constant.
4223 CODE is the code of the operation required, it can be any of SET, PLUS,
4224 IOR, AND, XOR, MINUS;
4225 MODE is the mode in which the operation is being performed;
4226 VAL is the integer to operate on;
4227 SOURCE is the other operand (a register, or a null-pointer for SET);
4228 SUBTARGETS means it is safe to create scratch registers if that will
4229 either produce a simpler sequence, or we will want to cse the values.
4230 Return value is the number of insns emitted. */
4232 /* ??? Tweak this for thumb2. */
4234 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4235 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4239 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4240 cond
= COND_EXEC_TEST (PATTERN (insn
));
4244 if (subtargets
|| code
== SET
4245 || (REG_P (target
) && REG_P (source
)
4246 && REGNO (target
) != REGNO (source
)))
4248 /* After arm_reorg has been called, we can't fix up expensive
4249 constants by pushing them into memory so we must synthesize
4250 them in-line, regardless of the cost. This is only likely to
4251 be more costly on chips that have load delay slots and we are
4252 compiling without running the scheduler (so no splitting
4253 occurred before the final instruction emission).
4255 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4257 if (!cfun
->machine
->after_arm_reorg
4259 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4261 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4266 /* Currently SET is the only monadic value for CODE, all
4267 the rest are diadic. */
4268 if (TARGET_USE_MOVT
)
4269 arm_emit_movpair (target
, GEN_INT (val
));
4271 emit_set_insn (target
, GEN_INT (val
));
4277 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4279 if (TARGET_USE_MOVT
)
4280 arm_emit_movpair (temp
, GEN_INT (val
));
4282 emit_set_insn (temp
, GEN_INT (val
));
4284 /* For MINUS, the value is subtracted from, since we never
4285 have subtraction of a constant. */
4287 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4289 emit_set_insn (target
,
4290 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4296 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4300 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4301 ARM/THUMB2 immediates, and add up to VAL.
4302 Thr function return value gives the number of insns required. */
4304 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4305 struct four_ints
*return_sequence
)
4307 int best_consecutive_zeros
= 0;
4311 struct four_ints tmp_sequence
;
4313 /* If we aren't targeting ARM, the best place to start is always at
4314 the bottom, otherwise look more closely. */
4317 for (i
= 0; i
< 32; i
+= 2)
4319 int consecutive_zeros
= 0;
4321 if (!(val
& (3 << i
)))
4323 while ((i
< 32) && !(val
& (3 << i
)))
4325 consecutive_zeros
+= 2;
4328 if (consecutive_zeros
> best_consecutive_zeros
)
4330 best_consecutive_zeros
= consecutive_zeros
;
4331 best_start
= i
- consecutive_zeros
;
4338 /* So long as it won't require any more insns to do so, it's
4339 desirable to emit a small constant (in bits 0...9) in the last
4340 insn. This way there is more chance that it can be combined with
4341 a later addressing insn to form a pre-indexed load or store
4342 operation. Consider:
4344 *((volatile int *)0xe0000100) = 1;
4345 *((volatile int *)0xe0000110) = 2;
4347 We want this to wind up as:
4351 str rB, [rA, #0x100]
4353 str rB, [rA, #0x110]
4355 rather than having to synthesize both large constants from scratch.
4357 Therefore, we calculate how many insns would be required to emit
4358 the constant starting from `best_start', and also starting from
4359 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4360 yield a shorter sequence, we may as well use zero. */
4361 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4363 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4365 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4366 if (insns2
<= insns1
)
4368 *return_sequence
= tmp_sequence
;
4376 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4378 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4379 struct four_ints
*return_sequence
, int i
)
4381 int remainder
= val
& 0xffffffff;
4384 /* Try and find a way of doing the job in either two or three
4387 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4388 location. We start at position I. This may be the MSB, or
4389 optimial_immediate_sequence may have positioned it at the largest block
4390 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4391 wrapping around to the top of the word when we drop off the bottom.
4392 In the worst case this code should produce no more than four insns.
4394 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4395 constants, shifted to any arbitrary location. We should always start
4400 unsigned int b1
, b2
, b3
, b4
;
4401 unsigned HOST_WIDE_INT result
;
4404 gcc_assert (insns
< 4);
4409 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4410 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4413 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4414 /* We can use addw/subw for the last 12 bits. */
4418 /* Use an 8-bit shifted/rotated immediate. */
4422 result
= remainder
& ((0x0ff << end
)
4423 | ((i
< end
) ? (0xff >> (32 - end
))
4430 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4431 arbitrary shifts. */
4432 i
-= TARGET_ARM
? 2 : 1;
4436 /* Next, see if we can do a better job with a thumb2 replicated
4439 We do it this way around to catch the cases like 0x01F001E0 where
4440 two 8-bit immediates would work, but a replicated constant would
4443 TODO: 16-bit constants that don't clear all the bits, but still win.
4444 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4447 b1
= (remainder
& 0xff000000) >> 24;
4448 b2
= (remainder
& 0x00ff0000) >> 16;
4449 b3
= (remainder
& 0x0000ff00) >> 8;
4450 b4
= remainder
& 0xff;
4454 /* The 8-bit immediate already found clears b1 (and maybe b2),
4455 but must leave b3 and b4 alone. */
4457 /* First try to find a 32-bit replicated constant that clears
4458 almost everything. We can assume that we can't do it in one,
4459 or else we wouldn't be here. */
4460 unsigned int tmp
= b1
& b2
& b3
& b4
;
4461 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4463 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4464 + (tmp
== b3
) + (tmp
== b4
);
4466 && (matching_bytes
>= 3
4467 || (matching_bytes
== 2
4468 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4470 /* At least 3 of the bytes match, and the fourth has at
4471 least as many bits set, or two of the bytes match
4472 and it will only require one more insn to finish. */
4480 /* Second, try to find a 16-bit replicated constant that can
4481 leave three of the bytes clear. If b2 or b4 is already
4482 zero, then we can. If the 8-bit from above would not
4483 clear b2 anyway, then we still win. */
4484 else if (b1
== b3
&& (!b2
|| !b4
4485 || (remainder
& 0x00ff0000 & ~result
)))
4487 result
= remainder
& 0xff00ff00;
4493 /* The 8-bit immediate already found clears b2 (and maybe b3)
4494 and we don't get here unless b1 is alredy clear, but it will
4495 leave b4 unchanged. */
4497 /* If we can clear b2 and b4 at once, then we win, since the
4498 8-bits couldn't possibly reach that far. */
4501 result
= remainder
& 0x00ff00ff;
4507 return_sequence
->i
[insns
++] = result
;
4508 remainder
&= ~result
;
4510 if (code
== SET
|| code
== MINUS
)
4518 /* Emit an instruction with the indicated PATTERN. If COND is
4519 non-NULL, conditionalize the execution of the instruction on COND
4523 emit_constant_insn (rtx cond
, rtx pattern
)
4526 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4527 emit_insn (pattern
);
4530 /* As above, but extra parameter GENERATE which, if clear, suppresses
4534 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4535 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4536 int subtargets
, int generate
)
4540 int final_invert
= 0;
4542 int set_sign_bit_copies
= 0;
4543 int clear_sign_bit_copies
= 0;
4544 int clear_zero_bit_copies
= 0;
4545 int set_zero_bit_copies
= 0;
4546 int insns
= 0, neg_insns
, inv_insns
;
4547 unsigned HOST_WIDE_INT temp1
, temp2
;
4548 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4549 struct four_ints
*immediates
;
4550 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4552 /* Find out which operations are safe for a given CODE. Also do a quick
4553 check for degenerate cases; these can occur when DImode operations
4566 if (remainder
== 0xffffffff)
4569 emit_constant_insn (cond
,
4570 gen_rtx_SET (target
,
4571 GEN_INT (ARM_SIGN_EXTEND (val
))));
4577 if (reload_completed
&& rtx_equal_p (target
, source
))
4581 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4590 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4593 if (remainder
== 0xffffffff)
4595 if (reload_completed
&& rtx_equal_p (target
, source
))
4598 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4607 if (reload_completed
&& rtx_equal_p (target
, source
))
4610 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4614 if (remainder
== 0xffffffff)
4617 emit_constant_insn (cond
,
4618 gen_rtx_SET (target
,
4619 gen_rtx_NOT (mode
, source
)));
4626 /* We treat MINUS as (val - source), since (source - val) is always
4627 passed as (source + (-val)). */
4631 emit_constant_insn (cond
,
4632 gen_rtx_SET (target
,
4633 gen_rtx_NEG (mode
, source
)));
4636 if (const_ok_for_arm (val
))
4639 emit_constant_insn (cond
,
4640 gen_rtx_SET (target
,
4641 gen_rtx_MINUS (mode
, GEN_INT (val
),
4652 /* If we can do it in one insn get out quickly. */
4653 if (const_ok_for_op (val
, code
))
4656 emit_constant_insn (cond
,
4657 gen_rtx_SET (target
,
4659 ? gen_rtx_fmt_ee (code
, mode
, source
,
4665 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4667 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4668 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4672 if (mode
== SImode
&& i
== 16)
4673 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4675 emit_constant_insn (cond
,
4676 gen_zero_extendhisi2
4677 (target
, gen_lowpart (HImode
, source
)));
4679 /* Extz only supports SImode, but we can coerce the operands
4681 emit_constant_insn (cond
,
4682 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4683 gen_lowpart (SImode
, source
),
4684 GEN_INT (i
), const0_rtx
));
4690 /* Calculate a few attributes that may be useful for specific
4692 /* Count number of leading zeros. */
4693 for (i
= 31; i
>= 0; i
--)
4695 if ((remainder
& (1 << i
)) == 0)
4696 clear_sign_bit_copies
++;
4701 /* Count number of leading 1's. */
4702 for (i
= 31; i
>= 0; i
--)
4704 if ((remainder
& (1 << i
)) != 0)
4705 set_sign_bit_copies
++;
4710 /* Count number of trailing zero's. */
4711 for (i
= 0; i
<= 31; i
++)
4713 if ((remainder
& (1 << i
)) == 0)
4714 clear_zero_bit_copies
++;
4719 /* Count number of trailing 1's. */
4720 for (i
= 0; i
<= 31; i
++)
4722 if ((remainder
& (1 << i
)) != 0)
4723 set_zero_bit_copies
++;
4731 /* See if we can do this by sign_extending a constant that is known
4732 to be negative. This is a good, way of doing it, since the shift
4733 may well merge into a subsequent insn. */
4734 if (set_sign_bit_copies
> 1)
4736 if (const_ok_for_arm
4737 (temp1
= ARM_SIGN_EXTEND (remainder
4738 << (set_sign_bit_copies
- 1))))
4742 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4743 emit_constant_insn (cond
,
4744 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4745 emit_constant_insn (cond
,
4746 gen_ashrsi3 (target
, new_src
,
4747 GEN_INT (set_sign_bit_copies
- 1)));
4751 /* For an inverted constant, we will need to set the low bits,
4752 these will be shifted out of harm's way. */
4753 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4754 if (const_ok_for_arm (~temp1
))
4758 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4759 emit_constant_insn (cond
,
4760 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4761 emit_constant_insn (cond
,
4762 gen_ashrsi3 (target
, new_src
,
4763 GEN_INT (set_sign_bit_copies
- 1)));
4769 /* See if we can calculate the value as the difference between two
4770 valid immediates. */
4771 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4773 int topshift
= clear_sign_bit_copies
& ~1;
4775 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4776 & (0xff000000 >> topshift
));
4778 /* If temp1 is zero, then that means the 9 most significant
4779 bits of remainder were 1 and we've caused it to overflow.
4780 When topshift is 0 we don't need to do anything since we
4781 can borrow from 'bit 32'. */
4782 if (temp1
== 0 && topshift
!= 0)
4783 temp1
= 0x80000000 >> (topshift
- 1);
4785 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4787 if (const_ok_for_arm (temp2
))
4791 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4792 emit_constant_insn (cond
,
4793 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4794 emit_constant_insn (cond
,
4795 gen_addsi3 (target
, new_src
,
4803 /* See if we can generate this by setting the bottom (or the top)
4804 16 bits, and then shifting these into the other half of the
4805 word. We only look for the simplest cases, to do more would cost
4806 too much. Be careful, however, not to generate this when the
4807 alternative would take fewer insns. */
4808 if (val
& 0xffff0000)
4810 temp1
= remainder
& 0xffff0000;
4811 temp2
= remainder
& 0x0000ffff;
4813 /* Overlaps outside this range are best done using other methods. */
4814 for (i
= 9; i
< 24; i
++)
4816 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4817 && !const_ok_for_arm (temp2
))
4819 rtx new_src
= (subtargets
4820 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4822 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4823 source
, subtargets
, generate
);
4831 gen_rtx_ASHIFT (mode
, source
,
4838 /* Don't duplicate cases already considered. */
4839 for (i
= 17; i
< 24; i
++)
4841 if (((temp1
| (temp1
>> i
)) == remainder
)
4842 && !const_ok_for_arm (temp1
))
4844 rtx new_src
= (subtargets
4845 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4847 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4848 source
, subtargets
, generate
);
4853 gen_rtx_SET (target
,
4856 gen_rtx_LSHIFTRT (mode
, source
,
4867 /* If we have IOR or XOR, and the constant can be loaded in a
4868 single instruction, and we can find a temporary to put it in,
4869 then this can be done in two instructions instead of 3-4. */
4871 /* TARGET can't be NULL if SUBTARGETS is 0 */
4872 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4874 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4878 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4880 emit_constant_insn (cond
,
4881 gen_rtx_SET (sub
, GEN_INT (val
)));
4882 emit_constant_insn (cond
,
4883 gen_rtx_SET (target
,
4884 gen_rtx_fmt_ee (code
, mode
,
4895 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4896 and the remainder 0s for e.g. 0xfff00000)
4897 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4899 This can be done in 2 instructions by using shifts with mov or mvn.
4904 mvn r0, r0, lsr #12 */
4905 if (set_sign_bit_copies
> 8
4906 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
4910 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4911 rtx shift
= GEN_INT (set_sign_bit_copies
);
4917 gen_rtx_ASHIFT (mode
,
4922 gen_rtx_SET (target
,
4924 gen_rtx_LSHIFTRT (mode
, sub
,
4931 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4933 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4935 For eg. r0 = r0 | 0xfff
4940 if (set_zero_bit_copies
> 8
4941 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4945 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4946 rtx shift
= GEN_INT (set_zero_bit_copies
);
4952 gen_rtx_LSHIFTRT (mode
,
4957 gen_rtx_SET (target
,
4959 gen_rtx_ASHIFT (mode
, sub
,
4965 /* This will never be reached for Thumb2 because orn is a valid
4966 instruction. This is for Thumb1 and the ARM 32 bit cases.
4968 x = y | constant (such that ~constant is a valid constant)
4970 x = ~(~y & ~constant).
4972 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4976 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4977 emit_constant_insn (cond
,
4979 gen_rtx_NOT (mode
, source
)));
4982 sub
= gen_reg_rtx (mode
);
4983 emit_constant_insn (cond
,
4985 gen_rtx_AND (mode
, source
,
4987 emit_constant_insn (cond
,
4988 gen_rtx_SET (target
,
4989 gen_rtx_NOT (mode
, sub
)));
4996 /* See if two shifts will do 2 or more insn's worth of work. */
4997 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4999 HOST_WIDE_INT shift_mask
= ((0xffffffff
5000 << (32 - clear_sign_bit_copies
))
5003 if ((remainder
| shift_mask
) != 0xffffffff)
5005 HOST_WIDE_INT new_val
5006 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5010 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5011 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
5012 new_src
, source
, subtargets
, 1);
5017 rtx targ
= subtargets
? NULL_RTX
: target
;
5018 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5019 targ
, source
, subtargets
, 0);
5025 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5026 rtx shift
= GEN_INT (clear_sign_bit_copies
);
5028 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
5029 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
5035 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
5037 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
5039 if ((remainder
| shift_mask
) != 0xffffffff)
5041 HOST_WIDE_INT new_val
5042 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5045 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5047 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5048 new_src
, source
, subtargets
, 1);
5053 rtx targ
= subtargets
? NULL_RTX
: target
;
5055 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5056 targ
, source
, subtargets
, 0);
5062 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5063 rtx shift
= GEN_INT (clear_zero_bit_copies
);
5065 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
5066 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
5078 /* Calculate what the instruction sequences would be if we generated it
5079 normally, negated, or inverted. */
5081 /* AND cannot be split into multiple insns, so invert and use BIC. */
5084 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
5087 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
5092 if (can_invert
|| final_invert
)
5093 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
5098 immediates
= &pos_immediates
;
5100 /* Is the negated immediate sequence more efficient? */
5101 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
5104 immediates
= &neg_immediates
;
5109 /* Is the inverted immediate sequence more efficient?
5110 We must allow for an extra NOT instruction for XOR operations, although
5111 there is some chance that the final 'mvn' will get optimized later. */
5112 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
5115 immediates
= &inv_immediates
;
5123 /* Now output the chosen sequence as instructions. */
5126 for (i
= 0; i
< insns
; i
++)
5128 rtx new_src
, temp1_rtx
;
5130 temp1
= immediates
->i
[i
];
5132 if (code
== SET
|| code
== MINUS
)
5133 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
5134 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
5135 new_src
= gen_reg_rtx (mode
);
5141 else if (can_negate
)
5144 temp1
= trunc_int_for_mode (temp1
, mode
);
5145 temp1_rtx
= GEN_INT (temp1
);
5149 else if (code
== MINUS
)
5150 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
5152 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
5154 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5159 can_negate
= can_invert
;
5163 else if (code
== MINUS
)
5171 emit_constant_insn (cond
, gen_rtx_SET (target
,
5172 gen_rtx_NOT (mode
, source
)));
5179 /* Canonicalize a comparison so that we are more likely to recognize it.
5180 This can be done for a few constant compares, where we can make the
5181 immediate value easier to load. */
5184 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5185 bool op0_preserve_value
)
5188 unsigned HOST_WIDE_INT i
, maxval
;
5190 mode
= GET_MODE (*op0
);
5191 if (mode
== VOIDmode
)
5192 mode
= GET_MODE (*op1
);
5194 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5196 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5197 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5198 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5199 for GTU/LEU in Thumb mode. */
5203 if (*code
== GT
|| *code
== LE
5204 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
5206 /* Missing comparison. First try to use an available
5208 if (CONST_INT_P (*op1
))
5216 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5218 *op1
= GEN_INT (i
+ 1);
5219 *code
= *code
== GT
? GE
: LT
;
5225 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5226 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5228 *op1
= GEN_INT (i
+ 1);
5229 *code
= *code
== GTU
? GEU
: LTU
;
5238 /* If that did not work, reverse the condition. */
5239 if (!op0_preserve_value
)
5241 std::swap (*op0
, *op1
);
5242 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5248 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5249 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5250 to facilitate possible combining with a cmp into 'ands'. */
5252 && GET_CODE (*op0
) == ZERO_EXTEND
5253 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5254 && GET_MODE (XEXP (*op0
, 0)) == QImode
5255 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5256 && subreg_lowpart_p (XEXP (*op0
, 0))
5257 && *op1
== const0_rtx
)
5258 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5261 /* Comparisons smaller than DImode. Only adjust comparisons against
5262 an out-of-range constant. */
5263 if (!CONST_INT_P (*op1
)
5264 || const_ok_for_arm (INTVAL (*op1
))
5265 || const_ok_for_arm (- INTVAL (*op1
)))
5279 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5281 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5282 *code
= *code
== GT
? GE
: LT
;
5290 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5292 *op1
= GEN_INT (i
- 1);
5293 *code
= *code
== GE
? GT
: LE
;
5300 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5301 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5303 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5304 *code
= *code
== GTU
? GEU
: LTU
;
5312 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5314 *op1
= GEN_INT (i
- 1);
5315 *code
= *code
== GEU
? GTU
: LEU
;
5326 /* Define how to find the value returned by a function. */
5329 arm_function_value(const_tree type
, const_tree func
,
5330 bool outgoing ATTRIBUTE_UNUSED
)
5333 int unsignedp ATTRIBUTE_UNUSED
;
5334 rtx r ATTRIBUTE_UNUSED
;
5336 mode
= TYPE_MODE (type
);
5338 if (TARGET_AAPCS_BASED
)
5339 return aapcs_allocate_return_reg (mode
, type
, func
);
5341 /* Promote integer types. */
5342 if (INTEGRAL_TYPE_P (type
))
5343 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5345 /* Promotes small structs returned in a register to full-word size
5346 for big-endian AAPCS. */
5347 if (arm_return_in_msb (type
))
5349 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5350 if (size
% UNITS_PER_WORD
!= 0)
5352 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5353 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
5357 return arm_libcall_value_1 (mode
);
5360 /* libcall hashtable helpers. */
5362 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5364 static inline hashval_t
hash (const rtx_def
*);
5365 static inline bool equal (const rtx_def
*, const rtx_def
*);
5366 static inline void remove (rtx_def
*);
5370 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5372 return rtx_equal_p (p1
, p2
);
5376 libcall_hasher::hash (const rtx_def
*p1
)
5378 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5381 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5384 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5386 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5390 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5392 static bool init_done
= false;
5393 static libcall_table_type
*libcall_htab
= NULL
;
5399 libcall_htab
= new libcall_table_type (31);
5400 add_libcall (libcall_htab
,
5401 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5402 add_libcall (libcall_htab
,
5403 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5404 add_libcall (libcall_htab
,
5405 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5406 add_libcall (libcall_htab
,
5407 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5409 add_libcall (libcall_htab
,
5410 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5411 add_libcall (libcall_htab
,
5412 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5413 add_libcall (libcall_htab
,
5414 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5415 add_libcall (libcall_htab
,
5416 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5418 add_libcall (libcall_htab
,
5419 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5420 add_libcall (libcall_htab
,
5421 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5422 add_libcall (libcall_htab
,
5423 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5424 add_libcall (libcall_htab
,
5425 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5426 add_libcall (libcall_htab
,
5427 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5428 add_libcall (libcall_htab
,
5429 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5430 add_libcall (libcall_htab
,
5431 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5432 add_libcall (libcall_htab
,
5433 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5435 /* Values from double-precision helper functions are returned in core
5436 registers if the selected core only supports single-precision
5437 arithmetic, even if we are using the hard-float ABI. The same is
5438 true for single-precision helpers, but we will never be using the
5439 hard-float ABI on a CPU which doesn't support single-precision
5440 operations in hardware. */
5441 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5442 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5443 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5444 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5445 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5446 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5447 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5448 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5449 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5450 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5451 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5452 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5454 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5456 add_libcall (libcall_htab
,
5457 convert_optab_libfunc (trunc_optab
, HFmode
, DFmode
));
5460 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5464 arm_libcall_value_1 (machine_mode mode
)
5466 if (TARGET_AAPCS_BASED
)
5467 return aapcs_libcall_value (mode
);
5468 else if (TARGET_IWMMXT_ABI
5469 && arm_vector_mode_supported_p (mode
))
5470 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5472 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5475 /* Define how to find the value returned by a library function
5476 assuming the value has mode MODE. */
5479 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5481 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5482 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5484 /* The following libcalls return their result in integer registers,
5485 even though they return a floating point value. */
5486 if (arm_libcall_uses_aapcs_base (libcall
))
5487 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5491 return arm_libcall_value_1 (mode
);
5494 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5497 arm_function_value_regno_p (const unsigned int regno
)
5499 if (regno
== ARG_REGISTER (1)
5501 && TARGET_AAPCS_BASED
5502 && TARGET_HARD_FLOAT
5503 && regno
== FIRST_VFP_REGNUM
)
5504 || (TARGET_IWMMXT_ABI
5505 && regno
== FIRST_IWMMXT_REGNUM
))
5511 /* Determine the amount of memory needed to store the possible return
5512 registers of an untyped call. */
5514 arm_apply_result_size (void)
5520 if (TARGET_HARD_FLOAT_ABI
)
5522 if (TARGET_IWMMXT_ABI
)
5529 /* Decide whether TYPE should be returned in memory (true)
5530 or in a register (false). FNTYPE is the type of the function making
5533 arm_return_in_memory (const_tree type
, const_tree fntype
)
5537 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5539 if (TARGET_AAPCS_BASED
)
5541 /* Simple, non-aggregate types (ie not including vectors and
5542 complex) are always returned in a register (or registers).
5543 We don't care about which register here, so we can short-cut
5544 some of the detail. */
5545 if (!AGGREGATE_TYPE_P (type
)
5546 && TREE_CODE (type
) != VECTOR_TYPE
5547 && TREE_CODE (type
) != COMPLEX_TYPE
)
5550 /* Any return value that is no larger than one word can be
5552 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5555 /* Check any available co-processors to see if they accept the
5556 type as a register candidate (VFP, for example, can return
5557 some aggregates in consecutive registers). These aren't
5558 available if the call is variadic. */
5559 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5562 /* Vector values should be returned using ARM registers, not
5563 memory (unless they're over 16 bytes, which will break since
5564 we only have four call-clobbered registers to play with). */
5565 if (TREE_CODE (type
) == VECTOR_TYPE
)
5566 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5568 /* The rest go in memory. */
5572 if (TREE_CODE (type
) == VECTOR_TYPE
)
5573 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5575 if (!AGGREGATE_TYPE_P (type
) &&
5576 (TREE_CODE (type
) != VECTOR_TYPE
))
5577 /* All simple types are returned in registers. */
5580 if (arm_abi
!= ARM_ABI_APCS
)
5582 /* ATPCS and later return aggregate types in memory only if they are
5583 larger than a word (or are variable size). */
5584 return (size
< 0 || size
> UNITS_PER_WORD
);
5587 /* For the arm-wince targets we choose to be compatible with Microsoft's
5588 ARM and Thumb compilers, which always return aggregates in memory. */
5590 /* All structures/unions bigger than one word are returned in memory.
5591 Also catch the case where int_size_in_bytes returns -1. In this case
5592 the aggregate is either huge or of variable size, and in either case
5593 we will want to return it via memory and not in a register. */
5594 if (size
< 0 || size
> UNITS_PER_WORD
)
5597 if (TREE_CODE (type
) == RECORD_TYPE
)
5601 /* For a struct the APCS says that we only return in a register
5602 if the type is 'integer like' and every addressable element
5603 has an offset of zero. For practical purposes this means
5604 that the structure can have at most one non bit-field element
5605 and that this element must be the first one in the structure. */
5607 /* Find the first field, ignoring non FIELD_DECL things which will
5608 have been created by C++. */
5609 for (field
= TYPE_FIELDS (type
);
5610 field
&& TREE_CODE (field
) != FIELD_DECL
;
5611 field
= DECL_CHAIN (field
))
5615 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5617 /* Check that the first field is valid for returning in a register. */
5619 /* ... Floats are not allowed */
5620 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5623 /* ... Aggregates that are not themselves valid for returning in
5624 a register are not allowed. */
5625 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5628 /* Now check the remaining fields, if any. Only bitfields are allowed,
5629 since they are not addressable. */
5630 for (field
= DECL_CHAIN (field
);
5632 field
= DECL_CHAIN (field
))
5634 if (TREE_CODE (field
) != FIELD_DECL
)
5637 if (!DECL_BIT_FIELD_TYPE (field
))
5644 if (TREE_CODE (type
) == UNION_TYPE
)
5648 /* Unions can be returned in registers if every element is
5649 integral, or can be returned in an integer register. */
5650 for (field
= TYPE_FIELDS (type
);
5652 field
= DECL_CHAIN (field
))
5654 if (TREE_CODE (field
) != FIELD_DECL
)
5657 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5660 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5666 #endif /* not ARM_WINCE */
5668 /* Return all other types in memory. */
5672 const struct pcs_attribute_arg
5676 } pcs_attribute_args
[] =
5678 {"aapcs", ARM_PCS_AAPCS
},
5679 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5681 /* We could recognize these, but changes would be needed elsewhere
5682 * to implement them. */
5683 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5684 {"atpcs", ARM_PCS_ATPCS
},
5685 {"apcs", ARM_PCS_APCS
},
5687 {NULL
, ARM_PCS_UNKNOWN
}
5691 arm_pcs_from_attribute (tree attr
)
5693 const struct pcs_attribute_arg
*ptr
;
5696 /* Get the value of the argument. */
5697 if (TREE_VALUE (attr
) == NULL_TREE
5698 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5699 return ARM_PCS_UNKNOWN
;
5701 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5703 /* Check it against the list of known arguments. */
5704 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5705 if (streq (arg
, ptr
->arg
))
5708 /* An unrecognized interrupt type. */
5709 return ARM_PCS_UNKNOWN
;
5712 /* Get the PCS variant to use for this call. TYPE is the function's type
5713 specification, DECL is the specific declartion. DECL may be null if
5714 the call could be indirect or if this is a library call. */
5716 arm_get_pcs_model (const_tree type
, const_tree decl
)
5718 bool user_convention
= false;
5719 enum arm_pcs user_pcs
= arm_pcs_default
;
5724 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5727 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5728 user_convention
= true;
5731 if (TARGET_AAPCS_BASED
)
5733 /* Detect varargs functions. These always use the base rules
5734 (no argument is ever a candidate for a co-processor
5736 bool base_rules
= stdarg_p (type
);
5738 if (user_convention
)
5740 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5741 sorry ("non-AAPCS derived PCS variant");
5742 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5743 error ("variadic functions must use the base AAPCS variant");
5747 return ARM_PCS_AAPCS
;
5748 else if (user_convention
)
5750 else if (decl
&& flag_unit_at_a_time
)
5752 /* Local functions never leak outside this compilation unit,
5753 so we are free to use whatever conventions are
5755 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5756 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5758 return ARM_PCS_AAPCS_LOCAL
;
5761 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5762 sorry ("PCS variant");
5764 /* For everything else we use the target's default. */
5765 return arm_pcs_default
;
5770 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5771 const_tree fntype ATTRIBUTE_UNUSED
,
5772 rtx libcall ATTRIBUTE_UNUSED
,
5773 const_tree fndecl ATTRIBUTE_UNUSED
)
5775 /* Record the unallocated VFP registers. */
5776 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5777 pcum
->aapcs_vfp_reg_alloc
= 0;
5780 /* Walk down the type tree of TYPE counting consecutive base elements.
5781 If *MODEP is VOIDmode, then set it to the first valid floating point
5782 type. If a non-floating point type is found, or if a floating point
5783 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5784 otherwise return the count in the sub-tree. */
5786 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5791 switch (TREE_CODE (type
))
5794 mode
= TYPE_MODE (type
);
5795 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
)
5798 if (*modep
== VOIDmode
)
5807 mode
= TYPE_MODE (TREE_TYPE (type
));
5808 if (mode
!= DFmode
&& mode
!= SFmode
)
5811 if (*modep
== VOIDmode
)
5820 /* Use V2SImode and V4SImode as representatives of all 64-bit
5821 and 128-bit vector types, whether or not those modes are
5822 supported with the present options. */
5823 size
= int_size_in_bytes (type
);
5836 if (*modep
== VOIDmode
)
5839 /* Vector modes are considered to be opaque: two vectors are
5840 equivalent for the purposes of being homogeneous aggregates
5841 if they are the same size. */
5850 tree index
= TYPE_DOMAIN (type
);
5852 /* Can't handle incomplete types nor sizes that are not
5854 if (!COMPLETE_TYPE_P (type
)
5855 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5858 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5861 || !TYPE_MAX_VALUE (index
)
5862 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5863 || !TYPE_MIN_VALUE (index
)
5864 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5868 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5869 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5871 /* There must be no padding. */
5872 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5884 /* Can't handle incomplete types nor sizes that are not
5886 if (!COMPLETE_TYPE_P (type
)
5887 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5890 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5892 if (TREE_CODE (field
) != FIELD_DECL
)
5895 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5901 /* There must be no padding. */
5902 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5909 case QUAL_UNION_TYPE
:
5911 /* These aren't very interesting except in a degenerate case. */
5916 /* Can't handle incomplete types nor sizes that are not
5918 if (!COMPLETE_TYPE_P (type
)
5919 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5922 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5924 if (TREE_CODE (field
) != FIELD_DECL
)
5927 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5930 count
= count
> sub_count
? count
: sub_count
;
5933 /* There must be no padding. */
5934 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5947 /* Return true if PCS_VARIANT should use VFP registers. */
5949 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5951 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5953 static bool seen_thumb1_vfp
= false;
5955 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5957 sorry ("Thumb-1 hard-float VFP ABI");
5958 /* sorry() is not immediately fatal, so only display this once. */
5959 seen_thumb1_vfp
= true;
5965 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5968 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
5969 (TARGET_VFP_DOUBLE
|| !is_double
));
5972 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5973 suitable for passing or returning in VFP registers for the PCS
5974 variant selected. If it is, then *BASE_MODE is updated to contain
5975 a machine mode describing each element of the argument's type and
5976 *COUNT to hold the number of such elements. */
5978 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5979 machine_mode mode
, const_tree type
,
5980 machine_mode
*base_mode
, int *count
)
5982 machine_mode new_mode
= VOIDmode
;
5984 /* If we have the type information, prefer that to working things
5985 out from the mode. */
5988 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5990 if (ag_count
> 0 && ag_count
<= 4)
5995 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5996 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5997 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6002 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6005 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
6011 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
6014 *base_mode
= new_mode
;
6019 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
6020 machine_mode mode
, const_tree type
)
6022 int count ATTRIBUTE_UNUSED
;
6023 machine_mode ag_mode ATTRIBUTE_UNUSED
;
6025 if (!use_vfp_abi (pcs_variant
, false))
6027 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6032 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6035 if (!use_vfp_abi (pcum
->pcs_variant
, false))
6038 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
6039 &pcum
->aapcs_vfp_rmode
,
6040 &pcum
->aapcs_vfp_rcount
);
6043 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6044 for the behaviour of this function. */
6047 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6048 const_tree type ATTRIBUTE_UNUSED
)
6051 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
6052 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
6053 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
6056 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
6057 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
6059 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
6061 || (mode
== TImode
&& ! TARGET_NEON
)
6062 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
6065 int rcount
= pcum
->aapcs_vfp_rcount
;
6067 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
6071 /* Avoid using unsupported vector modes. */
6072 if (rmode
== V2SImode
)
6074 else if (rmode
== V4SImode
)
6081 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
6082 for (i
= 0; i
< rcount
; i
++)
6084 rtx tmp
= gen_rtx_REG (rmode
,
6085 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
6086 tmp
= gen_rtx_EXPR_LIST
6088 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
6089 XVECEXP (par
, 0, i
) = tmp
;
6092 pcum
->aapcs_reg
= par
;
6095 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
6101 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6102 comment there for the behaviour of this function. */
6105 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
6107 const_tree type ATTRIBUTE_UNUSED
)
6109 if (!use_vfp_abi (pcs_variant
, false))
6113 || (GET_MODE_CLASS (mode
) == MODE_INT
6114 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
6118 machine_mode ag_mode
;
6123 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6128 if (ag_mode
== V2SImode
)
6130 else if (ag_mode
== V4SImode
)
6136 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
6137 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
6138 for (i
= 0; i
< count
; i
++)
6140 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
6141 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
6142 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
6143 XVECEXP (par
, 0, i
) = tmp
;
6149 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
6153 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6154 machine_mode mode ATTRIBUTE_UNUSED
,
6155 const_tree type ATTRIBUTE_UNUSED
)
6157 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6158 pcum
->aapcs_vfp_reg_alloc
= 0;
6162 #define AAPCS_CP(X) \
6164 aapcs_ ## X ## _cum_init, \
6165 aapcs_ ## X ## _is_call_candidate, \
6166 aapcs_ ## X ## _allocate, \
6167 aapcs_ ## X ## _is_return_candidate, \
6168 aapcs_ ## X ## _allocate_return_reg, \
6169 aapcs_ ## X ## _advance \
6172 /* Table of co-processors that can be used to pass arguments in
6173 registers. Idealy no arugment should be a candidate for more than
6174 one co-processor table entry, but the table is processed in order
6175 and stops after the first match. If that entry then fails to put
6176 the argument into a co-processor register, the argument will go on
6180 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6181 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6183 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6184 BLKmode) is a candidate for this co-processor's registers; this
6185 function should ignore any position-dependent state in
6186 CUMULATIVE_ARGS and only use call-type dependent information. */
6187 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6189 /* Return true if the argument does get a co-processor register; it
6190 should set aapcs_reg to an RTX of the register allocated as is
6191 required for a return from FUNCTION_ARG. */
6192 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6194 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6195 be returned in this co-processor's registers. */
6196 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6198 /* Allocate and return an RTX element to hold the return type of a call. This
6199 routine must not fail and will only be called if is_return_candidate
6200 returned true with the same parameters. */
6201 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6203 /* Finish processing this argument and prepare to start processing
6205 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6206 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6214 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6219 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6220 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6227 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6229 /* We aren't passed a decl, so we can't check that a call is local.
6230 However, it isn't clear that that would be a win anyway, since it
6231 might limit some tail-calling opportunities. */
6232 enum arm_pcs pcs_variant
;
6236 const_tree fndecl
= NULL_TREE
;
6238 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6241 fntype
= TREE_TYPE (fntype
);
6244 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6247 pcs_variant
= arm_pcs_default
;
6249 if (pcs_variant
!= ARM_PCS_AAPCS
)
6253 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6254 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6263 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6266 /* We aren't passed a decl, so we can't check that a call is local.
6267 However, it isn't clear that that would be a win anyway, since it
6268 might limit some tail-calling opportunities. */
6269 enum arm_pcs pcs_variant
;
6270 int unsignedp ATTRIBUTE_UNUSED
;
6274 const_tree fndecl
= NULL_TREE
;
6276 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6279 fntype
= TREE_TYPE (fntype
);
6282 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6285 pcs_variant
= arm_pcs_default
;
6287 /* Promote integer types. */
6288 if (type
&& INTEGRAL_TYPE_P (type
))
6289 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6291 if (pcs_variant
!= ARM_PCS_AAPCS
)
6295 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6296 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6298 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6302 /* Promotes small structs returned in a register to full-word size
6303 for big-endian AAPCS. */
6304 if (type
&& arm_return_in_msb (type
))
6306 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6307 if (size
% UNITS_PER_WORD
!= 0)
6309 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6310 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
6314 return gen_rtx_REG (mode
, R0_REGNUM
);
6318 aapcs_libcall_value (machine_mode mode
)
6320 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6321 && GET_MODE_SIZE (mode
) <= 4)
6324 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6327 /* Lay out a function argument using the AAPCS rules. The rule
6328 numbers referred to here are those in the AAPCS. */
6330 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6331 const_tree type
, bool named
)
6336 /* We only need to do this once per argument. */
6337 if (pcum
->aapcs_arg_processed
)
6340 pcum
->aapcs_arg_processed
= true;
6342 /* Special case: if named is false then we are handling an incoming
6343 anonymous argument which is on the stack. */
6347 /* Is this a potential co-processor register candidate? */
6348 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6350 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6351 pcum
->aapcs_cprc_slot
= slot
;
6353 /* We don't have to apply any of the rules from part B of the
6354 preparation phase, these are handled elsewhere in the
6359 /* A Co-processor register candidate goes either in its own
6360 class of registers or on the stack. */
6361 if (!pcum
->aapcs_cprc_failed
[slot
])
6363 /* C1.cp - Try to allocate the argument to co-processor
6365 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6368 /* C2.cp - Put the argument on the stack and note that we
6369 can't assign any more candidates in this slot. We also
6370 need to note that we have allocated stack space, so that
6371 we won't later try to split a non-cprc candidate between
6372 core registers and the stack. */
6373 pcum
->aapcs_cprc_failed
[slot
] = true;
6374 pcum
->can_split
= false;
6377 /* We didn't get a register, so this argument goes on the
6379 gcc_assert (pcum
->can_split
== false);
6384 /* C3 - For double-word aligned arguments, round the NCRN up to the
6385 next even number. */
6386 ncrn
= pcum
->aapcs_ncrn
;
6389 int res
= arm_needs_doubleword_align (mode
, type
);
6390 /* Only warn during RTL expansion of call stmts, otherwise we would
6391 warn e.g. during gimplification even on functions that will be
6392 always inlined, and we'd warn multiple times. Don't warn when
6393 called in expand_function_start either, as we warn instead in
6394 arm_function_arg_boundary in that case. */
6395 if (res
< 0 && warn_psabi
&& currently_expanding_gimple_stmt
)
6396 inform (input_location
, "parameter passing for argument of type "
6397 "%qT changed in GCC 7.1", type
);
6402 nregs
= ARM_NUM_REGS2(mode
, type
);
6404 /* Sigh, this test should really assert that nregs > 0, but a GCC
6405 extension allows empty structs and then gives them empty size; it
6406 then allows such a structure to be passed by value. For some of
6407 the code below we have to pretend that such an argument has
6408 non-zero size so that we 'locate' it correctly either in
6409 registers or on the stack. */
6410 gcc_assert (nregs
>= 0);
6412 nregs2
= nregs
? nregs
: 1;
6414 /* C4 - Argument fits entirely in core registers. */
6415 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6417 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6418 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6422 /* C5 - Some core registers left and there are no arguments already
6423 on the stack: split this argument between the remaining core
6424 registers and the stack. */
6425 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6427 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6428 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6429 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6433 /* C6 - NCRN is set to 4. */
6434 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6436 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6440 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6441 for a call to a function whose data type is FNTYPE.
6442 For a library call, FNTYPE is NULL. */
6444 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6446 tree fndecl ATTRIBUTE_UNUSED
)
6448 /* Long call handling. */
6450 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6452 pcum
->pcs_variant
= arm_pcs_default
;
6454 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6456 if (arm_libcall_uses_aapcs_base (libname
))
6457 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6459 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6460 pcum
->aapcs_reg
= NULL_RTX
;
6461 pcum
->aapcs_partial
= 0;
6462 pcum
->aapcs_arg_processed
= false;
6463 pcum
->aapcs_cprc_slot
= -1;
6464 pcum
->can_split
= true;
6466 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6470 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6472 pcum
->aapcs_cprc_failed
[i
] = false;
6473 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6481 /* On the ARM, the offset starts at 0. */
6483 pcum
->iwmmxt_nregs
= 0;
6484 pcum
->can_split
= true;
6486 /* Varargs vectors are treated the same as long long.
6487 named_count avoids having to change the way arm handles 'named' */
6488 pcum
->named_count
= 0;
6491 if (TARGET_REALLY_IWMMXT
&& fntype
)
6495 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6497 fn_arg
= TREE_CHAIN (fn_arg
))
6498 pcum
->named_count
+= 1;
6500 if (! pcum
->named_count
)
6501 pcum
->named_count
= INT_MAX
;
6505 /* Return 1 if double word alignment is required for argument passing.
6506 Return -1 if double word alignment used to be required for argument
6507 passing before PR77728 ABI fix, but is not required anymore.
6508 Return 0 if double word alignment is not required and wasn't requried
6511 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6514 return GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
;
6516 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6517 if (!AGGREGATE_TYPE_P (type
))
6518 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6520 /* Array types: Use member alignment of element type. */
6521 if (TREE_CODE (type
) == ARRAY_TYPE
)
6522 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
6525 /* Record/aggregate types: Use greatest member alignment of any member. */
6526 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6527 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
6529 if (TREE_CODE (field
) == FIELD_DECL
)
6532 /* Before PR77728 fix, we were incorrectly considering also
6533 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6534 Make sure we can warn about that with -Wpsabi. */
6542 /* Determine where to put an argument to a function.
6543 Value is zero to push the argument on the stack,
6544 or a hard register in which to store the argument.
6546 MODE is the argument's machine mode.
6547 TYPE is the data type of the argument (as a tree).
6548 This is null for libcalls where that information may
6550 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6551 the preceding args and about the function being called.
6552 NAMED is nonzero if this argument is a named parameter
6553 (otherwise it is an extra parameter matching an ellipsis).
6555 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6556 other arguments are passed on the stack. If (NAMED == 0) (which happens
6557 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6558 defined), say it is passed in the stack (function_prologue will
6559 indeed make it pass in the stack if necessary). */
6562 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6563 const_tree type
, bool named
)
6565 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6568 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6569 a call insn (op3 of a call_value insn). */
6570 if (mode
== VOIDmode
)
6573 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6575 aapcs_layout_arg (pcum
, mode
, type
, named
);
6576 return pcum
->aapcs_reg
;
6579 /* Varargs vectors are treated the same as long long.
6580 named_count avoids having to change the way arm handles 'named' */
6581 if (TARGET_IWMMXT_ABI
6582 && arm_vector_mode_supported_p (mode
)
6583 && pcum
->named_count
> pcum
->nargs
+ 1)
6585 if (pcum
->iwmmxt_nregs
<= 9)
6586 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6589 pcum
->can_split
= false;
6594 /* Put doubleword aligned quantities in even register pairs. */
6595 if ((pcum
->nregs
& 1) && ARM_DOUBLEWORD_ALIGN
)
6597 int res
= arm_needs_doubleword_align (mode
, type
);
6598 if (res
< 0 && warn_psabi
)
6599 inform (input_location
, "parameter passing for argument of type "
6600 "%qT changed in GCC 7.1", type
);
6605 /* Only allow splitting an arg between regs and memory if all preceding
6606 args were allocated to regs. For args passed by reference we only count
6607 the reference pointer. */
6608 if (pcum
->can_split
)
6611 nregs
= ARM_NUM_REGS2 (mode
, type
);
6613 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6616 return gen_rtx_REG (mode
, pcum
->nregs
);
6620 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6622 if (!ARM_DOUBLEWORD_ALIGN
)
6623 return PARM_BOUNDARY
;
6625 int res
= arm_needs_doubleword_align (mode
, type
);
6626 if (res
< 0 && warn_psabi
)
6627 inform (input_location
, "parameter passing for argument of type %qT "
6628 "changed in GCC 7.1", type
);
6630 return res
> 0 ? DOUBLEWORD_ALIGNMENT
: PARM_BOUNDARY
;
6634 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6635 tree type
, bool named
)
6637 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6638 int nregs
= pcum
->nregs
;
6640 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6642 aapcs_layout_arg (pcum
, mode
, type
, named
);
6643 return pcum
->aapcs_partial
;
6646 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6649 if (NUM_ARG_REGS
> nregs
6650 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6652 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6657 /* Update the data in PCUM to advance over an argument
6658 of mode MODE and data type TYPE.
6659 (TYPE is null for libcalls where that information may not be available.) */
6662 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6663 const_tree type
, bool named
)
6665 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6667 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6669 aapcs_layout_arg (pcum
, mode
, type
, named
);
6671 if (pcum
->aapcs_cprc_slot
>= 0)
6673 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6675 pcum
->aapcs_cprc_slot
= -1;
6678 /* Generic stuff. */
6679 pcum
->aapcs_arg_processed
= false;
6680 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6681 pcum
->aapcs_reg
= NULL_RTX
;
6682 pcum
->aapcs_partial
= 0;
6687 if (arm_vector_mode_supported_p (mode
)
6688 && pcum
->named_count
> pcum
->nargs
6689 && TARGET_IWMMXT_ABI
)
6690 pcum
->iwmmxt_nregs
+= 1;
6692 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6696 /* Variable sized types are passed by reference. This is a GCC
6697 extension to the ARM ABI. */
6700 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6701 machine_mode mode ATTRIBUTE_UNUSED
,
6702 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6704 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6707 /* Encode the current state of the #pragma [no_]long_calls. */
6710 OFF
, /* No #pragma [no_]long_calls is in effect. */
6711 LONG
, /* #pragma long_calls is in effect. */
6712 SHORT
/* #pragma no_long_calls is in effect. */
6715 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6718 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6720 arm_pragma_long_calls
= LONG
;
6724 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6726 arm_pragma_long_calls
= SHORT
;
6730 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6732 arm_pragma_long_calls
= OFF
;
6735 /* Handle an attribute requiring a FUNCTION_DECL;
6736 arguments as in struct attribute_spec.handler. */
6738 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6739 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6741 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6743 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6745 *no_add_attrs
= true;
6751 /* Handle an "interrupt" or "isr" attribute;
6752 arguments as in struct attribute_spec.handler. */
6754 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6759 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6761 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6763 *no_add_attrs
= true;
6765 /* FIXME: the argument if any is checked for type attributes;
6766 should it be checked for decl ones? */
6770 if (TREE_CODE (*node
) == FUNCTION_TYPE
6771 || TREE_CODE (*node
) == METHOD_TYPE
)
6773 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6775 warning (OPT_Wattributes
, "%qE attribute ignored",
6777 *no_add_attrs
= true;
6780 else if (TREE_CODE (*node
) == POINTER_TYPE
6781 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6782 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6783 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6785 *node
= build_variant_type_copy (*node
);
6786 TREE_TYPE (*node
) = build_type_attribute_variant
6788 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6789 *no_add_attrs
= true;
6793 /* Possibly pass this attribute on from the type to a decl. */
6794 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6795 | (int) ATTR_FLAG_FUNCTION_NEXT
6796 | (int) ATTR_FLAG_ARRAY_NEXT
))
6798 *no_add_attrs
= true;
6799 return tree_cons (name
, args
, NULL_TREE
);
6803 warning (OPT_Wattributes
, "%qE attribute ignored",
6812 /* Handle a "pcs" attribute; arguments as in struct
6813 attribute_spec.handler. */
6815 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6816 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6818 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6820 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6821 *no_add_attrs
= true;
6826 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6827 /* Handle the "notshared" attribute. This attribute is another way of
6828 requesting hidden visibility. ARM's compiler supports
6829 "__declspec(notshared)"; we support the same thing via an
6833 arm_handle_notshared_attribute (tree
*node
,
6834 tree name ATTRIBUTE_UNUSED
,
6835 tree args ATTRIBUTE_UNUSED
,
6836 int flags ATTRIBUTE_UNUSED
,
6839 tree decl
= TYPE_NAME (*node
);
6843 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6844 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6845 *no_add_attrs
= false;
6851 /* This function returns true if a function with declaration FNDECL and type
6852 FNTYPE uses the stack to pass arguments or return variables and false
6853 otherwise. This is used for functions with the attributes
6854 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6855 diagnostic messages if the stack is used. NAME is the name of the attribute
6859 cmse_func_args_or_return_in_stack (tree fndecl
, tree name
, tree fntype
)
6861 function_args_iterator args_iter
;
6862 CUMULATIVE_ARGS args_so_far_v
;
6863 cumulative_args_t args_so_far
;
6864 bool first_param
= true;
6865 tree arg_type
, prev_arg_type
= NULL_TREE
, ret_type
;
6867 /* Error out if any argument is passed on the stack. */
6868 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
6869 args_so_far
= pack_cumulative_args (&args_so_far_v
);
6870 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
6873 machine_mode arg_mode
= TYPE_MODE (arg_type
);
6875 prev_arg_type
= arg_type
;
6876 if (VOID_TYPE_P (arg_type
))
6880 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
, true);
6881 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
, true);
6883 || arm_arg_partial_bytes (args_so_far
, arg_mode
, arg_type
, true))
6885 error ("%qE attribute not available to functions with arguments "
6886 "passed on the stack", name
);
6889 first_param
= false;
6892 /* Error out for variadic functions since we cannot control how many
6893 arguments will be passed and thus stack could be used. stdarg_p () is not
6894 used for the checking to avoid browsing arguments twice. */
6895 if (prev_arg_type
!= NULL_TREE
&& !VOID_TYPE_P (prev_arg_type
))
6897 error ("%qE attribute not available to functions with variable number "
6898 "of arguments", name
);
6902 /* Error out if return value is passed on the stack. */
6903 ret_type
= TREE_TYPE (fntype
);
6904 if (arm_return_in_memory (ret_type
, fntype
))
6906 error ("%qE attribute not available to functions that return value on "
6913 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6914 function will check whether the attribute is allowed here and will add the
6915 attribute to the function declaration tree or otherwise issue a warning. */
6918 arm_handle_cmse_nonsecure_entry (tree
*node
, tree name
,
6927 *no_add_attrs
= true;
6928 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
6933 /* Ignore attribute for function types. */
6934 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6936 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6938 *no_add_attrs
= true;
6944 /* Warn for static linkage functions. */
6945 if (!TREE_PUBLIC (fndecl
))
6947 warning (OPT_Wattributes
, "%qE attribute has no effect on functions "
6948 "with static linkage", name
);
6949 *no_add_attrs
= true;
6953 *no_add_attrs
|= cmse_func_args_or_return_in_stack (fndecl
, name
,
6954 TREE_TYPE (fndecl
));
6959 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6960 function will check whether the attribute is allowed here and will add the
6961 attribute to the function type tree or otherwise issue a diagnostic. The
6962 reason we check this at declaration time is to only allow the use of the
6963 attribute with declarations of function pointers and not function
6964 declarations. This function checks NODE is of the expected type and issues
6965 diagnostics otherwise using NAME. If it is not of the expected type
6966 *NO_ADD_ATTRS will be set to true. */
6969 arm_handle_cmse_nonsecure_call (tree
*node
, tree name
,
6974 tree decl
= NULL_TREE
, fntype
= NULL_TREE
;
6979 *no_add_attrs
= true;
6980 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
6985 if (TREE_CODE (*node
) == VAR_DECL
|| TREE_CODE (*node
) == TYPE_DECL
)
6988 fntype
= TREE_TYPE (decl
);
6991 while (fntype
!= NULL_TREE
&& TREE_CODE (fntype
) == POINTER_TYPE
)
6992 fntype
= TREE_TYPE (fntype
);
6994 if (!decl
|| TREE_CODE (fntype
) != FUNCTION_TYPE
)
6996 warning (OPT_Wattributes
, "%qE attribute only applies to base type of a "
6997 "function pointer", name
);
6998 *no_add_attrs
= true;
7002 *no_add_attrs
|= cmse_func_args_or_return_in_stack (NULL
, name
, fntype
);
7007 /* Prevent trees being shared among function types with and without
7008 cmse_nonsecure_call attribute. */
7009 type
= TREE_TYPE (decl
);
7011 type
= build_distinct_type_copy (type
);
7012 TREE_TYPE (decl
) = type
;
7015 while (TREE_CODE (fntype
) != FUNCTION_TYPE
)
7018 fntype
= TREE_TYPE (fntype
);
7019 fntype
= build_distinct_type_copy (fntype
);
7020 TREE_TYPE (type
) = fntype
;
7023 /* Construct a type attribute and add it to the function type. */
7024 tree attrs
= tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE
,
7025 TYPE_ATTRIBUTES (fntype
));
7026 TYPE_ATTRIBUTES (fntype
) = attrs
;
7030 /* Return 0 if the attributes for two types are incompatible, 1 if they
7031 are compatible, and 2 if they are nearly compatible (which causes a
7032 warning to be generated). */
7034 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
7038 /* Check for mismatch of non-default calling convention. */
7039 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
7042 /* Check for mismatched call attributes. */
7043 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7044 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7045 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7046 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7048 /* Only bother to check if an attribute is defined. */
7049 if (l1
| l2
| s1
| s2
)
7051 /* If one type has an attribute, the other must have the same attribute. */
7052 if ((l1
!= l2
) || (s1
!= s2
))
7055 /* Disallow mixed attributes. */
7056 if ((l1
& s2
) || (l2
& s1
))
7060 /* Check for mismatched ISR attribute. */
7061 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
7063 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
7064 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
7066 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
7070 l1
= lookup_attribute ("cmse_nonsecure_call",
7071 TYPE_ATTRIBUTES (type1
)) != NULL
;
7072 l2
= lookup_attribute ("cmse_nonsecure_call",
7073 TYPE_ATTRIBUTES (type2
)) != NULL
;
7081 /* Assigns default attributes to newly defined type. This is used to
7082 set short_call/long_call attributes for function types of
7083 functions defined inside corresponding #pragma scopes. */
7085 arm_set_default_type_attributes (tree type
)
7087 /* Add __attribute__ ((long_call)) to all functions, when
7088 inside #pragma long_calls or __attribute__ ((short_call)),
7089 when inside #pragma no_long_calls. */
7090 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
7092 tree type_attr_list
, attr_name
;
7093 type_attr_list
= TYPE_ATTRIBUTES (type
);
7095 if (arm_pragma_long_calls
== LONG
)
7096 attr_name
= get_identifier ("long_call");
7097 else if (arm_pragma_long_calls
== SHORT
)
7098 attr_name
= get_identifier ("short_call");
7102 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
7103 TYPE_ATTRIBUTES (type
) = type_attr_list
;
7107 /* Return true if DECL is known to be linked into section SECTION. */
7110 arm_function_in_section_p (tree decl
, section
*section
)
7112 /* We can only be certain about the prevailing symbol definition. */
7113 if (!decl_binds_to_current_def_p (decl
))
7116 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7117 if (!DECL_SECTION_NAME (decl
))
7119 /* Make sure that we will not create a unique section for DECL. */
7120 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
7124 return function_section (decl
) == section
;
7127 /* Return nonzero if a 32-bit "long_call" should be generated for
7128 a call from the current function to DECL. We generate a long_call
7131 a. has an __attribute__((long call))
7132 or b. is within the scope of a #pragma long_calls
7133 or c. the -mlong-calls command line switch has been specified
7135 However we do not generate a long call if the function:
7137 d. has an __attribute__ ((short_call))
7138 or e. is inside the scope of a #pragma no_long_calls
7139 or f. is defined in the same section as the current function. */
7142 arm_is_long_call_p (tree decl
)
7147 return TARGET_LONG_CALLS
;
7149 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
7150 if (lookup_attribute ("short_call", attrs
))
7153 /* For "f", be conservative, and only cater for cases in which the
7154 whole of the current function is placed in the same section. */
7155 if (!flag_reorder_blocks_and_partition
7156 && TREE_CODE (decl
) == FUNCTION_DECL
7157 && arm_function_in_section_p (decl
, current_function_section ()))
7160 if (lookup_attribute ("long_call", attrs
))
7163 return TARGET_LONG_CALLS
;
7166 /* Return nonzero if it is ok to make a tail-call to DECL. */
7168 arm_function_ok_for_sibcall (tree decl
, tree exp
)
7170 unsigned long func_type
;
7172 if (cfun
->machine
->sibcall_blocked
)
7175 /* Never tailcall something if we are generating code for Thumb-1. */
7179 /* The PIC register is live on entry to VxWorks PLT entries, so we
7180 must make the call before restoring the PIC register. */
7181 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
7184 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7185 may be used both as target of the call and base register for restoring
7186 the VFP registers */
7187 if (TARGET_APCS_FRAME
&& TARGET_ARM
7188 && TARGET_HARD_FLOAT
7189 && decl
&& arm_is_long_call_p (decl
))
7192 /* If we are interworking and the function is not declared static
7193 then we can't tail-call it unless we know that it exists in this
7194 compilation unit (since it might be a Thumb routine). */
7195 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
7196 && !TREE_ASM_WRITTEN (decl
))
7199 func_type
= arm_current_func_type ();
7200 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7201 if (IS_INTERRUPT (func_type
))
7204 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7205 generated for entry functions themselves. */
7206 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7209 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7210 this would complicate matters for later code generation. */
7211 if (TREE_CODE (exp
) == CALL_EXPR
)
7213 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7214 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype
)))
7218 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
7220 /* Check that the return value locations are the same. For
7221 example that we aren't returning a value from the sibling in
7222 a VFP register but then need to transfer it to a core
7225 tree decl_or_type
= decl
;
7227 /* If it is an indirect function pointer, get the function type. */
7229 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7231 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
7232 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
7234 if (!rtx_equal_p (a
, b
))
7238 /* Never tailcall if function may be called with a misaligned SP. */
7239 if (IS_STACKALIGN (func_type
))
7242 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7243 references should become a NOP. Don't convert such calls into
7245 if (TARGET_AAPCS_BASED
7246 && arm_abi
== ARM_ABI_AAPCS
7248 && DECL_WEAK (decl
))
7251 /* We cannot do a tailcall for an indirect call by descriptor if all the
7252 argument registers are used because the only register left to load the
7253 address is IP and it will already contain the static chain. */
7254 if (!decl
&& CALL_EXPR_BY_DESCRIPTOR (exp
) && !flag_trampolines
)
7256 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7257 CUMULATIVE_ARGS cum
;
7258 cumulative_args_t cum_v
;
7260 arm_init_cumulative_args (&cum
, fntype
, NULL_RTX
, NULL_TREE
);
7261 cum_v
= pack_cumulative_args (&cum
);
7263 for (tree t
= TYPE_ARG_TYPES (fntype
); t
; t
= TREE_CHAIN (t
))
7265 tree type
= TREE_VALUE (t
);
7266 if (!VOID_TYPE_P (type
))
7267 arm_function_arg_advance (cum_v
, TYPE_MODE (type
), type
, true);
7270 if (!arm_function_arg (cum_v
, SImode
, integer_type_node
, true))
7274 /* Everything else is ok. */
7279 /* Addressing mode support functions. */
7281 /* Return nonzero if X is a legitimate immediate operand when compiling
7282 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7284 legitimate_pic_operand_p (rtx x
)
7286 if (GET_CODE (x
) == SYMBOL_REF
7287 || (GET_CODE (x
) == CONST
7288 && GET_CODE (XEXP (x
, 0)) == PLUS
7289 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7295 /* Record that the current function needs a PIC register. Initialize
7296 cfun->machine->pic_reg if we have not already done so. */
7299 require_pic_register (void)
7301 /* A lot of the logic here is made obscure by the fact that this
7302 routine gets called as part of the rtx cost estimation process.
7303 We don't want those calls to affect any assumptions about the real
7304 function; and further, we can't call entry_of_function() until we
7305 start the real expansion process. */
7306 if (!crtl
->uses_pic_offset_table
)
7308 gcc_assert (can_create_pseudo_p ());
7309 if (arm_pic_register
!= INVALID_REGNUM
7310 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
7312 if (!cfun
->machine
->pic_reg
)
7313 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
7315 /* Play games to avoid marking the function as needing pic
7316 if we are being called as part of the cost-estimation
7318 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7319 crtl
->uses_pic_offset_table
= 1;
7323 rtx_insn
*seq
, *insn
;
7325 if (!cfun
->machine
->pic_reg
)
7326 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
7328 /* Play games to avoid marking the function as needing pic
7329 if we are being called as part of the cost-estimation
7331 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7333 crtl
->uses_pic_offset_table
= 1;
7336 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
7337 && arm_pic_register
> LAST_LO_REGNUM
)
7338 emit_move_insn (cfun
->machine
->pic_reg
,
7339 gen_rtx_REG (Pmode
, arm_pic_register
));
7341 arm_load_pic_register (0UL);
7346 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
7348 INSN_LOCATION (insn
) = prologue_location
;
7350 /* We can be called during expansion of PHI nodes, where
7351 we can't yet emit instructions directly in the final
7352 insn stream. Queue the insns on the entry edge, they will
7353 be committed after everything else is expanded. */
7354 insert_insn_on_edge (seq
,
7355 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
7362 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
7364 if (GET_CODE (orig
) == SYMBOL_REF
7365 || GET_CODE (orig
) == LABEL_REF
)
7369 gcc_assert (can_create_pseudo_p ());
7370 reg
= gen_reg_rtx (Pmode
);
7373 /* VxWorks does not impose a fixed gap between segments; the run-time
7374 gap can be different from the object-file gap. We therefore can't
7375 use GOTOFF unless we are absolutely sure that the symbol is in the
7376 same segment as the GOT. Unfortunately, the flexibility of linker
7377 scripts means that we can't be sure of that in general, so assume
7378 that GOTOFF is never valid on VxWorks. */
7379 /* References to weak symbols cannot be resolved locally: they
7380 may be overridden by a non-weak definition at link time. */
7382 if ((GET_CODE (orig
) == LABEL_REF
7383 || (GET_CODE (orig
) == SYMBOL_REF
7384 && SYMBOL_REF_LOCAL_P (orig
)
7385 && (SYMBOL_REF_DECL (orig
)
7386 ? !DECL_WEAK (SYMBOL_REF_DECL (orig
)) : 1)))
7388 && arm_pic_data_is_text_relative
)
7389 insn
= arm_pic_static_addr (orig
, reg
);
7395 /* If this function doesn't have a pic register, create one now. */
7396 require_pic_register ();
7398 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
7400 /* Make the MEM as close to a constant as possible. */
7401 mem
= SET_SRC (pat
);
7402 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
7403 MEM_READONLY_P (mem
) = 1;
7404 MEM_NOTRAP_P (mem
) = 1;
7406 insn
= emit_insn (pat
);
7409 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7411 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
7415 else if (GET_CODE (orig
) == CONST
)
7419 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7420 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
7423 /* Handle the case where we have: const (UNSPEC_TLS). */
7424 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
7425 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
7428 /* Handle the case where we have:
7429 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7431 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7432 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
7433 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
7435 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
7441 gcc_assert (can_create_pseudo_p ());
7442 reg
= gen_reg_rtx (Pmode
);
7445 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
7447 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
7448 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
7449 base
== reg
? 0 : reg
);
7451 if (CONST_INT_P (offset
))
7453 /* The base register doesn't really matter, we only want to
7454 test the index for the appropriate mode. */
7455 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
7457 gcc_assert (can_create_pseudo_p ());
7458 offset
= force_reg (Pmode
, offset
);
7461 if (CONST_INT_P (offset
))
7462 return plus_constant (Pmode
, base
, INTVAL (offset
));
7465 if (GET_MODE_SIZE (mode
) > 4
7466 && (GET_MODE_CLASS (mode
) == MODE_INT
7467 || TARGET_SOFT_FLOAT
))
7469 emit_insn (gen_addsi3 (reg
, base
, offset
));
7473 return gen_rtx_PLUS (Pmode
, base
, offset
);
7480 /* Find a spare register to use during the prolog of a function. */
7483 thumb_find_work_register (unsigned long pushed_regs_mask
)
7487 /* Check the argument registers first as these are call-used. The
7488 register allocation order means that sometimes r3 might be used
7489 but earlier argument registers might not, so check them all. */
7490 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
7491 if (!df_regs_ever_live_p (reg
))
7494 /* Before going on to check the call-saved registers we can try a couple
7495 more ways of deducing that r3 is available. The first is when we are
7496 pushing anonymous arguments onto the stack and we have less than 4
7497 registers worth of fixed arguments(*). In this case r3 will be part of
7498 the variable argument list and so we can be sure that it will be
7499 pushed right at the start of the function. Hence it will be available
7500 for the rest of the prologue.
7501 (*): ie crtl->args.pretend_args_size is greater than 0. */
7502 if (cfun
->machine
->uses_anonymous_args
7503 && crtl
->args
.pretend_args_size
> 0)
7504 return LAST_ARG_REGNUM
;
7506 /* The other case is when we have fixed arguments but less than 4 registers
7507 worth. In this case r3 might be used in the body of the function, but
7508 it is not being used to convey an argument into the function. In theory
7509 we could just check crtl->args.size to see how many bytes are
7510 being passed in argument registers, but it seems that it is unreliable.
7511 Sometimes it will have the value 0 when in fact arguments are being
7512 passed. (See testcase execute/20021111-1.c for an example). So we also
7513 check the args_info.nregs field as well. The problem with this field is
7514 that it makes no allowances for arguments that are passed to the
7515 function but which are not used. Hence we could miss an opportunity
7516 when a function has an unused argument in r3. But it is better to be
7517 safe than to be sorry. */
7518 if (! cfun
->machine
->uses_anonymous_args
7519 && crtl
->args
.size
>= 0
7520 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
7521 && (TARGET_AAPCS_BASED
7522 ? crtl
->args
.info
.aapcs_ncrn
< 4
7523 : crtl
->args
.info
.nregs
< 4))
7524 return LAST_ARG_REGNUM
;
7526 /* Otherwise look for a call-saved register that is going to be pushed. */
7527 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
7528 if (pushed_regs_mask
& (1 << reg
))
7533 /* Thumb-2 can use high regs. */
7534 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
7535 if (pushed_regs_mask
& (1 << reg
))
7538 /* Something went wrong - thumb_compute_save_reg_mask()
7539 should have arranged for a suitable register to be pushed. */
7543 static GTY(()) int pic_labelno
;
7545 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7549 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
7551 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
7553 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
7556 gcc_assert (flag_pic
);
7558 pic_reg
= cfun
->machine
->pic_reg
;
7559 if (TARGET_VXWORKS_RTP
)
7561 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
7562 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7563 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
7565 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
7567 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
7568 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
7572 /* We use an UNSPEC rather than a LABEL_REF because this label
7573 never appears in the code stream. */
7575 labelno
= GEN_INT (pic_labelno
++);
7576 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7577 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7579 /* On the ARM the PC register contains 'dot + 8' at the time of the
7580 addition, on the Thumb it is 'dot + 4'. */
7581 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7582 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
7584 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7588 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7590 else /* TARGET_THUMB1 */
7592 if (arm_pic_register
!= INVALID_REGNUM
7593 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
7595 /* We will have pushed the pic register, so we should always be
7596 able to find a work register. */
7597 pic_tmp
= gen_rtx_REG (SImode
,
7598 thumb_find_work_register (saved_regs
));
7599 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
7600 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
7601 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
7603 else if (arm_pic_register
!= INVALID_REGNUM
7604 && arm_pic_register
> LAST_LO_REGNUM
7605 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
7607 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7608 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
7609 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
7612 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7616 /* Need to emit this whether or not we obey regdecls,
7617 since setjmp/longjmp can cause life info to screw up. */
7621 /* Generate code to load the address of a static var when flag_pic is set. */
7623 arm_pic_static_addr (rtx orig
, rtx reg
)
7625 rtx l1
, labelno
, offset_rtx
;
7627 gcc_assert (flag_pic
);
7629 /* We use an UNSPEC rather than a LABEL_REF because this label
7630 never appears in the code stream. */
7631 labelno
= GEN_INT (pic_labelno
++);
7632 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7633 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7635 /* On the ARM the PC register contains 'dot + 8' at the time of the
7636 addition, on the Thumb it is 'dot + 4'. */
7637 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7638 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
7639 UNSPEC_SYMBOL_OFFSET
);
7640 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
7642 return emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
7645 /* Return nonzero if X is valid as an ARM state addressing register. */
7647 arm_address_register_rtx_p (rtx x
, int strict_p
)
7657 return ARM_REGNO_OK_FOR_BASE_P (regno
);
7659 return (regno
<= LAST_ARM_REGNUM
7660 || regno
>= FIRST_PSEUDO_REGISTER
7661 || regno
== FRAME_POINTER_REGNUM
7662 || regno
== ARG_POINTER_REGNUM
);
7665 /* Return TRUE if this rtx is the difference of a symbol and a label,
7666 and will reduce to a PC-relative relocation in the object file.
7667 Expressions like this can be left alone when generating PIC, rather
7668 than forced through the GOT. */
7670 pcrel_constant_p (rtx x
)
7672 if (GET_CODE (x
) == MINUS
)
7673 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
7678 /* Return true if X will surely end up in an index register after next
7681 will_be_in_index_register (const_rtx x
)
7683 /* arm.md: calculate_pic_address will split this into a register. */
7684 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
7687 /* Return nonzero if X is a valid ARM state address operand. */
7689 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
7693 enum rtx_code code
= GET_CODE (x
);
7695 if (arm_address_register_rtx_p (x
, strict_p
))
7698 use_ldrd
= (TARGET_LDRD
7699 && (mode
== DImode
|| mode
== DFmode
));
7701 if (code
== POST_INC
|| code
== PRE_DEC
7702 || ((code
== PRE_INC
|| code
== POST_DEC
)
7703 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7704 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7706 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7707 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7708 && GET_CODE (XEXP (x
, 1)) == PLUS
7709 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7711 rtx addend
= XEXP (XEXP (x
, 1), 1);
7713 /* Don't allow ldrd post increment by register because it's hard
7714 to fixup invalid register choices. */
7716 && GET_CODE (x
) == POST_MODIFY
7720 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
7721 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
7724 /* After reload constants split into minipools will have addresses
7725 from a LABEL_REF. */
7726 else if (reload_completed
7727 && (code
== LABEL_REF
7729 && GET_CODE (XEXP (x
, 0)) == PLUS
7730 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7731 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7734 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7737 else if (code
== PLUS
)
7739 rtx xop0
= XEXP (x
, 0);
7740 rtx xop1
= XEXP (x
, 1);
7742 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7743 && ((CONST_INT_P (xop1
)
7744 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7745 || (!strict_p
&& will_be_in_index_register (xop1
))))
7746 || (arm_address_register_rtx_p (xop1
, strict_p
)
7747 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7751 /* Reload currently can't handle MINUS, so disable this for now */
7752 else if (GET_CODE (x
) == MINUS
)
7754 rtx xop0
= XEXP (x
, 0);
7755 rtx xop1
= XEXP (x
, 1);
7757 return (arm_address_register_rtx_p (xop0
, strict_p
)
7758 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7762 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7763 && code
== SYMBOL_REF
7764 && CONSTANT_POOL_ADDRESS_P (x
)
7766 && symbol_mentioned_p (get_pool_constant (x
))
7767 && ! pcrel_constant_p (get_pool_constant (x
))))
7773 /* Return true if we can avoid creating a constant pool entry for x. */
7775 can_avoid_literal_pool_for_label_p (rtx x
)
7777 /* Normally we can assign constant values to target registers without
7778 the help of constant pool. But there are cases we have to use constant
7780 1) assign a label to register.
7781 2) sign-extend a 8bit value to 32bit and then assign to register.
7783 Constant pool access in format:
7784 (set (reg r0) (mem (symbol_ref (".LC0"))))
7785 will cause the use of literal pool (later in function arm_reorg).
7786 So here we mark such format as an invalid format, then the compiler
7787 will adjust it into:
7788 (set (reg r0) (symbol_ref (".LC0")))
7789 (set (reg r0) (mem (reg r0))).
7790 No extra register is required, and (mem (reg r0)) won't cause the use
7791 of literal pools. */
7792 if (arm_disable_literal_pool
&& GET_CODE (x
) == SYMBOL_REF
7793 && CONSTANT_POOL_ADDRESS_P (x
))
7799 /* Return nonzero if X is a valid Thumb-2 address operand. */
7801 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7804 enum rtx_code code
= GET_CODE (x
);
7806 if (arm_address_register_rtx_p (x
, strict_p
))
7809 use_ldrd
= (TARGET_LDRD
7810 && (mode
== DImode
|| mode
== DFmode
));
7812 if (code
== POST_INC
|| code
== PRE_DEC
7813 || ((code
== PRE_INC
|| code
== POST_DEC
)
7814 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7815 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7817 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7818 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7819 && GET_CODE (XEXP (x
, 1)) == PLUS
7820 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7822 /* Thumb-2 only has autoincrement by constant. */
7823 rtx addend
= XEXP (XEXP (x
, 1), 1);
7824 HOST_WIDE_INT offset
;
7826 if (!CONST_INT_P (addend
))
7829 offset
= INTVAL(addend
);
7830 if (GET_MODE_SIZE (mode
) <= 4)
7831 return (offset
> -256 && offset
< 256);
7833 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7834 && (offset
& 3) == 0);
7837 /* After reload constants split into minipools will have addresses
7838 from a LABEL_REF. */
7839 else if (reload_completed
7840 && (code
== LABEL_REF
7842 && GET_CODE (XEXP (x
, 0)) == PLUS
7843 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7844 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7847 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7850 else if (code
== PLUS
)
7852 rtx xop0
= XEXP (x
, 0);
7853 rtx xop1
= XEXP (x
, 1);
7855 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7856 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7857 || (!strict_p
&& will_be_in_index_register (xop1
))))
7858 || (arm_address_register_rtx_p (xop1
, strict_p
)
7859 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7862 else if (can_avoid_literal_pool_for_label_p (x
))
7865 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7866 && code
== SYMBOL_REF
7867 && CONSTANT_POOL_ADDRESS_P (x
)
7869 && symbol_mentioned_p (get_pool_constant (x
))
7870 && ! pcrel_constant_p (get_pool_constant (x
))))
7876 /* Return nonzero if INDEX is valid for an address index operand in
7879 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7882 HOST_WIDE_INT range
;
7883 enum rtx_code code
= GET_CODE (index
);
7885 /* Standard coprocessor addressing modes. */
7886 if (TARGET_HARD_FLOAT
7887 && (mode
== SFmode
|| mode
== DFmode
))
7888 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7889 && INTVAL (index
) > -1024
7890 && (INTVAL (index
) & 3) == 0);
7892 /* For quad modes, we restrict the constant offset to be slightly less
7893 than what the instruction format permits. We do this because for
7894 quad mode moves, we will actually decompose them into two separate
7895 double-mode reads or writes. INDEX must therefore be a valid
7896 (double-mode) offset and so should INDEX+8. */
7897 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7898 return (code
== CONST_INT
7899 && INTVAL (index
) < 1016
7900 && INTVAL (index
) > -1024
7901 && (INTVAL (index
) & 3) == 0);
7903 /* We have no such constraint on double mode offsets, so we permit the
7904 full range of the instruction format. */
7905 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7906 return (code
== CONST_INT
7907 && INTVAL (index
) < 1024
7908 && INTVAL (index
) > -1024
7909 && (INTVAL (index
) & 3) == 0);
7911 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7912 return (code
== CONST_INT
7913 && INTVAL (index
) < 1024
7914 && INTVAL (index
) > -1024
7915 && (INTVAL (index
) & 3) == 0);
7917 if (arm_address_register_rtx_p (index
, strict_p
)
7918 && (GET_MODE_SIZE (mode
) <= 4))
7921 if (mode
== DImode
|| mode
== DFmode
)
7923 if (code
== CONST_INT
)
7925 HOST_WIDE_INT val
= INTVAL (index
);
7927 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
7928 If vldr is selected it uses arm_coproc_mem_operand. */
7930 return val
> -256 && val
< 256;
7932 return val
> -4096 && val
< 4092;
7935 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7938 if (GET_MODE_SIZE (mode
) <= 4
7942 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7946 rtx xiop0
= XEXP (index
, 0);
7947 rtx xiop1
= XEXP (index
, 1);
7949 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7950 && power_of_two_operand (xiop1
, SImode
))
7951 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7952 && power_of_two_operand (xiop0
, SImode
)));
7954 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7955 || code
== ASHIFT
|| code
== ROTATERT
)
7957 rtx op
= XEXP (index
, 1);
7959 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7962 && INTVAL (op
) <= 31);
7966 /* For ARM v4 we may be doing a sign-extend operation during the
7972 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7978 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7980 return (code
== CONST_INT
7981 && INTVAL (index
) < range
7982 && INTVAL (index
) > -range
);
7985 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7986 index operand. i.e. 1, 2, 4 or 8. */
7988 thumb2_index_mul_operand (rtx op
)
7992 if (!CONST_INT_P (op
))
7996 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7999 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8001 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
8003 enum rtx_code code
= GET_CODE (index
);
8005 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8006 /* Standard coprocessor addressing modes. */
8007 if (TARGET_HARD_FLOAT
8008 && (mode
== SFmode
|| mode
== DFmode
))
8009 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8010 /* Thumb-2 allows only > -256 index range for it's core register
8011 load/stores. Since we allow SF/DF in core registers, we have
8012 to use the intersection between -256~4096 (core) and -1024~1024
8014 && INTVAL (index
) > -256
8015 && (INTVAL (index
) & 3) == 0);
8017 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8019 /* For DImode assume values will usually live in core regs
8020 and only allow LDRD addressing modes. */
8021 if (!TARGET_LDRD
|| mode
!= DImode
)
8022 return (code
== CONST_INT
8023 && INTVAL (index
) < 1024
8024 && INTVAL (index
) > -1024
8025 && (INTVAL (index
) & 3) == 0);
8028 /* For quad modes, we restrict the constant offset to be slightly less
8029 than what the instruction format permits. We do this because for
8030 quad mode moves, we will actually decompose them into two separate
8031 double-mode reads or writes. INDEX must therefore be a valid
8032 (double-mode) offset and so should INDEX+8. */
8033 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8034 return (code
== CONST_INT
8035 && INTVAL (index
) < 1016
8036 && INTVAL (index
) > -1024
8037 && (INTVAL (index
) & 3) == 0);
8039 /* We have no such constraint on double mode offsets, so we permit the
8040 full range of the instruction format. */
8041 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8042 return (code
== CONST_INT
8043 && INTVAL (index
) < 1024
8044 && INTVAL (index
) > -1024
8045 && (INTVAL (index
) & 3) == 0);
8047 if (arm_address_register_rtx_p (index
, strict_p
)
8048 && (GET_MODE_SIZE (mode
) <= 4))
8051 if (mode
== DImode
|| mode
== DFmode
)
8053 if (code
== CONST_INT
)
8055 HOST_WIDE_INT val
= INTVAL (index
);
8056 /* Thumb-2 ldrd only has reg+const addressing modes.
8057 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8058 If vldr is selected it uses arm_coproc_mem_operand. */
8060 return IN_RANGE (val
, -1020, 1020) && (val
& 3) == 0;
8062 return IN_RANGE (val
, -255, 4095 - 4);
8070 rtx xiop0
= XEXP (index
, 0);
8071 rtx xiop1
= XEXP (index
, 1);
8073 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8074 && thumb2_index_mul_operand (xiop1
))
8075 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8076 && thumb2_index_mul_operand (xiop0
)));
8078 else if (code
== ASHIFT
)
8080 rtx op
= XEXP (index
, 1);
8082 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8085 && INTVAL (op
) <= 3);
8088 return (code
== CONST_INT
8089 && INTVAL (index
) < 4096
8090 && INTVAL (index
) > -256);
8093 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8095 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
8105 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
8107 return (regno
<= LAST_LO_REGNUM
8108 || regno
> LAST_VIRTUAL_REGISTER
8109 || regno
== FRAME_POINTER_REGNUM
8110 || (GET_MODE_SIZE (mode
) >= 4
8111 && (regno
== STACK_POINTER_REGNUM
8112 || regno
>= FIRST_PSEUDO_REGISTER
8113 || x
== hard_frame_pointer_rtx
8114 || x
== arg_pointer_rtx
)));
8117 /* Return nonzero if x is a legitimate index register. This is the case
8118 for any base register that can access a QImode object. */
8120 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
8122 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
8125 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8127 The AP may be eliminated to either the SP or the FP, so we use the
8128 least common denominator, e.g. SImode, and offsets from 0 to 64.
8130 ??? Verify whether the above is the right approach.
8132 ??? Also, the FP may be eliminated to the SP, so perhaps that
8133 needs special handling also.
8135 ??? Look at how the mips16 port solves this problem. It probably uses
8136 better ways to solve some of these problems.
8138 Although it is not incorrect, we don't accept QImode and HImode
8139 addresses based on the frame pointer or arg pointer until the
8140 reload pass starts. This is so that eliminating such addresses
8141 into stack based ones won't produce impossible code. */
8143 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8145 if (TARGET_HAVE_MOVT
&& can_avoid_literal_pool_for_label_p (x
))
8148 /* ??? Not clear if this is right. Experiment. */
8149 if (GET_MODE_SIZE (mode
) < 4
8150 && !(reload_in_progress
|| reload_completed
)
8151 && (reg_mentioned_p (frame_pointer_rtx
, x
)
8152 || reg_mentioned_p (arg_pointer_rtx
, x
)
8153 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
8154 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
8155 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
8156 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
8159 /* Accept any base register. SP only in SImode or larger. */
8160 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
8163 /* This is PC relative data before arm_reorg runs. */
8164 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
8165 && GET_CODE (x
) == SYMBOL_REF
8166 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
8169 /* This is PC relative data after arm_reorg runs. */
8170 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
8172 && (GET_CODE (x
) == LABEL_REF
8173 || (GET_CODE (x
) == CONST
8174 && GET_CODE (XEXP (x
, 0)) == PLUS
8175 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8176 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8179 /* Post-inc indexing only supported for SImode and larger. */
8180 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
8181 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
8184 else if (GET_CODE (x
) == PLUS
)
8186 /* REG+REG address can be any two index registers. */
8187 /* We disallow FRAME+REG addressing since we know that FRAME
8188 will be replaced with STACK, and SP relative addressing only
8189 permits SP+OFFSET. */
8190 if (GET_MODE_SIZE (mode
) <= 4
8191 && XEXP (x
, 0) != frame_pointer_rtx
8192 && XEXP (x
, 1) != frame_pointer_rtx
8193 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8194 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
8195 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
8198 /* REG+const has 5-7 bit offset for non-SP registers. */
8199 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8200 || XEXP (x
, 0) == arg_pointer_rtx
)
8201 && CONST_INT_P (XEXP (x
, 1))
8202 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
8205 /* REG+const has 10-bit offset for SP, but only SImode and
8206 larger is supported. */
8207 /* ??? Should probably check for DI/DFmode overflow here
8208 just like GO_IF_LEGITIMATE_OFFSET does. */
8209 else if (REG_P (XEXP (x
, 0))
8210 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
8211 && GET_MODE_SIZE (mode
) >= 4
8212 && CONST_INT_P (XEXP (x
, 1))
8213 && INTVAL (XEXP (x
, 1)) >= 0
8214 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
8215 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8218 else if (REG_P (XEXP (x
, 0))
8219 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
8220 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
8221 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
8222 && REGNO (XEXP (x
, 0))
8223 <= LAST_VIRTUAL_POINTER_REGISTER
))
8224 && GET_MODE_SIZE (mode
) >= 4
8225 && CONST_INT_P (XEXP (x
, 1))
8226 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8230 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8231 && GET_MODE_SIZE (mode
) == 4
8232 && GET_CODE (x
) == SYMBOL_REF
8233 && CONSTANT_POOL_ADDRESS_P (x
)
8235 && symbol_mentioned_p (get_pool_constant (x
))
8236 && ! pcrel_constant_p (get_pool_constant (x
))))
8242 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8243 instruction of mode MODE. */
8245 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
8247 switch (GET_MODE_SIZE (mode
))
8250 return val
>= 0 && val
< 32;
8253 return val
>= 0 && val
< 64 && (val
& 1) == 0;
8257 && (val
+ GET_MODE_SIZE (mode
)) <= 128
8263 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
8266 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
8267 else if (TARGET_THUMB2
)
8268 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
8269 else /* if (TARGET_THUMB1) */
8270 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
8273 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8275 Given an rtx X being reloaded into a reg required to be
8276 in class CLASS, return the class of reg to actually use.
8277 In general this is just CLASS, but for the Thumb core registers and
8278 immediate constants we prefer a LO_REGS class or a subset. */
8281 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
8287 if (rclass
== GENERAL_REGS
)
8294 /* Build the SYMBOL_REF for __tls_get_addr. */
8296 static GTY(()) rtx tls_get_addr_libfunc
;
8299 get_tls_get_addr (void)
8301 if (!tls_get_addr_libfunc
)
8302 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
8303 return tls_get_addr_libfunc
;
8307 arm_load_tp (rtx target
)
8310 target
= gen_reg_rtx (SImode
);
8314 /* Can return in any reg. */
8315 emit_insn (gen_load_tp_hard (target
));
8319 /* Always returned in r0. Immediately copy the result into a pseudo,
8320 otherwise other uses of r0 (e.g. setting up function arguments) may
8321 clobber the value. */
8325 emit_insn (gen_load_tp_soft ());
8327 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
8328 emit_move_insn (target
, tmp
);
8334 load_tls_operand (rtx x
, rtx reg
)
8338 if (reg
== NULL_RTX
)
8339 reg
= gen_reg_rtx (SImode
);
8341 tmp
= gen_rtx_CONST (SImode
, x
);
8343 emit_move_insn (reg
, tmp
);
8349 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
8351 rtx label
, labelno
, sum
;
8353 gcc_assert (reloc
!= TLS_DESCSEQ
);
8356 labelno
= GEN_INT (pic_labelno
++);
8357 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8358 label
= gen_rtx_CONST (VOIDmode
, label
);
8360 sum
= gen_rtx_UNSPEC (Pmode
,
8361 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
8362 GEN_INT (TARGET_ARM
? 8 : 4)),
8364 reg
= load_tls_operand (sum
, reg
);
8367 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
8369 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8371 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
8372 LCT_PURE
, /* LCT_CONST? */
8375 rtx_insn
*insns
= get_insns ();
8382 arm_tls_descseq_addr (rtx x
, rtx reg
)
8384 rtx labelno
= GEN_INT (pic_labelno
++);
8385 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8386 rtx sum
= gen_rtx_UNSPEC (Pmode
,
8387 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
8388 gen_rtx_CONST (VOIDmode
, label
),
8389 GEN_INT (!TARGET_ARM
)),
8391 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
8393 emit_insn (gen_tlscall (x
, labelno
));
8395 reg
= gen_reg_rtx (SImode
);
8397 gcc_assert (REGNO (reg
) != R0_REGNUM
);
8399 emit_move_insn (reg
, reg0
);
8405 legitimize_tls_address (rtx x
, rtx reg
)
8407 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
8409 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
8413 case TLS_MODEL_GLOBAL_DYNAMIC
:
8414 if (TARGET_GNU2_TLS
)
8416 reg
= arm_tls_descseq_addr (x
, reg
);
8418 tp
= arm_load_tp (NULL_RTX
);
8420 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8424 /* Original scheme */
8425 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
8426 dest
= gen_reg_rtx (Pmode
);
8427 emit_libcall_block (insns
, dest
, ret
, x
);
8431 case TLS_MODEL_LOCAL_DYNAMIC
:
8432 if (TARGET_GNU2_TLS
)
8434 reg
= arm_tls_descseq_addr (x
, reg
);
8436 tp
= arm_load_tp (NULL_RTX
);
8438 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8442 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
8444 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8445 share the LDM result with other LD model accesses. */
8446 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
8448 dest
= gen_reg_rtx (Pmode
);
8449 emit_libcall_block (insns
, dest
, ret
, eqv
);
8451 /* Load the addend. */
8452 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
8453 GEN_INT (TLS_LDO32
)),
8455 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
8456 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
8460 case TLS_MODEL_INITIAL_EXEC
:
8461 labelno
= GEN_INT (pic_labelno
++);
8462 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8463 label
= gen_rtx_CONST (VOIDmode
, label
);
8464 sum
= gen_rtx_UNSPEC (Pmode
,
8465 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
8466 GEN_INT (TARGET_ARM
? 8 : 4)),
8468 reg
= load_tls_operand (sum
, reg
);
8471 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
8472 else if (TARGET_THUMB2
)
8473 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
8476 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8477 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
8480 tp
= arm_load_tp (NULL_RTX
);
8482 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8484 case TLS_MODEL_LOCAL_EXEC
:
8485 tp
= arm_load_tp (NULL_RTX
);
8487 reg
= gen_rtx_UNSPEC (Pmode
,
8488 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
8490 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
8492 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8499 /* Try machine-dependent ways of modifying an illegitimate address
8500 to be legitimate. If we find one, return the new, valid address. */
8502 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8504 if (arm_tls_referenced_p (x
))
8508 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
8510 addend
= XEXP (XEXP (x
, 0), 1);
8511 x
= XEXP (XEXP (x
, 0), 0);
8514 if (GET_CODE (x
) != SYMBOL_REF
)
8517 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
8519 x
= legitimize_tls_address (x
, NULL_RTX
);
8523 x
= gen_rtx_PLUS (SImode
, x
, addend
);
8532 /* TODO: legitimize_address for Thumb2. */
8535 return thumb_legitimize_address (x
, orig_x
, mode
);
8538 if (GET_CODE (x
) == PLUS
)
8540 rtx xop0
= XEXP (x
, 0);
8541 rtx xop1
= XEXP (x
, 1);
8543 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
8544 xop0
= force_reg (SImode
, xop0
);
8546 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
8547 && !symbol_mentioned_p (xop1
))
8548 xop1
= force_reg (SImode
, xop1
);
8550 if (ARM_BASE_REGISTER_RTX_P (xop0
)
8551 && CONST_INT_P (xop1
))
8553 HOST_WIDE_INT n
, low_n
;
8557 /* VFP addressing modes actually allow greater offsets, but for
8558 now we just stick with the lowest common denominator. */
8559 if (mode
== DImode
|| mode
== DFmode
)
8571 low_n
= ((mode
) == TImode
? 0
8572 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
8576 base_reg
= gen_reg_rtx (SImode
);
8577 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
8578 emit_move_insn (base_reg
, val
);
8579 x
= plus_constant (Pmode
, base_reg
, low_n
);
8581 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8582 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8585 /* XXX We don't allow MINUS any more -- see comment in
8586 arm_legitimate_address_outer_p (). */
8587 else if (GET_CODE (x
) == MINUS
)
8589 rtx xop0
= XEXP (x
, 0);
8590 rtx xop1
= XEXP (x
, 1);
8592 if (CONSTANT_P (xop0
))
8593 xop0
= force_reg (SImode
, xop0
);
8595 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
8596 xop1
= force_reg (SImode
, xop1
);
8598 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8599 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
8602 /* Make sure to take full advantage of the pre-indexed addressing mode
8603 with absolute addresses which often allows for the base register to
8604 be factorized for multiple adjacent memory references, and it might
8605 even allows for the mini pool to be avoided entirely. */
8606 else if (CONST_INT_P (x
) && optimize
> 0)
8609 HOST_WIDE_INT mask
, base
, index
;
8612 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8613 use a 8-bit index. So let's use a 12-bit index for SImode only and
8614 hope that arm_gen_constant will enable ldrb to use more bits. */
8615 bits
= (mode
== SImode
) ? 12 : 8;
8616 mask
= (1 << bits
) - 1;
8617 base
= INTVAL (x
) & ~mask
;
8618 index
= INTVAL (x
) & mask
;
8619 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
8621 /* It'll most probably be more efficient to generate the base
8622 with more bits set and use a negative index instead. */
8626 base_reg
= force_reg (SImode
, GEN_INT (base
));
8627 x
= plus_constant (Pmode
, base_reg
, index
);
8632 /* We need to find and carefully transform any SYMBOL and LABEL
8633 references; so go back to the original address expression. */
8634 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8636 if (new_x
!= orig_x
)
8644 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8645 to be legitimate. If we find one, return the new, valid address. */
8647 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8649 if (GET_CODE (x
) == PLUS
8650 && CONST_INT_P (XEXP (x
, 1))
8651 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
8652 || INTVAL (XEXP (x
, 1)) < 0))
8654 rtx xop0
= XEXP (x
, 0);
8655 rtx xop1
= XEXP (x
, 1);
8656 HOST_WIDE_INT offset
= INTVAL (xop1
);
8658 /* Try and fold the offset into a biasing of the base register and
8659 then offsetting that. Don't do this when optimizing for space
8660 since it can cause too many CSEs. */
8661 if (optimize_size
&& offset
>= 0
8662 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
8664 HOST_WIDE_INT delta
;
8667 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
8668 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
8669 delta
= 31 * GET_MODE_SIZE (mode
);
8671 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
8673 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
8675 x
= plus_constant (Pmode
, xop0
, delta
);
8677 else if (offset
< 0 && offset
> -256)
8678 /* Small negative offsets are best done with a subtract before the
8679 dereference, forcing these into a register normally takes two
8681 x
= force_operand (x
, NULL_RTX
);
8684 /* For the remaining cases, force the constant into a register. */
8685 xop1
= force_reg (SImode
, xop1
);
8686 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8689 else if (GET_CODE (x
) == PLUS
8690 && s_register_operand (XEXP (x
, 1), SImode
)
8691 && !s_register_operand (XEXP (x
, 0), SImode
))
8693 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
8695 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
8700 /* We need to find and carefully transform any SYMBOL and LABEL
8701 references; so go back to the original address expression. */
8702 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8704 if (new_x
!= orig_x
)
8711 /* Return TRUE if X contains any TLS symbol references. */
8714 arm_tls_referenced_p (rtx x
)
8716 if (! TARGET_HAVE_TLS
)
8719 subrtx_iterator::array_type array
;
8720 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8722 const_rtx x
= *iter
;
8723 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8725 /* ARM currently does not provide relocations to encode TLS variables
8726 into AArch32 instructions, only data, so there is no way to
8727 currently implement these if a literal pool is disabled. */
8728 if (arm_disable_literal_pool
)
8729 sorry ("accessing thread-local storage is not currently supported "
8730 "with -mpure-code or -mslow-flash-data");
8735 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8736 TLS offsets, not real symbol references. */
8737 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8738 iter
.skip_subrtxes ();
8743 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8745 On the ARM, allow any integer (invalid ones are removed later by insn
8746 patterns), nice doubles and symbol_refs which refer to the function's
8749 When generating pic allow anything. */
8752 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8754 return flag_pic
|| !label_mentioned_p (x
);
8758 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8760 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8761 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8762 for ARMv8-M Baseline or later the result is valid. */
8763 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
8766 return (CONST_INT_P (x
)
8767 || CONST_DOUBLE_P (x
)
8768 || CONSTANT_ADDRESS_P (x
)
8769 || (TARGET_HAVE_MOVT
&& GET_CODE (x
) == SYMBOL_REF
)
8774 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8776 return (!arm_cannot_force_const_mem (mode
, x
)
8778 ? arm_legitimate_constant_p_1 (mode
, x
)
8779 : thumb_legitimate_constant_p (mode
, x
)));
8782 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8785 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8789 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8791 split_const (x
, &base
, &offset
);
8792 if (GET_CODE (base
) == SYMBOL_REF
8793 && !offset_within_block_p (base
, INTVAL (offset
)))
8796 return arm_tls_referenced_p (x
);
8799 #define REG_OR_SUBREG_REG(X) \
8801 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8803 #define REG_OR_SUBREG_RTX(X) \
8804 (REG_P (X) ? (X) : SUBREG_REG (X))
8807 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8809 machine_mode mode
= GET_MODE (x
);
8818 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8825 return COSTS_N_INSNS (1);
8828 if (arm_arch6m
&& arm_m_profile_small_mul
)
8829 return COSTS_N_INSNS (32);
8831 if (CONST_INT_P (XEXP (x
, 1)))
8834 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8841 return COSTS_N_INSNS (2) + cycles
;
8843 return COSTS_N_INSNS (1) + 16;
8846 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8848 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8849 return (COSTS_N_INSNS (words
)
8850 + 4 * ((MEM_P (SET_SRC (x
)))
8851 + MEM_P (SET_DEST (x
))));
8856 if (UINTVAL (x
) < 256
8857 /* 16-bit constant. */
8858 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
8860 if (thumb_shiftable_const (INTVAL (x
)))
8861 return COSTS_N_INSNS (2);
8862 return COSTS_N_INSNS (3);
8864 else if ((outer
== PLUS
|| outer
== COMPARE
)
8865 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8867 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8868 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8869 return COSTS_N_INSNS (1);
8870 else if (outer
== AND
)
8873 /* This duplicates the tests in the andsi3 expander. */
8874 for (i
= 9; i
<= 31; i
++)
8875 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8876 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8877 return COSTS_N_INSNS (2);
8879 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8880 || outer
== LSHIFTRT
)
8882 return COSTS_N_INSNS (2);
8888 return COSTS_N_INSNS (3);
8906 /* XXX another guess. */
8907 /* Memory costs quite a lot for the first word, but subsequent words
8908 load at the equivalent of a single insn each. */
8909 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8910 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8915 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8921 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8922 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8928 return total
+ COSTS_N_INSNS (1);
8930 /* Assume a two-shift sequence. Increase the cost slightly so
8931 we prefer actual shifts over an extend operation. */
8932 return total
+ 1 + COSTS_N_INSNS (2);
8939 /* Estimates the size cost of thumb1 instructions.
8940 For now most of the code is copied from thumb1_rtx_costs. We need more
8941 fine grain tuning when we have more related test cases. */
8943 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8945 machine_mode mode
= GET_MODE (x
);
8954 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8958 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8959 defined by RTL expansion, especially for the expansion of
8961 if ((GET_CODE (XEXP (x
, 0)) == MULT
8962 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8963 || (GET_CODE (XEXP (x
, 1)) == MULT
8964 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8965 return COSTS_N_INSNS (2);
8970 return COSTS_N_INSNS (1);
8973 if (CONST_INT_P (XEXP (x
, 1)))
8975 /* Thumb1 mul instruction can't operate on const. We must Load it
8976 into a register first. */
8977 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8978 /* For the targets which have a very small and high-latency multiply
8979 unit, we prefer to synthesize the mult with up to 5 instructions,
8980 giving a good balance between size and performance. */
8981 if (arm_arch6m
&& arm_m_profile_small_mul
)
8982 return COSTS_N_INSNS (5);
8984 return COSTS_N_INSNS (1) + const_size
;
8986 return COSTS_N_INSNS (1);
8989 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8991 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8992 cost
= COSTS_N_INSNS (words
);
8993 if (satisfies_constraint_J (SET_SRC (x
))
8994 || satisfies_constraint_K (SET_SRC (x
))
8995 /* Too big an immediate for a 2-byte mov, using MOVT. */
8996 || (CONST_INT_P (SET_SRC (x
))
8997 && UINTVAL (SET_SRC (x
)) >= 256
8999 && satisfies_constraint_j (SET_SRC (x
)))
9000 /* thumb1_movdi_insn. */
9001 || ((words
> 1) && MEM_P (SET_SRC (x
))))
9002 cost
+= COSTS_N_INSNS (1);
9008 if (UINTVAL (x
) < 256)
9009 return COSTS_N_INSNS (1);
9010 /* movw is 4byte long. */
9011 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
9012 return COSTS_N_INSNS (2);
9013 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9014 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
9015 return COSTS_N_INSNS (2);
9016 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9017 if (thumb_shiftable_const (INTVAL (x
)))
9018 return COSTS_N_INSNS (2);
9019 return COSTS_N_INSNS (3);
9021 else if ((outer
== PLUS
|| outer
== COMPARE
)
9022 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9024 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9025 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9026 return COSTS_N_INSNS (1);
9027 else if (outer
== AND
)
9030 /* This duplicates the tests in the andsi3 expander. */
9031 for (i
= 9; i
<= 31; i
++)
9032 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9033 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9034 return COSTS_N_INSNS (2);
9036 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9037 || outer
== LSHIFTRT
)
9039 return COSTS_N_INSNS (2);
9045 return COSTS_N_INSNS (3);
9059 return COSTS_N_INSNS (1);
9062 return (COSTS_N_INSNS (1)
9064 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9065 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9066 ? COSTS_N_INSNS (1) : 0));
9070 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9075 /* XXX still guessing. */
9076 switch (GET_MODE (XEXP (x
, 0)))
9079 return (1 + (mode
== DImode
? 4 : 0)
9080 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9083 return (4 + (mode
== DImode
? 4 : 0)
9084 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9087 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9098 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9099 operand, then return the operand that is being shifted. If the shift
9100 is not by a constant, then set SHIFT_REG to point to the operand.
9101 Return NULL if OP is not a shifter operand. */
9103 shifter_op_p (rtx op
, rtx
*shift_reg
)
9105 enum rtx_code code
= GET_CODE (op
);
9107 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9108 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9109 return XEXP (op
, 0);
9110 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9111 return XEXP (op
, 0);
9112 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9113 || code
== ASHIFTRT
)
9115 if (!CONST_INT_P (XEXP (op
, 1)))
9116 *shift_reg
= XEXP (op
, 1);
9117 return XEXP (op
, 0);
9124 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9126 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9127 rtx_code code
= GET_CODE (x
);
9128 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9130 switch (XINT (x
, 1))
9132 case UNSPEC_UNALIGNED_LOAD
:
9133 /* We can only do unaligned loads into the integer unit, and we can't
9135 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9137 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9138 + extra_cost
->ldst
.load_unaligned
);
9141 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9142 ADDR_SPACE_GENERIC
, speed_p
);
9146 case UNSPEC_UNALIGNED_STORE
:
9147 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9149 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9150 + extra_cost
->ldst
.store_unaligned
);
9152 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
9154 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9155 ADDR_SPACE_GENERIC
, speed_p
);
9166 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9170 *cost
= COSTS_N_INSNS (2);
9176 /* Cost of a libcall. We assume one insn per argument, an amount for the
9177 call (one insn for -Os) and then one for processing the result. */
9178 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9180 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9183 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9184 if (shift_op != NULL \
9185 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9190 *cost += extra_cost->alu.arith_shift_reg; \
9191 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9192 ASHIFT, 1, speed_p); \
9195 *cost += extra_cost->alu.arith_shift; \
9197 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9198 ASHIFT, 0, speed_p) \
9199 + rtx_cost (XEXP (x, 1 - IDX), \
9200 GET_MODE (shift_op), \
9207 /* RTX costs. Make an estimate of the cost of executing the operation
9208 X, which is contained with an operation with code OUTER_CODE.
9209 SPEED_P indicates whether the cost desired is the performance cost,
9210 or the size cost. The estimate is stored in COST and the return
9211 value is TRUE if the cost calculation is final, or FALSE if the
9212 caller should recurse through the operands of X to add additional
9215 We currently make no attempt to model the size savings of Thumb-2
9216 16-bit instructions. At the normal points in compilation where
9217 this code is called we have no measure of whether the condition
9218 flags are live or not, and thus no realistic way to determine what
9219 the size will eventually be. */
9221 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9222 const struct cpu_cost_table
*extra_cost
,
9223 int *cost
, bool speed_p
)
9225 machine_mode mode
= GET_MODE (x
);
9227 *cost
= COSTS_N_INSNS (1);
9232 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9234 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9242 /* SET RTXs don't have a mode so we get it from the destination. */
9243 mode
= GET_MODE (SET_DEST (x
));
9245 if (REG_P (SET_SRC (x
))
9246 && REG_P (SET_DEST (x
)))
9248 /* Assume that most copies can be done with a single insn,
9249 unless we don't have HW FP, in which case everything
9250 larger than word mode will require two insns. */
9251 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9252 && GET_MODE_SIZE (mode
) > 4)
9255 /* Conditional register moves can be encoded
9256 in 16 bits in Thumb mode. */
9257 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9263 if (CONST_INT_P (SET_SRC (x
)))
9265 /* Handle CONST_INT here, since the value doesn't have a mode
9266 and we would otherwise be unable to work out the true cost. */
9267 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
9270 /* Slightly lower the cost of setting a core reg to a constant.
9271 This helps break up chains and allows for better scheduling. */
9272 if (REG_P (SET_DEST (x
))
9273 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9276 /* Immediate moves with an immediate in the range [0, 255] can be
9277 encoded in 16 bits in Thumb mode. */
9278 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9279 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9281 goto const_int_cost
;
9287 /* A memory access costs 1 insn if the mode is small, or the address is
9288 a single register, otherwise it costs one insn per word. */
9289 if (REG_P (XEXP (x
, 0)))
9290 *cost
= COSTS_N_INSNS (1);
9292 && GET_CODE (XEXP (x
, 0)) == PLUS
9293 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9294 /* This will be split into two instructions.
9295 See arm.md:calculate_pic_address. */
9296 *cost
= COSTS_N_INSNS (2);
9298 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9300 /* For speed optimizations, add the costs of the address and
9301 accessing memory. */
9304 *cost
+= (extra_cost
->ldst
.load
9305 + arm_address_cost (XEXP (x
, 0), mode
,
9306 ADDR_SPACE_GENERIC
, speed_p
));
9308 *cost
+= extra_cost
->ldst
.load
;
9314 /* Calculations of LDM costs are complex. We assume an initial cost
9315 (ldm_1st) which will load the number of registers mentioned in
9316 ldm_regs_per_insn_1st registers; then each additional
9317 ldm_regs_per_insn_subsequent registers cost one more insn. The
9318 formula for N regs is thus:
9320 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9321 + ldm_regs_per_insn_subsequent - 1)
9322 / ldm_regs_per_insn_subsequent).
9324 Additional costs may also be added for addressing. A similar
9325 formula is used for STM. */
9327 bool is_ldm
= load_multiple_operation (x
, SImode
);
9328 bool is_stm
= store_multiple_operation (x
, SImode
);
9330 if (is_ldm
|| is_stm
)
9334 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9335 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9336 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9337 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9338 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9339 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9340 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9342 *cost
+= regs_per_insn_1st
9343 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9344 + regs_per_insn_sub
- 1)
9345 / regs_per_insn_sub
);
9354 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9355 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9356 *cost
+= COSTS_N_INSNS (speed_p
9357 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
9358 else if (mode
== SImode
&& TARGET_IDIV
)
9359 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
9361 *cost
= LIBCALL_COST (2);
9363 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9364 possible udiv is prefered. */
9365 *cost
+= (code
== DIV
? COSTS_N_INSNS (1) : 0);
9366 return false; /* All arguments must be in registers. */
9369 /* MOD by a power of 2 can be expanded as:
9371 and r0, r0, #(n - 1)
9372 and r1, r1, #(n - 1)
9373 rsbpl r0, r1, #0. */
9374 if (CONST_INT_P (XEXP (x
, 1))
9375 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
9378 *cost
+= COSTS_N_INSNS (3);
9381 *cost
+= 2 * extra_cost
->alu
.logical
9382 + extra_cost
->alu
.arith
;
9388 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9389 possible udiv is prefered. */
9390 *cost
= LIBCALL_COST (2) + (code
== MOD
? COSTS_N_INSNS (1) : 0);
9391 return false; /* All arguments must be in registers. */
9394 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9396 *cost
+= (COSTS_N_INSNS (1)
9397 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9399 *cost
+= extra_cost
->alu
.shift_reg
;
9407 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9409 *cost
+= (COSTS_N_INSNS (2)
9410 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9412 *cost
+= 2 * extra_cost
->alu
.shift
;
9415 else if (mode
== SImode
)
9417 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9418 /* Slightly disparage register shifts at -Os, but not by much. */
9419 if (!CONST_INT_P (XEXP (x
, 1)))
9420 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9421 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9424 else if (GET_MODE_CLASS (mode
) == MODE_INT
9425 && GET_MODE_SIZE (mode
) < 4)
9429 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9430 /* Slightly disparage register shifts at -Os, but not by
9432 if (!CONST_INT_P (XEXP (x
, 1)))
9433 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9434 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9436 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9438 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9440 /* Can use SBFX/UBFX. */
9442 *cost
+= extra_cost
->alu
.bfx
;
9443 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9447 *cost
+= COSTS_N_INSNS (1);
9448 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9451 if (CONST_INT_P (XEXP (x
, 1)))
9452 *cost
+= 2 * extra_cost
->alu
.shift
;
9454 *cost
+= (extra_cost
->alu
.shift
9455 + extra_cost
->alu
.shift_reg
);
9458 /* Slightly disparage register shifts. */
9459 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9464 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
9465 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9468 if (CONST_INT_P (XEXP (x
, 1)))
9469 *cost
+= (2 * extra_cost
->alu
.shift
9470 + extra_cost
->alu
.log_shift
);
9472 *cost
+= (extra_cost
->alu
.shift
9473 + extra_cost
->alu
.shift_reg
9474 + extra_cost
->alu
.log_shift_reg
);
9480 *cost
= LIBCALL_COST (2);
9489 *cost
+= extra_cost
->alu
.rev
;
9496 /* No rev instruction available. Look at arm_legacy_rev
9497 and thumb_legacy_rev for the form of RTL used then. */
9500 *cost
+= COSTS_N_INSNS (9);
9504 *cost
+= 6 * extra_cost
->alu
.shift
;
9505 *cost
+= 3 * extra_cost
->alu
.logical
;
9510 *cost
+= COSTS_N_INSNS (4);
9514 *cost
+= 2 * extra_cost
->alu
.shift
;
9515 *cost
+= extra_cost
->alu
.arith_shift
;
9516 *cost
+= 2 * extra_cost
->alu
.logical
;
9524 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9525 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9527 if (GET_CODE (XEXP (x
, 0)) == MULT
9528 || GET_CODE (XEXP (x
, 1)) == MULT
)
9530 rtx mul_op0
, mul_op1
, sub_op
;
9533 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9535 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9537 mul_op0
= XEXP (XEXP (x
, 0), 0);
9538 mul_op1
= XEXP (XEXP (x
, 0), 1);
9539 sub_op
= XEXP (x
, 1);
9543 mul_op0
= XEXP (XEXP (x
, 1), 0);
9544 mul_op1
= XEXP (XEXP (x
, 1), 1);
9545 sub_op
= XEXP (x
, 0);
9548 /* The first operand of the multiply may be optionally
9550 if (GET_CODE (mul_op0
) == NEG
)
9551 mul_op0
= XEXP (mul_op0
, 0);
9553 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9554 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9555 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
9561 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9567 rtx shift_by_reg
= NULL
;
9571 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9572 if (shift_op
== NULL
)
9574 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9575 non_shift_op
= XEXP (x
, 0);
9578 non_shift_op
= XEXP (x
, 1);
9580 if (shift_op
!= NULL
)
9582 if (shift_by_reg
!= NULL
)
9585 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9586 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
9589 *cost
+= extra_cost
->alu
.arith_shift
;
9591 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
9592 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
9597 && GET_CODE (XEXP (x
, 1)) == MULT
)
9601 *cost
+= extra_cost
->mult
[0].add
;
9602 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
9603 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
9604 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
9608 if (CONST_INT_P (XEXP (x
, 0)))
9610 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9611 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9613 *cost
= COSTS_N_INSNS (insns
);
9615 *cost
+= insns
* extra_cost
->alu
.arith
;
9616 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9620 *cost
+= extra_cost
->alu
.arith
;
9625 if (GET_MODE_CLASS (mode
) == MODE_INT
9626 && GET_MODE_SIZE (mode
) < 4)
9628 rtx shift_op
, shift_reg
;
9631 /* We check both sides of the MINUS for shifter operands since,
9632 unlike PLUS, it's not commutative. */
9634 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9635 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9637 /* Slightly disparage, as we might need to widen the result. */
9640 *cost
+= extra_cost
->alu
.arith
;
9642 if (CONST_INT_P (XEXP (x
, 0)))
9644 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9653 *cost
+= COSTS_N_INSNS (1);
9655 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9657 rtx op1
= XEXP (x
, 1);
9660 *cost
+= 2 * extra_cost
->alu
.arith
;
9662 if (GET_CODE (op1
) == ZERO_EXTEND
)
9663 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
9666 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
9667 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9671 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9674 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9675 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
9677 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
9680 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9681 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9684 *cost
+= (extra_cost
->alu
.arith
9685 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9686 ? extra_cost
->alu
.arith
9687 : extra_cost
->alu
.arith_shift
));
9688 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
9689 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
9690 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9695 *cost
+= 2 * extra_cost
->alu
.arith
;
9701 *cost
= LIBCALL_COST (2);
9705 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9706 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9708 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9710 rtx mul_op0
, mul_op1
, add_op
;
9713 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9715 mul_op0
= XEXP (XEXP (x
, 0), 0);
9716 mul_op1
= XEXP (XEXP (x
, 0), 1);
9717 add_op
= XEXP (x
, 1);
9719 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9720 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9721 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
9727 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9730 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9732 *cost
= LIBCALL_COST (2);
9736 /* Narrow modes can be synthesized in SImode, but the range
9737 of useful sub-operations is limited. Check for shift operations
9738 on one of the operands. Only left shifts can be used in the
9740 if (GET_MODE_CLASS (mode
) == MODE_INT
9741 && GET_MODE_SIZE (mode
) < 4)
9743 rtx shift_op
, shift_reg
;
9746 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9748 if (CONST_INT_P (XEXP (x
, 1)))
9750 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9751 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9753 *cost
= COSTS_N_INSNS (insns
);
9755 *cost
+= insns
* extra_cost
->alu
.arith
;
9756 /* Slightly penalize a narrow operation as the result may
9758 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9762 /* Slightly penalize a narrow operation as the result may
9766 *cost
+= extra_cost
->alu
.arith
;
9773 rtx shift_op
, shift_reg
;
9776 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9777 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9779 /* UXTA[BH] or SXTA[BH]. */
9781 *cost
+= extra_cost
->alu
.extend_arith
;
9782 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9784 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
9789 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9790 if (shift_op
!= NULL
)
9795 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9796 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9799 *cost
+= extra_cost
->alu
.arith_shift
;
9801 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9802 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9805 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9807 rtx mul_op
= XEXP (x
, 0);
9809 if (TARGET_DSP_MULTIPLY
9810 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9811 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9812 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9813 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9814 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9815 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9816 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9817 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9818 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9819 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9820 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9821 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9826 *cost
+= extra_cost
->mult
[0].extend_add
;
9827 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
9828 SIGN_EXTEND
, 0, speed_p
)
9829 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
9830 SIGN_EXTEND
, 0, speed_p
)
9831 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9836 *cost
+= extra_cost
->mult
[0].add
;
9837 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
9838 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
9839 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9842 if (CONST_INT_P (XEXP (x
, 1)))
9844 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9845 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9847 *cost
= COSTS_N_INSNS (insns
);
9849 *cost
+= insns
* extra_cost
->alu
.arith
;
9850 *cost
+= rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9854 *cost
+= extra_cost
->alu
.arith
;
9862 && GET_CODE (XEXP (x
, 0)) == MULT
9863 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9864 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9865 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
9866 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
9869 *cost
+= extra_cost
->mult
[1].extend_add
;
9870 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
9871 ZERO_EXTEND
, 0, speed_p
)
9872 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
9873 ZERO_EXTEND
, 0, speed_p
)
9874 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9878 *cost
+= COSTS_N_INSNS (1);
9880 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9881 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9884 *cost
+= (extra_cost
->alu
.arith
9885 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9886 ? extra_cost
->alu
.arith
9887 : extra_cost
->alu
.arith_shift
));
9889 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9891 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9896 *cost
+= 2 * extra_cost
->alu
.arith
;
9901 *cost
= LIBCALL_COST (2);
9904 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
9907 *cost
+= extra_cost
->alu
.rev
;
9915 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9916 rtx op0
= XEXP (x
, 0);
9917 rtx shift_op
, shift_reg
;
9921 || (code
== IOR
&& TARGET_THUMB2
)))
9922 op0
= XEXP (op0
, 0);
9925 shift_op
= shifter_op_p (op0
, &shift_reg
);
9926 if (shift_op
!= NULL
)
9931 *cost
+= extra_cost
->alu
.log_shift_reg
;
9932 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9935 *cost
+= extra_cost
->alu
.log_shift
;
9937 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9938 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9942 if (CONST_INT_P (XEXP (x
, 1)))
9944 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
9945 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9948 *cost
= COSTS_N_INSNS (insns
);
9950 *cost
+= insns
* extra_cost
->alu
.logical
;
9951 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
9956 *cost
+= extra_cost
->alu
.logical
;
9957 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
9958 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9964 rtx op0
= XEXP (x
, 0);
9965 enum rtx_code subcode
= GET_CODE (op0
);
9967 *cost
+= COSTS_N_INSNS (1);
9971 || (code
== IOR
&& TARGET_THUMB2
)))
9972 op0
= XEXP (op0
, 0);
9974 if (GET_CODE (op0
) == ZERO_EXTEND
)
9977 *cost
+= 2 * extra_cost
->alu
.logical
;
9979 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
9981 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
9984 else if (GET_CODE (op0
) == SIGN_EXTEND
)
9987 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
9989 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
9991 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
9996 *cost
+= 2 * extra_cost
->alu
.logical
;
10002 *cost
= LIBCALL_COST (2);
10006 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10007 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10009 rtx op0
= XEXP (x
, 0);
10011 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
10012 op0
= XEXP (op0
, 0);
10015 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10017 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
10018 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
10021 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10023 *cost
= LIBCALL_COST (2);
10027 if (mode
== SImode
)
10029 if (TARGET_DSP_MULTIPLY
10030 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10031 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10032 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10033 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10034 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10035 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10036 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10037 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10038 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10039 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10040 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10041 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10044 /* SMUL[TB][TB]. */
10046 *cost
+= extra_cost
->mult
[0].extend
;
10047 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
10048 SIGN_EXTEND
, 0, speed_p
);
10049 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
10050 SIGN_EXTEND
, 1, speed_p
);
10054 *cost
+= extra_cost
->mult
[0].simple
;
10058 if (mode
== DImode
)
10061 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10062 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10063 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10064 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10067 *cost
+= extra_cost
->mult
[1].extend
;
10068 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
10069 ZERO_EXTEND
, 0, speed_p
)
10070 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10071 ZERO_EXTEND
, 0, speed_p
));
10075 *cost
= LIBCALL_COST (2);
10080 *cost
= LIBCALL_COST (2);
10084 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10085 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10087 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10090 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
10095 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10099 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10101 *cost
= LIBCALL_COST (1);
10105 if (mode
== SImode
)
10107 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10109 *cost
+= COSTS_N_INSNS (1);
10110 /* Assume the non-flag-changing variant. */
10112 *cost
+= (extra_cost
->alu
.log_shift
10113 + extra_cost
->alu
.arith_shift
);
10114 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
10118 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10119 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10121 *cost
+= COSTS_N_INSNS (1);
10122 /* No extra cost for MOV imm and MVN imm. */
10123 /* If the comparison op is using the flags, there's no further
10124 cost, otherwise we need to add the cost of the comparison. */
10125 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10126 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10127 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10129 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
10130 *cost
+= (COSTS_N_INSNS (1)
10131 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
10133 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
10136 *cost
+= extra_cost
->alu
.arith
;
10142 *cost
+= extra_cost
->alu
.arith
;
10146 if (GET_MODE_CLASS (mode
) == MODE_INT
10147 && GET_MODE_SIZE (mode
) < 4)
10149 /* Slightly disparage, as we might need an extend operation. */
10152 *cost
+= extra_cost
->alu
.arith
;
10156 if (mode
== DImode
)
10158 *cost
+= COSTS_N_INSNS (1);
10160 *cost
+= 2 * extra_cost
->alu
.arith
;
10165 *cost
= LIBCALL_COST (1);
10169 if (mode
== SImode
)
10172 rtx shift_reg
= NULL
;
10174 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10178 if (shift_reg
!= NULL
)
10181 *cost
+= extra_cost
->alu
.log_shift_reg
;
10182 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10185 *cost
+= extra_cost
->alu
.log_shift
;
10186 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
10191 *cost
+= extra_cost
->alu
.logical
;
10194 if (mode
== DImode
)
10196 *cost
+= COSTS_N_INSNS (1);
10202 *cost
+= LIBCALL_COST (1);
10207 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10209 *cost
+= COSTS_N_INSNS (3);
10212 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
10213 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
10215 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
10216 /* Assume that if one arm of the if_then_else is a register,
10217 that it will be tied with the result and eliminate the
10218 conditional insn. */
10219 if (REG_P (XEXP (x
, 1)))
10221 else if (REG_P (XEXP (x
, 2)))
10227 if (extra_cost
->alu
.non_exec_costs_exec
)
10228 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10230 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10233 *cost
+= op1cost
+ op2cost
;
10239 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10243 machine_mode op0mode
;
10244 /* We'll mostly assume that the cost of a compare is the cost of the
10245 LHS. However, there are some notable exceptions. */
10247 /* Floating point compares are never done as side-effects. */
10248 op0mode
= GET_MODE (XEXP (x
, 0));
10249 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10250 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10253 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10255 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10257 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
10263 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10265 *cost
= LIBCALL_COST (2);
10269 /* DImode compares normally take two insns. */
10270 if (op0mode
== DImode
)
10272 *cost
+= COSTS_N_INSNS (1);
10274 *cost
+= 2 * extra_cost
->alu
.arith
;
10278 if (op0mode
== SImode
)
10283 if (XEXP (x
, 1) == const0_rtx
10284 && !(REG_P (XEXP (x
, 0))
10285 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10286 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10288 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10290 /* Multiply operations that set the flags are often
10291 significantly more expensive. */
10293 && GET_CODE (XEXP (x
, 0)) == MULT
10294 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10295 *cost
+= extra_cost
->mult
[0].flag_setting
;
10298 && GET_CODE (XEXP (x
, 0)) == PLUS
10299 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10300 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10302 *cost
+= extra_cost
->mult
[0].flag_setting
;
10307 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10308 if (shift_op
!= NULL
)
10310 if (shift_reg
!= NULL
)
10312 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
10315 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10318 *cost
+= extra_cost
->alu
.arith_shift
;
10319 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
10320 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
10325 *cost
+= extra_cost
->alu
.arith
;
10326 if (CONST_INT_P (XEXP (x
, 1))
10327 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10329 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10337 *cost
= LIBCALL_COST (2);
10360 if (outer_code
== SET
)
10362 /* Is it a store-flag operation? */
10363 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10364 && XEXP (x
, 1) == const0_rtx
)
10366 /* Thumb also needs an IT insn. */
10367 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
10370 if (XEXP (x
, 1) == const0_rtx
)
10375 /* LSR Rd, Rn, #31. */
10377 *cost
+= extra_cost
->alu
.shift
;
10387 *cost
+= COSTS_N_INSNS (1);
10391 /* RSBS T1, Rn, Rn, LSR #31
10393 *cost
+= COSTS_N_INSNS (1);
10395 *cost
+= extra_cost
->alu
.arith_shift
;
10399 /* RSB Rd, Rn, Rn, ASR #1
10400 LSR Rd, Rd, #31. */
10401 *cost
+= COSTS_N_INSNS (1);
10403 *cost
+= (extra_cost
->alu
.arith_shift
10404 + extra_cost
->alu
.shift
);
10410 *cost
+= COSTS_N_INSNS (1);
10412 *cost
+= extra_cost
->alu
.shift
;
10416 /* Remaining cases are either meaningless or would take
10417 three insns anyway. */
10418 *cost
= COSTS_N_INSNS (3);
10421 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10426 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10427 if (CONST_INT_P (XEXP (x
, 1))
10428 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10430 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10437 /* Not directly inside a set. If it involves the condition code
10438 register it must be the condition for a branch, cond_exec or
10439 I_T_E operation. Since the comparison is performed elsewhere
10440 this is just the control part which has no additional
10442 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10443 && XEXP (x
, 1) == const0_rtx
)
10451 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10452 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10455 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10459 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10461 *cost
= LIBCALL_COST (1);
10465 if (mode
== SImode
)
10468 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10472 *cost
= LIBCALL_COST (1);
10476 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10477 && MEM_P (XEXP (x
, 0)))
10479 if (mode
== DImode
)
10480 *cost
+= COSTS_N_INSNS (1);
10485 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10486 *cost
+= extra_cost
->ldst
.load
;
10488 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10490 if (mode
== DImode
)
10491 *cost
+= extra_cost
->alu
.shift
;
10496 /* Widening from less than 32-bits requires an extend operation. */
10497 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10499 /* We have SXTB/SXTH. */
10500 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10502 *cost
+= extra_cost
->alu
.extend
;
10504 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10506 /* Needs two shifts. */
10507 *cost
+= COSTS_N_INSNS (1);
10508 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10510 *cost
+= 2 * extra_cost
->alu
.shift
;
10513 /* Widening beyond 32-bits requires one more insn. */
10514 if (mode
== DImode
)
10516 *cost
+= COSTS_N_INSNS (1);
10518 *cost
+= extra_cost
->alu
.shift
;
10525 || GET_MODE (XEXP (x
, 0)) == SImode
10526 || GET_MODE (XEXP (x
, 0)) == QImode
)
10527 && MEM_P (XEXP (x
, 0)))
10529 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10531 if (mode
== DImode
)
10532 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10537 /* Widening from less than 32-bits requires an extend operation. */
10538 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10540 /* UXTB can be a shorter instruction in Thumb2, but it might
10541 be slower than the AND Rd, Rn, #255 alternative. When
10542 optimizing for speed it should never be slower to use
10543 AND, and we don't really model 16-bit vs 32-bit insns
10546 *cost
+= extra_cost
->alu
.logical
;
10548 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10550 /* We have UXTB/UXTH. */
10551 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10553 *cost
+= extra_cost
->alu
.extend
;
10555 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10557 /* Needs two shifts. It's marginally preferable to use
10558 shifts rather than two BIC instructions as the second
10559 shift may merge with a subsequent insn as a shifter
10561 *cost
= COSTS_N_INSNS (2);
10562 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10564 *cost
+= 2 * extra_cost
->alu
.shift
;
10567 /* Widening beyond 32-bits requires one more insn. */
10568 if (mode
== DImode
)
10570 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10577 /* CONST_INT has no mode, so we cannot tell for sure how many
10578 insns are really going to be needed. The best we can do is
10579 look at the value passed. If it fits in SImode, then assume
10580 that's the mode it will be used for. Otherwise assume it
10581 will be used in DImode. */
10582 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10587 /* Avoid blowing up in arm_gen_constant (). */
10588 if (!(outer_code
== PLUS
10589 || outer_code
== AND
10590 || outer_code
== IOR
10591 || outer_code
== XOR
10592 || outer_code
== MINUS
))
10596 if (mode
== SImode
)
10598 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10599 INTVAL (x
), NULL
, NULL
,
10605 *cost
+= COSTS_N_INSNS (arm_gen_constant
10606 (outer_code
, SImode
, NULL
,
10607 trunc_int_for_mode (INTVAL (x
), SImode
),
10609 + arm_gen_constant (outer_code
, SImode
, NULL
,
10610 INTVAL (x
) >> 32, NULL
,
10622 if (arm_arch_thumb2
&& !flag_pic
)
10623 *cost
+= COSTS_N_INSNS (1);
10625 *cost
+= extra_cost
->ldst
.load
;
10628 *cost
+= COSTS_N_INSNS (1);
10632 *cost
+= COSTS_N_INSNS (1);
10634 *cost
+= extra_cost
->alu
.arith
;
10640 *cost
= COSTS_N_INSNS (4);
10645 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10646 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10648 if (vfp3_const_double_rtx (x
))
10651 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10657 if (mode
== DFmode
)
10658 *cost
+= extra_cost
->ldst
.loadd
;
10660 *cost
+= extra_cost
->ldst
.loadf
;
10663 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
10667 *cost
= COSTS_N_INSNS (4);
10673 && TARGET_HARD_FLOAT
10674 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10675 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10676 *cost
= COSTS_N_INSNS (1);
10678 *cost
= COSTS_N_INSNS (4);
10683 /* When optimizing for size, we prefer constant pool entries to
10684 MOVW/MOVT pairs, so bump the cost of these slightly. */
10691 *cost
+= extra_cost
->alu
.clz
;
10695 if (XEXP (x
, 1) == const0_rtx
)
10698 *cost
+= extra_cost
->alu
.log_shift
;
10699 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10702 /* Fall through. */
10706 *cost
+= COSTS_N_INSNS (1);
10710 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10711 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10712 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10713 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10714 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10715 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10716 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10717 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10721 *cost
+= extra_cost
->mult
[1].extend
;
10722 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
10723 ZERO_EXTEND
, 0, speed_p
)
10724 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
10725 ZERO_EXTEND
, 0, speed_p
));
10728 *cost
= LIBCALL_COST (1);
10731 case UNSPEC_VOLATILE
:
10733 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10736 /* Reading the PC is like reading any other register. Writing it
10737 is more expensive, but we take that into account elsewhere. */
10742 /* TODO: Simple zero_extract of bottom bits using AND. */
10743 /* Fall through. */
10747 && CONST_INT_P (XEXP (x
, 1))
10748 && CONST_INT_P (XEXP (x
, 2)))
10751 *cost
+= extra_cost
->alu
.bfx
;
10752 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10755 /* Without UBFX/SBFX, need to resort to shift operations. */
10756 *cost
+= COSTS_N_INSNS (1);
10758 *cost
+= 2 * extra_cost
->alu
.shift
;
10759 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
10763 if (TARGET_HARD_FLOAT
)
10766 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10768 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10770 /* Pre v8, widening HF->DF is a two-step process, first
10771 widening to SFmode. */
10772 *cost
+= COSTS_N_INSNS (1);
10774 *cost
+= extra_cost
->fp
[0].widen
;
10776 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10780 *cost
= LIBCALL_COST (1);
10783 case FLOAT_TRUNCATE
:
10784 if (TARGET_HARD_FLOAT
)
10787 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10788 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10790 /* Vector modes? */
10792 *cost
= LIBCALL_COST (1);
10796 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10798 rtx op0
= XEXP (x
, 0);
10799 rtx op1
= XEXP (x
, 1);
10800 rtx op2
= XEXP (x
, 2);
10803 /* vfms or vfnma. */
10804 if (GET_CODE (op0
) == NEG
)
10805 op0
= XEXP (op0
, 0);
10807 /* vfnms or vfnma. */
10808 if (GET_CODE (op2
) == NEG
)
10809 op2
= XEXP (op2
, 0);
10811 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
10812 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
10813 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
10816 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
10821 *cost
= LIBCALL_COST (3);
10826 if (TARGET_HARD_FLOAT
)
10828 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10829 a vcvt fixed-point conversion. */
10830 if (code
== FIX
&& mode
== SImode
10831 && GET_CODE (XEXP (x
, 0)) == FIX
10832 && GET_MODE (XEXP (x
, 0)) == SFmode
10833 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10834 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10838 *cost
+= extra_cost
->fp
[0].toint
;
10840 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10845 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10847 mode
= GET_MODE (XEXP (x
, 0));
10849 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
10850 /* Strip of the 'cost' of rounding towards zero. */
10851 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10852 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
10855 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10856 /* ??? Increase the cost to deal with transferring from
10857 FP -> CORE registers? */
10860 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10864 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
10867 /* Vector costs? */
10869 *cost
= LIBCALL_COST (1);
10873 case UNSIGNED_FLOAT
:
10874 if (TARGET_HARD_FLOAT
)
10876 /* ??? Increase the cost to deal with transferring from CORE
10877 -> FP registers? */
10879 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
10882 *cost
= LIBCALL_COST (1);
10890 /* Just a guess. Guess number of instructions in the asm
10891 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10892 though (see PR60663). */
10893 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
10894 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
10896 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
10900 if (mode
!= VOIDmode
)
10901 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
10903 *cost
= COSTS_N_INSNS (4); /* Who knows? */
10908 #undef HANDLE_NARROW_SHIFT_ARITH
10910 /* RTX costs entry point. */
10913 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
10914 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
10917 int code
= GET_CODE (x
);
10918 gcc_assert (current_tune
->insn_extra_cost
);
10920 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
10921 (enum rtx_code
) outer_code
,
10922 current_tune
->insn_extra_cost
,
10925 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
10927 print_rtl_single (dump_file
, x
);
10928 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
10929 *total
, result
? "final" : "partial");
10934 /* All address computations that can be done are free, but rtx cost returns
10935 the same for practically all of them. So we weight the different types
10936 of address here in the order (most pref first):
10937 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10939 arm_arm_address_cost (rtx x
)
10941 enum rtx_code c
= GET_CODE (x
);
10943 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
10945 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
10950 if (CONST_INT_P (XEXP (x
, 1)))
10953 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
10963 arm_thumb_address_cost (rtx x
)
10965 enum rtx_code c
= GET_CODE (x
);
10970 && REG_P (XEXP (x
, 0))
10971 && CONST_INT_P (XEXP (x
, 1)))
10978 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
10979 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
10981 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
10984 /* Adjust cost hook for XScale. */
10986 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
10989 /* Some true dependencies can have a higher cost depending
10990 on precisely how certain input operands are used. */
10992 && recog_memoized (insn
) >= 0
10993 && recog_memoized (dep
) >= 0)
10995 int shift_opnum
= get_attr_shift (insn
);
10996 enum attr_type attr_type
= get_attr_type (dep
);
10998 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10999 operand for INSN. If we have a shifted input operand and the
11000 instruction we depend on is another ALU instruction, then we may
11001 have to account for an additional stall. */
11002 if (shift_opnum
!= 0
11003 && (attr_type
== TYPE_ALU_SHIFT_IMM
11004 || attr_type
== TYPE_ALUS_SHIFT_IMM
11005 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11006 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11007 || attr_type
== TYPE_ALU_SHIFT_REG
11008 || attr_type
== TYPE_ALUS_SHIFT_REG
11009 || attr_type
== TYPE_LOGIC_SHIFT_REG
11010 || attr_type
== TYPE_LOGICS_SHIFT_REG
11011 || attr_type
== TYPE_MOV_SHIFT
11012 || attr_type
== TYPE_MVN_SHIFT
11013 || attr_type
== TYPE_MOV_SHIFT_REG
11014 || attr_type
== TYPE_MVN_SHIFT_REG
))
11016 rtx shifted_operand
;
11019 /* Get the shifted operand. */
11020 extract_insn (insn
);
11021 shifted_operand
= recog_data
.operand
[shift_opnum
];
11023 /* Iterate over all the operands in DEP. If we write an operand
11024 that overlaps with SHIFTED_OPERAND, then we have increase the
11025 cost of this dependency. */
11026 extract_insn (dep
);
11027 preprocess_constraints (dep
);
11028 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11030 /* We can ignore strict inputs. */
11031 if (recog_data
.operand_type
[opno
] == OP_IN
)
11034 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11046 /* Adjust cost hook for Cortex A9. */
11048 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11058 case REG_DEP_OUTPUT
:
11059 if (recog_memoized (insn
) >= 0
11060 && recog_memoized (dep
) >= 0)
11062 if (GET_CODE (PATTERN (insn
)) == SET
)
11065 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11067 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11069 enum attr_type attr_type_insn
= get_attr_type (insn
);
11070 enum attr_type attr_type_dep
= get_attr_type (dep
);
11072 /* By default all dependencies of the form
11075 have an extra latency of 1 cycle because
11076 of the input and output dependency in this
11077 case. However this gets modeled as an true
11078 dependency and hence all these checks. */
11079 if (REG_P (SET_DEST (PATTERN (insn
)))
11080 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
11082 /* FMACS is a special case where the dependent
11083 instruction can be issued 3 cycles before
11084 the normal latency in case of an output
11086 if ((attr_type_insn
== TYPE_FMACS
11087 || attr_type_insn
== TYPE_FMACD
)
11088 && (attr_type_dep
== TYPE_FMACS
11089 || attr_type_dep
== TYPE_FMACD
))
11091 if (dep_type
== REG_DEP_OUTPUT
)
11092 *cost
= insn_default_latency (dep
) - 3;
11094 *cost
= insn_default_latency (dep
);
11099 if (dep_type
== REG_DEP_OUTPUT
)
11100 *cost
= insn_default_latency (dep
) + 1;
11102 *cost
= insn_default_latency (dep
);
11112 gcc_unreachable ();
11118 /* Adjust cost hook for FA726TE. */
11120 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11123 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11124 have penalty of 3. */
11125 if (dep_type
== REG_DEP_TRUE
11126 && recog_memoized (insn
) >= 0
11127 && recog_memoized (dep
) >= 0
11128 && get_attr_conds (dep
) == CONDS_SET
)
11130 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11131 if (get_attr_conds (insn
) == CONDS_USE
11132 && get_attr_type (insn
) != TYPE_BRANCH
)
11138 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11139 || get_attr_conds (insn
) == CONDS_USE
)
11149 /* Implement TARGET_REGISTER_MOVE_COST.
11151 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11152 it is typically more expensive than a single memory access. We set
11153 the cost to less than two memory accesses so that floating
11154 point to integer conversion does not go through memory. */
11157 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11158 reg_class_t from
, reg_class_t to
)
11162 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11163 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11165 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11166 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11168 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11175 if (from
== HI_REGS
|| to
== HI_REGS
)
11182 /* Implement TARGET_MEMORY_MOVE_COST. */
11185 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11186 bool in ATTRIBUTE_UNUSED
)
11192 if (GET_MODE_SIZE (mode
) < 4)
11195 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11199 /* Vectorizer cost model implementation. */
11201 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11203 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11205 int misalign ATTRIBUTE_UNUSED
)
11209 switch (type_of_cost
)
11212 return current_tune
->vec_costs
->scalar_stmt_cost
;
11215 return current_tune
->vec_costs
->scalar_load_cost
;
11218 return current_tune
->vec_costs
->scalar_store_cost
;
11221 return current_tune
->vec_costs
->vec_stmt_cost
;
11224 return current_tune
->vec_costs
->vec_align_load_cost
;
11227 return current_tune
->vec_costs
->vec_store_cost
;
11229 case vec_to_scalar
:
11230 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11232 case scalar_to_vec
:
11233 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11235 case unaligned_load
:
11236 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11238 case unaligned_store
:
11239 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11241 case cond_branch_taken
:
11242 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11244 case cond_branch_not_taken
:
11245 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11248 case vec_promote_demote
:
11249 return current_tune
->vec_costs
->vec_stmt_cost
;
11251 case vec_construct
:
11252 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11253 return elements
/ 2 + 1;
11256 gcc_unreachable ();
11260 /* Implement targetm.vectorize.add_stmt_cost. */
11263 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11264 struct _stmt_vec_info
*stmt_info
, int misalign
,
11265 enum vect_cost_model_location where
)
11267 unsigned *cost
= (unsigned *) data
;
11268 unsigned retval
= 0;
11270 if (flag_vect_cost_model
)
11272 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11273 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11275 /* Statements in an inner loop relative to the loop being
11276 vectorized are weighted more heavily. The value here is
11277 arbitrary and could potentially be improved with analysis. */
11278 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11279 count
*= 50; /* FIXME. */
11281 retval
= (unsigned) (count
* stmt_cost
);
11282 cost
[where
] += retval
;
11288 /* Return true if and only if this insn can dual-issue only as older. */
11290 cortexa7_older_only (rtx_insn
*insn
)
11292 if (recog_memoized (insn
) < 0)
11295 switch (get_attr_type (insn
))
11297 case TYPE_ALU_DSP_REG
:
11298 case TYPE_ALU_SREG
:
11299 case TYPE_ALUS_SREG
:
11300 case TYPE_LOGIC_REG
:
11301 case TYPE_LOGICS_REG
:
11303 case TYPE_ADCS_REG
:
11308 case TYPE_SHIFT_IMM
:
11309 case TYPE_SHIFT_REG
:
11310 case TYPE_LOAD_BYTE
:
11313 case TYPE_FFARITHS
:
11315 case TYPE_FFARITHD
:
11333 case TYPE_F_STORES
:
11340 /* Return true if and only if this insn can dual-issue as younger. */
11342 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11344 if (recog_memoized (insn
) < 0)
11347 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11351 switch (get_attr_type (insn
))
11354 case TYPE_ALUS_IMM
:
11355 case TYPE_LOGIC_IMM
:
11356 case TYPE_LOGICS_IMM
:
11361 case TYPE_MOV_SHIFT
:
11362 case TYPE_MOV_SHIFT_REG
:
11372 /* Look for an instruction that can dual issue only as an older
11373 instruction, and move it in front of any instructions that can
11374 dual-issue as younger, while preserving the relative order of all
11375 other instructions in the ready list. This is a hueuristic to help
11376 dual-issue in later cycles, by postponing issue of more flexible
11377 instructions. This heuristic may affect dual issue opportunities
11378 in the current cycle. */
11380 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11381 int *n_readyp
, int clock
)
11384 int first_older_only
= -1, first_younger
= -1;
11388 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11392 /* Traverse the ready list from the head (the instruction to issue
11393 first), and looking for the first instruction that can issue as
11394 younger and the first instruction that can dual-issue only as
11396 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11398 rtx_insn
*insn
= ready
[i
];
11399 if (cortexa7_older_only (insn
))
11401 first_older_only
= i
;
11403 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11406 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11410 /* Nothing to reorder because either no younger insn found or insn
11411 that can dual-issue only as older appears before any insn that
11412 can dual-issue as younger. */
11413 if (first_younger
== -1)
11416 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11420 /* Nothing to reorder because no older-only insn in the ready list. */
11421 if (first_older_only
== -1)
11424 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11428 /* Move first_older_only insn before first_younger. */
11430 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11431 INSN_UID(ready
[first_older_only
]),
11432 INSN_UID(ready
[first_younger
]));
11433 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11434 for (i
= first_older_only
; i
< first_younger
; i
++)
11436 ready
[i
] = ready
[i
+1];
11439 ready
[i
] = first_older_only_insn
;
11443 /* Implement TARGET_SCHED_REORDER. */
11445 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11450 case TARGET_CPU_cortexa7
:
11451 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11454 /* Do nothing for other cores. */
11458 return arm_issue_rate ();
11461 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11462 It corrects the value of COST based on the relationship between
11463 INSN and DEP through the dependence LINK. It returns the new
11464 value. There is a per-core adjust_cost hook to adjust scheduler costs
11465 and the per-core hook can choose to completely override the generic
11466 adjust_cost function. Only put bits of code into arm_adjust_cost that
11467 are common across all cores. */
11469 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
11474 /* When generating Thumb-1 code, we want to place flag-setting operations
11475 close to a conditional branch which depends on them, so that we can
11476 omit the comparison. */
11479 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11480 && recog_memoized (dep
) >= 0
11481 && get_attr_conds (dep
) == CONDS_SET
)
11484 if (current_tune
->sched_adjust_cost
!= NULL
)
11486 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
11490 /* XXX Is this strictly true? */
11491 if (dep_type
== REG_DEP_ANTI
11492 || dep_type
== REG_DEP_OUTPUT
)
11495 /* Call insns don't incur a stall, even if they follow a load. */
11500 if ((i_pat
= single_set (insn
)) != NULL
11501 && MEM_P (SET_SRC (i_pat
))
11502 && (d_pat
= single_set (dep
)) != NULL
11503 && MEM_P (SET_DEST (d_pat
)))
11505 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11506 /* This is a load after a store, there is no conflict if the load reads
11507 from a cached area. Assume that loads from the stack, and from the
11508 constant pool are cached, and that others will miss. This is a
11511 if ((GET_CODE (src_mem
) == SYMBOL_REF
11512 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11513 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11514 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11515 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11523 arm_max_conditional_execute (void)
11525 return max_insns_skipped
;
11529 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11532 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11534 return (optimize
> 0) ? 2 : 0;
11538 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11540 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11543 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11544 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11545 sequences of non-executed instructions in IT blocks probably take the same
11546 amount of time as executed instructions (and the IT instruction itself takes
11547 space in icache). This function was experimentally determined to give good
11548 results on a popular embedded benchmark. */
11551 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11553 return (TARGET_32BIT
&& speed_p
) ? 1
11554 : arm_default_branch_cost (speed_p
, predictable_p
);
11558 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
11560 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11563 static bool fp_consts_inited
= false;
11565 static REAL_VALUE_TYPE value_fp0
;
11568 init_fp_table (void)
11572 r
= REAL_VALUE_ATOF ("0", DFmode
);
11574 fp_consts_inited
= true;
11577 /* Return TRUE if rtx X is a valid immediate FP constant. */
11579 arm_const_double_rtx (rtx x
)
11581 const REAL_VALUE_TYPE
*r
;
11583 if (!fp_consts_inited
)
11586 r
= CONST_DOUBLE_REAL_VALUE (x
);
11587 if (REAL_VALUE_MINUS_ZERO (*r
))
11590 if (real_equal (r
, &value_fp0
))
11596 /* VFPv3 has a fairly wide range of representable immediates, formed from
11597 "quarter-precision" floating-point values. These can be evaluated using this
11598 formula (with ^ for exponentiation):
11602 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11603 16 <= n <= 31 and 0 <= r <= 7.
11605 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11607 - A (most-significant) is the sign bit.
11608 - BCD are the exponent (encoded as r XOR 3).
11609 - EFGH are the mantissa (encoded as n - 16).
11612 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11613 fconst[sd] instruction, or -1 if X isn't suitable. */
11615 vfp3_const_double_index (rtx x
)
11617 REAL_VALUE_TYPE r
, m
;
11618 int sign
, exponent
;
11619 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11620 unsigned HOST_WIDE_INT mask
;
11621 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11624 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11627 r
= *CONST_DOUBLE_REAL_VALUE (x
);
11629 /* We can't represent these things, so detect them first. */
11630 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11633 /* Extract sign, exponent and mantissa. */
11634 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
11635 r
= real_value_abs (&r
);
11636 exponent
= REAL_EXP (&r
);
11637 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11638 highest (sign) bit, with a fixed binary point at bit point_pos.
11639 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11640 bits for the mantissa, this may fail (low bits would be lost). */
11641 real_ldexp (&m
, &r
, point_pos
- exponent
);
11642 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
11643 mantissa
= w
.elt (0);
11644 mant_hi
= w
.elt (1);
11646 /* If there are bits set in the low part of the mantissa, we can't
11647 represent this value. */
11651 /* Now make it so that mantissa contains the most-significant bits, and move
11652 the point_pos to indicate that the least-significant bits have been
11654 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11655 mantissa
= mant_hi
;
11657 /* We can permit four significant bits of mantissa only, plus a high bit
11658 which is always 1. */
11659 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
11660 if ((mantissa
& mask
) != 0)
11663 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11664 mantissa
>>= point_pos
- 5;
11666 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11667 floating-point immediate zero with Neon using an integer-zero load, but
11668 that case is handled elsewhere.) */
11672 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
11674 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11675 normalized significands are in the range [1, 2). (Our mantissa is shifted
11676 left 4 places at this point relative to normalized IEEE754 values). GCC
11677 internally uses [0.5, 1) (see real.c), so the exponent returned from
11678 REAL_EXP must be altered. */
11679 exponent
= 5 - exponent
;
11681 if (exponent
< 0 || exponent
> 7)
11684 /* Sign, mantissa and exponent are now in the correct form to plug into the
11685 formula described in the comment above. */
11686 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
11689 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11691 vfp3_const_double_rtx (rtx x
)
11696 return vfp3_const_double_index (x
) != -1;
11699 /* Recognize immediates which can be used in various Neon instructions. Legal
11700 immediates are described by the following table (for VMVN variants, the
11701 bitwise inverse of the constant shown is recognized. In either case, VMOV
11702 is output and the correct instruction to use for a given constant is chosen
11703 by the assembler). The constant shown is replicated across all elements of
11704 the destination vector.
11706 insn elems variant constant (binary)
11707 ---- ----- ------- -----------------
11708 vmov i32 0 00000000 00000000 00000000 abcdefgh
11709 vmov i32 1 00000000 00000000 abcdefgh 00000000
11710 vmov i32 2 00000000 abcdefgh 00000000 00000000
11711 vmov i32 3 abcdefgh 00000000 00000000 00000000
11712 vmov i16 4 00000000 abcdefgh
11713 vmov i16 5 abcdefgh 00000000
11714 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11715 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11716 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11717 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11718 vmvn i16 10 00000000 abcdefgh
11719 vmvn i16 11 abcdefgh 00000000
11720 vmov i32 12 00000000 00000000 abcdefgh 11111111
11721 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11722 vmov i32 14 00000000 abcdefgh 11111111 11111111
11723 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11724 vmov i8 16 abcdefgh
11725 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11726 eeeeeeee ffffffff gggggggg hhhhhhhh
11727 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11728 vmov f32 19 00000000 00000000 00000000 00000000
11730 For case 18, B = !b. Representable values are exactly those accepted by
11731 vfp3_const_double_index, but are output as floating-point numbers rather
11734 For case 19, we will change it to vmov.i32 when assembling.
11736 Variants 0-5 (inclusive) may also be used as immediates for the second
11737 operand of VORR/VBIC instructions.
11739 The INVERSE argument causes the bitwise inverse of the given operand to be
11740 recognized instead (used for recognizing legal immediates for the VAND/VORN
11741 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11742 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11743 output, rather than the real insns vbic/vorr).
11745 INVERSE makes no difference to the recognition of float vectors.
11747 The return value is the variant of immediate as shown in the above table, or
11748 -1 if the given value doesn't match any of the listed patterns.
11751 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
11752 rtx
*modconst
, int *elementwidth
)
11754 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11756 for (i = 0; i < idx; i += (STRIDE)) \
11761 immtype = (CLASS); \
11762 elsize = (ELSIZE); \
11766 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
11767 unsigned int innersize
;
11768 unsigned char bytes
[16];
11769 int immtype
= -1, matches
;
11770 unsigned int invmask
= inverse
? 0xff : 0;
11771 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
11774 n_elts
= CONST_VECTOR_NUNITS (op
);
11778 if (mode
== VOIDmode
)
11782 innersize
= GET_MODE_UNIT_SIZE (mode
);
11784 /* Vectors of float constants. */
11785 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11787 rtx el0
= CONST_VECTOR_ELT (op
, 0);
11789 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
11792 /* FP16 vectors cannot be represented. */
11793 if (GET_MODE_INNER (mode
) == HFmode
)
11796 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11797 are distinct in this context. */
11798 if (!const_vec_duplicate_p (op
))
11802 *modconst
= CONST_VECTOR_ELT (op
, 0);
11807 if (el0
== CONST0_RTX (GET_MODE (el0
)))
11813 /* The tricks done in the code below apply for little-endian vector layout.
11814 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11815 FIXME: Implement logic for big-endian vectors. */
11816 if (BYTES_BIG_ENDIAN
&& vector
&& !const_vec_duplicate_p (op
))
11819 /* Splat vector constant out into a byte vector. */
11820 for (i
= 0; i
< n_elts
; i
++)
11822 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
11823 unsigned HOST_WIDE_INT elpart
;
11825 gcc_assert (CONST_INT_P (el
));
11826 elpart
= INTVAL (el
);
11828 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
11830 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
11831 elpart
>>= BITS_PER_UNIT
;
11835 /* Sanity check. */
11836 gcc_assert (idx
== GET_MODE_SIZE (mode
));
11840 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
11841 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11843 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11844 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11846 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11847 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11849 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11850 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
11852 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
11854 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
11856 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
11857 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11859 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11860 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11862 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11863 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11865 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11866 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
11868 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
11870 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
11872 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11873 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11875 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11876 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11878 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11879 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11881 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11882 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11884 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
11886 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
11887 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
11895 *elementwidth
= elsize
;
11899 unsigned HOST_WIDE_INT imm
= 0;
11901 /* Un-invert bytes of recognized vector, if necessary. */
11903 for (i
= 0; i
< idx
; i
++)
11904 bytes
[i
] ^= invmask
;
11908 /* FIXME: Broken on 32-bit H_W_I hosts. */
11909 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
11911 for (i
= 0; i
< 8; i
++)
11912 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
11913 << (i
* BITS_PER_UNIT
);
11915 *modconst
= GEN_INT (imm
);
11919 unsigned HOST_WIDE_INT imm
= 0;
11921 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
11922 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
11924 *modconst
= GEN_INT (imm
);
11932 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11933 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11934 float elements), and a modified constant (whatever should be output for a
11935 VMOV) in *MODCONST. */
11938 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
11939 rtx
*modconst
, int *elementwidth
)
11943 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
11949 *modconst
= tmpconst
;
11952 *elementwidth
= tmpwidth
;
11957 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11958 the immediate is valid, write a constant suitable for using as an operand
11959 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11960 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11963 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
11964 rtx
*modconst
, int *elementwidth
)
11968 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
11970 if (retval
< 0 || retval
> 5)
11974 *modconst
= tmpconst
;
11977 *elementwidth
= tmpwidth
;
11982 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11983 the immediate is valid, write a constant suitable for using as an operand
11984 to VSHR/VSHL to *MODCONST and the corresponding element width to
11985 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11986 because they have different limitations. */
11989 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
11990 rtx
*modconst
, int *elementwidth
,
11993 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
11994 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
11995 unsigned HOST_WIDE_INT last_elt
= 0;
11996 unsigned HOST_WIDE_INT maxshift
;
11998 /* Split vector constant out into a byte vector. */
11999 for (i
= 0; i
< n_elts
; i
++)
12001 rtx el
= CONST_VECTOR_ELT (op
, i
);
12002 unsigned HOST_WIDE_INT elpart
;
12004 if (CONST_INT_P (el
))
12005 elpart
= INTVAL (el
);
12006 else if (CONST_DOUBLE_P (el
))
12009 gcc_unreachable ();
12011 if (i
!= 0 && elpart
!= last_elt
)
12017 /* Shift less than element size. */
12018 maxshift
= innersize
* 8;
12022 /* Left shift immediate value can be from 0 to <size>-1. */
12023 if (last_elt
>= maxshift
)
12028 /* Right shift immediate value can be from 1 to <size>. */
12029 if (last_elt
== 0 || last_elt
> maxshift
)
12034 *elementwidth
= innersize
* 8;
12037 *modconst
= CONST_VECTOR_ELT (op
, 0);
12042 /* Return a string suitable for output of Neon immediate logic operation
12046 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12047 int inverse
, int quad
)
12049 int width
, is_valid
;
12050 static char templ
[40];
12052 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12054 gcc_assert (is_valid
!= 0);
12057 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12059 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12064 /* Return a string suitable for output of Neon immediate shift operation
12065 (VSHR or VSHL) MNEM. */
12068 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12069 machine_mode mode
, int quad
,
12072 int width
, is_valid
;
12073 static char templ
[40];
12075 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12076 gcc_assert (is_valid
!= 0);
12079 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12081 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12086 /* Output a sequence of pairwise operations to implement a reduction.
12087 NOTE: We do "too much work" here, because pairwise operations work on two
12088 registers-worth of operands in one go. Unfortunately we can't exploit those
12089 extra calculations to do the full operation in fewer steps, I don't think.
12090 Although all vector elements of the result but the first are ignored, we
12091 actually calculate the same result in each of the elements. An alternative
12092 such as initially loading a vector with zero to use as each of the second
12093 operands would use up an additional register and take an extra instruction,
12094 for no particular gain. */
12097 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12098 rtx (*reduc
) (rtx
, rtx
, rtx
))
12100 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
12103 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12105 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12106 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12111 /* If VALS is a vector constant that can be loaded into a register
12112 using VDUP, generate instructions to do so and return an RTX to
12113 assign to the register. Otherwise return NULL_RTX. */
12116 neon_vdup_constant (rtx vals
)
12118 machine_mode mode
= GET_MODE (vals
);
12119 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12122 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12125 if (!const_vec_duplicate_p (vals
, &x
))
12126 /* The elements are not all the same. We could handle repeating
12127 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12128 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12132 /* We can load this constant by using VDUP and a constant in a
12133 single ARM register. This will be cheaper than a vector
12136 x
= copy_to_mode_reg (inner_mode
, x
);
12137 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12140 /* Generate code to load VALS, which is a PARALLEL containing only
12141 constants (for vec_init) or CONST_VECTOR, efficiently into a
12142 register. Returns an RTX to copy into the register, or NULL_RTX
12143 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12146 neon_make_constant (rtx vals
)
12148 machine_mode mode
= GET_MODE (vals
);
12150 rtx const_vec
= NULL_RTX
;
12151 int n_elts
= GET_MODE_NUNITS (mode
);
12155 if (GET_CODE (vals
) == CONST_VECTOR
)
12157 else if (GET_CODE (vals
) == PARALLEL
)
12159 /* A CONST_VECTOR must contain only CONST_INTs and
12160 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12161 Only store valid constants in a CONST_VECTOR. */
12162 for (i
= 0; i
< n_elts
; ++i
)
12164 rtx x
= XVECEXP (vals
, 0, i
);
12165 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12168 if (n_const
== n_elts
)
12169 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12172 gcc_unreachable ();
12174 if (const_vec
!= NULL
12175 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12176 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12178 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12179 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12180 pipeline cycle; creating the constant takes one or two ARM
12181 pipeline cycles. */
12183 else if (const_vec
!= NULL_RTX
)
12184 /* Load from constant pool. On Cortex-A8 this takes two cycles
12185 (for either double or quad vectors). We can not take advantage
12186 of single-cycle VLD1 because we need a PC-relative addressing
12190 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12191 We can not construct an initializer. */
12195 /* Initialize vector TARGET to VALS. */
12198 neon_expand_vector_init (rtx target
, rtx vals
)
12200 machine_mode mode
= GET_MODE (target
);
12201 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12202 int n_elts
= GET_MODE_NUNITS (mode
);
12203 int n_var
= 0, one_var
= -1;
12204 bool all_same
= true;
12208 for (i
= 0; i
< n_elts
; ++i
)
12210 x
= XVECEXP (vals
, 0, i
);
12211 if (!CONSTANT_P (x
))
12212 ++n_var
, one_var
= i
;
12214 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12220 rtx constant
= neon_make_constant (vals
);
12221 if (constant
!= NULL_RTX
)
12223 emit_move_insn (target
, constant
);
12228 /* Splat a single non-constant element if we can. */
12229 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12231 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12232 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
12236 /* One field is non-constant. Load constant then overwrite varying
12237 field. This is more efficient than using the stack. */
12240 rtx copy
= copy_rtx (vals
);
12241 rtx index
= GEN_INT (one_var
);
12243 /* Load constant part of vector, substitute neighboring value for
12244 varying element. */
12245 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12246 neon_expand_vector_init (target
, copy
);
12248 /* Insert variable. */
12249 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12253 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12256 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12259 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12262 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12265 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12268 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12271 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12274 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12277 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12280 gcc_unreachable ();
12285 /* Construct the vector in memory one field at a time
12286 and load the whole vector. */
12287 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12288 for (i
= 0; i
< n_elts
; i
++)
12289 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12290 i
* GET_MODE_SIZE (inner_mode
)),
12291 XVECEXP (vals
, 0, i
));
12292 emit_move_insn (target
, mem
);
12295 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12296 ERR if it doesn't. EXP indicates the source location, which includes the
12297 inlining history for intrinsics. */
12300 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12301 const_tree exp
, const char *desc
)
12303 HOST_WIDE_INT lane
;
12305 gcc_assert (CONST_INT_P (operand
));
12307 lane
= INTVAL (operand
);
12309 if (lane
< low
|| lane
>= high
)
12312 error ("%K%s %wd out of range %wd - %wd",
12313 exp
, desc
, lane
, low
, high
- 1);
12315 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
12319 /* Bounds-check lanes. */
12322 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12325 bounds_check (operand
, low
, high
, exp
, "lane");
12328 /* Bounds-check constants. */
12331 arm_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12333 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
12337 neon_element_bits (machine_mode mode
)
12339 return GET_MODE_UNIT_BITSIZE (mode
);
12343 /* Predicates for `match_operand' and `match_operator'. */
12345 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12346 WB is true if full writeback address modes are allowed and is false
12347 if limited writeback address modes (POST_INC and PRE_DEC) are
12351 arm_coproc_mem_operand (rtx op
, bool wb
)
12355 /* Reject eliminable registers. */
12356 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12357 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12358 || reg_mentioned_p (arg_pointer_rtx
, op
)
12359 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12360 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12361 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12362 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12365 /* Constants are converted into offsets from labels. */
12369 ind
= XEXP (op
, 0);
12371 if (reload_completed
12372 && (GET_CODE (ind
) == LABEL_REF
12373 || (GET_CODE (ind
) == CONST
12374 && GET_CODE (XEXP (ind
, 0)) == PLUS
12375 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12376 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12379 /* Match: (mem (reg)). */
12381 return arm_address_register_rtx_p (ind
, 0);
12383 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12384 acceptable in any case (subject to verification by
12385 arm_address_register_rtx_p). We need WB to be true to accept
12386 PRE_INC and POST_DEC. */
12387 if (GET_CODE (ind
) == POST_INC
12388 || GET_CODE (ind
) == PRE_DEC
12390 && (GET_CODE (ind
) == PRE_INC
12391 || GET_CODE (ind
) == POST_DEC
)))
12392 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12395 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12396 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12397 && GET_CODE (XEXP (ind
, 1)) == PLUS
12398 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12399 ind
= XEXP (ind
, 1);
12404 if (GET_CODE (ind
) == PLUS
12405 && REG_P (XEXP (ind
, 0))
12406 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12407 && CONST_INT_P (XEXP (ind
, 1))
12408 && INTVAL (XEXP (ind
, 1)) > -1024
12409 && INTVAL (XEXP (ind
, 1)) < 1024
12410 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12416 /* Return TRUE if OP is a memory operand which we can load or store a vector
12417 to/from. TYPE is one of the following values:
12418 0 - Vector load/stor (vldr)
12419 1 - Core registers (ldm)
12420 2 - Element/structure loads (vld1)
12423 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12427 /* Reject eliminable registers. */
12428 if (strict
&& ! (reload_in_progress
|| reload_completed
)
12429 && (reg_mentioned_p (frame_pointer_rtx
, op
)
12430 || reg_mentioned_p (arg_pointer_rtx
, op
)
12431 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12432 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12433 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12434 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12437 /* Constants are converted into offsets from labels. */
12441 ind
= XEXP (op
, 0);
12443 if (reload_completed
12444 && (GET_CODE (ind
) == LABEL_REF
12445 || (GET_CODE (ind
) == CONST
12446 && GET_CODE (XEXP (ind
, 0)) == PLUS
12447 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12448 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12451 /* Match: (mem (reg)). */
12453 return arm_address_register_rtx_p (ind
, 0);
12455 /* Allow post-increment with Neon registers. */
12456 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12457 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12458 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12460 /* Allow post-increment by register for VLDn */
12461 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12462 && GET_CODE (XEXP (ind
, 1)) == PLUS
12463 && REG_P (XEXP (XEXP (ind
, 1), 1)))
12470 && GET_CODE (ind
) == PLUS
12471 && REG_P (XEXP (ind
, 0))
12472 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12473 && CONST_INT_P (XEXP (ind
, 1))
12474 && INTVAL (XEXP (ind
, 1)) > -1024
12475 /* For quad modes, we restrict the constant offset to be slightly less
12476 than what the instruction format permits. We have no such constraint
12477 on double mode offsets. (This must match arm_legitimate_index_p.) */
12478 && (INTVAL (XEXP (ind
, 1))
12479 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12480 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12486 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12489 neon_struct_mem_operand (rtx op
)
12493 /* Reject eliminable registers. */
12494 if (! (reload_in_progress
|| reload_completed
)
12495 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12496 || reg_mentioned_p (arg_pointer_rtx
, op
)
12497 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12498 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12499 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12500 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12503 /* Constants are converted into offsets from labels. */
12507 ind
= XEXP (op
, 0);
12509 if (reload_completed
12510 && (GET_CODE (ind
) == LABEL_REF
12511 || (GET_CODE (ind
) == CONST
12512 && GET_CODE (XEXP (ind
, 0)) == PLUS
12513 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12514 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12517 /* Match: (mem (reg)). */
12519 return arm_address_register_rtx_p (ind
, 0);
12521 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12522 if (GET_CODE (ind
) == POST_INC
12523 || GET_CODE (ind
) == PRE_DEC
)
12524 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12529 /* Return true if X is a register that will be eliminated later on. */
12531 arm_eliminable_register (rtx x
)
12533 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12534 || REGNO (x
) == ARG_POINTER_REGNUM
12535 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12536 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12539 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12540 coprocessor registers. Otherwise return NO_REGS. */
12543 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
12545 if (mode
== HFmode
)
12547 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
12548 return GENERAL_REGS
;
12549 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12551 return GENERAL_REGS
;
12554 /* The neon move patterns handle all legitimate vector and struct
12557 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12558 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12559 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12560 || VALID_NEON_STRUCT_MODE (mode
)))
12563 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12566 return GENERAL_REGS
;
12569 /* Values which must be returned in the most-significant end of the return
12573 arm_return_in_msb (const_tree valtype
)
12575 return (TARGET_AAPCS_BASED
12576 && BYTES_BIG_ENDIAN
12577 && (AGGREGATE_TYPE_P (valtype
)
12578 || TREE_CODE (valtype
) == COMPLEX_TYPE
12579 || FIXED_POINT_TYPE_P (valtype
)));
12582 /* Return TRUE if X references a SYMBOL_REF. */
12584 symbol_mentioned_p (rtx x
)
12589 if (GET_CODE (x
) == SYMBOL_REF
)
12592 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12593 are constant offsets, not symbols. */
12594 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12597 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12599 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12605 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12606 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
12609 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
12616 /* Return TRUE if X references a LABEL_REF. */
12618 label_mentioned_p (rtx x
)
12623 if (GET_CODE (x
) == LABEL_REF
)
12626 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12627 instruction, but they are constant offsets, not symbols. */
12628 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12631 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12632 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12638 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12639 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
12642 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
12650 tls_mentioned_p (rtx x
)
12652 switch (GET_CODE (x
))
12655 return tls_mentioned_p (XEXP (x
, 0));
12658 if (XINT (x
, 1) == UNSPEC_TLS
)
12661 /* Fall through. */
12667 /* Must not copy any rtx that uses a pc-relative address.
12668 Also, disallow copying of load-exclusive instructions that
12669 may appear after splitting of compare-and-swap-style operations
12670 so as to prevent those loops from being transformed away from their
12671 canonical forms (see PR 69904). */
12674 arm_cannot_copy_insn_p (rtx_insn
*insn
)
12676 /* The tls call insn cannot be copied, as it is paired with a data
12678 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
12681 subrtx_iterator::array_type array
;
12682 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
12684 const_rtx x
= *iter
;
12685 if (GET_CODE (x
) == UNSPEC
12686 && (XINT (x
, 1) == UNSPEC_PIC_BASE
12687 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
12691 rtx set
= single_set (insn
);
12694 rtx src
= SET_SRC (set
);
12695 if (GET_CODE (src
) == ZERO_EXTEND
)
12696 src
= XEXP (src
, 0);
12698 /* Catch the load-exclusive and load-acquire operations. */
12699 if (GET_CODE (src
) == UNSPEC_VOLATILE
12700 && (XINT (src
, 1) == VUNSPEC_LL
12701 || XINT (src
, 1) == VUNSPEC_LAX
))
12708 minmax_code (rtx x
)
12710 enum rtx_code code
= GET_CODE (x
);
12723 gcc_unreachable ();
12727 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12730 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
12731 int *mask
, bool *signed_sat
)
12733 /* The high bound must be a power of two minus one. */
12734 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
12738 /* The low bound is either zero (for usat) or one less than the
12739 negation of the high bound (for ssat). */
12740 if (INTVAL (lo_bound
) == 0)
12745 *signed_sat
= false;
12750 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
12755 *signed_sat
= true;
12763 /* Return 1 if memory locations are adjacent. */
12765 adjacent_mem_locations (rtx a
, rtx b
)
12767 /* We don't guarantee to preserve the order of these memory refs. */
12768 if (volatile_refs_p (a
) || volatile_refs_p (b
))
12771 if ((REG_P (XEXP (a
, 0))
12772 || (GET_CODE (XEXP (a
, 0)) == PLUS
12773 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
12774 && (REG_P (XEXP (b
, 0))
12775 || (GET_CODE (XEXP (b
, 0)) == PLUS
12776 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
12778 HOST_WIDE_INT val0
= 0, val1
= 0;
12782 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
12784 reg0
= XEXP (XEXP (a
, 0), 0);
12785 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
12788 reg0
= XEXP (a
, 0);
12790 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
12792 reg1
= XEXP (XEXP (b
, 0), 0);
12793 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
12796 reg1
= XEXP (b
, 0);
12798 /* Don't accept any offset that will require multiple
12799 instructions to handle, since this would cause the
12800 arith_adjacentmem pattern to output an overlong sequence. */
12801 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
12804 /* Don't allow an eliminable register: register elimination can make
12805 the offset too large. */
12806 if (arm_eliminable_register (reg0
))
12809 val_diff
= val1
- val0
;
12813 /* If the target has load delay slots, then there's no benefit
12814 to using an ldm instruction unless the offset is zero and
12815 we are optimizing for size. */
12816 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
12817 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
12818 && (val_diff
== 4 || val_diff
== -4));
12821 return ((REGNO (reg0
) == REGNO (reg1
))
12822 && (val_diff
== 4 || val_diff
== -4));
12828 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12829 for load operations, false for store operations. CONSECUTIVE is true
12830 if the register numbers in the operation must be consecutive in the register
12831 bank. RETURN_PC is true if value is to be loaded in PC.
12832 The pattern we are trying to match for load is:
12833 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12834 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12837 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12840 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12841 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12842 3. If consecutive is TRUE, then for kth register being loaded,
12843 REGNO (R_dk) = REGNO (R_d0) + k.
12844 The pattern for store is similar. */
12846 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
12847 bool consecutive
, bool return_pc
)
12849 HOST_WIDE_INT count
= XVECLEN (op
, 0);
12850 rtx reg
, mem
, addr
;
12852 unsigned first_regno
;
12853 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
12855 bool addr_reg_in_reglist
= false;
12856 bool update
= false;
12861 /* If not in SImode, then registers must be consecutive
12862 (e.g., VLDM instructions for DFmode). */
12863 gcc_assert ((mode
== SImode
) || consecutive
);
12864 /* Setting return_pc for stores is illegal. */
12865 gcc_assert (!return_pc
|| load
);
12867 /* Set up the increments and the regs per val based on the mode. */
12868 reg_increment
= GET_MODE_SIZE (mode
);
12869 regs_per_val
= reg_increment
/ 4;
12870 offset_adj
= return_pc
? 1 : 0;
12873 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
12874 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
12877 /* Check if this is a write-back. */
12878 elt
= XVECEXP (op
, 0, offset_adj
);
12879 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
12885 /* The offset adjustment must be the number of registers being
12886 popped times the size of a single register. */
12887 if (!REG_P (SET_DEST (elt
))
12888 || !REG_P (XEXP (SET_SRC (elt
), 0))
12889 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
12890 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
12891 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
12892 ((count
- 1 - offset_adj
) * reg_increment
))
12896 i
= i
+ offset_adj
;
12897 base
= base
+ offset_adj
;
12898 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12899 success depends on the type: VLDM can do just one reg,
12900 LDM must do at least two. */
12901 if ((count
<= i
) && (mode
== SImode
))
12904 elt
= XVECEXP (op
, 0, i
- 1);
12905 if (GET_CODE (elt
) != SET
)
12910 reg
= SET_DEST (elt
);
12911 mem
= SET_SRC (elt
);
12915 reg
= SET_SRC (elt
);
12916 mem
= SET_DEST (elt
);
12919 if (!REG_P (reg
) || !MEM_P (mem
))
12922 regno
= REGNO (reg
);
12923 first_regno
= regno
;
12924 addr
= XEXP (mem
, 0);
12925 if (GET_CODE (addr
) == PLUS
)
12927 if (!CONST_INT_P (XEXP (addr
, 1)))
12930 offset
= INTVAL (XEXP (addr
, 1));
12931 addr
= XEXP (addr
, 0);
12937 /* Don't allow SP to be loaded unless it is also the base register. It
12938 guarantees that SP is reset correctly when an LDM instruction
12939 is interrupted. Otherwise, we might end up with a corrupt stack. */
12940 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12943 for (; i
< count
; i
++)
12945 elt
= XVECEXP (op
, 0, i
);
12946 if (GET_CODE (elt
) != SET
)
12951 reg
= SET_DEST (elt
);
12952 mem
= SET_SRC (elt
);
12956 reg
= SET_SRC (elt
);
12957 mem
= SET_DEST (elt
);
12961 || GET_MODE (reg
) != mode
12962 || REGNO (reg
) <= regno
12965 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
12966 /* Don't allow SP to be loaded unless it is also the base register. It
12967 guarantees that SP is reset correctly when an LDM instruction
12968 is interrupted. Otherwise, we might end up with a corrupt stack. */
12969 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12971 || GET_MODE (mem
) != mode
12972 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
12973 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
12974 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
12975 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
12976 offset
+ (i
- base
) * reg_increment
))
12977 && (!REG_P (XEXP (mem
, 0))
12978 || offset
+ (i
- base
) * reg_increment
!= 0)))
12981 regno
= REGNO (reg
);
12982 if (regno
== REGNO (addr
))
12983 addr_reg_in_reglist
= true;
12988 if (update
&& addr_reg_in_reglist
)
12991 /* For Thumb-1, address register is always modified - either by write-back
12992 or by explicit load. If the pattern does not describe an update,
12993 then the address register must be in the list of loaded registers. */
12995 return update
|| addr_reg_in_reglist
;
13001 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13002 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13003 instruction. ADD_OFFSET is nonzero if the base address register needs
13004 to be modified with an add instruction before we can use it. */
13007 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13008 int nops
, HOST_WIDE_INT add_offset
)
13010 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13011 if the offset isn't small enough. The reason 2 ldrs are faster
13012 is because these ARMs are able to do more than one cache access
13013 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13014 whilst the ARM8 has a double bandwidth cache. This means that
13015 these cores can do both an instruction fetch and a data fetch in
13016 a single cycle, so the trick of calculating the address into a
13017 scratch register (one of the result regs) and then doing a load
13018 multiple actually becomes slower (and no smaller in code size).
13019 That is the transformation
13021 ldr rd1, [rbase + offset]
13022 ldr rd2, [rbase + offset + 4]
13026 add rd1, rbase, offset
13027 ldmia rd1, {rd1, rd2}
13029 produces worse code -- '3 cycles + any stalls on rd2' instead of
13030 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13031 access per cycle, the first sequence could never complete in less
13032 than 6 cycles, whereas the ldm sequence would only take 5 and
13033 would make better use of sequential accesses if not hitting the
13036 We cheat here and test 'arm_ld_sched' which we currently know to
13037 only be true for the ARM8, ARM9 and StrongARM. If this ever
13038 changes, then the test below needs to be reworked. */
13039 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13042 /* XScale has load-store double instructions, but they have stricter
13043 alignment requirements than load-store multiple, so we cannot
13046 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13047 the pipeline until completion.
13055 An ldr instruction takes 1-3 cycles, but does not block the
13064 Best case ldr will always win. However, the more ldr instructions
13065 we issue, the less likely we are to be able to schedule them well.
13066 Using ldr instructions also increases code size.
13068 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13069 for counts of 3 or 4 regs. */
13070 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13075 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13076 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13077 an array ORDER which describes the sequence to use when accessing the
13078 offsets that produces an ascending order. In this sequence, each
13079 offset must be larger by exactly 4 than the previous one. ORDER[0]
13080 must have been filled in with the lowest offset by the caller.
13081 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13082 we use to verify that ORDER produces an ascending order of registers.
13083 Return true if it was possible to construct such an order, false if
13087 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13088 int *unsorted_regs
)
13091 for (i
= 1; i
< nops
; i
++)
13095 order
[i
] = order
[i
- 1];
13096 for (j
= 0; j
< nops
; j
++)
13097 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13099 /* We must find exactly one offset that is higher than the
13100 previous one by 4. */
13101 if (order
[i
] != order
[i
- 1])
13105 if (order
[i
] == order
[i
- 1])
13107 /* The register numbers must be ascending. */
13108 if (unsorted_regs
!= NULL
13109 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13115 /* Used to determine in a peephole whether a sequence of load
13116 instructions can be changed into a load-multiple instruction.
13117 NOPS is the number of separate load instructions we are examining. The
13118 first NOPS entries in OPERANDS are the destination registers, the
13119 next NOPS entries are memory operands. If this function is
13120 successful, *BASE is set to the common base register of the memory
13121 accesses; *LOAD_OFFSET is set to the first memory location's offset
13122 from that base register.
13123 REGS is an array filled in with the destination register numbers.
13124 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13125 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13126 the sequence of registers in REGS matches the loads from ascending memory
13127 locations, and the function verifies that the register numbers are
13128 themselves ascending. If CHECK_REGS is false, the register numbers
13129 are stored in the order they are found in the operands. */
13131 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13132 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13134 int unsorted_regs
[MAX_LDM_STM_OPS
];
13135 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13136 int order
[MAX_LDM_STM_OPS
];
13137 rtx base_reg_rtx
= NULL
;
13141 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13142 easily extended if required. */
13143 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13145 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13147 /* Loop over the operands and check that the memory references are
13148 suitable (i.e. immediate offsets from the same base register). At
13149 the same time, extract the target register, and the memory
13151 for (i
= 0; i
< nops
; i
++)
13156 /* Convert a subreg of a mem into the mem itself. */
13157 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13158 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13160 gcc_assert (MEM_P (operands
[nops
+ i
]));
13162 /* Don't reorder volatile memory references; it doesn't seem worth
13163 looking for the case where the order is ok anyway. */
13164 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13167 offset
= const0_rtx
;
13169 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13170 || (GET_CODE (reg
) == SUBREG
13171 && REG_P (reg
= SUBREG_REG (reg
))))
13172 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13173 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13174 || (GET_CODE (reg
) == SUBREG
13175 && REG_P (reg
= SUBREG_REG (reg
))))
13176 && (CONST_INT_P (offset
13177 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13181 base_reg
= REGNO (reg
);
13182 base_reg_rtx
= reg
;
13183 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13186 else if (base_reg
!= (int) REGNO (reg
))
13187 /* Not addressed from the same base register. */
13190 unsorted_regs
[i
] = (REG_P (operands
[i
])
13191 ? REGNO (operands
[i
])
13192 : REGNO (SUBREG_REG (operands
[i
])));
13194 /* If it isn't an integer register, or if it overwrites the
13195 base register but isn't the last insn in the list, then
13196 we can't do this. */
13197 if (unsorted_regs
[i
] < 0
13198 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13199 || unsorted_regs
[i
] > 14
13200 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13203 /* Don't allow SP to be loaded unless it is also the base
13204 register. It guarantees that SP is reset correctly when
13205 an LDM instruction is interrupted. Otherwise, we might
13206 end up with a corrupt stack. */
13207 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13210 unsorted_offsets
[i
] = INTVAL (offset
);
13211 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13215 /* Not a suitable memory address. */
13219 /* All the useful information has now been extracted from the
13220 operands into unsorted_regs and unsorted_offsets; additionally,
13221 order[0] has been set to the lowest offset in the list. Sort
13222 the offsets into order, verifying that they are adjacent, and
13223 check that the register numbers are ascending. */
13224 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13225 check_regs
? unsorted_regs
: NULL
))
13229 memcpy (saved_order
, order
, sizeof order
);
13235 for (i
= 0; i
< nops
; i
++)
13236 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13238 *load_offset
= unsorted_offsets
[order
[0]];
13242 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13245 if (unsorted_offsets
[order
[0]] == 0)
13246 ldm_case
= 1; /* ldmia */
13247 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13248 ldm_case
= 2; /* ldmib */
13249 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13250 ldm_case
= 3; /* ldmda */
13251 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13252 ldm_case
= 4; /* ldmdb */
13253 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13254 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13259 if (!multiple_operation_profitable_p (false, nops
,
13261 ? unsorted_offsets
[order
[0]] : 0))
13267 /* Used to determine in a peephole whether a sequence of store instructions can
13268 be changed into a store-multiple instruction.
13269 NOPS is the number of separate store instructions we are examining.
13270 NOPS_TOTAL is the total number of instructions recognized by the peephole
13272 The first NOPS entries in OPERANDS are the source registers, the next
13273 NOPS entries are memory operands. If this function is successful, *BASE is
13274 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13275 to the first memory location's offset from that base register. REGS is an
13276 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13277 likewise filled with the corresponding rtx's.
13278 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13279 numbers to an ascending order of stores.
13280 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13281 from ascending memory locations, and the function verifies that the register
13282 numbers are themselves ascending. If CHECK_REGS is false, the register
13283 numbers are stored in the order they are found in the operands. */
13285 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13286 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13287 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13289 int unsorted_regs
[MAX_LDM_STM_OPS
];
13290 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13291 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13292 int order
[MAX_LDM_STM_OPS
];
13294 rtx base_reg_rtx
= NULL
;
13297 /* Write back of base register is currently only supported for Thumb 1. */
13298 int base_writeback
= TARGET_THUMB1
;
13300 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13301 easily extended if required. */
13302 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13304 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13306 /* Loop over the operands and check that the memory references are
13307 suitable (i.e. immediate offsets from the same base register). At
13308 the same time, extract the target register, and the memory
13310 for (i
= 0; i
< nops
; i
++)
13315 /* Convert a subreg of a mem into the mem itself. */
13316 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13317 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13319 gcc_assert (MEM_P (operands
[nops
+ i
]));
13321 /* Don't reorder volatile memory references; it doesn't seem worth
13322 looking for the case where the order is ok anyway. */
13323 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13326 offset
= const0_rtx
;
13328 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13329 || (GET_CODE (reg
) == SUBREG
13330 && REG_P (reg
= SUBREG_REG (reg
))))
13331 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13332 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13333 || (GET_CODE (reg
) == SUBREG
13334 && REG_P (reg
= SUBREG_REG (reg
))))
13335 && (CONST_INT_P (offset
13336 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13338 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13339 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13340 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13344 base_reg
= REGNO (reg
);
13345 base_reg_rtx
= reg
;
13346 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13349 else if (base_reg
!= (int) REGNO (reg
))
13350 /* Not addressed from the same base register. */
13353 /* If it isn't an integer register, then we can't do this. */
13354 if (unsorted_regs
[i
] < 0
13355 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13356 /* The effects are unpredictable if the base register is
13357 both updated and stored. */
13358 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13359 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13360 || unsorted_regs
[i
] > 14)
13363 unsorted_offsets
[i
] = INTVAL (offset
);
13364 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13368 /* Not a suitable memory address. */
13372 /* All the useful information has now been extracted from the
13373 operands into unsorted_regs and unsorted_offsets; additionally,
13374 order[0] has been set to the lowest offset in the list. Sort
13375 the offsets into order, verifying that they are adjacent, and
13376 check that the register numbers are ascending. */
13377 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13378 check_regs
? unsorted_regs
: NULL
))
13382 memcpy (saved_order
, order
, sizeof order
);
13388 for (i
= 0; i
< nops
; i
++)
13390 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13392 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13395 *load_offset
= unsorted_offsets
[order
[0]];
13399 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13402 if (unsorted_offsets
[order
[0]] == 0)
13403 stm_case
= 1; /* stmia */
13404 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13405 stm_case
= 2; /* stmib */
13406 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13407 stm_case
= 3; /* stmda */
13408 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13409 stm_case
= 4; /* stmdb */
13413 if (!multiple_operation_profitable_p (false, nops
, 0))
13419 /* Routines for use in generating RTL. */
13421 /* Generate a load-multiple instruction. COUNT is the number of loads in
13422 the instruction; REGS and MEMS are arrays containing the operands.
13423 BASEREG is the base register to be used in addressing the memory operands.
13424 WBACK_OFFSET is nonzero if the instruction should update the base
13428 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13429 HOST_WIDE_INT wback_offset
)
13434 if (!multiple_operation_profitable_p (false, count
, 0))
13440 for (i
= 0; i
< count
; i
++)
13441 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13443 if (wback_offset
!= 0)
13444 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13446 seq
= get_insns ();
13452 result
= gen_rtx_PARALLEL (VOIDmode
,
13453 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13454 if (wback_offset
!= 0)
13456 XVECEXP (result
, 0, 0)
13457 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13462 for (j
= 0; i
< count
; i
++, j
++)
13463 XVECEXP (result
, 0, i
)
13464 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13469 /* Generate a store-multiple instruction. COUNT is the number of stores in
13470 the instruction; REGS and MEMS are arrays containing the operands.
13471 BASEREG is the base register to be used in addressing the memory operands.
13472 WBACK_OFFSET is nonzero if the instruction should update the base
13476 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13477 HOST_WIDE_INT wback_offset
)
13482 if (GET_CODE (basereg
) == PLUS
)
13483 basereg
= XEXP (basereg
, 0);
13485 if (!multiple_operation_profitable_p (false, count
, 0))
13491 for (i
= 0; i
< count
; i
++)
13492 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13494 if (wback_offset
!= 0)
13495 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13497 seq
= get_insns ();
13503 result
= gen_rtx_PARALLEL (VOIDmode
,
13504 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13505 if (wback_offset
!= 0)
13507 XVECEXP (result
, 0, 0)
13508 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13513 for (j
= 0; i
< count
; i
++, j
++)
13514 XVECEXP (result
, 0, i
)
13515 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13520 /* Generate either a load-multiple or a store-multiple instruction. This
13521 function can be used in situations where we can start with a single MEM
13522 rtx and adjust its address upwards.
13523 COUNT is the number of operations in the instruction, not counting a
13524 possible update of the base register. REGS is an array containing the
13526 BASEREG is the base register to be used in addressing the memory operands,
13527 which are constructed from BASEMEM.
13528 WRITE_BACK specifies whether the generated instruction should include an
13529 update of the base register.
13530 OFFSETP is used to pass an offset to and from this function; this offset
13531 is not used when constructing the address (instead BASEMEM should have an
13532 appropriate offset in its address), it is used only for setting
13533 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13536 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13537 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13539 rtx mems
[MAX_LDM_STM_OPS
];
13540 HOST_WIDE_INT offset
= *offsetp
;
13543 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13545 if (GET_CODE (basereg
) == PLUS
)
13546 basereg
= XEXP (basereg
, 0);
13548 for (i
= 0; i
< count
; i
++)
13550 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13551 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13559 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13560 write_back
? 4 * count
: 0);
13562 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13563 write_back
? 4 * count
: 0);
13567 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13568 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13570 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13575 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13576 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13578 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13582 /* Called from a peephole2 expander to turn a sequence of loads into an
13583 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13584 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13585 is true if we can reorder the registers because they are used commutatively
13587 Returns true iff we could generate a new instruction. */
13590 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13592 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13593 rtx mems
[MAX_LDM_STM_OPS
];
13594 int i
, j
, base_reg
;
13596 HOST_WIDE_INT offset
;
13597 int write_back
= FALSE
;
13601 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13602 &base_reg
, &offset
, !sort_regs
);
13608 for (i
= 0; i
< nops
- 1; i
++)
13609 for (j
= i
+ 1; j
< nops
; j
++)
13610 if (regs
[i
] > regs
[j
])
13616 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13620 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
13621 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13627 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
13628 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
13630 if (!TARGET_THUMB1
)
13631 base_reg_rtx
= newbase
;
13634 for (i
= 0; i
< nops
; i
++)
13636 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13637 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13640 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13641 write_back
? offset
+ i
* 4 : 0));
13645 /* Called from a peephole2 expander to turn a sequence of stores into an
13646 STM instruction. OPERANDS are the operands found by the peephole matcher;
13647 NOPS indicates how many separate stores we are trying to combine.
13648 Returns true iff we could generate a new instruction. */
13651 gen_stm_seq (rtx
*operands
, int nops
)
13654 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13655 rtx mems
[MAX_LDM_STM_OPS
];
13658 HOST_WIDE_INT offset
;
13659 int write_back
= FALSE
;
13662 bool base_reg_dies
;
13664 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
13665 mem_order
, &base_reg
, &offset
, true);
13670 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13672 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
13675 gcc_assert (base_reg_dies
);
13681 gcc_assert (base_reg_dies
);
13682 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13686 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13688 for (i
= 0; i
< nops
; i
++)
13690 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13691 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13694 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13695 write_back
? offset
+ i
* 4 : 0));
13699 /* Called from a peephole2 expander to turn a sequence of stores that are
13700 preceded by constant loads into an STM instruction. OPERANDS are the
13701 operands found by the peephole matcher; NOPS indicates how many
13702 separate stores we are trying to combine; there are 2 * NOPS
13703 instructions in the peephole.
13704 Returns true iff we could generate a new instruction. */
13707 gen_const_stm_seq (rtx
*operands
, int nops
)
13709 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
13710 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13711 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
13712 rtx mems
[MAX_LDM_STM_OPS
];
13715 HOST_WIDE_INT offset
;
13716 int write_back
= FALSE
;
13719 bool base_reg_dies
;
13721 HARD_REG_SET allocated
;
13723 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
13724 mem_order
, &base_reg
, &offset
, false);
13729 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
13731 /* If the same register is used more than once, try to find a free
13733 CLEAR_HARD_REG_SET (allocated
);
13734 for (i
= 0; i
< nops
; i
++)
13736 for (j
= i
+ 1; j
< nops
; j
++)
13737 if (regs
[i
] == regs
[j
])
13739 rtx t
= peep2_find_free_register (0, nops
* 2,
13740 TARGET_THUMB1
? "l" : "r",
13741 SImode
, &allocated
);
13745 regs
[i
] = REGNO (t
);
13749 /* Compute an ordering that maps the register numbers to an ascending
13752 for (i
= 0; i
< nops
; i
++)
13753 if (regs
[i
] < regs
[reg_order
[0]])
13756 for (i
= 1; i
< nops
; i
++)
13758 int this_order
= reg_order
[i
- 1];
13759 for (j
= 0; j
< nops
; j
++)
13760 if (regs
[j
] > regs
[reg_order
[i
- 1]]
13761 && (this_order
== reg_order
[i
- 1]
13762 || regs
[j
] < regs
[this_order
]))
13764 reg_order
[i
] = this_order
;
13767 /* Ensure that registers that must be live after the instruction end
13768 up with the correct value. */
13769 for (i
= 0; i
< nops
; i
++)
13771 int this_order
= reg_order
[i
];
13772 if ((this_order
!= mem_order
[i
]
13773 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
13774 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
13778 /* Load the constants. */
13779 for (i
= 0; i
< nops
; i
++)
13781 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
13782 sorted_regs
[i
] = regs
[reg_order
[i
]];
13783 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
13786 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13788 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
13791 gcc_assert (base_reg_dies
);
13797 gcc_assert (base_reg_dies
);
13798 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13802 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13804 for (i
= 0; i
< nops
; i
++)
13806 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13807 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13810 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
13811 write_back
? offset
+ i
* 4 : 0));
13815 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13816 unaligned copies on processors which support unaligned semantics for those
13817 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13818 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13819 An interleave factor of 1 (the minimum) will perform no interleaving.
13820 Load/store multiple are used for aligned addresses where possible. */
13823 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
13824 HOST_WIDE_INT length
,
13825 unsigned int interleave_factor
)
13827 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
13828 int *regnos
= XALLOCAVEC (int, interleave_factor
);
13829 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
13830 HOST_WIDE_INT i
, j
;
13831 HOST_WIDE_INT remaining
= length
, words
;
13832 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
13834 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
13835 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
13836 HOST_WIDE_INT srcoffset
, dstoffset
;
13837 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
13840 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
13842 /* Use hard registers if we have aligned source or destination so we can use
13843 load/store multiple with contiguous registers. */
13844 if (dst_aligned
|| src_aligned
)
13845 for (i
= 0; i
< interleave_factor
; i
++)
13846 regs
[i
] = gen_rtx_REG (SImode
, i
);
13848 for (i
= 0; i
< interleave_factor
; i
++)
13849 regs
[i
] = gen_reg_rtx (SImode
);
13851 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
13852 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
13854 srcoffset
= dstoffset
= 0;
13856 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13857 For copying the last bytes we want to subtract this offset again. */
13858 src_autoinc
= dst_autoinc
= 0;
13860 for (i
= 0; i
< interleave_factor
; i
++)
13863 /* Copy BLOCK_SIZE_BYTES chunks. */
13865 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
13868 if (src_aligned
&& interleave_factor
> 1)
13870 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
13871 TRUE
, srcbase
, &srcoffset
));
13872 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13876 for (j
= 0; j
< interleave_factor
; j
++)
13878 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
13880 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13881 srcoffset
+ j
* UNITS_PER_WORD
);
13882 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13884 srcoffset
+= block_size_bytes
;
13888 if (dst_aligned
&& interleave_factor
> 1)
13890 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
13891 TRUE
, dstbase
, &dstoffset
));
13892 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13896 for (j
= 0; j
< interleave_factor
; j
++)
13898 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
13900 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13901 dstoffset
+ j
* UNITS_PER_WORD
);
13902 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13904 dstoffset
+= block_size_bytes
;
13907 remaining
-= block_size_bytes
;
13910 /* Copy any whole words left (note these aren't interleaved with any
13911 subsequent halfword/byte load/stores in the interests of simplicity). */
13913 words
= remaining
/ UNITS_PER_WORD
;
13915 gcc_assert (words
< interleave_factor
);
13917 if (src_aligned
&& words
> 1)
13919 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
13921 src_autoinc
+= UNITS_PER_WORD
* words
;
13925 for (j
= 0; j
< words
; j
++)
13927 addr
= plus_constant (Pmode
, src
,
13928 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
13929 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13930 srcoffset
+ j
* UNITS_PER_WORD
);
13932 emit_move_insn (regs
[j
], mem
);
13934 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13936 srcoffset
+= words
* UNITS_PER_WORD
;
13939 if (dst_aligned
&& words
> 1)
13941 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
13943 dst_autoinc
+= words
* UNITS_PER_WORD
;
13947 for (j
= 0; j
< words
; j
++)
13949 addr
= plus_constant (Pmode
, dst
,
13950 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
13951 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13952 dstoffset
+ j
* UNITS_PER_WORD
);
13954 emit_move_insn (mem
, regs
[j
]);
13956 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13958 dstoffset
+= words
* UNITS_PER_WORD
;
13961 remaining
-= words
* UNITS_PER_WORD
;
13963 gcc_assert (remaining
< 4);
13965 /* Copy a halfword if necessary. */
13967 if (remaining
>= 2)
13969 halfword_tmp
= gen_reg_rtx (SImode
);
13971 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13972 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
13973 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
13975 /* Either write out immediately, or delay until we've loaded the last
13976 byte, depending on interleave factor. */
13977 if (interleave_factor
== 1)
13979 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13980 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
13981 emit_insn (gen_unaligned_storehi (mem
,
13982 gen_lowpart (HImode
, halfword_tmp
)));
13983 halfword_tmp
= NULL
;
13991 gcc_assert (remaining
< 2);
13993 /* Copy last byte. */
13995 if ((remaining
& 1) != 0)
13997 byte_tmp
= gen_reg_rtx (SImode
);
13999 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14000 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14001 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14003 if (interleave_factor
== 1)
14005 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14006 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14007 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14016 /* Store last halfword if we haven't done so already. */
14020 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14021 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14022 emit_insn (gen_unaligned_storehi (mem
,
14023 gen_lowpart (HImode
, halfword_tmp
)));
14027 /* Likewise for last byte. */
14031 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14032 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14033 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14037 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14040 /* From mips_adjust_block_mem:
14042 Helper function for doing a loop-based block operation on memory
14043 reference MEM. Each iteration of the loop will operate on LENGTH
14046 Create a new base register for use within the loop and point it to
14047 the start of MEM. Create a new memory reference that uses this
14048 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14051 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14054 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14056 /* Although the new mem does not refer to a known location,
14057 it does keep up to LENGTH bytes of alignment. */
14058 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14059 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14062 /* From mips_block_move_loop:
14064 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14065 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14066 the memory regions do not overlap. */
14069 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14070 unsigned int interleave_factor
,
14071 HOST_WIDE_INT bytes_per_iter
)
14073 rtx src_reg
, dest_reg
, final_src
, test
;
14074 HOST_WIDE_INT leftover
;
14076 leftover
= length
% bytes_per_iter
;
14077 length
-= leftover
;
14079 /* Create registers and memory references for use within the loop. */
14080 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14081 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14083 /* Calculate the value that SRC_REG should have after the last iteration of
14085 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14086 0, 0, OPTAB_WIDEN
);
14088 /* Emit the start of the loop. */
14089 rtx_code_label
*label
= gen_label_rtx ();
14090 emit_label (label
);
14092 /* Emit the loop body. */
14093 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14094 interleave_factor
);
14096 /* Move on to the next block. */
14097 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14098 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14100 /* Emit the loop condition. */
14101 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14102 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14104 /* Mop up any left-over bytes. */
14106 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14109 /* Emit a block move when either the source or destination is unaligned (not
14110 aligned to a four-byte boundary). This may need further tuning depending on
14111 core type, optimize_size setting, etc. */
14114 arm_movmemqi_unaligned (rtx
*operands
)
14116 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14120 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14121 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14122 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14123 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14124 or dst_aligned though: allow more interleaving in those cases since the
14125 resulting code can be smaller. */
14126 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14127 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14130 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14131 interleave_factor
, bytes_per_iter
);
14133 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14134 interleave_factor
);
14138 /* Note that the loop created by arm_block_move_unaligned_loop may be
14139 subject to loop unrolling, which makes tuning this condition a little
14142 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14144 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14151 arm_gen_movmemqi (rtx
*operands
)
14153 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14154 HOST_WIDE_INT srcoffset
, dstoffset
;
14155 rtx src
, dst
, srcbase
, dstbase
;
14156 rtx part_bytes_reg
= NULL
;
14159 if (!CONST_INT_P (operands
[2])
14160 || !CONST_INT_P (operands
[3])
14161 || INTVAL (operands
[2]) > 64)
14164 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14165 return arm_movmemqi_unaligned (operands
);
14167 if (INTVAL (operands
[3]) & 3)
14170 dstbase
= operands
[0];
14171 srcbase
= operands
[1];
14173 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14174 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14176 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14177 out_words_to_go
= INTVAL (operands
[2]) / 4;
14178 last_bytes
= INTVAL (operands
[2]) & 3;
14179 dstoffset
= srcoffset
= 0;
14181 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14182 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14184 while (in_words_to_go
>= 2)
14186 if (in_words_to_go
> 4)
14187 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14188 TRUE
, srcbase
, &srcoffset
));
14190 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14191 src
, FALSE
, srcbase
,
14194 if (out_words_to_go
)
14196 if (out_words_to_go
> 4)
14197 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14198 TRUE
, dstbase
, &dstoffset
));
14199 else if (out_words_to_go
!= 1)
14200 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14201 out_words_to_go
, dst
,
14204 dstbase
, &dstoffset
));
14207 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14208 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14209 if (last_bytes
!= 0)
14211 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14217 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14218 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14221 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14222 if (out_words_to_go
)
14226 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14227 sreg
= copy_to_reg (mem
);
14229 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14230 emit_move_insn (mem
, sreg
);
14233 gcc_assert (!in_words_to_go
); /* Sanity check */
14236 if (in_words_to_go
)
14238 gcc_assert (in_words_to_go
> 0);
14240 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14241 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14244 gcc_assert (!last_bytes
|| part_bytes_reg
);
14246 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14248 rtx tmp
= gen_reg_rtx (SImode
);
14250 /* The bytes we want are in the top end of the word. */
14251 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14252 GEN_INT (8 * (4 - last_bytes
))));
14253 part_bytes_reg
= tmp
;
14257 mem
= adjust_automodify_address (dstbase
, QImode
,
14258 plus_constant (Pmode
, dst
,
14260 dstoffset
+ last_bytes
- 1);
14261 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14265 tmp
= gen_reg_rtx (SImode
);
14266 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14267 part_bytes_reg
= tmp
;
14274 if (last_bytes
> 1)
14276 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14277 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14281 rtx tmp
= gen_reg_rtx (SImode
);
14282 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14283 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14284 part_bytes_reg
= tmp
;
14291 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14292 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14299 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14302 next_consecutive_mem (rtx mem
)
14304 machine_mode mode
= GET_MODE (mem
);
14305 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14306 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14308 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14311 /* Copy using LDRD/STRD instructions whenever possible.
14312 Returns true upon success. */
14314 gen_movmem_ldrd_strd (rtx
*operands
)
14316 unsigned HOST_WIDE_INT len
;
14317 HOST_WIDE_INT align
;
14318 rtx src
, dst
, base
;
14320 bool src_aligned
, dst_aligned
;
14321 bool src_volatile
, dst_volatile
;
14323 gcc_assert (CONST_INT_P (operands
[2]));
14324 gcc_assert (CONST_INT_P (operands
[3]));
14326 len
= UINTVAL (operands
[2]);
14330 /* Maximum alignment we can assume for both src and dst buffers. */
14331 align
= INTVAL (operands
[3]);
14333 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14336 /* Place src and dst addresses in registers
14337 and update the corresponding mem rtx. */
14339 dst_volatile
= MEM_VOLATILE_P (dst
);
14340 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14341 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14342 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14345 src_volatile
= MEM_VOLATILE_P (src
);
14346 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14347 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14348 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14350 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14353 if (src_volatile
|| dst_volatile
)
14356 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14357 if (!(dst_aligned
|| src_aligned
))
14358 return arm_gen_movmemqi (operands
);
14360 /* If the either src or dst is unaligned we'll be accessing it as pairs
14361 of unaligned SImode accesses. Otherwise we can generate DImode
14362 ldrd/strd instructions. */
14363 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
14364 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
14369 reg0
= gen_reg_rtx (DImode
);
14370 rtx low_reg
= NULL_RTX
;
14371 rtx hi_reg
= NULL_RTX
;
14373 if (!src_aligned
|| !dst_aligned
)
14375 low_reg
= gen_lowpart (SImode
, reg0
);
14376 hi_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
14379 emit_move_insn (reg0
, src
);
14382 emit_insn (gen_unaligned_loadsi (low_reg
, src
));
14383 src
= next_consecutive_mem (src
);
14384 emit_insn (gen_unaligned_loadsi (hi_reg
, src
));
14388 emit_move_insn (dst
, reg0
);
14391 emit_insn (gen_unaligned_storesi (dst
, low_reg
));
14392 dst
= next_consecutive_mem (dst
);
14393 emit_insn (gen_unaligned_storesi (dst
, hi_reg
));
14396 src
= next_consecutive_mem (src
);
14397 dst
= next_consecutive_mem (dst
);
14400 gcc_assert (len
< 8);
14403 /* More than a word but less than a double-word to copy. Copy a word. */
14404 reg0
= gen_reg_rtx (SImode
);
14405 src
= adjust_address (src
, SImode
, 0);
14406 dst
= adjust_address (dst
, SImode
, 0);
14408 emit_move_insn (reg0
, src
);
14410 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14413 emit_move_insn (dst
, reg0
);
14415 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14417 src
= next_consecutive_mem (src
);
14418 dst
= next_consecutive_mem (dst
);
14425 /* Copy the remaining bytes. */
14428 dst
= adjust_address (dst
, HImode
, 0);
14429 src
= adjust_address (src
, HImode
, 0);
14430 reg0
= gen_reg_rtx (SImode
);
14432 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14434 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14437 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14439 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14441 src
= next_consecutive_mem (src
);
14442 dst
= next_consecutive_mem (dst
);
14447 dst
= adjust_address (dst
, QImode
, 0);
14448 src
= adjust_address (src
, QImode
, 0);
14449 reg0
= gen_reg_rtx (QImode
);
14450 emit_move_insn (reg0
, src
);
14451 emit_move_insn (dst
, reg0
);
14455 /* Select a dominance comparison mode if possible for a test of the general
14456 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14457 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14458 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14459 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14460 In all cases OP will be either EQ or NE, but we don't need to know which
14461 here. If we are unable to support a dominance comparison we return
14462 CC mode. This will then fail to match for the RTL expressions that
14463 generate this call. */
14465 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14467 enum rtx_code cond1
, cond2
;
14470 /* Currently we will probably get the wrong result if the individual
14471 comparisons are not simple. This also ensures that it is safe to
14472 reverse a comparison if necessary. */
14473 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14475 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14479 /* The if_then_else variant of this tests the second condition if the
14480 first passes, but is true if the first fails. Reverse the first
14481 condition to get a true "inclusive-or" expression. */
14482 if (cond_or
== DOM_CC_NX_OR_Y
)
14483 cond1
= reverse_condition (cond1
);
14485 /* If the comparisons are not equal, and one doesn't dominate the other,
14486 then we can't do this. */
14488 && !comparison_dominates_p (cond1
, cond2
)
14489 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14493 std::swap (cond1
, cond2
);
14498 if (cond_or
== DOM_CC_X_AND_Y
)
14503 case EQ
: return CC_DEQmode
;
14504 case LE
: return CC_DLEmode
;
14505 case LEU
: return CC_DLEUmode
;
14506 case GE
: return CC_DGEmode
;
14507 case GEU
: return CC_DGEUmode
;
14508 default: gcc_unreachable ();
14512 if (cond_or
== DOM_CC_X_AND_Y
)
14524 gcc_unreachable ();
14528 if (cond_or
== DOM_CC_X_AND_Y
)
14540 gcc_unreachable ();
14544 if (cond_or
== DOM_CC_X_AND_Y
)
14545 return CC_DLTUmode
;
14550 return CC_DLTUmode
;
14552 return CC_DLEUmode
;
14556 gcc_unreachable ();
14560 if (cond_or
== DOM_CC_X_AND_Y
)
14561 return CC_DGTUmode
;
14566 return CC_DGTUmode
;
14568 return CC_DGEUmode
;
14572 gcc_unreachable ();
14575 /* The remaining cases only occur when both comparisons are the
14578 gcc_assert (cond1
== cond2
);
14582 gcc_assert (cond1
== cond2
);
14586 gcc_assert (cond1
== cond2
);
14590 gcc_assert (cond1
== cond2
);
14591 return CC_DLEUmode
;
14594 gcc_assert (cond1
== cond2
);
14595 return CC_DGEUmode
;
14598 gcc_unreachable ();
14603 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14605 /* All floating point compares return CCFP if it is an equality
14606 comparison, and CCFPE otherwise. */
14607 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14630 gcc_unreachable ();
14634 /* A compare with a shifted operand. Because of canonicalization, the
14635 comparison will have to be swapped when we emit the assembler. */
14636 if (GET_MODE (y
) == SImode
14637 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14638 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14639 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14640 || GET_CODE (x
) == ROTATERT
))
14643 /* This operation is performed swapped, but since we only rely on the Z
14644 flag we don't need an additional mode. */
14645 if (GET_MODE (y
) == SImode
14646 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14647 && GET_CODE (x
) == NEG
14648 && (op
== EQ
|| op
== NE
))
14651 /* This is a special case that is used by combine to allow a
14652 comparison of a shifted byte load to be split into a zero-extend
14653 followed by a comparison of the shifted integer (only valid for
14654 equalities and unsigned inequalities). */
14655 if (GET_MODE (x
) == SImode
14656 && GET_CODE (x
) == ASHIFT
14657 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
14658 && GET_CODE (XEXP (x
, 0)) == SUBREG
14659 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
14660 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
14661 && (op
== EQ
|| op
== NE
14662 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
14663 && CONST_INT_P (y
))
14666 /* A construct for a conditional compare, if the false arm contains
14667 0, then both conditions must be true, otherwise either condition
14668 must be true. Not all conditions are possible, so CCmode is
14669 returned if it can't be done. */
14670 if (GET_CODE (x
) == IF_THEN_ELSE
14671 && (XEXP (x
, 2) == const0_rtx
14672 || XEXP (x
, 2) == const1_rtx
)
14673 && COMPARISON_P (XEXP (x
, 0))
14674 && COMPARISON_P (XEXP (x
, 1)))
14675 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14676 INTVAL (XEXP (x
, 2)));
14678 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14679 if (GET_CODE (x
) == AND
14680 && (op
== EQ
|| op
== NE
)
14681 && COMPARISON_P (XEXP (x
, 0))
14682 && COMPARISON_P (XEXP (x
, 1)))
14683 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14686 if (GET_CODE (x
) == IOR
14687 && (op
== EQ
|| op
== NE
)
14688 && COMPARISON_P (XEXP (x
, 0))
14689 && COMPARISON_P (XEXP (x
, 1)))
14690 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14693 /* An operation (on Thumb) where we want to test for a single bit.
14694 This is done by shifting that bit up into the top bit of a
14695 scratch register; we can then branch on the sign bit. */
14697 && GET_MODE (x
) == SImode
14698 && (op
== EQ
|| op
== NE
)
14699 && GET_CODE (x
) == ZERO_EXTRACT
14700 && XEXP (x
, 1) == const1_rtx
)
14703 /* An operation that sets the condition codes as a side-effect, the
14704 V flag is not set correctly, so we can only use comparisons where
14705 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14707 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14708 if (GET_MODE (x
) == SImode
14710 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
14711 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
14712 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
14713 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
14714 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
14715 || GET_CODE (x
) == LSHIFTRT
14716 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14717 || GET_CODE (x
) == ROTATERT
14718 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
14719 return CC_NOOVmode
;
14721 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
14724 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
14725 && GET_CODE (x
) == PLUS
14726 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
14729 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
14735 /* A DImode comparison against zero can be implemented by
14736 or'ing the two halves together. */
14737 if (y
== const0_rtx
)
14740 /* We can do an equality test in three Thumb instructions. */
14750 /* DImode unsigned comparisons can be implemented by cmp +
14751 cmpeq without a scratch register. Not worth doing in
14762 /* DImode signed and unsigned comparisons can be implemented
14763 by cmp + sbcs with a scratch register, but that does not
14764 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14765 gcc_assert (op
!= EQ
&& op
!= NE
);
14769 gcc_unreachable ();
14773 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
14774 return GET_MODE (x
);
14779 /* X and Y are two things to compare using CODE. Emit the compare insn and
14780 return the rtx for register 0 in the proper mode. FP means this is a
14781 floating point compare: I don't think that it is needed on the arm. */
14783 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
14787 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
14789 /* We might have X as a constant, Y as a register because of the predicates
14790 used for cmpdi. If so, force X to a register here. */
14791 if (dimode_comparison
&& !REG_P (x
))
14792 x
= force_reg (DImode
, x
);
14794 mode
= SELECT_CC_MODE (code
, x
, y
);
14795 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
14797 if (dimode_comparison
14798 && mode
!= CC_CZmode
)
14802 /* To compare two non-zero values for equality, XOR them and
14803 then compare against zero. Not used for ARM mode; there
14804 CC_CZmode is cheaper. */
14805 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
14807 gcc_assert (!reload_completed
);
14808 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
14812 /* A scratch register is required. */
14813 if (reload_completed
)
14814 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
14816 scratch
= gen_rtx_SCRATCH (SImode
);
14818 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
14819 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14820 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
14823 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14828 /* Generate a sequence of insns that will generate the correct return
14829 address mask depending on the physical architecture that the program
14832 arm_gen_return_addr_mask (void)
14834 rtx reg
= gen_reg_rtx (Pmode
);
14836 emit_insn (gen_return_addr_mask (reg
));
14841 arm_reload_in_hi (rtx
*operands
)
14843 rtx ref
= operands
[1];
14845 HOST_WIDE_INT offset
= 0;
14847 if (GET_CODE (ref
) == SUBREG
)
14849 offset
= SUBREG_BYTE (ref
);
14850 ref
= SUBREG_REG (ref
);
14855 /* We have a pseudo which has been spilt onto the stack; there
14856 are two cases here: the first where there is a simple
14857 stack-slot replacement and a second where the stack-slot is
14858 out of range, or is used as a subreg. */
14859 if (reg_equiv_mem (REGNO (ref
)))
14861 ref
= reg_equiv_mem (REGNO (ref
));
14862 base
= find_replacement (&XEXP (ref
, 0));
14865 /* The slot is out of range, or was dressed up in a SUBREG. */
14866 base
= reg_equiv_address (REGNO (ref
));
14868 /* PR 62554: If there is no equivalent memory location then just move
14869 the value as an SImode register move. This happens when the target
14870 architecture variant does not have an HImode register move. */
14873 gcc_assert (REG_P (operands
[0]));
14874 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14875 gen_rtx_SUBREG (SImode
, ref
, 0)));
14880 base
= find_replacement (&XEXP (ref
, 0));
14882 /* Handle the case where the address is too complex to be offset by 1. */
14883 if (GET_CODE (base
) == MINUS
14884 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14886 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14888 emit_set_insn (base_plus
, base
);
14891 else if (GET_CODE (base
) == PLUS
)
14893 /* The addend must be CONST_INT, or we would have dealt with it above. */
14894 HOST_WIDE_INT hi
, lo
;
14896 offset
+= INTVAL (XEXP (base
, 1));
14897 base
= XEXP (base
, 0);
14899 /* Rework the address into a legal sequence of insns. */
14900 /* Valid range for lo is -4095 -> 4095 */
14903 : -((-offset
) & 0xfff));
14905 /* Corner case, if lo is the max offset then we would be out of range
14906 once we have added the additional 1 below, so bump the msb into the
14907 pre-loading insn(s). */
14911 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14912 ^ (HOST_WIDE_INT
) 0x80000000)
14913 - (HOST_WIDE_INT
) 0x80000000);
14915 gcc_assert (hi
+ lo
== offset
);
14919 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14921 /* Get the base address; addsi3 knows how to handle constants
14922 that require more than one insn. */
14923 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
14929 /* Operands[2] may overlap operands[0] (though it won't overlap
14930 operands[1]), that's why we asked for a DImode reg -- so we can
14931 use the bit that does not overlap. */
14932 if (REGNO (operands
[2]) == REGNO (operands
[0]))
14933 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14935 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14937 emit_insn (gen_zero_extendqisi2 (scratch
,
14938 gen_rtx_MEM (QImode
,
14939 plus_constant (Pmode
, base
,
14941 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14942 gen_rtx_MEM (QImode
,
14943 plus_constant (Pmode
, base
,
14945 if (!BYTES_BIG_ENDIAN
)
14946 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14947 gen_rtx_IOR (SImode
,
14950 gen_rtx_SUBREG (SImode
, operands
[0], 0),
14954 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14955 gen_rtx_IOR (SImode
,
14956 gen_rtx_ASHIFT (SImode
, scratch
,
14958 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
14961 /* Handle storing a half-word to memory during reload by synthesizing as two
14962 byte stores. Take care not to clobber the input values until after we
14963 have moved them somewhere safe. This code assumes that if the DImode
14964 scratch in operands[2] overlaps either the input value or output address
14965 in some way, then that value must die in this insn (we absolutely need
14966 two scratch registers for some corner cases). */
14968 arm_reload_out_hi (rtx
*operands
)
14970 rtx ref
= operands
[0];
14971 rtx outval
= operands
[1];
14973 HOST_WIDE_INT offset
= 0;
14975 if (GET_CODE (ref
) == SUBREG
)
14977 offset
= SUBREG_BYTE (ref
);
14978 ref
= SUBREG_REG (ref
);
14983 /* We have a pseudo which has been spilt onto the stack; there
14984 are two cases here: the first where there is a simple
14985 stack-slot replacement and a second where the stack-slot is
14986 out of range, or is used as a subreg. */
14987 if (reg_equiv_mem (REGNO (ref
)))
14989 ref
= reg_equiv_mem (REGNO (ref
));
14990 base
= find_replacement (&XEXP (ref
, 0));
14993 /* The slot is out of range, or was dressed up in a SUBREG. */
14994 base
= reg_equiv_address (REGNO (ref
));
14996 /* PR 62254: If there is no equivalent memory location then just move
14997 the value as an SImode register move. This happens when the target
14998 architecture variant does not have an HImode register move. */
15001 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
15003 if (REG_P (outval
))
15005 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15006 gen_rtx_SUBREG (SImode
, outval
, 0)));
15008 else /* SUBREG_P (outval) */
15010 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
15011 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15012 SUBREG_REG (outval
)));
15014 /* FIXME: Handle other cases ? */
15015 gcc_unreachable ();
15021 base
= find_replacement (&XEXP (ref
, 0));
15023 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15025 /* Handle the case where the address is too complex to be offset by 1. */
15026 if (GET_CODE (base
) == MINUS
15027 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15029 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15031 /* Be careful not to destroy OUTVAL. */
15032 if (reg_overlap_mentioned_p (base_plus
, outval
))
15034 /* Updating base_plus might destroy outval, see if we can
15035 swap the scratch and base_plus. */
15036 if (!reg_overlap_mentioned_p (scratch
, outval
))
15037 std::swap (scratch
, base_plus
);
15040 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15042 /* Be conservative and copy OUTVAL into the scratch now,
15043 this should only be necessary if outval is a subreg
15044 of something larger than a word. */
15045 /* XXX Might this clobber base? I can't see how it can,
15046 since scratch is known to overlap with OUTVAL, and
15047 must be wider than a word. */
15048 emit_insn (gen_movhi (scratch_hi
, outval
));
15049 outval
= scratch_hi
;
15053 emit_set_insn (base_plus
, base
);
15056 else if (GET_CODE (base
) == PLUS
)
15058 /* The addend must be CONST_INT, or we would have dealt with it above. */
15059 HOST_WIDE_INT hi
, lo
;
15061 offset
+= INTVAL (XEXP (base
, 1));
15062 base
= XEXP (base
, 0);
15064 /* Rework the address into a legal sequence of insns. */
15065 /* Valid range for lo is -4095 -> 4095 */
15068 : -((-offset
) & 0xfff));
15070 /* Corner case, if lo is the max offset then we would be out of range
15071 once we have added the additional 1 below, so bump the msb into the
15072 pre-loading insn(s). */
15076 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15077 ^ (HOST_WIDE_INT
) 0x80000000)
15078 - (HOST_WIDE_INT
) 0x80000000);
15080 gcc_assert (hi
+ lo
== offset
);
15084 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15086 /* Be careful not to destroy OUTVAL. */
15087 if (reg_overlap_mentioned_p (base_plus
, outval
))
15089 /* Updating base_plus might destroy outval, see if we
15090 can swap the scratch and base_plus. */
15091 if (!reg_overlap_mentioned_p (scratch
, outval
))
15092 std::swap (scratch
, base_plus
);
15095 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15097 /* Be conservative and copy outval into scratch now,
15098 this should only be necessary if outval is a
15099 subreg of something larger than a word. */
15100 /* XXX Might this clobber base? I can't see how it
15101 can, since scratch is known to overlap with
15103 emit_insn (gen_movhi (scratch_hi
, outval
));
15104 outval
= scratch_hi
;
15108 /* Get the base address; addsi3 knows how to handle constants
15109 that require more than one insn. */
15110 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15116 if (BYTES_BIG_ENDIAN
)
15118 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15119 plus_constant (Pmode
, base
,
15121 gen_lowpart (QImode
, outval
)));
15122 emit_insn (gen_lshrsi3 (scratch
,
15123 gen_rtx_SUBREG (SImode
, outval
, 0),
15125 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15127 gen_lowpart (QImode
, scratch
)));
15131 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15133 gen_lowpart (QImode
, outval
)));
15134 emit_insn (gen_lshrsi3 (scratch
,
15135 gen_rtx_SUBREG (SImode
, outval
, 0),
15137 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15138 plus_constant (Pmode
, base
,
15140 gen_lowpart (QImode
, scratch
)));
15144 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15145 (padded to the size of a word) should be passed in a register. */
15148 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15150 if (TARGET_AAPCS_BASED
)
15151 return must_pass_in_stack_var_size (mode
, type
);
15153 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15157 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15158 byte of a stack argument has useful data. For legacy APCS ABIs we use
15159 the default. For AAPCS based ABIs small aggregate types are placed
15160 in the lowest memory address. */
15162 static pad_direction
15163 arm_function_arg_padding (machine_mode mode
, const_tree type
)
15165 if (!TARGET_AAPCS_BASED
)
15166 return default_function_arg_padding (mode
, type
);
15168 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15169 return PAD_DOWNWARD
;
15175 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15176 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15177 register has useful data, and return the opposite if the most
15178 significant byte does. */
15181 arm_pad_reg_upward (machine_mode mode
,
15182 tree type
, int first ATTRIBUTE_UNUSED
)
15184 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15186 /* For AAPCS, small aggregates, small fixed-point types,
15187 and small complex types are always padded upwards. */
15190 if ((AGGREGATE_TYPE_P (type
)
15191 || TREE_CODE (type
) == COMPLEX_TYPE
15192 || FIXED_POINT_TYPE_P (type
))
15193 && int_size_in_bytes (type
) <= 4)
15198 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15199 && GET_MODE_SIZE (mode
) <= 4)
15204 /* Otherwise, use default padding. */
15205 return !BYTES_BIG_ENDIAN
;
15208 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15209 assuming that the address in the base register is word aligned. */
15211 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15213 HOST_WIDE_INT max_offset
;
15215 /* Offset must be a multiple of 4 in Thumb mode. */
15216 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15221 else if (TARGET_ARM
)
15226 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15229 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15230 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15231 Assumes that the address in the base register RN is word aligned. Pattern
15232 guarantees that both memory accesses use the same base register,
15233 the offsets are constants within the range, and the gap between the offsets is 4.
15234 If preload complete then check that registers are legal. WBACK indicates whether
15235 address is updated. LOAD indicates whether memory access is load or store. */
15237 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15238 bool wback
, bool load
)
15240 unsigned int t
, t2
, n
;
15242 if (!reload_completed
)
15245 if (!offset_ok_for_ldrd_strd (offset
))
15252 if ((TARGET_THUMB2
)
15253 && ((wback
&& (n
== t
|| n
== t2
))
15254 || (t
== SP_REGNUM
)
15255 || (t
== PC_REGNUM
)
15256 || (t2
== SP_REGNUM
)
15257 || (t2
== PC_REGNUM
)
15258 || (!load
&& (n
== PC_REGNUM
))
15259 || (load
&& (t
== t2
))
15260 /* Triggers Cortex-M3 LDRD errata. */
15261 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15265 && ((wback
&& (n
== t
|| n
== t2
))
15266 || (t2
== PC_REGNUM
)
15267 || (t
% 2 != 0) /* First destination register is not even. */
15269 /* PC can be used as base register (for offset addressing only),
15270 but it is depricated. */
15271 || (n
== PC_REGNUM
)))
15277 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15278 operand MEM's address contains an immediate offset from the base
15279 register and has no side effects, in which case it sets BASE and
15280 OFFSET accordingly. */
15282 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15286 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15288 /* TODO: Handle more general memory operand patterns, such as
15289 PRE_DEC and PRE_INC. */
15291 if (side_effects_p (mem
))
15294 /* Can't deal with subregs. */
15295 if (GET_CODE (mem
) == SUBREG
)
15298 gcc_assert (MEM_P (mem
));
15300 *offset
= const0_rtx
;
15302 addr
= XEXP (mem
, 0);
15304 /* If addr isn't valid for DImode, then we can't handle it. */
15305 if (!arm_legitimate_address_p (DImode
, addr
,
15306 reload_in_progress
|| reload_completed
))
15314 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15316 *base
= XEXP (addr
, 0);
15317 *offset
= XEXP (addr
, 1);
15318 return (REG_P (*base
) && CONST_INT_P (*offset
));
15324 /* Called from a peephole2 to replace two word-size accesses with a
15325 single LDRD/STRD instruction. Returns true iff we can generate a
15326 new instruction sequence. That is, both accesses use the same base
15327 register and the gap between constant offsets is 4. This function
15328 may reorder its operands to match ldrd/strd RTL templates.
15329 OPERANDS are the operands found by the peephole matcher;
15330 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15331 corresponding memory operands. LOAD indicaates whether the access
15332 is load or store. CONST_STORE indicates a store of constant
15333 integer values held in OPERANDS[4,5] and assumes that the pattern
15334 is of length 4 insn, for the purpose of checking dead registers.
15335 COMMUTE indicates that register operands may be reordered. */
15337 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15338 bool const_store
, bool commute
)
15341 HOST_WIDE_INT offsets
[2], offset
;
15342 rtx base
= NULL_RTX
;
15343 rtx cur_base
, cur_offset
, tmp
;
15345 HARD_REG_SET regset
;
15347 gcc_assert (!const_store
|| !load
);
15348 /* Check that the memory references are immediate offsets from the
15349 same base register. Extract the base register, the destination
15350 registers, and the corresponding memory offsets. */
15351 for (i
= 0; i
< nops
; i
++)
15353 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15358 else if (REGNO (base
) != REGNO (cur_base
))
15361 offsets
[i
] = INTVAL (cur_offset
);
15362 if (GET_CODE (operands
[i
]) == SUBREG
)
15364 tmp
= SUBREG_REG (operands
[i
]);
15365 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15370 /* Make sure there is no dependency between the individual loads. */
15371 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15372 return false; /* RAW */
15374 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15375 return false; /* WAW */
15377 /* If the same input register is used in both stores
15378 when storing different constants, try to find a free register.
15379 For example, the code
15384 can be transformed into
15388 in Thumb mode assuming that r1 is free.
15389 For ARM mode do the same but only if the starting register
15390 can be made to be even. */
15392 && REGNO (operands
[0]) == REGNO (operands
[1])
15393 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15397 CLEAR_HARD_REG_SET (regset
);
15398 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15399 if (tmp
== NULL_RTX
)
15402 /* Use the new register in the first load to ensure that
15403 if the original input register is not dead after peephole,
15404 then it will have the correct constant value. */
15407 else if (TARGET_ARM
)
15409 int regno
= REGNO (operands
[0]);
15410 if (!peep2_reg_dead_p (4, operands
[0]))
15412 /* When the input register is even and is not dead after the
15413 pattern, it has to hold the second constant but we cannot
15414 form a legal STRD in ARM mode with this register as the second
15416 if (regno
% 2 == 0)
15419 /* Is regno-1 free? */
15420 SET_HARD_REG_SET (regset
);
15421 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15422 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15423 if (tmp
== NULL_RTX
)
15430 /* Find a DImode register. */
15431 CLEAR_HARD_REG_SET (regset
);
15432 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15433 if (tmp
!= NULL_RTX
)
15435 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15436 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15440 /* Can we use the input register to form a DI register? */
15441 SET_HARD_REG_SET (regset
);
15442 CLEAR_HARD_REG_BIT(regset
,
15443 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15444 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15445 if (tmp
== NULL_RTX
)
15447 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15451 gcc_assert (operands
[0] != NULL_RTX
);
15452 gcc_assert (operands
[1] != NULL_RTX
);
15453 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15454 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15458 /* Make sure the instructions are ordered with lower memory access first. */
15459 if (offsets
[0] > offsets
[1])
15461 gap
= offsets
[0] - offsets
[1];
15462 offset
= offsets
[1];
15464 /* Swap the instructions such that lower memory is accessed first. */
15465 std::swap (operands
[0], operands
[1]);
15466 std::swap (operands
[2], operands
[3]);
15468 std::swap (operands
[4], operands
[5]);
15472 gap
= offsets
[1] - offsets
[0];
15473 offset
= offsets
[0];
15476 /* Make sure accesses are to consecutive memory locations. */
15480 /* Make sure we generate legal instructions. */
15481 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15485 /* In Thumb state, where registers are almost unconstrained, there
15486 is little hope to fix it. */
15490 if (load
&& commute
)
15492 /* Try reordering registers. */
15493 std::swap (operands
[0], operands
[1]);
15494 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15501 /* If input registers are dead after this pattern, they can be
15502 reordered or replaced by other registers that are free in the
15503 current pattern. */
15504 if (!peep2_reg_dead_p (4, operands
[0])
15505 || !peep2_reg_dead_p (4, operands
[1]))
15508 /* Try to reorder the input registers. */
15509 /* For example, the code
15514 can be transformed into
15519 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15522 std::swap (operands
[0], operands
[1]);
15526 /* Try to find a free DI register. */
15527 CLEAR_HARD_REG_SET (regset
);
15528 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15529 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15532 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15533 if (tmp
== NULL_RTX
)
15536 /* DREG must be an even-numbered register in DImode.
15537 Split it into SI registers. */
15538 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15539 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15540 gcc_assert (operands
[0] != NULL_RTX
);
15541 gcc_assert (operands
[1] != NULL_RTX
);
15542 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15543 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15545 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15557 /* Print a symbolic form of X to the debug file, F. */
15559 arm_print_value (FILE *f
, rtx x
)
15561 switch (GET_CODE (x
))
15564 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15568 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15576 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15578 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15579 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15587 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15591 fprintf (f
, "`%s'", XSTR (x
, 0));
15595 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15599 arm_print_value (f
, XEXP (x
, 0));
15603 arm_print_value (f
, XEXP (x
, 0));
15605 arm_print_value (f
, XEXP (x
, 1));
15613 fprintf (f
, "????");
15618 /* Routines for manipulation of the constant pool. */
15620 /* Arm instructions cannot load a large constant directly into a
15621 register; they have to come from a pc relative load. The constant
15622 must therefore be placed in the addressable range of the pc
15623 relative load. Depending on the precise pc relative load
15624 instruction the range is somewhere between 256 bytes and 4k. This
15625 means that we often have to dump a constant inside a function, and
15626 generate code to branch around it.
15628 It is important to minimize this, since the branches will slow
15629 things down and make the code larger.
15631 Normally we can hide the table after an existing unconditional
15632 branch so that there is no interruption of the flow, but in the
15633 worst case the code looks like this:
15651 We fix this by performing a scan after scheduling, which notices
15652 which instructions need to have their operands fetched from the
15653 constant table and builds the table.
15655 The algorithm starts by building a table of all the constants that
15656 need fixing up and all the natural barriers in the function (places
15657 where a constant table can be dropped without breaking the flow).
15658 For each fixup we note how far the pc-relative replacement will be
15659 able to reach and the offset of the instruction into the function.
15661 Having built the table we then group the fixes together to form
15662 tables that are as large as possible (subject to addressing
15663 constraints) and emit each table of constants after the last
15664 barrier that is within range of all the instructions in the group.
15665 If a group does not contain a barrier, then we forcibly create one
15666 by inserting a jump instruction into the flow. Once the table has
15667 been inserted, the insns are then modified to reference the
15668 relevant entry in the pool.
15670 Possible enhancements to the algorithm (not implemented) are:
15672 1) For some processors and object formats, there may be benefit in
15673 aligning the pools to the start of cache lines; this alignment
15674 would need to be taken into account when calculating addressability
15677 /* These typedefs are located at the start of this file, so that
15678 they can be used in the prototypes there. This comment is to
15679 remind readers of that fact so that the following structures
15680 can be understood more easily.
15682 typedef struct minipool_node Mnode;
15683 typedef struct minipool_fixup Mfix; */
15685 struct minipool_node
15687 /* Doubly linked chain of entries. */
15690 /* The maximum offset into the code that this entry can be placed. While
15691 pushing fixes for forward references, all entries are sorted in order
15692 of increasing max_address. */
15693 HOST_WIDE_INT max_address
;
15694 /* Similarly for an entry inserted for a backwards ref. */
15695 HOST_WIDE_INT min_address
;
15696 /* The number of fixes referencing this entry. This can become zero
15697 if we "unpush" an entry. In this case we ignore the entry when we
15698 come to emit the code. */
15700 /* The offset from the start of the minipool. */
15701 HOST_WIDE_INT offset
;
15702 /* The value in table. */
15704 /* The mode of value. */
15706 /* The size of the value. With iWMMXt enabled
15707 sizes > 4 also imply an alignment of 8-bytes. */
15711 struct minipool_fixup
15715 HOST_WIDE_INT address
;
15721 HOST_WIDE_INT forwards
;
15722 HOST_WIDE_INT backwards
;
15725 /* Fixes less than a word need padding out to a word boundary. */
15726 #define MINIPOOL_FIX_SIZE(mode) \
15727 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15729 static Mnode
* minipool_vector_head
;
15730 static Mnode
* minipool_vector_tail
;
15731 static rtx_code_label
*minipool_vector_label
;
15732 static int minipool_pad
;
15734 /* The linked list of all minipool fixes required for this function. */
15735 Mfix
* minipool_fix_head
;
15736 Mfix
* minipool_fix_tail
;
15737 /* The fix entry for the current minipool, once it has been placed. */
15738 Mfix
* minipool_barrier
;
15740 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15741 #define JUMP_TABLES_IN_TEXT_SECTION 0
15744 static HOST_WIDE_INT
15745 get_jump_table_size (rtx_jump_table_data
*insn
)
15747 /* ADDR_VECs only take room if read-only data does into the text
15749 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
15751 rtx body
= PATTERN (insn
);
15752 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
15753 HOST_WIDE_INT size
;
15754 HOST_WIDE_INT modesize
;
15756 modesize
= GET_MODE_SIZE (GET_MODE (body
));
15757 size
= modesize
* XVECLEN (body
, elt
);
15761 /* Round up size of TBB table to a halfword boundary. */
15762 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
15765 /* No padding necessary for TBH. */
15768 /* Add two bytes for alignment on Thumb. */
15773 gcc_unreachable ();
15781 /* Return the maximum amount of padding that will be inserted before
15784 static HOST_WIDE_INT
15785 get_label_padding (rtx label
)
15787 HOST_WIDE_INT align
, min_insn_size
;
15789 align
= 1 << label_to_alignment (label
);
15790 min_insn_size
= TARGET_THUMB
? 2 : 4;
15791 return align
> min_insn_size
? align
- min_insn_size
: 0;
15794 /* Move a minipool fix MP from its current location to before MAX_MP.
15795 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15796 constraints may need updating. */
15798 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
15799 HOST_WIDE_INT max_address
)
15801 /* The code below assumes these are different. */
15802 gcc_assert (mp
!= max_mp
);
15804 if (max_mp
== NULL
)
15806 if (max_address
< mp
->max_address
)
15807 mp
->max_address
= max_address
;
15811 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15812 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15814 mp
->max_address
= max_address
;
15816 /* Unlink MP from its current position. Since max_mp is non-null,
15817 mp->prev must be non-null. */
15818 mp
->prev
->next
= mp
->next
;
15819 if (mp
->next
!= NULL
)
15820 mp
->next
->prev
= mp
->prev
;
15822 minipool_vector_tail
= mp
->prev
;
15824 /* Re-insert it before MAX_MP. */
15826 mp
->prev
= max_mp
->prev
;
15829 if (mp
->prev
!= NULL
)
15830 mp
->prev
->next
= mp
;
15832 minipool_vector_head
= mp
;
15835 /* Save the new entry. */
15838 /* Scan over the preceding entries and adjust their addresses as
15840 while (mp
->prev
!= NULL
15841 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15843 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15850 /* Add a constant to the minipool for a forward reference. Returns the
15851 node added or NULL if the constant will not fit in this pool. */
15853 add_minipool_forward_ref (Mfix
*fix
)
15855 /* If set, max_mp is the first pool_entry that has a lower
15856 constraint than the one we are trying to add. */
15857 Mnode
* max_mp
= NULL
;
15858 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
15861 /* If the minipool starts before the end of FIX->INSN then this FIX
15862 can not be placed into the current pool. Furthermore, adding the
15863 new constant pool entry may cause the pool to start FIX_SIZE bytes
15865 if (minipool_vector_head
&&
15866 (fix
->address
+ get_attr_length (fix
->insn
)
15867 >= minipool_vector_head
->max_address
- fix
->fix_size
))
15870 /* Scan the pool to see if a constant with the same value has
15871 already been added. While we are doing this, also note the
15872 location where we must insert the constant if it doesn't already
15874 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15876 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15877 && fix
->mode
== mp
->mode
15878 && (!LABEL_P (fix
->value
)
15879 || (CODE_LABEL_NUMBER (fix
->value
)
15880 == CODE_LABEL_NUMBER (mp
->value
)))
15881 && rtx_equal_p (fix
->value
, mp
->value
))
15883 /* More than one fix references this entry. */
15885 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
15888 /* Note the insertion point if necessary. */
15890 && mp
->max_address
> max_address
)
15893 /* If we are inserting an 8-bytes aligned quantity and
15894 we have not already found an insertion point, then
15895 make sure that all such 8-byte aligned quantities are
15896 placed at the start of the pool. */
15897 if (ARM_DOUBLEWORD_ALIGN
15899 && fix
->fix_size
>= 8
15900 && mp
->fix_size
< 8)
15903 max_address
= mp
->max_address
;
15907 /* The value is not currently in the minipool, so we need to create
15908 a new entry for it. If MAX_MP is NULL, the entry will be put on
15909 the end of the list since the placement is less constrained than
15910 any existing entry. Otherwise, we insert the new fix before
15911 MAX_MP and, if necessary, adjust the constraints on the other
15914 mp
->fix_size
= fix
->fix_size
;
15915 mp
->mode
= fix
->mode
;
15916 mp
->value
= fix
->value
;
15918 /* Not yet required for a backwards ref. */
15919 mp
->min_address
= -65536;
15921 if (max_mp
== NULL
)
15923 mp
->max_address
= max_address
;
15925 mp
->prev
= minipool_vector_tail
;
15927 if (mp
->prev
== NULL
)
15929 minipool_vector_head
= mp
;
15930 minipool_vector_label
= gen_label_rtx ();
15933 mp
->prev
->next
= mp
;
15935 minipool_vector_tail
= mp
;
15939 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15940 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15942 mp
->max_address
= max_address
;
15945 mp
->prev
= max_mp
->prev
;
15947 if (mp
->prev
!= NULL
)
15948 mp
->prev
->next
= mp
;
15950 minipool_vector_head
= mp
;
15953 /* Save the new entry. */
15956 /* Scan over the preceding entries and adjust their addresses as
15958 while (mp
->prev
!= NULL
15959 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15961 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15969 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
15970 HOST_WIDE_INT min_address
)
15972 HOST_WIDE_INT offset
;
15974 /* The code below assumes these are different. */
15975 gcc_assert (mp
!= min_mp
);
15977 if (min_mp
== NULL
)
15979 if (min_address
> mp
->min_address
)
15980 mp
->min_address
= min_address
;
15984 /* We will adjust this below if it is too loose. */
15985 mp
->min_address
= min_address
;
15987 /* Unlink MP from its current position. Since min_mp is non-null,
15988 mp->next must be non-null. */
15989 mp
->next
->prev
= mp
->prev
;
15990 if (mp
->prev
!= NULL
)
15991 mp
->prev
->next
= mp
->next
;
15993 minipool_vector_head
= mp
->next
;
15995 /* Reinsert it after MIN_MP. */
15997 mp
->next
= min_mp
->next
;
15999 if (mp
->next
!= NULL
)
16000 mp
->next
->prev
= mp
;
16002 minipool_vector_tail
= mp
;
16008 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16010 mp
->offset
= offset
;
16011 if (mp
->refcount
> 0)
16012 offset
+= mp
->fix_size
;
16014 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16015 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16021 /* Add a constant to the minipool for a backward reference. Returns the
16022 node added or NULL if the constant will not fit in this pool.
16024 Note that the code for insertion for a backwards reference can be
16025 somewhat confusing because the calculated offsets for each fix do
16026 not take into account the size of the pool (which is still under
16029 add_minipool_backward_ref (Mfix
*fix
)
16031 /* If set, min_mp is the last pool_entry that has a lower constraint
16032 than the one we are trying to add. */
16033 Mnode
*min_mp
= NULL
;
16034 /* This can be negative, since it is only a constraint. */
16035 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16038 /* If we can't reach the current pool from this insn, or if we can't
16039 insert this entry at the end of the pool without pushing other
16040 fixes out of range, then we don't try. This ensures that we
16041 can't fail later on. */
16042 if (min_address
>= minipool_barrier
->address
16043 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16044 >= minipool_barrier
->address
))
16047 /* Scan the pool to see if a constant with the same value has
16048 already been added. While we are doing this, also note the
16049 location where we must insert the constant if it doesn't already
16051 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16053 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16054 && fix
->mode
== mp
->mode
16055 && (!LABEL_P (fix
->value
)
16056 || (CODE_LABEL_NUMBER (fix
->value
)
16057 == CODE_LABEL_NUMBER (mp
->value
)))
16058 && rtx_equal_p (fix
->value
, mp
->value
)
16059 /* Check that there is enough slack to move this entry to the
16060 end of the table (this is conservative). */
16061 && (mp
->max_address
16062 > (minipool_barrier
->address
16063 + minipool_vector_tail
->offset
16064 + minipool_vector_tail
->fix_size
)))
16067 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16070 if (min_mp
!= NULL
)
16071 mp
->min_address
+= fix
->fix_size
;
16074 /* Note the insertion point if necessary. */
16075 if (mp
->min_address
< min_address
)
16077 /* For now, we do not allow the insertion of 8-byte alignment
16078 requiring nodes anywhere but at the start of the pool. */
16079 if (ARM_DOUBLEWORD_ALIGN
16080 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16085 else if (mp
->max_address
16086 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16088 /* Inserting before this entry would push the fix beyond
16089 its maximum address (which can happen if we have
16090 re-located a forwards fix); force the new fix to come
16092 if (ARM_DOUBLEWORD_ALIGN
16093 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16098 min_address
= mp
->min_address
+ fix
->fix_size
;
16101 /* Do not insert a non-8-byte aligned quantity before 8-byte
16102 aligned quantities. */
16103 else if (ARM_DOUBLEWORD_ALIGN
16104 && fix
->fix_size
< 8
16105 && mp
->fix_size
>= 8)
16108 min_address
= mp
->min_address
+ fix
->fix_size
;
16113 /* We need to create a new entry. */
16115 mp
->fix_size
= fix
->fix_size
;
16116 mp
->mode
= fix
->mode
;
16117 mp
->value
= fix
->value
;
16119 mp
->max_address
= minipool_barrier
->address
+ 65536;
16121 mp
->min_address
= min_address
;
16123 if (min_mp
== NULL
)
16126 mp
->next
= minipool_vector_head
;
16128 if (mp
->next
== NULL
)
16130 minipool_vector_tail
= mp
;
16131 minipool_vector_label
= gen_label_rtx ();
16134 mp
->next
->prev
= mp
;
16136 minipool_vector_head
= mp
;
16140 mp
->next
= min_mp
->next
;
16144 if (mp
->next
!= NULL
)
16145 mp
->next
->prev
= mp
;
16147 minipool_vector_tail
= mp
;
16150 /* Save the new entry. */
16158 /* Scan over the following entries and adjust their offsets. */
16159 while (mp
->next
!= NULL
)
16161 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16162 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16165 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16167 mp
->next
->offset
= mp
->offset
;
16176 assign_minipool_offsets (Mfix
*barrier
)
16178 HOST_WIDE_INT offset
= 0;
16181 minipool_barrier
= barrier
;
16183 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16185 mp
->offset
= offset
;
16187 if (mp
->refcount
> 0)
16188 offset
+= mp
->fix_size
;
16192 /* Output the literal table */
16194 dump_minipool (rtx_insn
*scan
)
16200 if (ARM_DOUBLEWORD_ALIGN
)
16201 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16202 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16209 fprintf (dump_file
,
16210 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16211 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16213 scan
= emit_label_after (gen_label_rtx (), scan
);
16214 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16215 scan
= emit_label_after (minipool_vector_label
, scan
);
16217 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16219 if (mp
->refcount
> 0)
16223 fprintf (dump_file
,
16224 ";; Offset %u, min %ld, max %ld ",
16225 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16226 (unsigned long) mp
->max_address
);
16227 arm_print_value (dump_file
, mp
->value
);
16228 fputc ('\n', dump_file
);
16231 rtx val
= copy_rtx (mp
->value
);
16233 switch (GET_MODE_SIZE (mp
->mode
))
16235 #ifdef HAVE_consttable_1
16237 scan
= emit_insn_after (gen_consttable_1 (val
), scan
);
16241 #ifdef HAVE_consttable_2
16243 scan
= emit_insn_after (gen_consttable_2 (val
), scan
);
16247 #ifdef HAVE_consttable_4
16249 scan
= emit_insn_after (gen_consttable_4 (val
), scan
);
16253 #ifdef HAVE_consttable_8
16255 scan
= emit_insn_after (gen_consttable_8 (val
), scan
);
16259 #ifdef HAVE_consttable_16
16261 scan
= emit_insn_after (gen_consttable_16 (val
), scan
);
16266 gcc_unreachable ();
16274 minipool_vector_head
= minipool_vector_tail
= NULL
;
16275 scan
= emit_insn_after (gen_consttable_end (), scan
);
16276 scan
= emit_barrier_after (scan
);
16279 /* Return the cost of forcibly inserting a barrier after INSN. */
16281 arm_barrier_cost (rtx_insn
*insn
)
16283 /* Basing the location of the pool on the loop depth is preferable,
16284 but at the moment, the basic block information seems to be
16285 corrupt by this stage of the compilation. */
16286 int base_cost
= 50;
16287 rtx_insn
*next
= next_nonnote_insn (insn
);
16289 if (next
!= NULL
&& LABEL_P (next
))
16292 switch (GET_CODE (insn
))
16295 /* It will always be better to place the table before the label, rather
16304 return base_cost
- 10;
16307 return base_cost
+ 10;
16311 /* Find the best place in the insn stream in the range
16312 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16313 Create the barrier by inserting a jump and add a new fix entry for
16316 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16318 HOST_WIDE_INT count
= 0;
16319 rtx_barrier
*barrier
;
16320 rtx_insn
*from
= fix
->insn
;
16321 /* The instruction after which we will insert the jump. */
16322 rtx_insn
*selected
= NULL
;
16324 /* The address at which the jump instruction will be placed. */
16325 HOST_WIDE_INT selected_address
;
16327 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16328 rtx_code_label
*label
= gen_label_rtx ();
16330 selected_cost
= arm_barrier_cost (from
);
16331 selected_address
= fix
->address
;
16333 while (from
&& count
< max_count
)
16335 rtx_jump_table_data
*tmp
;
16338 /* This code shouldn't have been called if there was a natural barrier
16340 gcc_assert (!BARRIER_P (from
));
16342 /* Count the length of this insn. This must stay in sync with the
16343 code that pushes minipool fixes. */
16344 if (LABEL_P (from
))
16345 count
+= get_label_padding (from
);
16347 count
+= get_attr_length (from
);
16349 /* If there is a jump table, add its length. */
16350 if (tablejump_p (from
, NULL
, &tmp
))
16352 count
+= get_jump_table_size (tmp
);
16354 /* Jump tables aren't in a basic block, so base the cost on
16355 the dispatch insn. If we select this location, we will
16356 still put the pool after the table. */
16357 new_cost
= arm_barrier_cost (from
);
16359 if (count
< max_count
16360 && (!selected
|| new_cost
<= selected_cost
))
16363 selected_cost
= new_cost
;
16364 selected_address
= fix
->address
+ count
;
16367 /* Continue after the dispatch table. */
16368 from
= NEXT_INSN (tmp
);
16372 new_cost
= arm_barrier_cost (from
);
16374 if (count
< max_count
16375 && (!selected
|| new_cost
<= selected_cost
))
16378 selected_cost
= new_cost
;
16379 selected_address
= fix
->address
+ count
;
16382 from
= NEXT_INSN (from
);
16385 /* Make sure that we found a place to insert the jump. */
16386 gcc_assert (selected
);
16388 /* Make sure we do not split a call and its corresponding
16389 CALL_ARG_LOCATION note. */
16390 if (CALL_P (selected
))
16392 rtx_insn
*next
= NEXT_INSN (selected
);
16393 if (next
&& NOTE_P (next
)
16394 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16398 /* Create a new JUMP_INSN that branches around a barrier. */
16399 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16400 JUMP_LABEL (from
) = label
;
16401 barrier
= emit_barrier_after (from
);
16402 emit_label_after (label
, barrier
);
16404 /* Create a minipool barrier entry for the new barrier. */
16405 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16406 new_fix
->insn
= barrier
;
16407 new_fix
->address
= selected_address
;
16408 new_fix
->next
= fix
->next
;
16409 fix
->next
= new_fix
;
16414 /* Record that there is a natural barrier in the insn stream at
16417 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16419 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16422 fix
->address
= address
;
16425 if (minipool_fix_head
!= NULL
)
16426 minipool_fix_tail
->next
= fix
;
16428 minipool_fix_head
= fix
;
16430 minipool_fix_tail
= fix
;
16433 /* Record INSN, which will need fixing up to load a value from the
16434 minipool. ADDRESS is the offset of the insn since the start of the
16435 function; LOC is a pointer to the part of the insn which requires
16436 fixing; VALUE is the constant that must be loaded, which is of type
16439 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16440 machine_mode mode
, rtx value
)
16442 gcc_assert (!arm_disable_literal_pool
);
16443 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16446 fix
->address
= address
;
16449 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16450 fix
->value
= value
;
16451 fix
->forwards
= get_attr_pool_range (insn
);
16452 fix
->backwards
= get_attr_neg_pool_range (insn
);
16453 fix
->minipool
= NULL
;
16455 /* If an insn doesn't have a range defined for it, then it isn't
16456 expecting to be reworked by this code. Better to stop now than
16457 to generate duff assembly code. */
16458 gcc_assert (fix
->forwards
|| fix
->backwards
);
16460 /* If an entry requires 8-byte alignment then assume all constant pools
16461 require 4 bytes of padding. Trying to do this later on a per-pool
16462 basis is awkward because existing pool entries have to be modified. */
16463 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16468 fprintf (dump_file
,
16469 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16470 GET_MODE_NAME (mode
),
16471 INSN_UID (insn
), (unsigned long) address
,
16472 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16473 arm_print_value (dump_file
, fix
->value
);
16474 fprintf (dump_file
, "\n");
16477 /* Add it to the chain of fixes. */
16480 if (minipool_fix_head
!= NULL
)
16481 minipool_fix_tail
->next
= fix
;
16483 minipool_fix_head
= fix
;
16485 minipool_fix_tail
= fix
;
16488 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16489 Returns the number of insns needed, or 99 if we always want to synthesize
16492 arm_max_const_double_inline_cost ()
16494 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16497 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16498 Returns the number of insns needed, or 99 if we don't know how to
16501 arm_const_double_inline_cost (rtx val
)
16503 rtx lowpart
, highpart
;
16506 mode
= GET_MODE (val
);
16508 if (mode
== VOIDmode
)
16511 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16513 lowpart
= gen_lowpart (SImode
, val
);
16514 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16516 gcc_assert (CONST_INT_P (lowpart
));
16517 gcc_assert (CONST_INT_P (highpart
));
16519 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16520 NULL_RTX
, NULL_RTX
, 0, 0)
16521 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16522 NULL_RTX
, NULL_RTX
, 0, 0));
16525 /* Cost of loading a SImode constant. */
16527 arm_const_inline_cost (enum rtx_code code
, rtx val
)
16529 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
16530 NULL_RTX
, NULL_RTX
, 1, 0);
16533 /* Return true if it is worthwhile to split a 64-bit constant into two
16534 32-bit operations. This is the case if optimizing for size, or
16535 if we have load delay slots, or if one 32-bit part can be done with
16536 a single data operation. */
16538 arm_const_double_by_parts (rtx val
)
16540 machine_mode mode
= GET_MODE (val
);
16543 if (optimize_size
|| arm_ld_sched
)
16546 if (mode
== VOIDmode
)
16549 part
= gen_highpart_mode (SImode
, mode
, val
);
16551 gcc_assert (CONST_INT_P (part
));
16553 if (const_ok_for_arm (INTVAL (part
))
16554 || const_ok_for_arm (~INTVAL (part
)))
16557 part
= gen_lowpart (SImode
, val
);
16559 gcc_assert (CONST_INT_P (part
));
16561 if (const_ok_for_arm (INTVAL (part
))
16562 || const_ok_for_arm (~INTVAL (part
)))
16568 /* Return true if it is possible to inline both the high and low parts
16569 of a 64-bit constant into 32-bit data processing instructions. */
16571 arm_const_double_by_immediates (rtx val
)
16573 machine_mode mode
= GET_MODE (val
);
16576 if (mode
== VOIDmode
)
16579 part
= gen_highpart_mode (SImode
, mode
, val
);
16581 gcc_assert (CONST_INT_P (part
));
16583 if (!const_ok_for_arm (INTVAL (part
)))
16586 part
= gen_lowpart (SImode
, val
);
16588 gcc_assert (CONST_INT_P (part
));
16590 if (!const_ok_for_arm (INTVAL (part
)))
16596 /* Scan INSN and note any of its operands that need fixing.
16597 If DO_PUSHES is false we do not actually push any of the fixups
16600 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
16604 extract_constrain_insn (insn
);
16606 if (recog_data
.n_alternatives
== 0)
16609 /* Fill in recog_op_alt with information about the constraints of
16611 preprocess_constraints (insn
);
16613 const operand_alternative
*op_alt
= which_op_alt ();
16614 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16616 /* Things we need to fix can only occur in inputs. */
16617 if (recog_data
.operand_type
[opno
] != OP_IN
)
16620 /* If this alternative is a memory reference, then any mention
16621 of constants in this alternative is really to fool reload
16622 into allowing us to accept one there. We need to fix them up
16623 now so that we output the right code. */
16624 if (op_alt
[opno
].memory_ok
)
16626 rtx op
= recog_data
.operand
[opno
];
16628 if (CONSTANT_P (op
))
16631 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16632 recog_data
.operand_mode
[opno
], op
);
16634 else if (MEM_P (op
)
16635 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16636 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16640 rtx cop
= avoid_constant_pool_reference (op
);
16642 /* Casting the address of something to a mode narrower
16643 than a word can cause avoid_constant_pool_reference()
16644 to return the pool reference itself. That's no good to
16645 us here. Lets just hope that we can use the
16646 constant pool value directly. */
16648 cop
= get_pool_constant (XEXP (op
, 0));
16650 push_minipool_fix (insn
, address
,
16651 recog_data
.operand_loc
[opno
],
16652 recog_data
.operand_mode
[opno
], cop
);
16662 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16663 and unions in the context of ARMv8-M Security Extensions. It is used as a
16664 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16665 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16666 or four masks, depending on whether it is being computed for a
16667 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16668 respectively. The tree for the type of the argument or a field within an
16669 argument is passed in ARG_TYPE, the current register this argument or field
16670 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16671 argument or field starts at is passed in STARTING_BIT and the last used bit
16672 is kept in LAST_USED_BIT which is also updated accordingly. */
16674 static unsigned HOST_WIDE_INT
16675 comp_not_to_clear_mask_str_un (tree arg_type
, int * regno
,
16676 uint32_t * padding_bits_to_clear
,
16677 unsigned starting_bit
, int * last_used_bit
)
16680 unsigned HOST_WIDE_INT not_to_clear_reg_mask
= 0;
16682 if (TREE_CODE (arg_type
) == RECORD_TYPE
)
16684 unsigned current_bit
= starting_bit
;
16686 long int offset
, size
;
16689 field
= TYPE_FIELDS (arg_type
);
16692 /* The offset within a structure is always an offset from
16693 the start of that structure. Make sure we take that into the
16694 calculation of the register based offset that we use here. */
16695 offset
= starting_bit
;
16696 offset
+= TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field
), 0);
16699 /* This is the actual size of the field, for bitfields this is the
16700 bitfield width and not the container size. */
16701 size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16703 if (*last_used_bit
!= offset
)
16705 if (offset
< *last_used_bit
)
16707 /* This field's offset is before the 'last_used_bit', that
16708 means this field goes on the next register. So we need to
16709 pad the rest of the current register and increase the
16710 register number. */
16712 mask
= ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit
);
16715 padding_bits_to_clear
[*regno
] |= mask
;
16716 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16721 /* Otherwise we pad the bits between the last field's end and
16722 the start of the new field. */
16725 mask
= ((uint32_t)-1) >> (32 - offset
);
16726 mask
-= ((uint32_t) 1 << *last_used_bit
) - 1;
16727 padding_bits_to_clear
[*regno
] |= mask
;
16729 current_bit
= offset
;
16732 /* Calculate further padding bits for inner structs/unions too. */
16733 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field
)))
16735 *last_used_bit
= current_bit
;
16736 not_to_clear_reg_mask
16737 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field
), regno
,
16738 padding_bits_to_clear
, offset
,
16743 /* Update 'current_bit' with this field's size. If the
16744 'current_bit' lies in a subsequent register, update 'regno' and
16745 reset 'current_bit' to point to the current bit in that new
16747 current_bit
+= size
;
16748 while (current_bit
>= 32)
16751 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16754 *last_used_bit
= current_bit
;
16757 field
= TREE_CHAIN (field
);
16759 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16761 else if (TREE_CODE (arg_type
) == UNION_TYPE
)
16763 tree field
, field_t
;
16764 int i
, regno_t
, field_size
;
16768 uint32_t padding_bits_to_clear_res
[NUM_ARG_REGS
]
16769 = {-1, -1, -1, -1};
16771 /* To compute the padding bits in a union we only consider bits as
16772 padding bits if they are always either a padding bit or fall outside a
16773 fields size for all fields in the union. */
16774 field
= TYPE_FIELDS (arg_type
);
16777 uint32_t padding_bits_to_clear_t
[NUM_ARG_REGS
]
16778 = {0U, 0U, 0U, 0U};
16779 int last_used_bit_t
= *last_used_bit
;
16781 field_t
= TREE_TYPE (field
);
16783 /* If the field's type is either a record or a union make sure to
16784 compute their padding bits too. */
16785 if (RECORD_OR_UNION_TYPE_P (field_t
))
16786 not_to_clear_reg_mask
16787 |= comp_not_to_clear_mask_str_un (field_t
, ®no_t
,
16788 &padding_bits_to_clear_t
[0],
16789 starting_bit
, &last_used_bit_t
);
16792 field_size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16793 regno_t
= (field_size
/ 32) + *regno
;
16794 last_used_bit_t
= (starting_bit
+ field_size
) % 32;
16797 for (i
= *regno
; i
< regno_t
; i
++)
16799 /* For all but the last register used by this field only keep the
16800 padding bits that were padding bits in this field. */
16801 padding_bits_to_clear_res
[i
] &= padding_bits_to_clear_t
[i
];
16804 /* For the last register, keep all padding bits that were padding
16805 bits in this field and any padding bits that are still valid
16806 as padding bits but fall outside of this field's size. */
16807 mask
= (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t
)) + 1;
16808 padding_bits_to_clear_res
[regno_t
]
16809 &= padding_bits_to_clear_t
[regno_t
] | mask
;
16811 /* Update the maximum size of the fields in terms of registers used
16812 ('max_reg') and the 'last_used_bit' in said register. */
16813 if (max_reg
< regno_t
)
16816 max_bit
= last_used_bit_t
;
16818 else if (max_reg
== regno_t
&& max_bit
< last_used_bit_t
)
16819 max_bit
= last_used_bit_t
;
16821 field
= TREE_CHAIN (field
);
16824 /* Update the current padding_bits_to_clear using the intersection of the
16825 padding bits of all the fields. */
16826 for (i
=*regno
; i
< max_reg
; i
++)
16827 padding_bits_to_clear
[i
] |= padding_bits_to_clear_res
[i
];
16829 /* Do not keep trailing padding bits, we do not know yet whether this
16830 is the end of the argument. */
16831 mask
= ((uint32_t) 1 << max_bit
) - 1;
16832 padding_bits_to_clear
[max_reg
]
16833 |= padding_bits_to_clear_res
[max_reg
] & mask
;
16836 *last_used_bit
= max_bit
;
16839 /* This function should only be used for structs and unions. */
16840 gcc_unreachable ();
16842 return not_to_clear_reg_mask
;
16845 /* In the context of ARMv8-M Security Extensions, this function is used for both
16846 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16847 registers are used when returning or passing arguments, which is then
16848 returned as a mask. It will also compute a mask to indicate padding/unused
16849 bits for each of these registers, and passes this through the
16850 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16851 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16852 the starting register used to pass this argument or return value is passed
16853 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16854 for struct and union types. */
16856 static unsigned HOST_WIDE_INT
16857 compute_not_to_clear_mask (tree arg_type
, rtx arg_rtx
, int regno
,
16858 uint32_t * padding_bits_to_clear
)
16861 int last_used_bit
= 0;
16862 unsigned HOST_WIDE_INT not_to_clear_mask
;
16864 if (RECORD_OR_UNION_TYPE_P (arg_type
))
16867 = comp_not_to_clear_mask_str_un (arg_type
, ®no
,
16868 padding_bits_to_clear
, 0,
16872 /* If the 'last_used_bit' is not zero, that means we are still using a
16873 part of the last 'regno'. In such cases we must clear the trailing
16874 bits. Otherwise we are not using regno and we should mark it as to
16876 if (last_used_bit
!= 0)
16877 padding_bits_to_clear
[regno
]
16878 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit
) + 1;
16880 not_to_clear_mask
&= ~(HOST_WIDE_INT_1U
<< regno
);
16884 not_to_clear_mask
= 0;
16885 /* We are not dealing with structs nor unions. So these arguments may be
16886 passed in floating point registers too. In some cases a BLKmode is
16887 used when returning or passing arguments in multiple VFP registers. */
16888 if (GET_MODE (arg_rtx
) == BLKmode
)
16893 /* This should really only occur when dealing with the hard-float
16895 gcc_assert (TARGET_HARD_FLOAT_ABI
);
16897 for (i
= 0; i
< XVECLEN (arg_rtx
, 0); i
++)
16899 reg
= XEXP (XVECEXP (arg_rtx
, 0, i
), 0);
16900 gcc_assert (REG_P (reg
));
16902 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (reg
);
16904 /* If we are dealing with DF mode, make sure we don't
16905 clear either of the registers it addresses. */
16906 arg_regs
= ARM_NUM_REGS (GET_MODE (reg
));
16909 unsigned HOST_WIDE_INT mask
;
16910 mask
= HOST_WIDE_INT_1U
<< (REGNO (reg
) + arg_regs
);
16911 mask
-= HOST_WIDE_INT_1U
<< REGNO (reg
);
16912 not_to_clear_mask
|= mask
;
16918 /* Otherwise we can rely on the MODE to determine how many registers
16919 are being used by this argument. */
16920 int arg_regs
= ARM_NUM_REGS (GET_MODE (arg_rtx
));
16921 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
16924 unsigned HOST_WIDE_INT
16925 mask
= HOST_WIDE_INT_1U
<< (REGNO (arg_rtx
) + arg_regs
);
16926 mask
-= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
16927 not_to_clear_mask
|= mask
;
16932 return not_to_clear_mask
;
16935 /* Clears caller saved registers not used to pass arguments before a
16936 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
16937 registers is done in __gnu_cmse_nonsecure_call libcall.
16938 See libgcc/config/arm/cmse_nonsecure_call.S. */
16941 cmse_nonsecure_call_clear_caller_saved (void)
16945 FOR_EACH_BB_FN (bb
, cfun
)
16949 FOR_BB_INSNS (bb
, insn
)
16951 uint64_t to_clear_mask
, float_mask
;
16953 rtx pat
, call
, unspec
, reg
, cleared_reg
, tmp
;
16954 unsigned int regno
, maxregno
;
16956 CUMULATIVE_ARGS args_so_far_v
;
16957 cumulative_args_t args_so_far
;
16958 tree arg_type
, fntype
;
16959 bool using_r4
, first_param
= true;
16960 function_args_iterator args_iter
;
16961 uint32_t padding_bits_to_clear
[4] = {0U, 0U, 0U, 0U};
16962 uint32_t * padding_bits_to_clear_ptr
= &padding_bits_to_clear
[0];
16964 if (!NONDEBUG_INSN_P (insn
))
16967 if (!CALL_P (insn
))
16970 pat
= PATTERN (insn
);
16971 gcc_assert (GET_CODE (pat
) == PARALLEL
&& XVECLEN (pat
, 0) > 0);
16972 call
= XVECEXP (pat
, 0, 0);
16974 /* Get the real call RTX if the insn sets a value, ie. returns. */
16975 if (GET_CODE (call
) == SET
)
16976 call
= SET_SRC (call
);
16978 /* Check if it is a cmse_nonsecure_call. */
16979 unspec
= XEXP (call
, 0);
16980 if (GET_CODE (unspec
) != UNSPEC
16981 || XINT (unspec
, 1) != UNSPEC_NONSECURE_MEM
)
16984 /* Determine the caller-saved registers we need to clear. */
16985 to_clear_mask
= (1LL << (NUM_ARG_REGS
)) - 1;
16986 maxregno
= NUM_ARG_REGS
- 1;
16987 /* Only look at the caller-saved floating point registers in case of
16988 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
16989 lazy store and loads which clear both caller- and callee-saved
16991 if (TARGET_HARD_FLOAT_ABI
)
16993 float_mask
= (1LL << (D7_VFP_REGNUM
+ 1)) - 1;
16994 float_mask
&= ~((1LL << FIRST_VFP_REGNUM
) - 1);
16995 to_clear_mask
|= float_mask
;
16996 maxregno
= D7_VFP_REGNUM
;
16999 /* Make sure the register used to hold the function address is not
17001 address
= RTVEC_ELT (XVEC (unspec
, 0), 0);
17002 gcc_assert (MEM_P (address
));
17003 gcc_assert (REG_P (XEXP (address
, 0)));
17004 to_clear_mask
&= ~(1LL << REGNO (XEXP (address
, 0)));
17006 /* Set basic block of call insn so that df rescan is performed on
17007 insns inserted here. */
17008 set_block_for_insn (insn
, bb
);
17009 df_set_flags (DF_DEFER_INSN_RESCAN
);
17012 /* Make sure the scheduler doesn't schedule other insns beyond
17014 emit_insn (gen_blockage ());
17016 /* Walk through all arguments and clear registers appropriately.
17018 fntype
= TREE_TYPE (MEM_EXPR (address
));
17019 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
,
17021 args_so_far
= pack_cumulative_args (&args_so_far_v
);
17022 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
17025 machine_mode arg_mode
= TYPE_MODE (arg_type
);
17027 if (VOID_TYPE_P (arg_type
))
17031 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
,
17034 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
,
17036 gcc_assert (REG_P (arg_rtx
));
17038 &= ~compute_not_to_clear_mask (arg_type
, arg_rtx
,
17040 padding_bits_to_clear_ptr
);
17042 first_param
= false;
17045 /* Clear padding bits where needed. */
17046 cleared_reg
= XEXP (address
, 0);
17047 reg
= gen_rtx_REG (SImode
, IP_REGNUM
);
17049 for (regno
= R0_REGNUM
; regno
< NUM_ARG_REGS
; regno
++)
17051 if (padding_bits_to_clear
[regno
] == 0)
17054 /* If this is a Thumb-1 target copy the address of the function
17055 we are calling from 'r4' into 'ip' such that we can use r4 to
17056 clear the unused bits in the arguments. */
17057 if (TARGET_THUMB1
&& !using_r4
)
17061 emit_move_insn (gen_rtx_REG (SImode
, IP_REGNUM
),
17065 tmp
= GEN_INT ((((~padding_bits_to_clear
[regno
]) << 16u) >> 16u));
17066 emit_move_insn (reg
, tmp
);
17067 /* Also fill the top half of the negated
17068 padding_bits_to_clear. */
17069 if (((~padding_bits_to_clear
[regno
]) >> 16) > 0)
17071 tmp
= GEN_INT ((~padding_bits_to_clear
[regno
]) >> 16);
17072 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode
, reg
,
17078 emit_insn (gen_andsi3 (gen_rtx_REG (SImode
, regno
),
17079 gen_rtx_REG (SImode
, regno
),
17084 emit_move_insn (cleared_reg
,
17085 gen_rtx_REG (SImode
, IP_REGNUM
));
17087 /* We use right shift and left shift to clear the LSB of the address
17088 we jump to instead of using bic, to avoid having to use an extra
17089 register on Thumb-1. */
17090 tmp
= gen_rtx_LSHIFTRT (SImode
, cleared_reg
, const1_rtx
);
17091 emit_insn (gen_rtx_SET (cleared_reg
, tmp
));
17092 tmp
= gen_rtx_ASHIFT (SImode
, cleared_reg
, const1_rtx
);
17093 emit_insn (gen_rtx_SET (cleared_reg
, tmp
));
17095 /* Clearing all registers that leak before doing a non-secure
17097 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
17099 if (!(to_clear_mask
& (1LL << regno
)))
17102 /* If regno is an even vfp register and its successor is also to
17103 be cleared, use vmov. */
17104 if (IS_VFP_REGNUM (regno
))
17106 if (TARGET_VFP_DOUBLE
17107 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
17108 && to_clear_mask
& (1LL << (regno
+ 1)))
17109 emit_move_insn (gen_rtx_REG (DFmode
, regno
++),
17110 CONST0_RTX (DFmode
));
17112 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
17113 CONST0_RTX (SFmode
));
17116 emit_move_insn (gen_rtx_REG (SImode
, regno
), cleared_reg
);
17119 seq
= get_insns ();
17121 emit_insn_before (seq
, insn
);
17127 /* Rewrite move insn into subtract of 0 if the condition codes will
17128 be useful in next conditional jump insn. */
17131 thumb1_reorg (void)
17135 FOR_EACH_BB_FN (bb
, cfun
)
17138 rtx cmp
, op0
, op1
, set
= NULL
;
17139 rtx_insn
*prev
, *insn
= BB_END (bb
);
17140 bool insn_clobbered
= false;
17142 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17143 insn
= PREV_INSN (insn
);
17145 /* Find the last cbranchsi4_insn in basic block BB. */
17146 if (insn
== BB_HEAD (bb
)
17147 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17150 /* Get the register with which we are comparing. */
17151 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
17152 op0
= XEXP (cmp
, 0);
17153 op1
= XEXP (cmp
, 1);
17155 /* Check that comparison is against ZERO. */
17156 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
17159 /* Find the first flag setting insn before INSN in basic block BB. */
17160 gcc_assert (insn
!= BB_HEAD (bb
));
17161 for (prev
= PREV_INSN (insn
);
17163 && prev
!= BB_HEAD (bb
)
17165 || DEBUG_INSN_P (prev
)
17166 || ((set
= single_set (prev
)) != NULL
17167 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17168 prev
= PREV_INSN (prev
))
17170 if (reg_set_p (op0
, prev
))
17171 insn_clobbered
= true;
17174 /* Skip if op0 is clobbered by insn other than prev. */
17175 if (insn_clobbered
)
17181 dest
= SET_DEST (set
);
17182 src
= SET_SRC (set
);
17183 if (!low_register_operand (dest
, SImode
)
17184 || !low_register_operand (src
, SImode
))
17187 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17188 in INSN. Both src and dest of the move insn are checked. */
17189 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17191 dest
= copy_rtx (dest
);
17192 src
= copy_rtx (src
);
17193 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17194 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
17195 INSN_CODE (prev
) = -1;
17196 /* Set test register in INSN to dest. */
17197 XEXP (cmp
, 0) = copy_rtx (dest
);
17198 INSN_CODE (insn
) = -1;
17203 /* Convert instructions to their cc-clobbering variant if possible, since
17204 that allows us to use smaller encodings. */
17207 thumb2_reorg (void)
17212 INIT_REG_SET (&live
);
17214 /* We are freeing block_for_insn in the toplev to keep compatibility
17215 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17216 compute_bb_for_insn ();
17219 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17221 FOR_EACH_BB_FN (bb
, cfun
)
17223 if ((current_tune
->disparage_flag_setting_t16_encodings
17224 == tune_params::DISPARAGE_FLAGS_ALL
)
17225 && optimize_bb_for_speed_p (bb
))
17229 Convert_Action action
= SKIP
;
17230 Convert_Action action_for_partial_flag_setting
17231 = ((current_tune
->disparage_flag_setting_t16_encodings
17232 != tune_params::DISPARAGE_FLAGS_NEITHER
)
17233 && optimize_bb_for_speed_p (bb
))
17236 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17237 df_simulate_initialize_backwards (bb
, &live
);
17238 FOR_BB_INSNS_REVERSE (bb
, insn
)
17240 if (NONJUMP_INSN_P (insn
)
17241 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17242 && GET_CODE (PATTERN (insn
)) == SET
)
17245 rtx pat
= PATTERN (insn
);
17246 rtx dst
= XEXP (pat
, 0);
17247 rtx src
= XEXP (pat
, 1);
17248 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17250 if (UNARY_P (src
) || BINARY_P (src
))
17251 op0
= XEXP (src
, 0);
17253 if (BINARY_P (src
))
17254 op1
= XEXP (src
, 1);
17256 if (low_register_operand (dst
, SImode
))
17258 switch (GET_CODE (src
))
17261 /* Adding two registers and storing the result
17262 in the first source is already a 16-bit
17264 if (rtx_equal_p (dst
, op0
)
17265 && register_operand (op1
, SImode
))
17268 if (low_register_operand (op0
, SImode
))
17270 /* ADDS <Rd>,<Rn>,<Rm> */
17271 if (low_register_operand (op1
, SImode
))
17273 /* ADDS <Rdn>,#<imm8> */
17274 /* SUBS <Rdn>,#<imm8> */
17275 else if (rtx_equal_p (dst
, op0
)
17276 && CONST_INT_P (op1
)
17277 && IN_RANGE (INTVAL (op1
), -255, 255))
17279 /* ADDS <Rd>,<Rn>,#<imm3> */
17280 /* SUBS <Rd>,<Rn>,#<imm3> */
17281 else if (CONST_INT_P (op1
)
17282 && IN_RANGE (INTVAL (op1
), -7, 7))
17285 /* ADCS <Rd>, <Rn> */
17286 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17287 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17288 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17290 && COMPARISON_P (op1
)
17291 && cc_register (XEXP (op1
, 0), VOIDmode
)
17292 && maybe_get_arm_condition_code (op1
) == ARM_CS
17293 && XEXP (op1
, 1) == const0_rtx
)
17298 /* RSBS <Rd>,<Rn>,#0
17299 Not handled here: see NEG below. */
17300 /* SUBS <Rd>,<Rn>,#<imm3>
17302 Not handled here: see PLUS above. */
17303 /* SUBS <Rd>,<Rn>,<Rm> */
17304 if (low_register_operand (op0
, SImode
)
17305 && low_register_operand (op1
, SImode
))
17310 /* MULS <Rdm>,<Rn>,<Rdm>
17311 As an exception to the rule, this is only used
17312 when optimizing for size since MULS is slow on all
17313 known implementations. We do not even want to use
17314 MULS in cold code, if optimizing for speed, so we
17315 test the global flag here. */
17316 if (!optimize_size
)
17318 /* Fall through. */
17322 /* ANDS <Rdn>,<Rm> */
17323 if (rtx_equal_p (dst
, op0
)
17324 && low_register_operand (op1
, SImode
))
17325 action
= action_for_partial_flag_setting
;
17326 else if (rtx_equal_p (dst
, op1
)
17327 && low_register_operand (op0
, SImode
))
17328 action
= action_for_partial_flag_setting
== SKIP
17329 ? SKIP
: SWAP_CONV
;
17335 /* ASRS <Rdn>,<Rm> */
17336 /* LSRS <Rdn>,<Rm> */
17337 /* LSLS <Rdn>,<Rm> */
17338 if (rtx_equal_p (dst
, op0
)
17339 && low_register_operand (op1
, SImode
))
17340 action
= action_for_partial_flag_setting
;
17341 /* ASRS <Rd>,<Rm>,#<imm5> */
17342 /* LSRS <Rd>,<Rm>,#<imm5> */
17343 /* LSLS <Rd>,<Rm>,#<imm5> */
17344 else if (low_register_operand (op0
, SImode
)
17345 && CONST_INT_P (op1
)
17346 && IN_RANGE (INTVAL (op1
), 0, 31))
17347 action
= action_for_partial_flag_setting
;
17351 /* RORS <Rdn>,<Rm> */
17352 if (rtx_equal_p (dst
, op0
)
17353 && low_register_operand (op1
, SImode
))
17354 action
= action_for_partial_flag_setting
;
17358 /* MVNS <Rd>,<Rm> */
17359 if (low_register_operand (op0
, SImode
))
17360 action
= action_for_partial_flag_setting
;
17364 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17365 if (low_register_operand (op0
, SImode
))
17370 /* MOVS <Rd>,#<imm8> */
17371 if (CONST_INT_P (src
)
17372 && IN_RANGE (INTVAL (src
), 0, 255))
17373 action
= action_for_partial_flag_setting
;
17377 /* MOVS and MOV<c> with registers have different
17378 encodings, so are not relevant here. */
17386 if (action
!= SKIP
)
17388 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17389 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17392 if (action
== SWAP_CONV
)
17394 src
= copy_rtx (src
);
17395 XEXP (src
, 0) = op1
;
17396 XEXP (src
, 1) = op0
;
17397 pat
= gen_rtx_SET (dst
, src
);
17398 vec
= gen_rtvec (2, pat
, clobber
);
17400 else /* action == CONV */
17401 vec
= gen_rtvec (2, pat
, clobber
);
17403 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17404 INSN_CODE (insn
) = -1;
17408 if (NONDEBUG_INSN_P (insn
))
17409 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17413 CLEAR_REG_SET (&live
);
17416 /* Gcc puts the pool in the wrong place for ARM, since we can only
17417 load addresses a limited distance around the pc. We do some
17418 special munging to move the constant pool values to the correct
17419 point in the code. */
17424 HOST_WIDE_INT address
= 0;
17428 cmse_nonsecure_call_clear_caller_saved ();
17431 else if (TARGET_THUMB2
)
17434 /* Ensure all insns that must be split have been split at this point.
17435 Otherwise, the pool placement code below may compute incorrect
17436 insn lengths. Note that when optimizing, all insns have already
17437 been split at this point. */
17439 split_all_insns_noflow ();
17441 /* Make sure we do not attempt to create a literal pool even though it should
17442 no longer be necessary to create any. */
17443 if (arm_disable_literal_pool
)
17446 minipool_fix_head
= minipool_fix_tail
= NULL
;
17448 /* The first insn must always be a note, or the code below won't
17449 scan it properly. */
17450 insn
= get_insns ();
17451 gcc_assert (NOTE_P (insn
));
17454 /* Scan all the insns and record the operands that will need fixing. */
17455 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17457 if (BARRIER_P (insn
))
17458 push_minipool_barrier (insn
, address
);
17459 else if (INSN_P (insn
))
17461 rtx_jump_table_data
*table
;
17463 note_invalid_constants (insn
, address
, true);
17464 address
+= get_attr_length (insn
);
17466 /* If the insn is a vector jump, add the size of the table
17467 and skip the table. */
17468 if (tablejump_p (insn
, NULL
, &table
))
17470 address
+= get_jump_table_size (table
);
17474 else if (LABEL_P (insn
))
17475 /* Add the worst-case padding due to alignment. We don't add
17476 the _current_ padding because the minipool insertions
17477 themselves might change it. */
17478 address
+= get_label_padding (insn
);
17481 fix
= minipool_fix_head
;
17483 /* Now scan the fixups and perform the required changes. */
17488 Mfix
* last_added_fix
;
17489 Mfix
* last_barrier
= NULL
;
17492 /* Skip any further barriers before the next fix. */
17493 while (fix
&& BARRIER_P (fix
->insn
))
17496 /* No more fixes. */
17500 last_added_fix
= NULL
;
17502 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17504 if (BARRIER_P (ftmp
->insn
))
17506 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17509 last_barrier
= ftmp
;
17511 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17514 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17517 /* If we found a barrier, drop back to that; any fixes that we
17518 could have reached but come after the barrier will now go in
17519 the next mini-pool. */
17520 if (last_barrier
!= NULL
)
17522 /* Reduce the refcount for those fixes that won't go into this
17524 for (fdel
= last_barrier
->next
;
17525 fdel
&& fdel
!= ftmp
;
17528 fdel
->minipool
->refcount
--;
17529 fdel
->minipool
= NULL
;
17532 ftmp
= last_barrier
;
17536 /* ftmp is first fix that we can't fit into this pool and
17537 there no natural barriers that we could use. Insert a
17538 new barrier in the code somewhere between the previous
17539 fix and this one, and arrange to jump around it. */
17540 HOST_WIDE_INT max_address
;
17542 /* The last item on the list of fixes must be a barrier, so
17543 we can never run off the end of the list of fixes without
17544 last_barrier being set. */
17547 max_address
= minipool_vector_head
->max_address
;
17548 /* Check that there isn't another fix that is in range that
17549 we couldn't fit into this pool because the pool was
17550 already too large: we need to put the pool before such an
17551 instruction. The pool itself may come just after the
17552 fix because create_fix_barrier also allows space for a
17553 jump instruction. */
17554 if (ftmp
->address
< max_address
)
17555 max_address
= ftmp
->address
+ 1;
17557 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17560 assign_minipool_offsets (last_barrier
);
17564 if (!BARRIER_P (ftmp
->insn
)
17565 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17572 /* Scan over the fixes we have identified for this pool, fixing them
17573 up and adding the constants to the pool itself. */
17574 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17575 this_fix
= this_fix
->next
)
17576 if (!BARRIER_P (this_fix
->insn
))
17579 = plus_constant (Pmode
,
17580 gen_rtx_LABEL_REF (VOIDmode
,
17581 minipool_vector_label
),
17582 this_fix
->minipool
->offset
);
17583 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17586 dump_minipool (last_barrier
->insn
);
17590 /* From now on we must synthesize any constants that we can't handle
17591 directly. This can happen if the RTL gets split during final
17592 instruction generation. */
17593 cfun
->machine
->after_arm_reorg
= 1;
17595 /* Free the minipool memory. */
17596 obstack_free (&minipool_obstack
, minipool_startobj
);
17599 /* Routines to output assembly language. */
17601 /* Return string representation of passed in real value. */
17602 static const char *
17603 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17605 if (!fp_consts_inited
)
17608 gcc_assert (real_equal (r
, &value_fp0
));
17612 /* OPERANDS[0] is the entire list of insns that constitute pop,
17613 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17614 is in the list, UPDATE is true iff the list contains explicit
17615 update of base register. */
17617 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17623 const char *conditional
;
17624 int num_saves
= XVECLEN (operands
[0], 0);
17625 unsigned int regno
;
17626 unsigned int regno_base
= REGNO (operands
[1]);
17627 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
17630 offset
+= update
? 1 : 0;
17631 offset
+= return_pc
? 1 : 0;
17633 /* Is the base register in the list? */
17634 for (i
= offset
; i
< num_saves
; i
++)
17636 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17637 /* If SP is in the list, then the base register must be SP. */
17638 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17639 /* If base register is in the list, there must be no explicit update. */
17640 if (regno
== regno_base
)
17641 gcc_assert (!update
);
17644 conditional
= reverse
? "%?%D0" : "%?%d0";
17645 /* Can't use POP if returning from an interrupt. */
17646 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
17647 sprintf (pattern
, "pop%s\t{", conditional
);
17650 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17651 It's just a convention, their semantics are identical. */
17652 if (regno_base
== SP_REGNUM
)
17653 sprintf (pattern
, "ldmfd%s\t", conditional
);
17655 sprintf (pattern
, "ldmia%s\t", conditional
);
17657 sprintf (pattern
, "ldm%s\t", conditional
);
17659 strcat (pattern
, reg_names
[regno_base
]);
17661 strcat (pattern
, "!, {");
17663 strcat (pattern
, ", {");
17666 /* Output the first destination register. */
17668 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17670 /* Output the rest of the destination registers. */
17671 for (i
= offset
+ 1; i
< num_saves
; i
++)
17673 strcat (pattern
, ", ");
17675 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17678 strcat (pattern
, "}");
17680 if (interrupt_p
&& return_pc
)
17681 strcat (pattern
, "^");
17683 output_asm_insn (pattern
, &cond
);
17687 /* Output the assembly for a store multiple. */
17690 vfp_output_vstmd (rtx
* operands
)
17696 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17697 ? XEXP (operands
[0], 0)
17698 : XEXP (XEXP (operands
[0], 0), 0);
17699 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17702 strcpy (pattern
, "vpush%?.64\t{%P1");
17704 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17706 p
= strlen (pattern
);
17708 gcc_assert (REG_P (operands
[1]));
17710 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17711 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17713 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17715 strcpy (&pattern
[p
], "}");
17717 output_asm_insn (pattern
, operands
);
17722 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17723 number of bytes pushed. */
17726 vfp_emit_fstmd (int base_reg
, int count
)
17733 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17734 register pairs are stored by a store multiple insn. We avoid this
17735 by pushing an extra pair. */
17736 if (count
== 2 && !arm_arch6
)
17738 if (base_reg
== LAST_VFP_REGNUM
- 3)
17743 /* FSTMD may not store more than 16 doubleword registers at once. Split
17744 larger stores into multiple parts (up to a maximum of two, in
17749 /* NOTE: base_reg is an internal register number, so each D register
17751 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17752 saved
+= vfp_emit_fstmd (base_reg
, 16);
17756 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17757 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17759 reg
= gen_rtx_REG (DFmode
, base_reg
);
17762 XVECEXP (par
, 0, 0)
17763 = gen_rtx_SET (gen_frame_mem
17765 gen_rtx_PRE_MODIFY (Pmode
,
17768 (Pmode
, stack_pointer_rtx
,
17771 gen_rtx_UNSPEC (BLKmode
,
17772 gen_rtvec (1, reg
),
17773 UNSPEC_PUSH_MULT
));
17775 tmp
= gen_rtx_SET (stack_pointer_rtx
,
17776 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17777 RTX_FRAME_RELATED_P (tmp
) = 1;
17778 XVECEXP (dwarf
, 0, 0) = tmp
;
17780 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
17781 RTX_FRAME_RELATED_P (tmp
) = 1;
17782 XVECEXP (dwarf
, 0, 1) = tmp
;
17784 for (i
= 1; i
< count
; i
++)
17786 reg
= gen_rtx_REG (DFmode
, base_reg
);
17788 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17790 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
17791 plus_constant (Pmode
,
17795 RTX_FRAME_RELATED_P (tmp
) = 1;
17796 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17799 par
= emit_insn (par
);
17800 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17801 RTX_FRAME_RELATED_P (par
) = 1;
17806 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17807 has the cmse_nonsecure_call attribute and returns false otherwise. */
17810 detect_cmse_nonsecure_call (tree addr
)
17815 tree fntype
= TREE_TYPE (addr
);
17816 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_call",
17817 TYPE_ATTRIBUTES (fntype
)))
17823 /* Emit a call instruction with pattern PAT. ADDR is the address of
17824 the call target. */
17827 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
17831 insn
= emit_call_insn (pat
);
17833 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17834 If the call might use such an entry, add a use of the PIC register
17835 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17836 if (TARGET_VXWORKS_RTP
17839 && GET_CODE (addr
) == SYMBOL_REF
17840 && (SYMBOL_REF_DECL (addr
)
17841 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17842 : !SYMBOL_REF_LOCAL_P (addr
)))
17844 require_pic_register ();
17845 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17848 if (TARGET_AAPCS_BASED
)
17850 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17851 linker. We need to add an IP clobber to allow setting
17852 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17853 is not needed since it's a fixed register. */
17854 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
17855 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
17859 /* Output a 'call' insn. */
17861 output_call (rtx
*operands
)
17863 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17865 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17866 if (REGNO (operands
[0]) == LR_REGNUM
)
17868 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17869 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17872 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17874 if (TARGET_INTERWORK
|| arm_arch4t
)
17875 output_asm_insn ("bx%?\t%0", operands
);
17877 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17882 /* Output a move from arm registers to arm registers of a long double
17883 OPERANDS[0] is the destination.
17884 OPERANDS[1] is the source. */
17886 output_mov_long_double_arm_from_arm (rtx
*operands
)
17888 /* We have to be careful here because the two might overlap. */
17889 int dest_start
= REGNO (operands
[0]);
17890 int src_start
= REGNO (operands
[1]);
17894 if (dest_start
< src_start
)
17896 for (i
= 0; i
< 3; i
++)
17898 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17899 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17900 output_asm_insn ("mov%?\t%0, %1", ops
);
17905 for (i
= 2; i
>= 0; i
--)
17907 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17908 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17909 output_asm_insn ("mov%?\t%0, %1", ops
);
17917 arm_emit_movpair (rtx dest
, rtx src
)
17919 /* If the src is an immediate, simplify it. */
17920 if (CONST_INT_P (src
))
17922 HOST_WIDE_INT val
= INTVAL (src
);
17923 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17924 if ((val
>> 16) & 0x0000ffff)
17926 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17928 GEN_INT ((val
>> 16) & 0x0000ffff));
17929 rtx_insn
*insn
= get_last_insn ();
17930 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17934 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17935 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17936 rtx_insn
*insn
= get_last_insn ();
17937 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17940 /* Output a move between double words. It must be REG<-MEM
17943 output_move_double (rtx
*operands
, bool emit
, int *count
)
17945 enum rtx_code code0
= GET_CODE (operands
[0]);
17946 enum rtx_code code1
= GET_CODE (operands
[1]);
17951 /* The only case when this might happen is when
17952 you are looking at the length of a DImode instruction
17953 that has an invalid constant in it. */
17954 if (code0
== REG
&& code1
!= MEM
)
17956 gcc_assert (!emit
);
17963 unsigned int reg0
= REGNO (operands
[0]);
17965 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17967 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17969 switch (GET_CODE (XEXP (operands
[1], 0)))
17976 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17977 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
17979 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
17984 gcc_assert (TARGET_LDRD
);
17986 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
17993 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
17995 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
18003 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
18005 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
18010 gcc_assert (TARGET_LDRD
);
18012 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
18017 /* Autoicrement addressing modes should never have overlapping
18018 base and destination registers, and overlapping index registers
18019 are already prohibited, so this doesn't need to worry about
18021 otherops
[0] = operands
[0];
18022 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
18023 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
18025 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
18027 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
18029 /* Registers overlap so split out the increment. */
18032 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
18033 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
18040 /* Use a single insn if we can.
18041 FIXME: IWMMXT allows offsets larger than ldrd can
18042 handle, fix these up with a pair of ldr. */
18044 || !CONST_INT_P (otherops
[2])
18045 || (INTVAL (otherops
[2]) > -256
18046 && INTVAL (otherops
[2]) < 256))
18049 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
18055 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
18056 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18066 /* Use a single insn if we can.
18067 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18068 fix these up with a pair of ldr. */
18070 || !CONST_INT_P (otherops
[2])
18071 || (INTVAL (otherops
[2]) > -256
18072 && INTVAL (otherops
[2]) < 256))
18075 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
18081 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18082 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18092 /* We might be able to use ldrd %0, %1 here. However the range is
18093 different to ldr/adr, and it is broken on some ARMv7-M
18094 implementations. */
18095 /* Use the second register of the pair to avoid problematic
18097 otherops
[1] = operands
[1];
18099 output_asm_insn ("adr%?\t%0, %1", otherops
);
18100 operands
[1] = otherops
[0];
18104 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18106 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
18113 /* ??? This needs checking for thumb2. */
18115 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18116 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18118 otherops
[0] = operands
[0];
18119 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18120 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18122 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18124 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18126 switch ((int) INTVAL (otherops
[2]))
18130 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
18136 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
18142 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
18146 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18147 operands
[1] = otherops
[0];
18149 && (REG_P (otherops
[2])
18151 || (CONST_INT_P (otherops
[2])
18152 && INTVAL (otherops
[2]) > -256
18153 && INTVAL (otherops
[2]) < 256)))
18155 if (reg_overlap_mentioned_p (operands
[0],
18158 /* Swap base and index registers over to
18159 avoid a conflict. */
18160 std::swap (otherops
[1], otherops
[2]);
18162 /* If both registers conflict, it will usually
18163 have been fixed by a splitter. */
18164 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18165 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18169 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18170 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18177 otherops
[0] = operands
[0];
18179 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
18184 if (CONST_INT_P (otherops
[2]))
18188 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18189 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18191 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18197 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18203 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18210 return "ldrd%?\t%0, [%1]";
18212 return "ldmia%?\t%1, %M0";
18216 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18217 /* Take care of overlapping base/data reg. */
18218 if (reg_mentioned_p (operands
[0], operands
[1]))
18222 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18223 output_asm_insn ("ldr%?\t%0, %1", operands
);
18233 output_asm_insn ("ldr%?\t%0, %1", operands
);
18234 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18244 /* Constraints should ensure this. */
18245 gcc_assert (code0
== MEM
&& code1
== REG
);
18246 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18247 || (TARGET_ARM
&& TARGET_LDRD
));
18249 switch (GET_CODE (XEXP (operands
[0], 0)))
18255 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
18257 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18262 gcc_assert (TARGET_LDRD
);
18264 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
18271 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
18273 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
18281 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
18283 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
18288 gcc_assert (TARGET_LDRD
);
18290 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
18295 otherops
[0] = operands
[1];
18296 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18297 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18299 /* IWMMXT allows offsets larger than ldrd can handle,
18300 fix these up with a pair of ldr. */
18302 && CONST_INT_P (otherops
[2])
18303 && (INTVAL(otherops
[2]) <= -256
18304 || INTVAL(otherops
[2]) >= 256))
18306 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18310 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18311 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18320 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18321 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18327 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18330 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
18335 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
18340 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18341 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18343 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18347 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
18354 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
18361 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
18366 && (REG_P (otherops
[2])
18368 || (CONST_INT_P (otherops
[2])
18369 && INTVAL (otherops
[2]) > -256
18370 && INTVAL (otherops
[2]) < 256)))
18372 otherops
[0] = operands
[1];
18373 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18375 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
18381 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18382 otherops
[1] = operands
[1];
18385 output_asm_insn ("str%?\t%1, %0", operands
);
18386 output_asm_insn ("str%?\t%H1, %0", otherops
);
18396 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18397 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18400 output_move_quad (rtx
*operands
)
18402 if (REG_P (operands
[0]))
18404 /* Load, or reg->reg move. */
18406 if (MEM_P (operands
[1]))
18408 switch (GET_CODE (XEXP (operands
[1], 0)))
18411 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18416 output_asm_insn ("adr%?\t%0, %1", operands
);
18417 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
18421 gcc_unreachable ();
18429 gcc_assert (REG_P (operands
[1]));
18431 dest
= REGNO (operands
[0]);
18432 src
= REGNO (operands
[1]);
18434 /* This seems pretty dumb, but hopefully GCC won't try to do it
18437 for (i
= 0; i
< 4; i
++)
18439 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18440 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18441 output_asm_insn ("mov%?\t%0, %1", ops
);
18444 for (i
= 3; i
>= 0; i
--)
18446 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18447 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18448 output_asm_insn ("mov%?\t%0, %1", ops
);
18454 gcc_assert (MEM_P (operands
[0]));
18455 gcc_assert (REG_P (operands
[1]));
18456 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18458 switch (GET_CODE (XEXP (operands
[0], 0)))
18461 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18465 gcc_unreachable ();
18472 /* Output a VFP load or store instruction. */
18475 output_move_vfp (rtx
*operands
)
18477 rtx reg
, mem
, addr
, ops
[2];
18478 int load
= REG_P (operands
[0]);
18479 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18480 int sp
= (!TARGET_VFP_FP16INST
18481 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
18482 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18487 reg
= operands
[!load
];
18488 mem
= operands
[load
];
18490 mode
= GET_MODE (reg
);
18492 gcc_assert (REG_P (reg
));
18493 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18494 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
18500 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18501 gcc_assert (MEM_P (mem
));
18503 addr
= XEXP (mem
, 0);
18505 switch (GET_CODE (addr
))
18508 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18509 ops
[0] = XEXP (addr
, 0);
18514 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18515 ops
[0] = XEXP (addr
, 0);
18520 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18526 sprintf (buff
, templ
,
18527 load
? "ld" : "st",
18528 dp
? "64" : sp
? "32" : "16",
18530 integer_p
? "\t%@ int" : "");
18531 output_asm_insn (buff
, ops
);
18536 /* Output a Neon double-word or quad-word load or store, or a load
18537 or store for larger structure modes.
18539 WARNING: The ordering of elements is weird in big-endian mode,
18540 because the EABI requires that vectors stored in memory appear
18541 as though they were stored by a VSTM, as required by the EABI.
18542 GCC RTL defines element ordering based on in-memory order.
18543 This can be different from the architectural ordering of elements
18544 within a NEON register. The intrinsics defined in arm_neon.h use the
18545 NEON register element ordering, not the GCC RTL element ordering.
18547 For example, the in-memory ordering of a big-endian a quadword
18548 vector with 16-bit elements when stored from register pair {d0,d1}
18549 will be (lowest address first, d0[N] is NEON register element N):
18551 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18553 When necessary, quadword registers (dN, dN+1) are moved to ARM
18554 registers from rN in the order:
18556 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18558 So that STM/LDM can be used on vectors in ARM registers, and the
18559 same memory layout will result as if VSTM/VLDM were used.
18561 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18562 possible, which allows use of appropriate alignment tags.
18563 Note that the choice of "64" is independent of the actual vector
18564 element size; this size simply ensures that the behavior is
18565 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18567 Due to limitations of those instructions, use of VST1.64/VLD1.64
18568 is not possible if:
18569 - the address contains PRE_DEC, or
18570 - the mode refers to more than 4 double-word registers
18572 In those cases, it would be possible to replace VSTM/VLDM by a
18573 sequence of instructions; this is not currently implemented since
18574 this is not certain to actually improve performance. */
18577 output_move_neon (rtx
*operands
)
18579 rtx reg
, mem
, addr
, ops
[2];
18580 int regno
, nregs
, load
= REG_P (operands
[0]);
18585 reg
= operands
[!load
];
18586 mem
= operands
[load
];
18588 mode
= GET_MODE (reg
);
18590 gcc_assert (REG_P (reg
));
18591 regno
= REGNO (reg
);
18592 nregs
= REG_NREGS (reg
) / 2;
18593 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18594 || NEON_REGNO_OK_FOR_QUAD (regno
));
18595 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18596 || VALID_NEON_QREG_MODE (mode
)
18597 || VALID_NEON_STRUCT_MODE (mode
));
18598 gcc_assert (MEM_P (mem
));
18600 addr
= XEXP (mem
, 0);
18602 /* Strip off const from addresses like (const (plus (...))). */
18603 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18604 addr
= XEXP (addr
, 0);
18606 switch (GET_CODE (addr
))
18609 /* We have to use vldm / vstm for too-large modes. */
18612 templ
= "v%smia%%?\t%%0!, %%h1";
18613 ops
[0] = XEXP (addr
, 0);
18617 templ
= "v%s1.64\t%%h1, %%A0";
18624 /* We have to use vldm / vstm in this case, since there is no
18625 pre-decrement form of the vld1 / vst1 instructions. */
18626 templ
= "v%smdb%%?\t%%0!, %%h1";
18627 ops
[0] = XEXP (addr
, 0);
18632 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18633 gcc_unreachable ();
18636 /* We have to use vldm / vstm for too-large modes. */
18640 templ
= "v%smia%%?\t%%m0, %%h1";
18642 templ
= "v%s1.64\t%%h1, %%A0";
18648 /* Fall through. */
18654 for (i
= 0; i
< nregs
; i
++)
18656 /* We're only using DImode here because it's a convenient size. */
18657 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18658 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18659 if (reg_overlap_mentioned_p (ops
[0], mem
))
18661 gcc_assert (overlap
== -1);
18666 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18667 output_asm_insn (buff
, ops
);
18672 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18673 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18674 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18675 output_asm_insn (buff
, ops
);
18682 gcc_unreachable ();
18685 sprintf (buff
, templ
, load
? "ld" : "st");
18686 output_asm_insn (buff
, ops
);
18691 /* Compute and return the length of neon_mov<mode>, where <mode> is
18692 one of VSTRUCT modes: EI, OI, CI or XI. */
18694 arm_attr_length_move_neon (rtx_insn
*insn
)
18696 rtx reg
, mem
, addr
;
18700 extract_insn_cached (insn
);
18702 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18704 mode
= GET_MODE (recog_data
.operand
[0]);
18715 gcc_unreachable ();
18719 load
= REG_P (recog_data
.operand
[0]);
18720 reg
= recog_data
.operand
[!load
];
18721 mem
= recog_data
.operand
[load
];
18723 gcc_assert (MEM_P (mem
));
18725 addr
= XEXP (mem
, 0);
18727 /* Strip off const from addresses like (const (plus (...))). */
18728 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18729 addr
= XEXP (addr
, 0);
18731 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18733 int insns
= REG_NREGS (reg
) / 2;
18740 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18744 arm_address_offset_is_imm (rtx_insn
*insn
)
18748 extract_insn_cached (insn
);
18750 if (REG_P (recog_data
.operand
[0]))
18753 mem
= recog_data
.operand
[0];
18755 gcc_assert (MEM_P (mem
));
18757 addr
= XEXP (mem
, 0);
18760 || (GET_CODE (addr
) == PLUS
18761 && REG_P (XEXP (addr
, 0))
18762 && CONST_INT_P (XEXP (addr
, 1))))
18768 /* Output an ADD r, s, #n where n may be too big for one instruction.
18769 If adding zero to one register, output nothing. */
18771 output_add_immediate (rtx
*operands
)
18773 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18775 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18778 output_multi_immediate (operands
,
18779 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18782 output_multi_immediate (operands
,
18783 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18790 /* Output a multiple immediate operation.
18791 OPERANDS is the vector of operands referred to in the output patterns.
18792 INSTR1 is the output pattern to use for the first constant.
18793 INSTR2 is the output pattern to use for subsequent constants.
18794 IMMED_OP is the index of the constant slot in OPERANDS.
18795 N is the constant value. */
18796 static const char *
18797 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18798 int immed_op
, HOST_WIDE_INT n
)
18800 #if HOST_BITS_PER_WIDE_INT > 32
18806 /* Quick and easy output. */
18807 operands
[immed_op
] = const0_rtx
;
18808 output_asm_insn (instr1
, operands
);
18813 const char * instr
= instr1
;
18815 /* Note that n is never zero here (which would give no output). */
18816 for (i
= 0; i
< 32; i
+= 2)
18820 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18821 output_asm_insn (instr
, operands
);
18831 /* Return the name of a shifter operation. */
18832 static const char *
18833 arm_shift_nmem(enum rtx_code code
)
18838 return ARM_LSL_NAME
;
18854 /* Return the appropriate ARM instruction for the operation code.
18855 The returned result should not be overwritten. OP is the rtx of the
18856 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18859 arithmetic_instr (rtx op
, int shift_first_arg
)
18861 switch (GET_CODE (op
))
18867 return shift_first_arg
? "rsb" : "sub";
18882 return arm_shift_nmem(GET_CODE(op
));
18885 gcc_unreachable ();
18889 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18890 for the operation code. The returned result should not be overwritten.
18891 OP is the rtx code of the shift.
18892 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18894 static const char *
18895 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18898 enum rtx_code code
= GET_CODE (op
);
18903 if (!CONST_INT_P (XEXP (op
, 1)))
18905 output_operand_lossage ("invalid shift operand");
18910 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18918 mnem
= arm_shift_nmem(code
);
18919 if (CONST_INT_P (XEXP (op
, 1)))
18921 *amountp
= INTVAL (XEXP (op
, 1));
18923 else if (REG_P (XEXP (op
, 1)))
18930 output_operand_lossage ("invalid shift operand");
18936 /* We never have to worry about the amount being other than a
18937 power of 2, since this case can never be reloaded from a reg. */
18938 if (!CONST_INT_P (XEXP (op
, 1)))
18940 output_operand_lossage ("invalid shift operand");
18944 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18946 /* Amount must be a power of two. */
18947 if (*amountp
& (*amountp
- 1))
18949 output_operand_lossage ("invalid shift operand");
18953 *amountp
= exact_log2 (*amountp
);
18954 gcc_assert (IN_RANGE (*amountp
, 0, 31));
18955 return ARM_LSL_NAME
;
18958 output_operand_lossage ("invalid shift operand");
18962 /* This is not 100% correct, but follows from the desire to merge
18963 multiplication by a power of 2 with the recognizer for a
18964 shift. >=32 is not a valid shift for "lsl", so we must try and
18965 output a shift that produces the correct arithmetical result.
18966 Using lsr #32 is identical except for the fact that the carry bit
18967 is not set correctly if we set the flags; but we never use the
18968 carry bit from such an operation, so we can ignore that. */
18969 if (code
== ROTATERT
)
18970 /* Rotate is just modulo 32. */
18972 else if (*amountp
!= (*amountp
& 31))
18974 if (code
== ASHIFT
)
18979 /* Shifts of 0 are no-ops. */
18986 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18987 because /bin/as is horribly restrictive. The judgement about
18988 whether or not each character is 'printable' (and can be output as
18989 is) or not (and must be printed with an octal escape) must be made
18990 with reference to the *host* character set -- the situation is
18991 similar to that discussed in the comments above pp_c_char in
18992 c-pretty-print.c. */
18994 #define MAX_ASCII_LEN 51
18997 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
19000 int len_so_far
= 0;
19002 fputs ("\t.ascii\t\"", stream
);
19004 for (i
= 0; i
< len
; i
++)
19008 if (len_so_far
>= MAX_ASCII_LEN
)
19010 fputs ("\"\n\t.ascii\t\"", stream
);
19016 if (c
== '\\' || c
== '\"')
19018 putc ('\\', stream
);
19026 fprintf (stream
, "\\%03o", c
);
19031 fputs ("\"\n", stream
);
19034 /* Whether a register is callee saved or not. This is necessary because high
19035 registers are marked as caller saved when optimizing for size on Thumb-1
19036 targets despite being callee saved in order to avoid using them. */
19037 #define callee_saved_reg_p(reg) \
19038 (!call_used_regs[reg] \
19039 || (TARGET_THUMB1 && optimize_size \
19040 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19042 /* Compute the register save mask for registers 0 through 12
19043 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19045 static unsigned long
19046 arm_compute_save_reg0_reg12_mask (void)
19048 unsigned long func_type
= arm_current_func_type ();
19049 unsigned long save_reg_mask
= 0;
19052 if (IS_INTERRUPT (func_type
))
19054 unsigned int max_reg
;
19055 /* Interrupt functions must not corrupt any registers,
19056 even call clobbered ones. If this is a leaf function
19057 we can just examine the registers used by the RTL, but
19058 otherwise we have to assume that whatever function is
19059 called might clobber anything, and so we have to save
19060 all the call-clobbered registers as well. */
19061 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
19062 /* FIQ handlers have registers r8 - r12 banked, so
19063 we only need to check r0 - r7, Normal ISRs only
19064 bank r14 and r15, so we must check up to r12.
19065 r13 is the stack pointer which is always preserved,
19066 so we do not need to consider it here. */
19071 for (reg
= 0; reg
<= max_reg
; reg
++)
19072 if (df_regs_ever_live_p (reg
)
19073 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
19074 save_reg_mask
|= (1 << reg
);
19076 /* Also save the pic base register if necessary. */
19078 && !TARGET_SINGLE_PIC_BASE
19079 && arm_pic_register
!= INVALID_REGNUM
19080 && crtl
->uses_pic_offset_table
)
19081 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19083 else if (IS_VOLATILE(func_type
))
19085 /* For noreturn functions we historically omitted register saves
19086 altogether. However this really messes up debugging. As a
19087 compromise save just the frame pointers. Combined with the link
19088 register saved elsewhere this should be sufficient to get
19090 if (frame_pointer_needed
)
19091 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19092 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19093 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19094 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19095 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19099 /* In the normal case we only need to save those registers
19100 which are call saved and which are used by this function. */
19101 for (reg
= 0; reg
<= 11; reg
++)
19102 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19103 save_reg_mask
|= (1 << reg
);
19105 /* Handle the frame pointer as a special case. */
19106 if (frame_pointer_needed
)
19107 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19109 /* If we aren't loading the PIC register,
19110 don't stack it even though it may be live. */
19112 && !TARGET_SINGLE_PIC_BASE
19113 && arm_pic_register
!= INVALID_REGNUM
19114 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19115 || crtl
->uses_pic_offset_table
))
19116 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19118 /* The prologue will copy SP into R0, so save it. */
19119 if (IS_STACKALIGN (func_type
))
19120 save_reg_mask
|= 1;
19123 /* Save registers so the exception handler can modify them. */
19124 if (crtl
->calls_eh_return
)
19130 reg
= EH_RETURN_DATA_REGNO (i
);
19131 if (reg
== INVALID_REGNUM
)
19133 save_reg_mask
|= 1 << reg
;
19137 return save_reg_mask
;
19140 /* Return true if r3 is live at the start of the function. */
19143 arm_r3_live_at_start_p (void)
19145 /* Just look at cfg info, which is still close enough to correct at this
19146 point. This gives false positives for broken functions that might use
19147 uninitialized data that happens to be allocated in r3, but who cares? */
19148 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19151 /* Compute the number of bytes used to store the static chain register on the
19152 stack, above the stack frame. We need to know this accurately to get the
19153 alignment of the rest of the stack frame correct. */
19156 arm_compute_static_chain_stack_bytes (void)
19158 /* See the defining assertion in arm_expand_prologue. */
19159 if (IS_NESTED (arm_current_func_type ())
19160 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19161 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
19162 && !df_regs_ever_live_p (LR_REGNUM
)))
19163 && arm_r3_live_at_start_p ()
19164 && crtl
->args
.pretend_args_size
== 0)
19170 /* Compute a bit mask of which core registers need to be
19171 saved on the stack for the current function.
19172 This is used by arm_compute_frame_layout, which may add extra registers. */
19174 static unsigned long
19175 arm_compute_save_core_reg_mask (void)
19177 unsigned int save_reg_mask
= 0;
19178 unsigned long func_type
= arm_current_func_type ();
19181 if (IS_NAKED (func_type
))
19182 /* This should never really happen. */
19185 /* If we are creating a stack frame, then we must save the frame pointer,
19186 IP (which will hold the old stack pointer), LR and the PC. */
19187 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19189 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19192 | (1 << PC_REGNUM
);
19194 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19196 /* Decide if we need to save the link register.
19197 Interrupt routines have their own banked link register,
19198 so they never need to save it.
19199 Otherwise if we do not use the link register we do not need to save
19200 it. If we are pushing other registers onto the stack however, we
19201 can save an instruction in the epilogue by pushing the link register
19202 now and then popping it back into the PC. This incurs extra memory
19203 accesses though, so we only do it when optimizing for size, and only
19204 if we know that we will not need a fancy return sequence. */
19205 if (df_regs_ever_live_p (LR_REGNUM
)
19208 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19209 && !crtl
->tail_call_emit
19210 && !crtl
->calls_eh_return
))
19211 save_reg_mask
|= 1 << LR_REGNUM
;
19213 if (cfun
->machine
->lr_save_eliminated
)
19214 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19216 if (TARGET_REALLY_IWMMXT
19217 && ((bit_count (save_reg_mask
)
19218 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19219 arm_compute_static_chain_stack_bytes())
19222 /* The total number of registers that are going to be pushed
19223 onto the stack is odd. We need to ensure that the stack
19224 is 64-bit aligned before we start to save iWMMXt registers,
19225 and also before we start to create locals. (A local variable
19226 might be a double or long long which we will load/store using
19227 an iWMMXt instruction). Therefore we need to push another
19228 ARM register, so that the stack will be 64-bit aligned. We
19229 try to avoid using the arg registers (r0 -r3) as they might be
19230 used to pass values in a tail call. */
19231 for (reg
= 4; reg
<= 12; reg
++)
19232 if ((save_reg_mask
& (1 << reg
)) == 0)
19236 save_reg_mask
|= (1 << reg
);
19239 cfun
->machine
->sibcall_blocked
= 1;
19240 save_reg_mask
|= (1 << 3);
19244 /* We may need to push an additional register for use initializing the
19245 PIC base register. */
19246 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19247 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19249 reg
= thumb_find_work_register (1 << 4);
19250 if (!call_used_regs
[reg
])
19251 save_reg_mask
|= (1 << reg
);
19254 return save_reg_mask
;
19257 /* Compute a bit mask of which core registers need to be
19258 saved on the stack for the current function. */
19259 static unsigned long
19260 thumb1_compute_save_core_reg_mask (void)
19262 unsigned long mask
;
19266 for (reg
= 0; reg
< 12; reg
++)
19267 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19270 /* Handle the frame pointer as a special case. */
19271 if (frame_pointer_needed
)
19272 mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19275 && !TARGET_SINGLE_PIC_BASE
19276 && arm_pic_register
!= INVALID_REGNUM
19277 && crtl
->uses_pic_offset_table
)
19278 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19280 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19281 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19282 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19284 /* LR will also be pushed if any lo regs are pushed. */
19285 if (mask
& 0xff || thumb_force_lr_save ())
19286 mask
|= (1 << LR_REGNUM
);
19288 /* Make sure we have a low work register if we need one.
19289 We will need one if we are going to push a high register,
19290 but we are not currently intending to push a low register. */
19291 if ((mask
& 0xff) == 0
19292 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19294 /* Use thumb_find_work_register to choose which register
19295 we will use. If the register is live then we will
19296 have to push it. Use LAST_LO_REGNUM as our fallback
19297 choice for the register to select. */
19298 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19299 /* Make sure the register returned by thumb_find_work_register is
19300 not part of the return value. */
19301 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19302 reg
= LAST_LO_REGNUM
;
19304 if (callee_saved_reg_p (reg
))
19308 /* The 504 below is 8 bytes less than 512 because there are two possible
19309 alignment words. We can't tell here if they will be present or not so we
19310 have to play it safe and assume that they are. */
19311 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19312 ROUND_UP_WORD (get_frame_size ()) +
19313 crtl
->outgoing_args_size
) >= 504)
19315 /* This is the same as the code in thumb1_expand_prologue() which
19316 determines which register to use for stack decrement. */
19317 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19318 if (mask
& (1 << reg
))
19321 if (reg
> LAST_LO_REGNUM
)
19323 /* Make sure we have a register available for stack decrement. */
19324 mask
|= 1 << LAST_LO_REGNUM
;
19332 /* Return the number of bytes required to save VFP registers. */
19334 arm_get_vfp_saved_size (void)
19336 unsigned int regno
;
19341 /* Space for saved VFP registers. */
19342 if (TARGET_HARD_FLOAT
)
19345 for (regno
= FIRST_VFP_REGNUM
;
19346 regno
< LAST_VFP_REGNUM
;
19349 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19350 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19354 /* Workaround ARM10 VFPr1 bug. */
19355 if (count
== 2 && !arm_arch6
)
19357 saved
+= count
* 8;
19366 if (count
== 2 && !arm_arch6
)
19368 saved
+= count
* 8;
19375 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19376 everything bar the final return instruction. If simple_return is true,
19377 then do not output epilogue, because it has already been emitted in RTL. */
19379 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19380 bool simple_return
)
19382 char conditional
[10];
19385 unsigned long live_regs_mask
;
19386 unsigned long func_type
;
19387 arm_stack_offsets
*offsets
;
19389 func_type
= arm_current_func_type ();
19391 if (IS_NAKED (func_type
))
19394 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19396 /* If this function was declared non-returning, and we have
19397 found a tail call, then we have to trust that the called
19398 function won't return. */
19403 /* Otherwise, trap an attempted return by aborting. */
19405 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19407 assemble_external_libcall (ops
[1]);
19408 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19414 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19416 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19418 cfun
->machine
->return_used_this_function
= 1;
19420 offsets
= arm_get_frame_offsets ();
19421 live_regs_mask
= offsets
->saved_regs_mask
;
19423 if (!simple_return
&& live_regs_mask
)
19425 const char * return_reg
;
19427 /* If we do not have any special requirements for function exit
19428 (e.g. interworking) then we can load the return address
19429 directly into the PC. Otherwise we must load it into LR. */
19431 && !IS_CMSE_ENTRY (func_type
)
19432 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19433 return_reg
= reg_names
[PC_REGNUM
];
19435 return_reg
= reg_names
[LR_REGNUM
];
19437 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19439 /* There are three possible reasons for the IP register
19440 being saved. 1) a stack frame was created, in which case
19441 IP contains the old stack pointer, or 2) an ISR routine
19442 corrupted it, or 3) it was saved to align the stack on
19443 iWMMXt. In case 1, restore IP into SP, otherwise just
19445 if (frame_pointer_needed
)
19447 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19448 live_regs_mask
|= (1 << SP_REGNUM
);
19451 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19454 /* On some ARM architectures it is faster to use LDR rather than
19455 LDM to load a single register. On other architectures, the
19456 cost is the same. In 26 bit mode, or for exception handlers,
19457 we have to use LDM to load the PC so that the CPSR is also
19459 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19460 if (live_regs_mask
== (1U << reg
))
19463 if (reg
<= LAST_ARM_REGNUM
19464 && (reg
!= LR_REGNUM
19466 || ! IS_INTERRUPT (func_type
)))
19468 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19469 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19476 /* Generate the load multiple instruction to restore the
19477 registers. Note we can get here, even if
19478 frame_pointer_needed is true, but only if sp already
19479 points to the base of the saved core registers. */
19480 if (live_regs_mask
& (1 << SP_REGNUM
))
19482 unsigned HOST_WIDE_INT stack_adjust
;
19484 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19485 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19487 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19488 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19491 /* If we can't use ldmib (SA110 bug),
19492 then try to pop r3 instead. */
19494 live_regs_mask
|= 1 << 3;
19496 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19499 /* For interrupt returns we have to use an LDM rather than
19500 a POP so that we can use the exception return variant. */
19501 else if (IS_INTERRUPT (func_type
))
19502 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
19504 sprintf (instr
, "pop%s\t{", conditional
);
19506 p
= instr
+ strlen (instr
);
19508 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19509 if (live_regs_mask
& (1 << reg
))
19511 int l
= strlen (reg_names
[reg
]);
19517 memcpy (p
, ", ", 2);
19521 memcpy (p
, "%|", 2);
19522 memcpy (p
+ 2, reg_names
[reg
], l
);
19526 if (live_regs_mask
& (1 << LR_REGNUM
))
19528 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19529 /* If returning from an interrupt, restore the CPSR. */
19530 if (IS_INTERRUPT (func_type
))
19537 output_asm_insn (instr
, & operand
);
19539 /* See if we need to generate an extra instruction to
19540 perform the actual function return. */
19542 && func_type
!= ARM_FT_INTERWORKED
19543 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19545 /* The return has already been handled
19546 by loading the LR into the PC. */
19553 switch ((int) ARM_FUNC_TYPE (func_type
))
19557 /* ??? This is wrong for unified assembly syntax. */
19558 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19561 case ARM_FT_INTERWORKED
:
19562 gcc_assert (arm_arch5
|| arm_arch4t
);
19563 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19566 case ARM_FT_EXCEPTION
:
19567 /* ??? This is wrong for unified assembly syntax. */
19568 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19572 if (IS_CMSE_ENTRY (func_type
))
19574 /* Check if we have to clear the 'GE bits' which is only used if
19575 parallel add and subtraction instructions are available. */
19576 if (TARGET_INT_SIMD
)
19577 snprintf (instr
, sizeof (instr
),
19578 "msr%s\tAPSR_nzcvqg, %%|lr", conditional
);
19580 snprintf (instr
, sizeof (instr
),
19581 "msr%s\tAPSR_nzcvq, %%|lr", conditional
);
19583 output_asm_insn (instr
, & operand
);
19584 if (TARGET_HARD_FLOAT
&& !TARGET_THUMB1
)
19586 /* Clear the cumulative exception-status bits (0-4,7) and the
19587 condition code bits (28-31) of the FPSCR. We need to
19588 remember to clear the first scratch register used (IP) and
19589 save and restore the second (r4). */
19590 snprintf (instr
, sizeof (instr
), "push\t{%%|r4}");
19591 output_asm_insn (instr
, & operand
);
19592 snprintf (instr
, sizeof (instr
), "vmrs\t%%|ip, fpscr");
19593 output_asm_insn (instr
, & operand
);
19594 snprintf (instr
, sizeof (instr
), "movw\t%%|r4, #65376");
19595 output_asm_insn (instr
, & operand
);
19596 snprintf (instr
, sizeof (instr
), "movt\t%%|r4, #4095");
19597 output_asm_insn (instr
, & operand
);
19598 snprintf (instr
, sizeof (instr
), "and\t%%|ip, %%|r4");
19599 output_asm_insn (instr
, & operand
);
19600 snprintf (instr
, sizeof (instr
), "vmsr\tfpscr, %%|ip");
19601 output_asm_insn (instr
, & operand
);
19602 snprintf (instr
, sizeof (instr
), "pop\t{%%|r4}");
19603 output_asm_insn (instr
, & operand
);
19604 snprintf (instr
, sizeof (instr
), "mov\t%%|ip, %%|lr");
19605 output_asm_insn (instr
, & operand
);
19607 snprintf (instr
, sizeof (instr
), "bxns\t%%|lr");
19609 /* Use bx if it's available. */
19610 else if (arm_arch5
|| arm_arch4t
)
19611 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19613 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19617 output_asm_insn (instr
, & operand
);
19623 /* Output in FILE asm statements needed to declare the NAME of the function
19624 defined by its DECL node. */
19627 arm_asm_declare_function_name (FILE *file
, const char *name
, tree decl
)
19629 size_t cmse_name_len
;
19630 char *cmse_name
= 0;
19631 char cmse_prefix
[] = "__acle_se_";
19633 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19634 extra function label for each function with the 'cmse_nonsecure_entry'
19635 attribute. This extra function label should be prepended with
19636 '__acle_se_', telling the linker that it needs to create secure gateway
19637 veneers for this function. */
19638 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_entry",
19639 DECL_ATTRIBUTES (decl
)))
19641 cmse_name_len
= sizeof (cmse_prefix
) + strlen (name
);
19642 cmse_name
= XALLOCAVEC (char, cmse_name_len
);
19643 snprintf (cmse_name
, cmse_name_len
, "%s%s", cmse_prefix
, name
);
19644 targetm
.asm_out
.globalize_label (file
, cmse_name
);
19646 ARM_DECLARE_FUNCTION_NAME (file
, cmse_name
, decl
);
19647 ASM_OUTPUT_TYPE_DIRECTIVE (file
, cmse_name
, "function");
19650 ARM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
19651 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
19652 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
19653 ASM_OUTPUT_LABEL (file
, name
);
19656 ASM_OUTPUT_LABEL (file
, cmse_name
);
19658 ARM_OUTPUT_FN_UNWIND (file
, TRUE
);
19661 /* Write the function name into the code section, directly preceding
19662 the function prologue.
19664 Code will be output similar to this:
19666 .ascii "arm_poke_function_name", 0
19669 .word 0xff000000 + (t1 - t0)
19670 arm_poke_function_name
19672 stmfd sp!, {fp, ip, lr, pc}
19675 When performing a stack backtrace, code can inspect the value
19676 of 'pc' stored at 'fp' + 0. If the trace function then looks
19677 at location pc - 12 and the top 8 bits are set, then we know
19678 that there is a function name embedded immediately preceding this
19679 location and has length ((pc[-3]) & 0xff000000).
19681 We assume that pc is declared as a pointer to an unsigned long.
19683 It is of no benefit to output the function name if we are assembling
19684 a leaf function. These function types will not contain a stack
19685 backtrace structure, therefore it is not possible to determine the
19688 arm_poke_function_name (FILE *stream
, const char *name
)
19690 unsigned long alignlength
;
19691 unsigned long length
;
19694 length
= strlen (name
) + 1;
19695 alignlength
= ROUND_UP_WORD (length
);
19697 ASM_OUTPUT_ASCII (stream
, name
, length
);
19698 ASM_OUTPUT_ALIGN (stream
, 2);
19699 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19700 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19703 /* Place some comments into the assembler stream
19704 describing the current function. */
19706 arm_output_function_prologue (FILE *f
)
19708 unsigned long func_type
;
19710 /* Sanity check. */
19711 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19713 func_type
= arm_current_func_type ();
19715 switch ((int) ARM_FUNC_TYPE (func_type
))
19718 case ARM_FT_NORMAL
:
19720 case ARM_FT_INTERWORKED
:
19721 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19724 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19727 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19729 case ARM_FT_EXCEPTION
:
19730 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19734 if (IS_NAKED (func_type
))
19735 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19737 if (IS_VOLATILE (func_type
))
19738 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19740 if (IS_NESTED (func_type
))
19741 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19742 if (IS_STACKALIGN (func_type
))
19743 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19744 if (IS_CMSE_ENTRY (func_type
))
19745 asm_fprintf (f
, "\t%@ Non-secure entry function: called from non-secure code.\n");
19747 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19749 crtl
->args
.pretend_args_size
,
19750 (HOST_WIDE_INT
) get_frame_size ());
19752 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19753 frame_pointer_needed
,
19754 cfun
->machine
->uses_anonymous_args
);
19756 if (cfun
->machine
->lr_save_eliminated
)
19757 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19759 if (crtl
->calls_eh_return
)
19760 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19765 arm_output_function_epilogue (FILE *)
19767 arm_stack_offsets
*offsets
;
19773 /* Emit any call-via-reg trampolines that are needed for v4t support
19774 of call_reg and call_value_reg type insns. */
19775 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19777 rtx label
= cfun
->machine
->call_via
[regno
];
19781 switch_to_section (function_section (current_function_decl
));
19782 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19783 CODE_LABEL_NUMBER (label
));
19784 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19788 /* ??? Probably not safe to set this here, since it assumes that a
19789 function will be emitted as assembly immediately after we generate
19790 RTL for it. This does not happen for inline functions. */
19791 cfun
->machine
->return_used_this_function
= 0;
19793 else /* TARGET_32BIT */
19795 /* We need to take into account any stack-frame rounding. */
19796 offsets
= arm_get_frame_offsets ();
19798 gcc_assert (!use_return_insn (FALSE
, NULL
)
19799 || (cfun
->machine
->return_used_this_function
!= 0)
19800 || offsets
->saved_regs
== offsets
->outgoing_args
19801 || frame_pointer_needed
);
19805 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19806 STR and STRD. If an even number of registers are being pushed, one
19807 or more STRD patterns are created for each register pair. If an
19808 odd number of registers are pushed, emit an initial STR followed by
19809 as many STRD instructions as are needed. This works best when the
19810 stack is initially 64-bit aligned (the normal case), since it
19811 ensures that each STRD is also 64-bit aligned. */
19813 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19818 rtx par
= NULL_RTX
;
19819 rtx dwarf
= NULL_RTX
;
19823 num_regs
= bit_count (saved_regs_mask
);
19825 /* Must be at least one register to save, and can't save SP or PC. */
19826 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19827 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19828 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19830 /* Create sequence for DWARF info. All the frame-related data for
19831 debugging is held in this wrapper. */
19832 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19834 /* Describe the stack adjustment. */
19835 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19836 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19837 RTX_FRAME_RELATED_P (tmp
) = 1;
19838 XVECEXP (dwarf
, 0, 0) = tmp
;
19840 /* Find the first register. */
19841 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19846 /* If there's an odd number of registers to push. Start off by
19847 pushing a single register. This ensures that subsequent strd
19848 operations are dword aligned (assuming that SP was originally
19849 64-bit aligned). */
19850 if ((num_regs
& 1) != 0)
19852 rtx reg
, mem
, insn
;
19854 reg
= gen_rtx_REG (SImode
, regno
);
19856 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19857 stack_pointer_rtx
));
19859 mem
= gen_frame_mem (Pmode
,
19861 (Pmode
, stack_pointer_rtx
,
19862 plus_constant (Pmode
, stack_pointer_rtx
,
19865 tmp
= gen_rtx_SET (mem
, reg
);
19866 RTX_FRAME_RELATED_P (tmp
) = 1;
19867 insn
= emit_insn (tmp
);
19868 RTX_FRAME_RELATED_P (insn
) = 1;
19869 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19870 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
19871 RTX_FRAME_RELATED_P (tmp
) = 1;
19874 XVECEXP (dwarf
, 0, i
) = tmp
;
19878 while (i
< num_regs
)
19879 if (saved_regs_mask
& (1 << regno
))
19881 rtx reg1
, reg2
, mem1
, mem2
;
19882 rtx tmp0
, tmp1
, tmp2
;
19885 /* Find the register to pair with this one. */
19886 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19890 reg1
= gen_rtx_REG (SImode
, regno
);
19891 reg2
= gen_rtx_REG (SImode
, regno2
);
19898 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19901 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19903 -4 * (num_regs
- 1)));
19904 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
19905 plus_constant (Pmode
, stack_pointer_rtx
,
19907 tmp1
= gen_rtx_SET (mem1
, reg1
);
19908 tmp2
= gen_rtx_SET (mem2
, reg2
);
19909 RTX_FRAME_RELATED_P (tmp0
) = 1;
19910 RTX_FRAME_RELATED_P (tmp1
) = 1;
19911 RTX_FRAME_RELATED_P (tmp2
) = 1;
19912 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19913 XVECEXP (par
, 0, 0) = tmp0
;
19914 XVECEXP (par
, 0, 1) = tmp1
;
19915 XVECEXP (par
, 0, 2) = tmp2
;
19916 insn
= emit_insn (par
);
19917 RTX_FRAME_RELATED_P (insn
) = 1;
19918 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19922 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19925 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19928 tmp1
= gen_rtx_SET (mem1
, reg1
);
19929 tmp2
= gen_rtx_SET (mem2
, reg2
);
19930 RTX_FRAME_RELATED_P (tmp1
) = 1;
19931 RTX_FRAME_RELATED_P (tmp2
) = 1;
19932 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19933 XVECEXP (par
, 0, 0) = tmp1
;
19934 XVECEXP (par
, 0, 1) = tmp2
;
19938 /* Create unwind information. This is an approximation. */
19939 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
19940 plus_constant (Pmode
,
19944 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
19945 plus_constant (Pmode
,
19950 RTX_FRAME_RELATED_P (tmp1
) = 1;
19951 RTX_FRAME_RELATED_P (tmp2
) = 1;
19952 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19953 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19955 regno
= regno2
+ 1;
19963 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19964 whenever possible, otherwise it emits single-word stores. The first store
19965 also allocates stack space for all saved registers, using writeback with
19966 post-addressing mode. All other stores use offset addressing. If no STRD
19967 can be emitted, this function emits a sequence of single-word stores,
19968 and not an STM as before, because single-word stores provide more freedom
19969 scheduling and can be turned into an STM by peephole optimizations. */
19971 arm_emit_strd_push (unsigned long saved_regs_mask
)
19974 int i
, j
, dwarf_index
= 0;
19976 rtx dwarf
= NULL_RTX
;
19977 rtx insn
= NULL_RTX
;
19980 /* TODO: A more efficient code can be emitted by changing the
19981 layout, e.g., first push all pairs that can use STRD to keep the
19982 stack aligned, and then push all other registers. */
19983 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19984 if (saved_regs_mask
& (1 << i
))
19987 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19988 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19989 gcc_assert (num_regs
> 0);
19991 /* Create sequence for DWARF info. */
19992 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19994 /* For dwarf info, we generate explicit stack update. */
19995 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19996 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19997 RTX_FRAME_RELATED_P (tmp
) = 1;
19998 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20000 /* Save registers. */
20001 offset
= - 4 * num_regs
;
20003 while (j
<= LAST_ARM_REGNUM
)
20004 if (saved_regs_mask
& (1 << j
))
20007 && (saved_regs_mask
& (1 << (j
+ 1))))
20009 /* Current register and previous register form register pair for
20010 which STRD can be generated. */
20013 /* Allocate stack space for all saved registers. */
20014 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20015 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20016 mem
= gen_frame_mem (DImode
, tmp
);
20019 else if (offset
> 0)
20020 mem
= gen_frame_mem (DImode
,
20021 plus_constant (Pmode
,
20025 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20027 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
20028 RTX_FRAME_RELATED_P (tmp
) = 1;
20029 tmp
= emit_insn (tmp
);
20031 /* Record the first store insn. */
20032 if (dwarf_index
== 1)
20035 /* Generate dwarf info. */
20036 mem
= gen_frame_mem (SImode
,
20037 plus_constant (Pmode
,
20040 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20041 RTX_FRAME_RELATED_P (tmp
) = 1;
20042 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20044 mem
= gen_frame_mem (SImode
,
20045 plus_constant (Pmode
,
20048 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
20049 RTX_FRAME_RELATED_P (tmp
) = 1;
20050 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20057 /* Emit a single word store. */
20060 /* Allocate stack space for all saved registers. */
20061 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20062 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20063 mem
= gen_frame_mem (SImode
, tmp
);
20066 else if (offset
> 0)
20067 mem
= gen_frame_mem (SImode
,
20068 plus_constant (Pmode
,
20072 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20074 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20075 RTX_FRAME_RELATED_P (tmp
) = 1;
20076 tmp
= emit_insn (tmp
);
20078 /* Record the first store insn. */
20079 if (dwarf_index
== 1)
20082 /* Generate dwarf info. */
20083 mem
= gen_frame_mem (SImode
,
20084 plus_constant(Pmode
,
20087 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20088 RTX_FRAME_RELATED_P (tmp
) = 1;
20089 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20098 /* Attach dwarf info to the first insn we generate. */
20099 gcc_assert (insn
!= NULL_RTX
);
20100 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20101 RTX_FRAME_RELATED_P (insn
) = 1;
20104 /* Generate and emit an insn that we will recognize as a push_multi.
20105 Unfortunately, since this insn does not reflect very well the actual
20106 semantics of the operation, we need to annotate the insn for the benefit
20107 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20108 MASK for registers that should be annotated for DWARF2 frame unwind
20111 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
20114 int num_dwarf_regs
= 0;
20118 int dwarf_par_index
;
20121 /* We don't record the PC in the dwarf frame information. */
20122 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20124 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20126 if (mask
& (1 << i
))
20128 if (dwarf_regs_mask
& (1 << i
))
20132 gcc_assert (num_regs
&& num_regs
<= 16);
20133 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20135 /* For the body of the insn we are going to generate an UNSPEC in
20136 parallel with several USEs. This allows the insn to be recognized
20137 by the push_multi pattern in the arm.md file.
20139 The body of the insn looks something like this:
20142 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20143 (const_int:SI <num>)))
20144 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20150 For the frame note however, we try to be more explicit and actually
20151 show each register being stored into the stack frame, plus a (single)
20152 decrement of the stack pointer. We do it this way in order to be
20153 friendly to the stack unwinding code, which only wants to see a single
20154 stack decrement per instruction. The RTL we generate for the note looks
20155 something like this:
20158 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20159 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20160 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20161 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20165 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20166 instead we'd have a parallel expression detailing all
20167 the stores to the various memory addresses so that debug
20168 information is more up-to-date. Remember however while writing
20169 this to take care of the constraints with the push instruction.
20171 Note also that this has to be taken care of for the VFP registers.
20173 For more see PR43399. */
20175 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20176 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20177 dwarf_par_index
= 1;
20179 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20181 if (mask
& (1 << i
))
20183 reg
= gen_rtx_REG (SImode
, i
);
20185 XVECEXP (par
, 0, 0)
20186 = gen_rtx_SET (gen_frame_mem
20188 gen_rtx_PRE_MODIFY (Pmode
,
20191 (Pmode
, stack_pointer_rtx
,
20194 gen_rtx_UNSPEC (BLKmode
,
20195 gen_rtvec (1, reg
),
20196 UNSPEC_PUSH_MULT
));
20198 if (dwarf_regs_mask
& (1 << i
))
20200 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
20202 RTX_FRAME_RELATED_P (tmp
) = 1;
20203 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20210 for (j
= 1, i
++; j
< num_regs
; i
++)
20212 if (mask
& (1 << i
))
20214 reg
= gen_rtx_REG (SImode
, i
);
20216 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20218 if (dwarf_regs_mask
& (1 << i
))
20221 = gen_rtx_SET (gen_frame_mem
20223 plus_constant (Pmode
, stack_pointer_rtx
,
20226 RTX_FRAME_RELATED_P (tmp
) = 1;
20227 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20234 par
= emit_insn (par
);
20236 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20237 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20238 RTX_FRAME_RELATED_P (tmp
) = 1;
20239 XVECEXP (dwarf
, 0, 0) = tmp
;
20241 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20246 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20247 SIZE is the offset to be adjusted.
20248 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20250 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20254 RTX_FRAME_RELATED_P (insn
) = 1;
20255 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
20256 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20259 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20260 SAVED_REGS_MASK shows which registers need to be restored.
20262 Unfortunately, since this insn does not reflect very well the actual
20263 semantics of the operation, we need to annotate the insn for the benefit
20264 of DWARF2 frame unwind information. */
20266 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20271 rtx dwarf
= NULL_RTX
;
20273 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20277 offset_adj
= return_in_pc
? 1 : 0;
20278 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20279 if (saved_regs_mask
& (1 << i
))
20282 gcc_assert (num_regs
&& num_regs
<= 16);
20284 /* If SP is in reglist, then we don't emit SP update insn. */
20285 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20287 /* The parallel needs to hold num_regs SETs
20288 and one SET for the stack update. */
20289 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20292 XVECEXP (par
, 0, 0) = ret_rtx
;
20296 /* Increment the stack pointer, based on there being
20297 num_regs 4-byte registers to restore. */
20298 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20299 plus_constant (Pmode
,
20302 RTX_FRAME_RELATED_P (tmp
) = 1;
20303 XVECEXP (par
, 0, offset_adj
) = tmp
;
20306 /* Now restore every reg, which may include PC. */
20307 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20308 if (saved_regs_mask
& (1 << i
))
20310 reg
= gen_rtx_REG (SImode
, i
);
20311 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20313 /* Emit single load with writeback. */
20314 tmp
= gen_frame_mem (SImode
,
20315 gen_rtx_POST_INC (Pmode
,
20316 stack_pointer_rtx
));
20317 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
20318 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20322 tmp
= gen_rtx_SET (reg
,
20325 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20326 RTX_FRAME_RELATED_P (tmp
) = 1;
20327 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20329 /* We need to maintain a sequence for DWARF info too. As dwarf info
20330 should not have PC, skip PC. */
20331 if (i
!= PC_REGNUM
)
20332 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20338 par
= emit_jump_insn (par
);
20340 par
= emit_insn (par
);
20342 REG_NOTES (par
) = dwarf
;
20344 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20345 stack_pointer_rtx
, stack_pointer_rtx
);
20348 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20349 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20351 Unfortunately, since this insn does not reflect very well the actual
20352 semantics of the operation, we need to annotate the insn for the benefit
20353 of DWARF2 frame unwind information. */
20355 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20359 rtx dwarf
= NULL_RTX
;
20362 gcc_assert (num_regs
&& num_regs
<= 32);
20364 /* Workaround ARM10 VFPr1 bug. */
20365 if (num_regs
== 2 && !arm_arch6
)
20367 if (first_reg
== 15)
20373 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20374 there could be up to 32 D-registers to restore.
20375 If there are more than 16 D-registers, make two recursive calls,
20376 each of which emits one pop_multi instruction. */
20379 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20380 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20384 /* The parallel needs to hold num_regs SETs
20385 and one SET for the stack update. */
20386 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20388 /* Increment the stack pointer, based on there being
20389 num_regs 8-byte registers to restore. */
20390 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20391 RTX_FRAME_RELATED_P (tmp
) = 1;
20392 XVECEXP (par
, 0, 0) = tmp
;
20394 /* Now show every reg that will be restored, using a SET for each. */
20395 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20397 reg
= gen_rtx_REG (DFmode
, i
);
20399 tmp
= gen_rtx_SET (reg
,
20402 plus_constant (Pmode
, base_reg
, 8 * j
)));
20403 RTX_FRAME_RELATED_P (tmp
) = 1;
20404 XVECEXP (par
, 0, j
+ 1) = tmp
;
20406 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20411 par
= emit_insn (par
);
20412 REG_NOTES (par
) = dwarf
;
20414 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20415 if (REGNO (base_reg
) == IP_REGNUM
)
20417 RTX_FRAME_RELATED_P (par
) = 1;
20418 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20421 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20422 base_reg
, base_reg
);
20425 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20426 number of registers are being popped, multiple LDRD patterns are created for
20427 all register pairs. If odd number of registers are popped, last register is
20428 loaded by using LDR pattern. */
20430 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20434 rtx par
= NULL_RTX
;
20435 rtx dwarf
= NULL_RTX
;
20436 rtx tmp
, reg
, tmp1
;
20437 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20439 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20440 if (saved_regs_mask
& (1 << i
))
20443 gcc_assert (num_regs
&& num_regs
<= 16);
20445 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20446 to be popped. So, if num_regs is even, now it will become odd,
20447 and we can generate pop with PC. If num_regs is odd, it will be
20448 even now, and ldr with return can be generated for PC. */
20452 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20454 /* Var j iterates over all the registers to gather all the registers in
20455 saved_regs_mask. Var i gives index of saved registers in stack frame.
20456 A PARALLEL RTX of register-pair is created here, so that pattern for
20457 LDRD can be matched. As PC is always last register to be popped, and
20458 we have already decremented num_regs if PC, we don't have to worry
20459 about PC in this loop. */
20460 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20461 if (saved_regs_mask
& (1 << j
))
20463 /* Create RTX for memory load. */
20464 reg
= gen_rtx_REG (SImode
, j
);
20465 tmp
= gen_rtx_SET (reg
,
20466 gen_frame_mem (SImode
,
20467 plus_constant (Pmode
,
20468 stack_pointer_rtx
, 4 * i
)));
20469 RTX_FRAME_RELATED_P (tmp
) = 1;
20473 /* When saved-register index (i) is even, the RTX to be emitted is
20474 yet to be created. Hence create it first. The LDRD pattern we
20475 are generating is :
20476 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20477 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20478 where target registers need not be consecutive. */
20479 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20483 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20484 added as 0th element and if i is odd, reg_i is added as 1st element
20485 of LDRD pattern shown above. */
20486 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20487 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20491 /* When saved-register index (i) is odd, RTXs for both the registers
20492 to be loaded are generated in above given LDRD pattern, and the
20493 pattern can be emitted now. */
20494 par
= emit_insn (par
);
20495 REG_NOTES (par
) = dwarf
;
20496 RTX_FRAME_RELATED_P (par
) = 1;
20502 /* If the number of registers pushed is odd AND return_in_pc is false OR
20503 number of registers are even AND return_in_pc is true, last register is
20504 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20505 then LDR with post increment. */
20507 /* Increment the stack pointer, based on there being
20508 num_regs 4-byte registers to restore. */
20509 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20510 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20511 RTX_FRAME_RELATED_P (tmp
) = 1;
20512 tmp
= emit_insn (tmp
);
20515 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20516 stack_pointer_rtx
, stack_pointer_rtx
);
20521 if (((num_regs
% 2) == 1 && !return_in_pc
)
20522 || ((num_regs
% 2) == 0 && return_in_pc
))
20524 /* Scan for the single register to be popped. Skip until the saved
20525 register is found. */
20526 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20528 /* Gen LDR with post increment here. */
20529 tmp1
= gen_rtx_MEM (SImode
,
20530 gen_rtx_POST_INC (SImode
,
20531 stack_pointer_rtx
));
20532 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20534 reg
= gen_rtx_REG (SImode
, j
);
20535 tmp
= gen_rtx_SET (reg
, tmp1
);
20536 RTX_FRAME_RELATED_P (tmp
) = 1;
20537 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20541 /* If return_in_pc, j must be PC_REGNUM. */
20542 gcc_assert (j
== PC_REGNUM
);
20543 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20544 XVECEXP (par
, 0, 0) = ret_rtx
;
20545 XVECEXP (par
, 0, 1) = tmp
;
20546 par
= emit_jump_insn (par
);
20550 par
= emit_insn (tmp
);
20551 REG_NOTES (par
) = dwarf
;
20552 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20553 stack_pointer_rtx
, stack_pointer_rtx
);
20557 else if ((num_regs
% 2) == 1 && return_in_pc
)
20559 /* There are 2 registers to be popped. So, generate the pattern
20560 pop_multiple_with_stack_update_and_return to pop in PC. */
20561 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20567 /* LDRD in ARM mode needs consecutive registers as operands. This function
20568 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20569 offset addressing and then generates one separate stack udpate. This provides
20570 more scheduling freedom, compared to writeback on every load. However,
20571 if the function returns using load into PC directly
20572 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20573 before the last load. TODO: Add a peephole optimization to recognize
20574 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20575 peephole optimization to merge the load at stack-offset zero
20576 with the stack update instruction using load with writeback
20577 in post-index addressing mode. */
20579 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20583 rtx par
= NULL_RTX
;
20584 rtx dwarf
= NULL_RTX
;
20587 /* Restore saved registers. */
20588 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20590 while (j
<= LAST_ARM_REGNUM
)
20591 if (saved_regs_mask
& (1 << j
))
20594 && (saved_regs_mask
& (1 << (j
+ 1)))
20595 && (j
+ 1) != PC_REGNUM
)
20597 /* Current register and next register form register pair for which
20598 LDRD can be generated. PC is always the last register popped, and
20599 we handle it separately. */
20601 mem
= gen_frame_mem (DImode
,
20602 plus_constant (Pmode
,
20606 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20608 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
20609 tmp
= emit_insn (tmp
);
20610 RTX_FRAME_RELATED_P (tmp
) = 1;
20612 /* Generate dwarf info. */
20614 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20615 gen_rtx_REG (SImode
, j
),
20617 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20618 gen_rtx_REG (SImode
, j
+ 1),
20621 REG_NOTES (tmp
) = dwarf
;
20626 else if (j
!= PC_REGNUM
)
20628 /* Emit a single word load. */
20630 mem
= gen_frame_mem (SImode
,
20631 plus_constant (Pmode
,
20635 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20637 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
20638 tmp
= emit_insn (tmp
);
20639 RTX_FRAME_RELATED_P (tmp
) = 1;
20641 /* Generate dwarf info. */
20642 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20643 gen_rtx_REG (SImode
, j
),
20649 else /* j == PC_REGNUM */
20655 /* Update the stack. */
20658 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20659 plus_constant (Pmode
,
20662 tmp
= emit_insn (tmp
);
20663 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20664 stack_pointer_rtx
, stack_pointer_rtx
);
20668 if (saved_regs_mask
& (1 << PC_REGNUM
))
20670 /* Only PC is to be popped. */
20671 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20672 XVECEXP (par
, 0, 0) = ret_rtx
;
20673 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
20674 gen_frame_mem (SImode
,
20675 gen_rtx_POST_INC (SImode
,
20676 stack_pointer_rtx
)));
20677 RTX_FRAME_RELATED_P (tmp
) = 1;
20678 XVECEXP (par
, 0, 1) = tmp
;
20679 par
= emit_jump_insn (par
);
20681 /* Generate dwarf info. */
20682 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20683 gen_rtx_REG (SImode
, PC_REGNUM
),
20685 REG_NOTES (par
) = dwarf
;
20686 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20687 stack_pointer_rtx
, stack_pointer_rtx
);
20691 /* Calculate the size of the return value that is passed in registers. */
20693 arm_size_return_regs (void)
20697 if (crtl
->return_rtx
!= 0)
20698 mode
= GET_MODE (crtl
->return_rtx
);
20700 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20702 return GET_MODE_SIZE (mode
);
20705 /* Return true if the current function needs to save/restore LR. */
20707 thumb_force_lr_save (void)
20709 return !cfun
->machine
->lr_save_eliminated
20711 || thumb_far_jump_used_p ()
20712 || df_regs_ever_live_p (LR_REGNUM
));
20715 /* We do not know if r3 will be available because
20716 we do have an indirect tailcall happening in this
20717 particular case. */
20719 is_indirect_tailcall_p (rtx call
)
20721 rtx pat
= PATTERN (call
);
20723 /* Indirect tail call. */
20724 pat
= XVECEXP (pat
, 0, 0);
20725 if (GET_CODE (pat
) == SET
)
20726 pat
= SET_SRC (pat
);
20728 pat
= XEXP (XEXP (pat
, 0), 0);
20729 return REG_P (pat
);
20732 /* Return true if r3 is used by any of the tail call insns in the
20733 current function. */
20735 any_sibcall_could_use_r3 (void)
20740 if (!crtl
->tail_call_emit
)
20742 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20743 if (e
->flags
& EDGE_SIBCALL
)
20745 rtx_insn
*call
= BB_END (e
->src
);
20746 if (!CALL_P (call
))
20747 call
= prev_nonnote_nondebug_insn (call
);
20748 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20749 if (find_regno_fusage (call
, USE
, 3)
20750 || is_indirect_tailcall_p (call
))
20757 /* Compute the distance from register FROM to register TO.
20758 These can be the arg pointer (26), the soft frame pointer (25),
20759 the stack pointer (13) or the hard frame pointer (11).
20760 In thumb mode r7 is used as the soft frame pointer, if needed.
20761 Typical stack layout looks like this:
20763 old stack pointer -> | |
20766 | | saved arguments for
20767 | | vararg functions
20770 hard FP & arg pointer -> | | \
20778 soft frame pointer -> | | /
20783 locals base pointer -> | | /
20788 current stack pointer -> | | /
20791 For a given function some or all of these stack components
20792 may not be needed, giving rise to the possibility of
20793 eliminating some of the registers.
20795 The values returned by this function must reflect the behavior
20796 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20798 The sign of the number returned reflects the direction of stack
20799 growth, so the values are positive for all eliminations except
20800 from the soft frame pointer to the hard frame pointer.
20802 SFP may point just inside the local variables block to ensure correct
20806 /* Return cached stack offsets. */
20808 static arm_stack_offsets
*
20809 arm_get_frame_offsets (void)
20811 struct arm_stack_offsets
*offsets
;
20813 offsets
= &cfun
->machine
->stack_offsets
;
20819 /* Calculate stack offsets. These are used to calculate register elimination
20820 offsets and in prologue/epilogue code. Also calculates which registers
20821 should be saved. */
20824 arm_compute_frame_layout (void)
20826 struct arm_stack_offsets
*offsets
;
20827 unsigned long func_type
;
20830 HOST_WIDE_INT frame_size
;
20833 offsets
= &cfun
->machine
->stack_offsets
;
20835 /* Initially this is the size of the local variables. It will translated
20836 into an offset once we have determined the size of preceding data. */
20837 frame_size
= ROUND_UP_WORD (get_frame_size ());
20839 /* Space for variadic functions. */
20840 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20842 /* In Thumb mode this is incorrect, but never used. */
20844 = (offsets
->saved_args
20845 + arm_compute_static_chain_stack_bytes ()
20846 + (frame_pointer_needed
? 4 : 0));
20850 unsigned int regno
;
20852 offsets
->saved_regs_mask
= arm_compute_save_core_reg_mask ();
20853 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20854 saved
= core_saved
;
20856 /* We know that SP will be doubleword aligned on entry, and we must
20857 preserve that condition at any subroutine call. We also require the
20858 soft frame pointer to be doubleword aligned. */
20860 if (TARGET_REALLY_IWMMXT
)
20862 /* Check for the call-saved iWMMXt registers. */
20863 for (regno
= FIRST_IWMMXT_REGNUM
;
20864 regno
<= LAST_IWMMXT_REGNUM
;
20866 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20870 func_type
= arm_current_func_type ();
20871 /* Space for saved VFP registers. */
20872 if (! IS_VOLATILE (func_type
)
20873 && TARGET_HARD_FLOAT
)
20874 saved
+= arm_get_vfp_saved_size ();
20876 else /* TARGET_THUMB1 */
20878 offsets
->saved_regs_mask
= thumb1_compute_save_core_reg_mask ();
20879 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20880 saved
= core_saved
;
20881 if (TARGET_BACKTRACE
)
20885 /* Saved registers include the stack frame. */
20886 offsets
->saved_regs
20887 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20888 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20890 /* A leaf function does not need any stack alignment if it has nothing
20892 if (crtl
->is_leaf
&& frame_size
== 0
20893 /* However if it calls alloca(), we have a dynamically allocated
20894 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20895 && ! cfun
->calls_alloca
)
20897 offsets
->outgoing_args
= offsets
->soft_frame
;
20898 offsets
->locals_base
= offsets
->soft_frame
;
20902 /* Ensure SFP has the correct alignment. */
20903 if (ARM_DOUBLEWORD_ALIGN
20904 && (offsets
->soft_frame
& 7))
20906 offsets
->soft_frame
+= 4;
20907 /* Try to align stack by pushing an extra reg. Don't bother doing this
20908 when there is a stack frame as the alignment will be rolled into
20909 the normal stack adjustment. */
20910 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20914 /* Register r3 is caller-saved. Normally it does not need to be
20915 saved on entry by the prologue. However if we choose to save
20916 it for padding then we may confuse the compiler into thinking
20917 a prologue sequence is required when in fact it is not. This
20918 will occur when shrink-wrapping if r3 is used as a scratch
20919 register and there are no other callee-saved writes.
20921 This situation can be avoided when other callee-saved registers
20922 are available and r3 is not mandatory if we choose a callee-saved
20923 register for padding. */
20924 bool prefer_callee_reg_p
= false;
20926 /* If it is safe to use r3, then do so. This sometimes
20927 generates better code on Thumb-2 by avoiding the need to
20928 use 32-bit push/pop instructions. */
20929 if (! any_sibcall_could_use_r3 ()
20930 && arm_size_return_regs () <= 12
20931 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20933 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20936 if (!TARGET_THUMB2
)
20937 prefer_callee_reg_p
= true;
20940 || prefer_callee_reg_p
)
20942 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20944 /* Avoid fixed registers; they may be changed at
20945 arbitrary times so it's unsafe to restore them
20946 during the epilogue. */
20948 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20958 offsets
->saved_regs
+= 4;
20959 offsets
->saved_regs_mask
|= (1 << reg
);
20964 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20965 offsets
->outgoing_args
= (offsets
->locals_base
20966 + crtl
->outgoing_args_size
);
20968 if (ARM_DOUBLEWORD_ALIGN
)
20970 /* Ensure SP remains doubleword aligned. */
20971 if (offsets
->outgoing_args
& 7)
20972 offsets
->outgoing_args
+= 4;
20973 gcc_assert (!(offsets
->outgoing_args
& 7));
20978 /* Calculate the relative offsets for the different stack pointers. Positive
20979 offsets are in the direction of stack growth. */
20982 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20984 arm_stack_offsets
*offsets
;
20986 offsets
= arm_get_frame_offsets ();
20988 /* OK, now we have enough information to compute the distances.
20989 There must be an entry in these switch tables for each pair
20990 of registers in ELIMINABLE_REGS, even if some of the entries
20991 seem to be redundant or useless. */
20994 case ARG_POINTER_REGNUM
:
20997 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21000 case FRAME_POINTER_REGNUM
:
21001 /* This is the reverse of the soft frame pointer
21002 to hard frame pointer elimination below. */
21003 return offsets
->soft_frame
- offsets
->saved_args
;
21005 case ARM_HARD_FRAME_POINTER_REGNUM
:
21006 /* This is only non-zero in the case where the static chain register
21007 is stored above the frame. */
21008 return offsets
->frame
- offsets
->saved_args
- 4;
21010 case STACK_POINTER_REGNUM
:
21011 /* If nothing has been pushed on the stack at all
21012 then this will return -4. This *is* correct! */
21013 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
21016 gcc_unreachable ();
21018 gcc_unreachable ();
21020 case FRAME_POINTER_REGNUM
:
21023 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21026 case ARM_HARD_FRAME_POINTER_REGNUM
:
21027 /* The hard frame pointer points to the top entry in the
21028 stack frame. The soft frame pointer to the bottom entry
21029 in the stack frame. If there is no stack frame at all,
21030 then they are identical. */
21032 return offsets
->frame
- offsets
->soft_frame
;
21034 case STACK_POINTER_REGNUM
:
21035 return offsets
->outgoing_args
- offsets
->soft_frame
;
21038 gcc_unreachable ();
21040 gcc_unreachable ();
21043 /* You cannot eliminate from the stack pointer.
21044 In theory you could eliminate from the hard frame
21045 pointer to the stack pointer, but this will never
21046 happen, since if a stack frame is not needed the
21047 hard frame pointer will never be used. */
21048 gcc_unreachable ();
21052 /* Given FROM and TO register numbers, say whether this elimination is
21053 allowed. Frame pointer elimination is automatically handled.
21055 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21056 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21057 pointer, we must eliminate FRAME_POINTER_REGNUM into
21058 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21059 ARG_POINTER_REGNUM. */
21062 arm_can_eliminate (const int from
, const int to
)
21064 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
21065 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
21066 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
21067 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
21071 /* Emit RTL to save coprocessor registers on function entry. Returns the
21072 number of bytes pushed. */
21075 arm_save_coproc_regs(void)
21077 int saved_size
= 0;
21079 unsigned start_reg
;
21082 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
21083 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
21085 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21086 insn
= gen_rtx_MEM (V2SImode
, insn
);
21087 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
21088 RTX_FRAME_RELATED_P (insn
) = 1;
21092 if (TARGET_HARD_FLOAT
)
21094 start_reg
= FIRST_VFP_REGNUM
;
21096 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
21098 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
21099 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
21101 if (start_reg
!= reg
)
21102 saved_size
+= vfp_emit_fstmd (start_reg
,
21103 (reg
- start_reg
) / 2);
21104 start_reg
= reg
+ 2;
21107 if (start_reg
!= reg
)
21108 saved_size
+= vfp_emit_fstmd (start_reg
,
21109 (reg
- start_reg
) / 2);
21115 /* Set the Thumb frame pointer from the stack pointer. */
21118 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21120 HOST_WIDE_INT amount
;
21123 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21125 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21126 stack_pointer_rtx
, GEN_INT (amount
)));
21129 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21130 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21131 expects the first two operands to be the same. */
21134 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21136 hard_frame_pointer_rtx
));
21140 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21141 hard_frame_pointer_rtx
,
21142 stack_pointer_rtx
));
21144 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
21145 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21146 RTX_FRAME_RELATED_P (dwarf
) = 1;
21147 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21150 RTX_FRAME_RELATED_P (insn
) = 1;
21153 struct scratch_reg
{
21158 /* Return a short-lived scratch register for use as a 2nd scratch register on
21159 function entry after the registers are saved in the prologue. This register
21160 must be released by means of release_scratch_register_on_entry. IP is not
21161 considered since it is always used as the 1st scratch register if available.
21163 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21164 mask of live registers. */
21167 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
21168 unsigned long live_regs
)
21174 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
21180 for (i
= 4; i
< 11; i
++)
21181 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
21189 /* If IP is used as the 1st scratch register for a nested function,
21190 then either r3 wasn't available or is used to preserve IP. */
21191 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
21193 regno
= (regno1
== 3 ? 2 : 3);
21195 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
21200 sr
->reg
= gen_rtx_REG (SImode
, regno
);
21203 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21204 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
21205 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21206 plus_constant (Pmode
, stack_pointer_rtx
, -4));
21207 RTX_FRAME_RELATED_P (insn
) = 1;
21208 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21212 /* Release a scratch register obtained from the preceding function. */
21215 release_scratch_register_on_entry (struct scratch_reg
*sr
)
21219 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
21220 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
21221 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21222 plus_constant (Pmode
, stack_pointer_rtx
, 4));
21223 RTX_FRAME_RELATED_P (insn
) = 1;
21224 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21228 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21230 #if PROBE_INTERVAL > 4096
21231 #error Cannot use indexed addressing mode for stack probing
21234 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21235 inclusive. These are offsets from the current stack pointer. REGNO1
21236 is the index number of the 1st scratch register and LIVE_REGS is the
21237 mask of live registers. */
21240 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
21241 unsigned int regno1
, unsigned long live_regs
)
21243 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
21245 /* See if we have a constant small number of probes to generate. If so,
21246 that's the easy case. */
21247 if (size
<= PROBE_INTERVAL
)
21249 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21250 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21251 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
21254 /* The run-time loop is made up of 10 insns in the generic case while the
21255 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21256 else if (size
<= 5 * PROBE_INTERVAL
)
21258 HOST_WIDE_INT i
, rem
;
21260 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21261 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21262 emit_stack_probe (reg1
);
21264 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21265 it exceeds SIZE. If only two probes are needed, this will not
21266 generate any code. Then probe at FIRST + SIZE. */
21267 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
21269 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21270 emit_stack_probe (reg1
);
21273 rem
= size
- (i
- PROBE_INTERVAL
);
21274 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21276 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21277 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
21280 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
21283 /* Otherwise, do the same as above, but in a loop. Note that we must be
21284 extra careful with variables wrapping around because we might be at
21285 the very top (or the very bottom) of the address space and we have
21286 to be able to handle this case properly; in particular, we use an
21287 equality test for the loop condition. */
21290 HOST_WIDE_INT rounded_size
;
21291 struct scratch_reg sr
;
21293 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
21295 emit_move_insn (reg1
, GEN_INT (first
));
21298 /* Step 1: round SIZE to the previous multiple of the interval. */
21300 rounded_size
= size
& -PROBE_INTERVAL
;
21301 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
21304 /* Step 2: compute initial and final value of the loop counter. */
21306 /* TEST_ADDR = SP + FIRST. */
21307 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21309 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21310 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
21313 /* Step 3: the loop
21317 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21320 while (TEST_ADDR != LAST_ADDR)
21322 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21323 until it is equal to ROUNDED_SIZE. */
21325 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
21328 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21329 that SIZE is equal to ROUNDED_SIZE. */
21331 if (size
!= rounded_size
)
21333 HOST_WIDE_INT rem
= size
- rounded_size
;
21335 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21337 emit_set_insn (sr
.reg
,
21338 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
21339 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
21340 PROBE_INTERVAL
- rem
));
21343 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
21346 release_scratch_register_on_entry (&sr
);
21349 /* Make sure nothing is scheduled before we are done. */
21350 emit_insn (gen_blockage ());
21353 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21354 absolute addresses. */
21357 output_probe_stack_range (rtx reg1
, rtx reg2
)
21359 static int labelno
= 0;
21363 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
21366 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
21368 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21370 xops
[1] = GEN_INT (PROBE_INTERVAL
);
21371 output_asm_insn ("sub\t%0, %0, %1", xops
);
21373 /* Probe at TEST_ADDR. */
21374 output_asm_insn ("str\tr0, [%0, #0]", xops
);
21376 /* Test if TEST_ADDR == LAST_ADDR. */
21378 output_asm_insn ("cmp\t%0, %1", xops
);
21381 fputs ("\tbne\t", asm_out_file
);
21382 assemble_name_raw (asm_out_file
, loop_lab
);
21383 fputc ('\n', asm_out_file
);
21388 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21391 arm_expand_prologue (void)
21396 unsigned long live_regs_mask
;
21397 unsigned long func_type
;
21399 int saved_pretend_args
= 0;
21400 int saved_regs
= 0;
21401 unsigned HOST_WIDE_INT args_to_push
;
21402 HOST_WIDE_INT size
;
21403 arm_stack_offsets
*offsets
;
21406 func_type
= arm_current_func_type ();
21408 /* Naked functions don't have prologues. */
21409 if (IS_NAKED (func_type
))
21411 if (flag_stack_usage_info
)
21412 current_function_static_stack_size
= 0;
21416 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21417 args_to_push
= crtl
->args
.pretend_args_size
;
21419 /* Compute which register we will have to save onto the stack. */
21420 offsets
= arm_get_frame_offsets ();
21421 live_regs_mask
= offsets
->saved_regs_mask
;
21423 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21425 if (IS_STACKALIGN (func_type
))
21429 /* Handle a word-aligned stack pointer. We generate the following:
21434 <save and restore r0 in normal prologue/epilogue>
21438 The unwinder doesn't need to know about the stack realignment.
21439 Just tell it we saved SP in r0. */
21440 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21442 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
21443 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
21445 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21446 RTX_FRAME_RELATED_P (insn
) = 1;
21447 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21449 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21451 /* ??? The CFA changes here, which may cause GDB to conclude that it
21452 has entered a different function. That said, the unwind info is
21453 correct, individually, before and after this instruction because
21454 we've described the save of SP, which will override the default
21455 handling of SP as restoring from the CFA. */
21456 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21459 /* The static chain register is the same as the IP register. If it is
21460 clobbered when creating the frame, we need to save and restore it. */
21461 clobber_ip
= IS_NESTED (func_type
)
21462 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21463 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21464 && !df_regs_ever_live_p (LR_REGNUM
)
21465 && arm_r3_live_at_start_p ()));
21467 /* Find somewhere to store IP whilst the frame is being created.
21468 We try the following places in order:
21470 1. The last argument register r3 if it is available.
21471 2. A slot on the stack above the frame if there are no
21472 arguments to push onto the stack.
21473 3. Register r3 again, after pushing the argument registers
21474 onto the stack, if this is a varargs function.
21475 4. The last slot on the stack created for the arguments to
21476 push, if this isn't a varargs function.
21478 Note - we only need to tell the dwarf2 backend about the SP
21479 adjustment in the second variant; the static chain register
21480 doesn't need to be unwound, as it doesn't contain a value
21481 inherited from the caller. */
21484 if (!arm_r3_live_at_start_p ())
21485 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21486 else if (args_to_push
== 0)
21490 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21493 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21494 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21497 /* Just tell the dwarf backend that we adjusted SP. */
21498 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21499 plus_constant (Pmode
, stack_pointer_rtx
,
21501 RTX_FRAME_RELATED_P (insn
) = 1;
21502 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21506 /* Store the args on the stack. */
21507 if (cfun
->machine
->uses_anonymous_args
)
21509 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21510 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21511 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21512 saved_pretend_args
= 1;
21518 if (args_to_push
== 4)
21519 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21521 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21522 plus_constant (Pmode
,
21526 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21528 /* Just tell the dwarf backend that we adjusted SP. */
21529 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21530 plus_constant (Pmode
, stack_pointer_rtx
,
21532 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21535 RTX_FRAME_RELATED_P (insn
) = 1;
21536 fp_offset
= args_to_push
;
21541 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21543 if (IS_INTERRUPT (func_type
))
21545 /* Interrupt functions must not corrupt any registers.
21546 Creating a frame pointer however, corrupts the IP
21547 register, so we must push it first. */
21548 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21550 /* Do not set RTX_FRAME_RELATED_P on this insn.
21551 The dwarf stack unwinding code only wants to see one
21552 stack decrement per function, and this is not it. If
21553 this instruction is labeled as being part of the frame
21554 creation sequence then dwarf2out_frame_debug_expr will
21555 die when it encounters the assignment of IP to FP
21556 later on, since the use of SP here establishes SP as
21557 the CFA register and not IP.
21559 Anyway this instruction is not really part of the stack
21560 frame creation although it is part of the prologue. */
21563 insn
= emit_set_insn (ip_rtx
,
21564 plus_constant (Pmode
, stack_pointer_rtx
,
21566 RTX_FRAME_RELATED_P (insn
) = 1;
21571 /* Push the argument registers, or reserve space for them. */
21572 if (cfun
->machine
->uses_anonymous_args
)
21573 insn
= emit_multi_reg_push
21574 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21575 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21578 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21579 GEN_INT (- args_to_push
)));
21580 RTX_FRAME_RELATED_P (insn
) = 1;
21583 /* If this is an interrupt service routine, and the link register
21584 is going to be pushed, and we're not generating extra
21585 push of IP (needed when frame is needed and frame layout if apcs),
21586 subtracting four from LR now will mean that the function return
21587 can be done with a single instruction. */
21588 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21589 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21590 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21593 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21595 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21598 if (live_regs_mask
)
21600 unsigned long dwarf_regs_mask
= live_regs_mask
;
21602 saved_regs
+= bit_count (live_regs_mask
) * 4;
21603 if (optimize_size
&& !frame_pointer_needed
21604 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21606 /* If no coprocessor registers are being pushed and we don't have
21607 to worry about a frame pointer then push extra registers to
21608 create the stack frame. This is done in a way that does not
21609 alter the frame layout, so is independent of the epilogue. */
21613 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21615 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21616 if (frame
&& n
* 4 >= frame
)
21619 live_regs_mask
|= (1 << n
) - 1;
21620 saved_regs
+= frame
;
21625 && current_tune
->prefer_ldrd_strd
21626 && !optimize_function_for_size_p (cfun
))
21628 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21630 thumb2_emit_strd_push (live_regs_mask
);
21631 else if (TARGET_ARM
21632 && !TARGET_APCS_FRAME
21633 && !IS_INTERRUPT (func_type
))
21634 arm_emit_strd_push (live_regs_mask
);
21637 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21638 RTX_FRAME_RELATED_P (insn
) = 1;
21643 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21644 RTX_FRAME_RELATED_P (insn
) = 1;
21648 if (! IS_VOLATILE (func_type
))
21649 saved_regs
+= arm_save_coproc_regs ();
21651 if (frame_pointer_needed
&& TARGET_ARM
)
21653 /* Create the new frame pointer. */
21654 if (TARGET_APCS_FRAME
)
21656 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21657 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21658 RTX_FRAME_RELATED_P (insn
) = 1;
21662 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
21663 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21664 stack_pointer_rtx
, insn
));
21665 RTX_FRAME_RELATED_P (insn
) = 1;
21669 size
= offsets
->outgoing_args
- offsets
->saved_args
;
21670 if (flag_stack_usage_info
)
21671 current_function_static_stack_size
= size
;
21673 /* If this isn't an interrupt service routine and we have a frame, then do
21674 stack checking. We use IP as the first scratch register, except for the
21675 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21676 if (!IS_INTERRUPT (func_type
)
21677 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
21679 unsigned int regno
;
21681 if (!IS_NESTED (func_type
) || clobber_ip
)
21683 else if (df_regs_ever_live_p (LR_REGNUM
))
21688 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
21690 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
21691 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
,
21692 size
- STACK_CHECK_PROTECT
,
21693 regno
, live_regs_mask
);
21696 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
,
21697 regno
, live_regs_mask
);
21700 /* Recover the static chain register. */
21703 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21704 insn
= gen_rtx_REG (SImode
, 3);
21707 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21708 insn
= gen_frame_mem (SImode
, insn
);
21710 emit_set_insn (ip_rtx
, insn
);
21711 emit_insn (gen_force_register_use (ip_rtx
));
21714 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21716 /* This add can produce multiple insns for a large constant, so we
21717 need to get tricky. */
21718 rtx_insn
*last
= get_last_insn ();
21720 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21721 - offsets
->outgoing_args
);
21723 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21727 last
= last
? NEXT_INSN (last
) : get_insns ();
21728 RTX_FRAME_RELATED_P (last
) = 1;
21730 while (last
!= insn
);
21732 /* If the frame pointer is needed, emit a special barrier that
21733 will prevent the scheduler from moving stores to the frame
21734 before the stack adjustment. */
21735 if (frame_pointer_needed
)
21736 emit_insn (gen_stack_tie (stack_pointer_rtx
,
21737 hard_frame_pointer_rtx
));
21741 if (frame_pointer_needed
&& TARGET_THUMB2
)
21742 thumb_set_frame_pointer (offsets
);
21744 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21746 unsigned long mask
;
21748 mask
= live_regs_mask
;
21749 mask
&= THUMB2_WORK_REGS
;
21750 if (!IS_NESTED (func_type
))
21751 mask
|= (1 << IP_REGNUM
);
21752 arm_load_pic_register (mask
);
21755 /* If we are profiling, make sure no instructions are scheduled before
21756 the call to mcount. Similarly if the user has requested no
21757 scheduling in the prolog. Similarly if we want non-call exceptions
21758 using the EABI unwinder, to prevent faulting instructions from being
21759 swapped with a stack adjustment. */
21760 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21761 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21762 && cfun
->can_throw_non_call_exceptions
))
21763 emit_insn (gen_blockage ());
21765 /* If the link register is being kept alive, with the return address in it,
21766 then make sure that it does not get reused by the ce2 pass. */
21767 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21768 cfun
->machine
->lr_save_eliminated
= 1;
21771 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21773 arm_print_condition (FILE *stream
)
21775 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21777 /* Branch conversion is not implemented for Thumb-2. */
21780 output_operand_lossage ("predicated Thumb instruction");
21783 if (current_insn_predicate
!= NULL
)
21785 output_operand_lossage
21786 ("predicated instruction in conditional sequence");
21790 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21792 else if (current_insn_predicate
)
21794 enum arm_cond_code code
;
21798 output_operand_lossage ("predicated Thumb instruction");
21802 code
= get_arm_condition_code (current_insn_predicate
);
21803 fputs (arm_condition_codes
[code
], stream
);
21808 /* Globally reserved letters: acln
21809 Puncutation letters currently used: @_|?().!#
21810 Lower case letters currently used: bcdefhimpqtvwxyz
21811 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21812 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21814 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21816 If CODE is 'd', then the X is a condition operand and the instruction
21817 should only be executed if the condition is true.
21818 if CODE is 'D', then the X is a condition operand and the instruction
21819 should only be executed if the condition is false: however, if the mode
21820 of the comparison is CCFPEmode, then always execute the instruction -- we
21821 do this because in these circumstances !GE does not necessarily imply LT;
21822 in these cases the instruction pattern will take care to make sure that
21823 an instruction containing %d will follow, thereby undoing the effects of
21824 doing this instruction unconditionally.
21825 If CODE is 'N' then X is a floating point operand that must be negated
21827 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21828 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21830 arm_print_operand (FILE *stream
, rtx x
, int code
)
21835 fputs (ASM_COMMENT_START
, stream
);
21839 fputs (user_label_prefix
, stream
);
21843 fputs (REGISTER_PREFIX
, stream
);
21847 arm_print_condition (stream
);
21851 /* The current condition code for a condition code setting instruction.
21852 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21853 fputc('s', stream
);
21854 arm_print_condition (stream
);
21858 /* If the instruction is conditionally executed then print
21859 the current condition code, otherwise print 's'. */
21860 gcc_assert (TARGET_THUMB2
);
21861 if (current_insn_predicate
)
21862 arm_print_condition (stream
);
21864 fputc('s', stream
);
21867 /* %# is a "break" sequence. It doesn't output anything, but is used to
21868 separate e.g. operand numbers from following text, if that text consists
21869 of further digits which we don't want to be part of the operand
21877 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
21878 fprintf (stream
, "%s", fp_const_from_val (&r
));
21882 /* An integer or symbol address without a preceding # sign. */
21884 switch (GET_CODE (x
))
21887 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21891 output_addr_const (stream
, x
);
21895 if (GET_CODE (XEXP (x
, 0)) == PLUS
21896 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21898 output_addr_const (stream
, x
);
21901 /* Fall through. */
21904 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21908 /* An integer that we want to print in HEX. */
21910 switch (GET_CODE (x
))
21913 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21917 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21922 if (CONST_INT_P (x
))
21925 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21926 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21930 putc ('~', stream
);
21931 output_addr_const (stream
, x
);
21936 /* Print the log2 of a CONST_INT. */
21940 if (!CONST_INT_P (x
)
21941 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
21942 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21944 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21949 /* The low 16 bits of an immediate constant. */
21950 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21954 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21958 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21966 shift
= shift_op (x
, &val
);
21970 fprintf (stream
, ", %s ", shift
);
21972 arm_print_operand (stream
, XEXP (x
, 1), 0);
21974 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21979 /* An explanation of the 'Q', 'R' and 'H' register operands:
21981 In a pair of registers containing a DI or DF value the 'Q'
21982 operand returns the register number of the register containing
21983 the least significant part of the value. The 'R' operand returns
21984 the register number of the register containing the most
21985 significant part of the value.
21987 The 'H' operand returns the higher of the two register numbers.
21988 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21989 same as the 'Q' operand, since the most significant part of the
21990 value is held in the lower number register. The reverse is true
21991 on systems where WORDS_BIG_ENDIAN is false.
21993 The purpose of these operands is to distinguish between cases
21994 where the endian-ness of the values is important (for example
21995 when they are added together), and cases where the endian-ness
21996 is irrelevant, but the order of register operations is important.
21997 For example when loading a value from memory into a register
21998 pair, the endian-ness does not matter. Provided that the value
21999 from the lower memory address is put into the lower numbered
22000 register, and the value from the higher address is put into the
22001 higher numbered register, the load will work regardless of whether
22002 the value being loaded is big-wordian or little-wordian. The
22003 order of the two register loads can matter however, if the address
22004 of the memory location is actually held in one of the registers
22005 being overwritten by the load.
22007 The 'Q' and 'R' constraints are also available for 64-bit
22010 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22012 rtx part
= gen_lowpart (SImode
, x
);
22013 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22017 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22019 output_operand_lossage ("invalid operand for code '%c'", code
);
22023 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
22027 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22029 machine_mode mode
= GET_MODE (x
);
22032 if (mode
== VOIDmode
)
22034 part
= gen_highpart_mode (SImode
, mode
, x
);
22035 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22039 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22041 output_operand_lossage ("invalid operand for code '%c'", code
);
22045 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
22049 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22051 output_operand_lossage ("invalid operand for code '%c'", code
);
22055 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
22059 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22061 output_operand_lossage ("invalid operand for code '%c'", code
);
22065 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
22069 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22071 output_operand_lossage ("invalid operand for code '%c'", code
);
22075 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
22079 asm_fprintf (stream
, "%r",
22080 REG_P (XEXP (x
, 0))
22081 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
22085 asm_fprintf (stream
, "{%r-%r}",
22087 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
22090 /* Like 'M', but writing doubleword vector registers, for use by Neon
22094 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
22095 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
22097 asm_fprintf (stream
, "{d%d}", regno
);
22099 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
22104 /* CONST_TRUE_RTX means always -- that's the default. */
22105 if (x
== const_true_rtx
)
22108 if (!COMPARISON_P (x
))
22110 output_operand_lossage ("invalid operand for code '%c'", code
);
22114 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
22119 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22120 want to do that. */
22121 if (x
== const_true_rtx
)
22123 output_operand_lossage ("instruction never executed");
22126 if (!COMPARISON_P (x
))
22128 output_operand_lossage ("invalid operand for code '%c'", code
);
22132 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
22133 (get_arm_condition_code (x
))],
22143 /* Former Maverick support, removed after GCC-4.7. */
22144 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
22149 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
22150 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
22151 /* Bad value for wCG register number. */
22153 output_operand_lossage ("invalid operand for code '%c'", code
);
22158 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
22161 /* Print an iWMMXt control register name. */
22163 if (!CONST_INT_P (x
)
22165 || INTVAL (x
) >= 16)
22166 /* Bad value for wC register number. */
22168 output_operand_lossage ("invalid operand for code '%c'", code
);
22174 static const char * wc_reg_names
[16] =
22176 "wCID", "wCon", "wCSSF", "wCASF",
22177 "wC4", "wC5", "wC6", "wC7",
22178 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22179 "wC12", "wC13", "wC14", "wC15"
22182 fputs (wc_reg_names
[INTVAL (x
)], stream
);
22186 /* Print the high single-precision register of a VFP double-precision
22190 machine_mode mode
= GET_MODE (x
);
22193 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
22195 output_operand_lossage ("invalid operand for code '%c'", code
);
22200 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
22202 output_operand_lossage ("invalid operand for code '%c'", code
);
22206 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
22210 /* Print a VFP/Neon double precision or quad precision register name. */
22214 machine_mode mode
= GET_MODE (x
);
22215 int is_quad
= (code
== 'q');
22218 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
22220 output_operand_lossage ("invalid operand for code '%c'", code
);
22225 || !IS_VFP_REGNUM (REGNO (x
)))
22227 output_operand_lossage ("invalid operand for code '%c'", code
);
22232 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
22233 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
22235 output_operand_lossage ("invalid operand for code '%c'", code
);
22239 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
22240 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
22244 /* These two codes print the low/high doubleword register of a Neon quad
22245 register, respectively. For pair-structure types, can also print
22246 low/high quadword registers. */
22250 machine_mode mode
= GET_MODE (x
);
22253 if ((GET_MODE_SIZE (mode
) != 16
22254 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
22256 output_operand_lossage ("invalid operand for code '%c'", code
);
22261 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
22263 output_operand_lossage ("invalid operand for code '%c'", code
);
22267 if (GET_MODE_SIZE (mode
) == 16)
22268 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
22269 + (code
== 'f' ? 1 : 0));
22271 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
22272 + (code
== 'f' ? 1 : 0));
22276 /* Print a VFPv3 floating-point constant, represented as an integer
22280 int index
= vfp3_const_double_index (x
);
22281 gcc_assert (index
!= -1);
22282 fprintf (stream
, "%d", index
);
22286 /* Print bits representing opcode features for Neon.
22288 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22289 and polynomials as unsigned.
22291 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22293 Bit 2 is 1 for rounding functions, 0 otherwise. */
22295 /* Identify the type as 's', 'u', 'p' or 'f'. */
22298 HOST_WIDE_INT bits
= INTVAL (x
);
22299 fputc ("uspf"[bits
& 3], stream
);
22303 /* Likewise, but signed and unsigned integers are both 'i'. */
22306 HOST_WIDE_INT bits
= INTVAL (x
);
22307 fputc ("iipf"[bits
& 3], stream
);
22311 /* As for 'T', but emit 'u' instead of 'p'. */
22314 HOST_WIDE_INT bits
= INTVAL (x
);
22315 fputc ("usuf"[bits
& 3], stream
);
22319 /* Bit 2: rounding (vs none). */
22322 HOST_WIDE_INT bits
= INTVAL (x
);
22323 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
22327 /* Memory operand for vld1/vst1 instruction. */
22331 bool postinc
= FALSE
;
22332 rtx postinc_reg
= NULL
;
22333 unsigned align
, memsize
, align_bits
;
22335 gcc_assert (MEM_P (x
));
22336 addr
= XEXP (x
, 0);
22337 if (GET_CODE (addr
) == POST_INC
)
22340 addr
= XEXP (addr
, 0);
22342 if (GET_CODE (addr
) == POST_MODIFY
)
22344 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22345 addr
= XEXP (addr
, 0);
22347 asm_fprintf (stream
, "[%r", REGNO (addr
));
22349 /* We know the alignment of this access, so we can emit a hint in the
22350 instruction (for some alignments) as an aid to the memory subsystem
22352 align
= MEM_ALIGN (x
) >> 3;
22353 memsize
= MEM_SIZE (x
);
22355 /* Only certain alignment specifiers are supported by the hardware. */
22356 if (memsize
== 32 && (align
% 32) == 0)
22358 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22360 else if (memsize
>= 8 && (align
% 8) == 0)
22365 if (align_bits
!= 0)
22366 asm_fprintf (stream
, ":%d", align_bits
);
22368 asm_fprintf (stream
, "]");
22371 fputs("!", stream
);
22373 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22381 gcc_assert (MEM_P (x
));
22382 addr
= XEXP (x
, 0);
22383 gcc_assert (REG_P (addr
));
22384 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22388 /* Translate an S register number into a D register number and element index. */
22391 machine_mode mode
= GET_MODE (x
);
22394 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22396 output_operand_lossage ("invalid operand for code '%c'", code
);
22401 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22403 output_operand_lossage ("invalid operand for code '%c'", code
);
22407 regno
= regno
- FIRST_VFP_REGNUM
;
22408 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22413 gcc_assert (CONST_DOUBLE_P (x
));
22415 result
= vfp3_const_double_for_fract_bits (x
);
22417 result
= vfp3_const_double_for_bits (x
);
22418 fprintf (stream
, "#%d", result
);
22421 /* Register specifier for vld1.16/vst1.16. Translate the S register
22422 number into a D register number and element index. */
22425 machine_mode mode
= GET_MODE (x
);
22428 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22430 output_operand_lossage ("invalid operand for code '%c'", code
);
22435 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22437 output_operand_lossage ("invalid operand for code '%c'", code
);
22441 regno
= regno
- FIRST_VFP_REGNUM
;
22442 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22449 output_operand_lossage ("missing operand");
22453 switch (GET_CODE (x
))
22456 asm_fprintf (stream
, "%r", REGNO (x
));
22460 output_address (GET_MODE (x
), XEXP (x
, 0));
22466 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22467 sizeof (fpstr
), 0, 1);
22468 fprintf (stream
, "#%s", fpstr
);
22473 gcc_assert (GET_CODE (x
) != NEG
);
22474 fputc ('#', stream
);
22475 if (GET_CODE (x
) == HIGH
)
22477 fputs (":lower16:", stream
);
22481 output_addr_const (stream
, x
);
22487 /* Target hook for printing a memory address. */
22489 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
22493 int is_minus
= GET_CODE (x
) == MINUS
;
22496 asm_fprintf (stream
, "[%r]", REGNO (x
));
22497 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22499 rtx base
= XEXP (x
, 0);
22500 rtx index
= XEXP (x
, 1);
22501 HOST_WIDE_INT offset
= 0;
22503 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22505 /* Ensure that BASE is a register. */
22506 /* (one of them must be). */
22507 /* Also ensure the SP is not used as in index register. */
22508 std::swap (base
, index
);
22510 switch (GET_CODE (index
))
22513 offset
= INTVAL (index
);
22516 asm_fprintf (stream
, "[%r, #%wd]",
22517 REGNO (base
), offset
);
22521 asm_fprintf (stream
, "[%r, %s%r]",
22522 REGNO (base
), is_minus
? "-" : "",
22532 asm_fprintf (stream
, "[%r, %s%r",
22533 REGNO (base
), is_minus
? "-" : "",
22534 REGNO (XEXP (index
, 0)));
22535 arm_print_operand (stream
, index
, 'S');
22536 fputs ("]", stream
);
22541 gcc_unreachable ();
22544 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22545 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22547 gcc_assert (REG_P (XEXP (x
, 0)));
22549 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22550 asm_fprintf (stream
, "[%r, #%s%d]!",
22551 REGNO (XEXP (x
, 0)),
22552 GET_CODE (x
) == PRE_DEC
? "-" : "",
22553 GET_MODE_SIZE (mode
));
22555 asm_fprintf (stream
, "[%r], #%s%d",
22556 REGNO (XEXP (x
, 0)),
22557 GET_CODE (x
) == POST_DEC
? "-" : "",
22558 GET_MODE_SIZE (mode
));
22560 else if (GET_CODE (x
) == PRE_MODIFY
)
22562 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22563 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22564 asm_fprintf (stream
, "#%wd]!",
22565 INTVAL (XEXP (XEXP (x
, 1), 1)));
22567 asm_fprintf (stream
, "%r]!",
22568 REGNO (XEXP (XEXP (x
, 1), 1)));
22570 else if (GET_CODE (x
) == POST_MODIFY
)
22572 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22573 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22574 asm_fprintf (stream
, "#%wd",
22575 INTVAL (XEXP (XEXP (x
, 1), 1)));
22577 asm_fprintf (stream
, "%r",
22578 REGNO (XEXP (XEXP (x
, 1), 1)));
22580 else output_addr_const (stream
, x
);
22585 asm_fprintf (stream
, "[%r]", REGNO (x
));
22586 else if (GET_CODE (x
) == POST_INC
)
22587 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22588 else if (GET_CODE (x
) == PLUS
)
22590 gcc_assert (REG_P (XEXP (x
, 0)));
22591 if (CONST_INT_P (XEXP (x
, 1)))
22592 asm_fprintf (stream
, "[%r, #%wd]",
22593 REGNO (XEXP (x
, 0)),
22594 INTVAL (XEXP (x
, 1)));
22596 asm_fprintf (stream
, "[%r, %r]",
22597 REGNO (XEXP (x
, 0)),
22598 REGNO (XEXP (x
, 1)));
22601 output_addr_const (stream
, x
);
22605 /* Target hook for indicating whether a punctuation character for
22606 TARGET_PRINT_OPERAND is valid. */
22608 arm_print_operand_punct_valid_p (unsigned char code
)
22610 return (code
== '@' || code
== '|' || code
== '.'
22611 || code
== '(' || code
== ')' || code
== '#'
22612 || (TARGET_32BIT
&& (code
== '?'))
22613 || (TARGET_THUMB2
&& (code
== '!'))
22614 || (TARGET_THUMB
&& (code
== '_')));
22617 /* Target hook for assembling integer objects. The ARM version needs to
22618 handle word-sized values specially. */
22620 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22624 if (size
== UNITS_PER_WORD
&& aligned_p
)
22626 fputs ("\t.word\t", asm_out_file
);
22627 output_addr_const (asm_out_file
, x
);
22629 /* Mark symbols as position independent. We only do this in the
22630 .text segment, not in the .data segment. */
22631 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22632 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22634 /* See legitimize_pic_address for an explanation of the
22635 TARGET_VXWORKS_RTP check. */
22636 /* References to weak symbols cannot be resolved locally:
22637 they may be overridden by a non-weak definition at link
22639 if (!arm_pic_data_is_text_relative
22640 || (GET_CODE (x
) == SYMBOL_REF
22641 && (!SYMBOL_REF_LOCAL_P (x
)
22642 || (SYMBOL_REF_DECL (x
)
22643 ? DECL_WEAK (SYMBOL_REF_DECL (x
)) : 0))))
22644 fputs ("(GOT)", asm_out_file
);
22646 fputs ("(GOTOFF)", asm_out_file
);
22648 fputc ('\n', asm_out_file
);
22652 mode
= GET_MODE (x
);
22654 if (arm_vector_mode_supported_p (mode
))
22658 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22660 units
= CONST_VECTOR_NUNITS (x
);
22661 size
= GET_MODE_UNIT_SIZE (mode
);
22663 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22664 for (i
= 0; i
< units
; i
++)
22666 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22668 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22671 for (i
= 0; i
< units
; i
++)
22673 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22675 (*CONST_DOUBLE_REAL_VALUE (elt
),
22676 as_a
<scalar_float_mode
> (GET_MODE_INNER (mode
)),
22677 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22683 return default_assemble_integer (x
, size
, aligned_p
);
22687 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22691 if (!TARGET_AAPCS_BASED
)
22694 default_named_section_asm_out_constructor
22695 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22699 /* Put these in the .init_array section, using a special relocation. */
22700 if (priority
!= DEFAULT_INIT_PRIORITY
)
22703 sprintf (buf
, "%s.%.5u",
22704 is_ctor
? ".init_array" : ".fini_array",
22706 s
= get_section (buf
, SECTION_WRITE
| SECTION_NOTYPE
, NULL_TREE
);
22713 switch_to_section (s
);
22714 assemble_align (POINTER_SIZE
);
22715 fputs ("\t.word\t", asm_out_file
);
22716 output_addr_const (asm_out_file
, symbol
);
22717 fputs ("(target1)\n", asm_out_file
);
22720 /* Add a function to the list of static constructors. */
22723 arm_elf_asm_constructor (rtx symbol
, int priority
)
22725 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22728 /* Add a function to the list of static destructors. */
22731 arm_elf_asm_destructor (rtx symbol
, int priority
)
22733 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22736 /* A finite state machine takes care of noticing whether or not instructions
22737 can be conditionally executed, and thus decrease execution time and code
22738 size by deleting branch instructions. The fsm is controlled by
22739 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22741 /* The state of the fsm controlling condition codes are:
22742 0: normal, do nothing special
22743 1: make ASM_OUTPUT_OPCODE not output this instruction
22744 2: make ASM_OUTPUT_OPCODE not output this instruction
22745 3: make instructions conditional
22746 4: make instructions conditional
22748 State transitions (state->state by whom under condition):
22749 0 -> 1 final_prescan_insn if the `target' is a label
22750 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22751 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22752 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22753 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22754 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22755 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22756 (the target insn is arm_target_insn).
22758 If the jump clobbers the conditions then we use states 2 and 4.
22760 A similar thing can be done with conditional return insns.
22762 XXX In case the `target' is an unconditional branch, this conditionalising
22763 of the instructions always reduces code size, but not always execution
22764 time. But then, I want to reduce the code size to somewhere near what
22765 /bin/cc produces. */
22767 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22768 instructions. When a COND_EXEC instruction is seen the subsequent
22769 instructions are scanned so that multiple conditional instructions can be
22770 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22771 specify the length and true/false mask for the IT block. These will be
22772 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22774 /* Returns the index of the ARM condition code string in
22775 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22776 COMPARISON should be an rtx like `(eq (...) (...))'. */
22779 maybe_get_arm_condition_code (rtx comparison
)
22781 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22782 enum arm_cond_code code
;
22783 enum rtx_code comp_code
= GET_CODE (comparison
);
22785 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22786 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22787 XEXP (comparison
, 1));
22791 case E_CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22792 case E_CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22793 case E_CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22794 case E_CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22795 case E_CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22796 case E_CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22797 case E_CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22798 case E_CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22799 case E_CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22800 case E_CC_DLTUmode
: code
= ARM_CC
;
22803 if (comp_code
== EQ
)
22804 return ARM_INVERSE_CONDITION_CODE (code
);
22805 if (comp_code
== NE
)
22809 case E_CC_NOOVmode
:
22812 case NE
: return ARM_NE
;
22813 case EQ
: return ARM_EQ
;
22814 case GE
: return ARM_PL
;
22815 case LT
: return ARM_MI
;
22816 default: return ARM_NV
;
22822 case NE
: return ARM_NE
;
22823 case EQ
: return ARM_EQ
;
22824 default: return ARM_NV
;
22830 case NE
: return ARM_MI
;
22831 case EQ
: return ARM_PL
;
22832 default: return ARM_NV
;
22837 /* We can handle all cases except UNEQ and LTGT. */
22840 case GE
: return ARM_GE
;
22841 case GT
: return ARM_GT
;
22842 case LE
: return ARM_LS
;
22843 case LT
: return ARM_MI
;
22844 case NE
: return ARM_NE
;
22845 case EQ
: return ARM_EQ
;
22846 case ORDERED
: return ARM_VC
;
22847 case UNORDERED
: return ARM_VS
;
22848 case UNLT
: return ARM_LT
;
22849 case UNLE
: return ARM_LE
;
22850 case UNGT
: return ARM_HI
;
22851 case UNGE
: return ARM_PL
;
22852 /* UNEQ and LTGT do not have a representation. */
22853 case UNEQ
: /* Fall through. */
22854 case LTGT
: /* Fall through. */
22855 default: return ARM_NV
;
22861 case NE
: return ARM_NE
;
22862 case EQ
: return ARM_EQ
;
22863 case GE
: return ARM_LE
;
22864 case GT
: return ARM_LT
;
22865 case LE
: return ARM_GE
;
22866 case LT
: return ARM_GT
;
22867 case GEU
: return ARM_LS
;
22868 case GTU
: return ARM_CC
;
22869 case LEU
: return ARM_CS
;
22870 case LTU
: return ARM_HI
;
22871 default: return ARM_NV
;
22877 case LTU
: return ARM_CS
;
22878 case GEU
: return ARM_CC
;
22879 case NE
: return ARM_CS
;
22880 case EQ
: return ARM_CC
;
22881 default: return ARM_NV
;
22887 case NE
: return ARM_NE
;
22888 case EQ
: return ARM_EQ
;
22889 case GEU
: return ARM_CS
;
22890 case GTU
: return ARM_HI
;
22891 case LEU
: return ARM_LS
;
22892 case LTU
: return ARM_CC
;
22893 default: return ARM_NV
;
22899 case GE
: return ARM_GE
;
22900 case LT
: return ARM_LT
;
22901 case GEU
: return ARM_CS
;
22902 case LTU
: return ARM_CC
;
22903 default: return ARM_NV
;
22909 case NE
: return ARM_VS
;
22910 case EQ
: return ARM_VC
;
22911 default: return ARM_NV
;
22917 case NE
: return ARM_NE
;
22918 case EQ
: return ARM_EQ
;
22919 case GE
: return ARM_GE
;
22920 case GT
: return ARM_GT
;
22921 case LE
: return ARM_LE
;
22922 case LT
: return ARM_LT
;
22923 case GEU
: return ARM_CS
;
22924 case GTU
: return ARM_HI
;
22925 case LEU
: return ARM_LS
;
22926 case LTU
: return ARM_CC
;
22927 default: return ARM_NV
;
22930 default: gcc_unreachable ();
22934 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22935 static enum arm_cond_code
22936 get_arm_condition_code (rtx comparison
)
22938 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22939 gcc_assert (code
!= ARM_NV
);
22943 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
22944 code registers when not targetting Thumb1. The VFP condition register
22945 only exists when generating hard-float code. */
22947 arm_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
22953 *p2
= TARGET_HARD_FLOAT
? VFPCC_REGNUM
: INVALID_REGNUM
;
22957 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22960 thumb2_final_prescan_insn (rtx_insn
*insn
)
22962 rtx_insn
*first_insn
= insn
;
22963 rtx body
= PATTERN (insn
);
22965 enum arm_cond_code code
;
22970 /* max_insns_skipped in the tune was already taken into account in the
22971 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22972 just emit the IT blocks as we can. It does not make sense to split
22974 max
= MAX_INSN_PER_IT_BLOCK
;
22976 /* Remove the previous insn from the count of insns to be output. */
22977 if (arm_condexec_count
)
22978 arm_condexec_count
--;
22980 /* Nothing to do if we are already inside a conditional block. */
22981 if (arm_condexec_count
)
22984 if (GET_CODE (body
) != COND_EXEC
)
22987 /* Conditional jumps are implemented directly. */
22991 predicate
= COND_EXEC_TEST (body
);
22992 arm_current_cc
= get_arm_condition_code (predicate
);
22994 n
= get_attr_ce_count (insn
);
22995 arm_condexec_count
= 1;
22996 arm_condexec_mask
= (1 << n
) - 1;
22997 arm_condexec_masklen
= n
;
22998 /* See if subsequent instructions can be combined into the same block. */
23001 insn
= next_nonnote_insn (insn
);
23003 /* Jumping into the middle of an IT block is illegal, so a label or
23004 barrier terminates the block. */
23005 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
23008 body
= PATTERN (insn
);
23009 /* USE and CLOBBER aren't really insns, so just skip them. */
23010 if (GET_CODE (body
) == USE
23011 || GET_CODE (body
) == CLOBBER
)
23014 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23015 if (GET_CODE (body
) != COND_EXEC
)
23017 /* Maximum number of conditionally executed instructions in a block. */
23018 n
= get_attr_ce_count (insn
);
23019 if (arm_condexec_masklen
+ n
> max
)
23022 predicate
= COND_EXEC_TEST (body
);
23023 code
= get_arm_condition_code (predicate
);
23024 mask
= (1 << n
) - 1;
23025 if (arm_current_cc
== code
)
23026 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
23027 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
23030 arm_condexec_count
++;
23031 arm_condexec_masklen
+= n
;
23033 /* A jump must be the last instruction in a conditional block. */
23037 /* Restore recog_data (getting the attributes of other insns can
23038 destroy this array, but final.c assumes that it remains intact
23039 across this call). */
23040 extract_constrain_insn_cached (first_insn
);
23044 arm_final_prescan_insn (rtx_insn
*insn
)
23046 /* BODY will hold the body of INSN. */
23047 rtx body
= PATTERN (insn
);
23049 /* This will be 1 if trying to repeat the trick, and things need to be
23050 reversed if it appears to fail. */
23053 /* If we start with a return insn, we only succeed if we find another one. */
23054 int seeking_return
= 0;
23055 enum rtx_code return_code
= UNKNOWN
;
23057 /* START_INSN will hold the insn from where we start looking. This is the
23058 first insn after the following code_label if REVERSE is true. */
23059 rtx_insn
*start_insn
= insn
;
23061 /* If in state 4, check if the target branch is reached, in order to
23062 change back to state 0. */
23063 if (arm_ccfsm_state
== 4)
23065 if (insn
== arm_target_insn
)
23067 arm_target_insn
= NULL
;
23068 arm_ccfsm_state
= 0;
23073 /* If in state 3, it is possible to repeat the trick, if this insn is an
23074 unconditional branch to a label, and immediately following this branch
23075 is the previous target label which is only used once, and the label this
23076 branch jumps to is not too far off. */
23077 if (arm_ccfsm_state
== 3)
23079 if (simplejump_p (insn
))
23081 start_insn
= next_nonnote_insn (start_insn
);
23082 if (BARRIER_P (start_insn
))
23084 /* XXX Isn't this always a barrier? */
23085 start_insn
= next_nonnote_insn (start_insn
);
23087 if (LABEL_P (start_insn
)
23088 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23089 && LABEL_NUSES (start_insn
) == 1)
23094 else if (ANY_RETURN_P (body
))
23096 start_insn
= next_nonnote_insn (start_insn
);
23097 if (BARRIER_P (start_insn
))
23098 start_insn
= next_nonnote_insn (start_insn
);
23099 if (LABEL_P (start_insn
)
23100 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23101 && LABEL_NUSES (start_insn
) == 1)
23104 seeking_return
= 1;
23105 return_code
= GET_CODE (body
);
23114 gcc_assert (!arm_ccfsm_state
|| reverse
);
23115 if (!JUMP_P (insn
))
23118 /* This jump might be paralleled with a clobber of the condition codes
23119 the jump should always come first */
23120 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
23121 body
= XVECEXP (body
, 0, 0);
23124 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
23125 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
23128 int fail
= FALSE
, succeed
= FALSE
;
23129 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23130 int then_not_else
= TRUE
;
23131 rtx_insn
*this_insn
= start_insn
;
23134 /* Register the insn jumped to. */
23137 if (!seeking_return
)
23138 label
= XEXP (SET_SRC (body
), 0);
23140 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
23141 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
23142 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
23144 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
23145 then_not_else
= FALSE
;
23147 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
23149 seeking_return
= 1;
23150 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
23152 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
23154 seeking_return
= 1;
23155 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
23156 then_not_else
= FALSE
;
23159 gcc_unreachable ();
23161 /* See how many insns this branch skips, and what kind of insns. If all
23162 insns are okay, and the label or unconditional branch to the same
23163 label is not too far away, succeed. */
23164 for (insns_skipped
= 0;
23165 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
23169 this_insn
= next_nonnote_insn (this_insn
);
23173 switch (GET_CODE (this_insn
))
23176 /* Succeed if it is the target label, otherwise fail since
23177 control falls in from somewhere else. */
23178 if (this_insn
== label
)
23180 arm_ccfsm_state
= 1;
23188 /* Succeed if the following insn is the target label.
23190 If return insns are used then the last insn in a function
23191 will be a barrier. */
23192 this_insn
= next_nonnote_insn (this_insn
);
23193 if (this_insn
&& this_insn
== label
)
23195 arm_ccfsm_state
= 1;
23203 /* The AAPCS says that conditional calls should not be
23204 used since they make interworking inefficient (the
23205 linker can't transform BL<cond> into BLX). That's
23206 only a problem if the machine has BLX. */
23213 /* Succeed if the following insn is the target label, or
23214 if the following two insns are a barrier and the
23216 this_insn
= next_nonnote_insn (this_insn
);
23217 if (this_insn
&& BARRIER_P (this_insn
))
23218 this_insn
= next_nonnote_insn (this_insn
);
23220 if (this_insn
&& this_insn
== label
23221 && insns_skipped
< max_insns_skipped
)
23223 arm_ccfsm_state
= 1;
23231 /* If this is an unconditional branch to the same label, succeed.
23232 If it is to another label, do nothing. If it is conditional,
23234 /* XXX Probably, the tests for SET and the PC are
23237 scanbody
= PATTERN (this_insn
);
23238 if (GET_CODE (scanbody
) == SET
23239 && GET_CODE (SET_DEST (scanbody
)) == PC
)
23241 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
23242 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
23244 arm_ccfsm_state
= 2;
23247 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
23250 /* Fail if a conditional return is undesirable (e.g. on a
23251 StrongARM), but still allow this if optimizing for size. */
23252 else if (GET_CODE (scanbody
) == return_code
23253 && !use_return_insn (TRUE
, NULL
)
23256 else if (GET_CODE (scanbody
) == return_code
)
23258 arm_ccfsm_state
= 2;
23261 else if (GET_CODE (scanbody
) == PARALLEL
)
23263 switch (get_attr_conds (this_insn
))
23273 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
23278 /* Instructions using or affecting the condition codes make it
23280 scanbody
= PATTERN (this_insn
);
23281 if (!(GET_CODE (scanbody
) == SET
23282 || GET_CODE (scanbody
) == PARALLEL
)
23283 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
23293 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
23294 arm_target_label
= CODE_LABEL_NUMBER (label
);
23297 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
23299 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
23301 this_insn
= next_nonnote_insn (this_insn
);
23302 gcc_assert (!this_insn
23303 || (!BARRIER_P (this_insn
)
23304 && !LABEL_P (this_insn
)));
23308 /* Oh, dear! we ran off the end.. give up. */
23309 extract_constrain_insn_cached (insn
);
23310 arm_ccfsm_state
= 0;
23311 arm_target_insn
= NULL
;
23314 arm_target_insn
= this_insn
;
23317 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23320 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
23322 if (reverse
|| then_not_else
)
23323 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
23326 /* Restore recog_data (getting the attributes of other insns can
23327 destroy this array, but final.c assumes that it remains intact
23328 across this call. */
23329 extract_constrain_insn_cached (insn
);
23333 /* Output IT instructions. */
23335 thumb2_asm_output_opcode (FILE * stream
)
23340 if (arm_condexec_mask
)
23342 for (n
= 0; n
< arm_condexec_masklen
; n
++)
23343 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
23345 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
23346 arm_condition_codes
[arm_current_cc
]);
23347 arm_condexec_mask
= 0;
23351 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23353 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
23355 if (GET_MODE_CLASS (mode
) == MODE_CC
)
23356 return (regno
== CC_REGNUM
23357 || (TARGET_HARD_FLOAT
23358 && regno
== VFPCC_REGNUM
));
23360 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23364 /* For the Thumb we only allow values bigger than SImode in
23365 registers 0 - 6, so that there is always a second low
23366 register available to hold the upper part of the value.
23367 We probably we ought to ensure that the register is the
23368 start of an even numbered register pair. */
23369 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23371 if (TARGET_HARD_FLOAT
&& IS_VFP_REGNUM (regno
))
23373 if (mode
== SFmode
|| mode
== SImode
)
23374 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23376 if (mode
== DFmode
)
23377 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23379 if (mode
== HFmode
)
23380 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23382 /* VFP registers can hold HImode values. */
23383 if (mode
== HImode
)
23384 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23387 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23388 || (VALID_NEON_QREG_MODE (mode
)
23389 && NEON_REGNO_OK_FOR_QUAD (regno
))
23390 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23391 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23392 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23393 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23394 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23399 if (TARGET_REALLY_IWMMXT
)
23401 if (IS_IWMMXT_GR_REGNUM (regno
))
23402 return mode
== SImode
;
23404 if (IS_IWMMXT_REGNUM (regno
))
23405 return VALID_IWMMXT_REG_MODE (mode
);
23408 /* We allow almost any value to be stored in the general registers.
23409 Restrict doubleword quantities to even register pairs in ARM state
23410 so that we can use ldrd. Do not allow very large Neon structure
23411 opaque modes in general registers; they would use too many. */
23412 if (regno
<= LAST_ARM_REGNUM
)
23414 if (ARM_NUM_REGS (mode
) > 4)
23420 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23423 if (regno
== FRAME_POINTER_REGNUM
23424 || regno
== ARG_POINTER_REGNUM
)
23425 /* We only allow integers in the fake hard registers. */
23426 return GET_MODE_CLASS (mode
) == MODE_INT
;
23431 /* Implement TARGET_MODES_TIEABLE_P. */
23434 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23436 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23439 /* We specifically want to allow elements of "structure" modes to
23440 be tieable to the structure. This more general condition allows
23441 other rarer situations too. */
23443 && (VALID_NEON_DREG_MODE (mode1
)
23444 || VALID_NEON_QREG_MODE (mode1
)
23445 || VALID_NEON_STRUCT_MODE (mode1
))
23446 && (VALID_NEON_DREG_MODE (mode2
)
23447 || VALID_NEON_QREG_MODE (mode2
)
23448 || VALID_NEON_STRUCT_MODE (mode2
)))
23454 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23455 not used in arm mode. */
23458 arm_regno_class (int regno
)
23460 if (regno
== PC_REGNUM
)
23465 if (regno
== STACK_POINTER_REGNUM
)
23467 if (regno
== CC_REGNUM
)
23474 if (TARGET_THUMB2
&& regno
< 8)
23477 if ( regno
<= LAST_ARM_REGNUM
23478 || regno
== FRAME_POINTER_REGNUM
23479 || regno
== ARG_POINTER_REGNUM
)
23480 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23482 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23483 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23485 if (IS_VFP_REGNUM (regno
))
23487 if (regno
<= D7_VFP_REGNUM
)
23488 return VFP_D0_D7_REGS
;
23489 else if (regno
<= LAST_LO_VFP_REGNUM
)
23490 return VFP_LO_REGS
;
23492 return VFP_HI_REGS
;
23495 if (IS_IWMMXT_REGNUM (regno
))
23496 return IWMMXT_REGS
;
23498 if (IS_IWMMXT_GR_REGNUM (regno
))
23499 return IWMMXT_GR_REGS
;
23504 /* Handle a special case when computing the offset
23505 of an argument from the frame pointer. */
23507 arm_debugger_arg_offset (int value
, rtx addr
)
23511 /* We are only interested if dbxout_parms() failed to compute the offset. */
23515 /* We can only cope with the case where the address is held in a register. */
23519 /* If we are using the frame pointer to point at the argument, then
23520 an offset of 0 is correct. */
23521 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23524 /* If we are using the stack pointer to point at the
23525 argument, then an offset of 0 is correct. */
23526 /* ??? Check this is consistent with thumb2 frame layout. */
23527 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23528 && REGNO (addr
) == SP_REGNUM
)
23531 /* Oh dear. The argument is pointed to by a register rather
23532 than being held in a register, or being stored at a known
23533 offset from the frame pointer. Since GDB only understands
23534 those two kinds of argument we must translate the address
23535 held in the register into an offset from the frame pointer.
23536 We do this by searching through the insns for the function
23537 looking to see where this register gets its value. If the
23538 register is initialized from the frame pointer plus an offset
23539 then we are in luck and we can continue, otherwise we give up.
23541 This code is exercised by producing debugging information
23542 for a function with arguments like this:
23544 double func (double a, double b, int c, double d) {return d;}
23546 Without this code the stab for parameter 'd' will be set to
23547 an offset of 0 from the frame pointer, rather than 8. */
23549 /* The if() statement says:
23551 If the insn is a normal instruction
23552 and if the insn is setting the value in a register
23553 and if the register being set is the register holding the address of the argument
23554 and if the address is computing by an addition
23555 that involves adding to a register
23556 which is the frame pointer
23561 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23563 if ( NONJUMP_INSN_P (insn
)
23564 && GET_CODE (PATTERN (insn
)) == SET
23565 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23566 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23567 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23568 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23569 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23572 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23581 warning (0, "unable to compute real location of stacked parameter");
23582 value
= 8; /* XXX magic hack */
23588 /* Implement TARGET_PROMOTED_TYPE. */
23591 arm_promoted_type (const_tree t
)
23593 if (SCALAR_FLOAT_TYPE_P (t
)
23594 && TYPE_PRECISION (t
) == 16
23595 && TYPE_MAIN_VARIANT (t
) == arm_fp16_type_node
)
23596 return float_type_node
;
23600 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23601 This simply adds HFmode as a supported mode; even though we don't
23602 implement arithmetic on this type directly, it's supported by
23603 optabs conversions, much the way the double-word arithmetic is
23604 special-cased in the default hook. */
23607 arm_scalar_mode_supported_p (scalar_mode mode
)
23609 if (mode
== HFmode
)
23610 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23611 else if (ALL_FIXED_POINT_MODE_P (mode
))
23614 return default_scalar_mode_supported_p (mode
);
23617 /* Set the value of FLT_EVAL_METHOD.
23618 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23620 0: evaluate all operations and constants, whose semantic type has at
23621 most the range and precision of type float, to the range and
23622 precision of float; evaluate all other operations and constants to
23623 the range and precision of the semantic type;
23625 N, where _FloatN is a supported interchange floating type
23626 evaluate all operations and constants, whose semantic type has at
23627 most the range and precision of _FloatN type, to the range and
23628 precision of the _FloatN type; evaluate all other operations and
23629 constants to the range and precision of the semantic type;
23631 If we have the ARMv8.2-A extensions then we support _Float16 in native
23632 precision, so we should set this to 16. Otherwise, we support the type,
23633 but want to evaluate expressions in float precision, so set this to
23636 static enum flt_eval_method
23637 arm_excess_precision (enum excess_precision_type type
)
23641 case EXCESS_PRECISION_TYPE_FAST
:
23642 case EXCESS_PRECISION_TYPE_STANDARD
:
23643 /* We can calculate either in 16-bit range and precision or
23644 32-bit range and precision. Make that decision based on whether
23645 we have native support for the ARMv8.2-A 16-bit floating-point
23646 instructions or not. */
23647 return (TARGET_VFP_FP16INST
23648 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23649 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
23650 case EXCESS_PRECISION_TYPE_IMPLICIT
:
23651 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
23653 gcc_unreachable ();
23655 return FLT_EVAL_METHOD_UNPREDICTABLE
;
23659 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23660 _Float16 if we are using anything other than ieee format for 16-bit
23661 floating point. Otherwise, punt to the default implementation. */
23662 static opt_scalar_float_mode
23663 arm_floatn_mode (int n
, bool extended
)
23665 if (!extended
&& n
== 16)
23667 if (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
)
23669 return opt_scalar_float_mode ();
23672 return default_floatn_mode (n
, extended
);
23676 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23677 not to early-clobber SRC registers in the process.
23679 We assume that the operands described by SRC and DEST represent a
23680 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23681 number of components into which the copy has been decomposed. */
23683 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23687 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23688 || REGNO (operands
[0]) < REGNO (operands
[1]))
23690 for (i
= 0; i
< count
; i
++)
23692 operands
[2 * i
] = dest
[i
];
23693 operands
[2 * i
+ 1] = src
[i
];
23698 for (i
= 0; i
< count
; i
++)
23700 operands
[2 * i
] = dest
[count
- i
- 1];
23701 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23706 /* Split operands into moves from op[1] + op[2] into op[0]. */
23709 neon_split_vcombine (rtx operands
[3])
23711 unsigned int dest
= REGNO (operands
[0]);
23712 unsigned int src1
= REGNO (operands
[1]);
23713 unsigned int src2
= REGNO (operands
[2]);
23714 machine_mode halfmode
= GET_MODE (operands
[1]);
23715 unsigned int halfregs
= REG_NREGS (operands
[1]);
23716 rtx destlo
, desthi
;
23718 if (src1
== dest
&& src2
== dest
+ halfregs
)
23720 /* No-op move. Can't split to nothing; emit something. */
23721 emit_note (NOTE_INSN_DELETED
);
23725 /* Preserve register attributes for variable tracking. */
23726 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23727 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23728 GET_MODE_SIZE (halfmode
));
23730 /* Special case of reversed high/low parts. Use VSWP. */
23731 if (src2
== dest
&& src1
== dest
+ halfregs
)
23733 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
23734 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
23735 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
23739 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
23741 /* Try to avoid unnecessary moves if part of the result
23742 is in the right place already. */
23744 emit_move_insn (destlo
, operands
[1]);
23745 if (src2
!= dest
+ halfregs
)
23746 emit_move_insn (desthi
, operands
[2]);
23750 if (src2
!= dest
+ halfregs
)
23751 emit_move_insn (desthi
, operands
[2]);
23753 emit_move_insn (destlo
, operands
[1]);
23757 /* Return the number (counting from 0) of
23758 the least significant set bit in MASK. */
23761 number_of_first_bit_set (unsigned mask
)
23763 return ctz_hwi (mask
);
23766 /* Like emit_multi_reg_push, but allowing for a different set of
23767 registers to be described as saved. MASK is the set of registers
23768 to be saved; REAL_REGS is the set of registers to be described as
23769 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23772 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
23774 unsigned long regno
;
23775 rtx par
[10], tmp
, reg
;
23779 /* Build the parallel of the registers actually being stored. */
23780 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
23782 regno
= ctz_hwi (mask
);
23783 reg
= gen_rtx_REG (SImode
, regno
);
23786 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
23788 tmp
= gen_rtx_USE (VOIDmode
, reg
);
23793 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23794 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
23795 tmp
= gen_frame_mem (BLKmode
, tmp
);
23796 tmp
= gen_rtx_SET (tmp
, par
[0]);
23799 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
23800 insn
= emit_insn (tmp
);
23802 /* Always build the stack adjustment note for unwind info. */
23803 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23804 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
23807 /* Build the parallel of the registers recorded as saved for unwind. */
23808 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
23810 regno
= ctz_hwi (real_regs
);
23811 reg
= gen_rtx_REG (SImode
, regno
);
23813 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
23814 tmp
= gen_frame_mem (SImode
, tmp
);
23815 tmp
= gen_rtx_SET (tmp
, reg
);
23816 RTX_FRAME_RELATED_P (tmp
) = 1;
23824 RTX_FRAME_RELATED_P (par
[0]) = 1;
23825 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
23828 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
23833 /* Emit code to push or pop registers to or from the stack. F is the
23834 assembly file. MASK is the registers to pop. */
23836 thumb_pop (FILE *f
, unsigned long mask
)
23839 int lo_mask
= mask
& 0xFF;
23843 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
23845 /* Special case. Do not generate a POP PC statement here, do it in
23847 thumb_exit (f
, -1);
23851 fprintf (f
, "\tpop\t{");
23853 /* Look at the low registers first. */
23854 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
23858 asm_fprintf (f
, "%r", regno
);
23860 if ((lo_mask
& ~1) != 0)
23865 if (mask
& (1 << PC_REGNUM
))
23867 /* Catch popping the PC. */
23868 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
|| crtl
->calls_eh_return
23869 || IS_CMSE_ENTRY (arm_current_func_type ()))
23871 /* The PC is never poped directly, instead
23872 it is popped into r3 and then BX is used. */
23873 fprintf (f
, "}\n");
23875 thumb_exit (f
, -1);
23884 asm_fprintf (f
, "%r", PC_REGNUM
);
23888 fprintf (f
, "}\n");
23891 /* Generate code to return from a thumb function.
23892 If 'reg_containing_return_addr' is -1, then the return address is
23893 actually on the stack, at the stack pointer. */
23895 thumb_exit (FILE *f
, int reg_containing_return_addr
)
23897 unsigned regs_available_for_popping
;
23898 unsigned regs_to_pop
;
23900 unsigned available
;
23904 int restore_a4
= FALSE
;
23906 /* Compute the registers we need to pop. */
23910 if (reg_containing_return_addr
== -1)
23912 regs_to_pop
|= 1 << LR_REGNUM
;
23916 if (TARGET_BACKTRACE
)
23918 /* Restore the (ARM) frame pointer and stack pointer. */
23919 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
23923 /* If there is nothing to pop then just emit the BX instruction and
23925 if (pops_needed
== 0)
23927 if (crtl
->calls_eh_return
)
23928 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23930 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23932 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n",
23933 reg_containing_return_addr
);
23934 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
23937 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23940 /* Otherwise if we are not supporting interworking and we have not created
23941 a backtrace structure and the function was not entered in ARM mode then
23942 just pop the return address straight into the PC. */
23943 else if (!TARGET_INTERWORK
23944 && !TARGET_BACKTRACE
23945 && !is_called_in_ARM_mode (current_function_decl
)
23946 && !crtl
->calls_eh_return
23947 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23949 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
23953 /* Find out how many of the (return) argument registers we can corrupt. */
23954 regs_available_for_popping
= 0;
23956 /* If returning via __builtin_eh_return, the bottom three registers
23957 all contain information needed for the return. */
23958 if (crtl
->calls_eh_return
)
23962 /* If we can deduce the registers used from the function's
23963 return value. This is more reliable that examining
23964 df_regs_ever_live_p () because that will be set if the register is
23965 ever used in the function, not just if the register is used
23966 to hold a return value. */
23968 if (crtl
->return_rtx
!= 0)
23969 mode
= GET_MODE (crtl
->return_rtx
);
23971 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
23973 size
= GET_MODE_SIZE (mode
);
23977 /* In a void function we can use any argument register.
23978 In a function that returns a structure on the stack
23979 we can use the second and third argument registers. */
23980 if (mode
== VOIDmode
)
23981 regs_available_for_popping
=
23982 (1 << ARG_REGISTER (1))
23983 | (1 << ARG_REGISTER (2))
23984 | (1 << ARG_REGISTER (3));
23986 regs_available_for_popping
=
23987 (1 << ARG_REGISTER (2))
23988 | (1 << ARG_REGISTER (3));
23990 else if (size
<= 4)
23991 regs_available_for_popping
=
23992 (1 << ARG_REGISTER (2))
23993 | (1 << ARG_REGISTER (3));
23994 else if (size
<= 8)
23995 regs_available_for_popping
=
23996 (1 << ARG_REGISTER (3));
23999 /* Match registers to be popped with registers into which we pop them. */
24000 for (available
= regs_available_for_popping
,
24001 required
= regs_to_pop
;
24002 required
!= 0 && available
!= 0;
24003 available
&= ~(available
& - available
),
24004 required
&= ~(required
& - required
))
24007 /* If we have any popping registers left over, remove them. */
24009 regs_available_for_popping
&= ~available
;
24011 /* Otherwise if we need another popping register we can use
24012 the fourth argument register. */
24013 else if (pops_needed
)
24015 /* If we have not found any free argument registers and
24016 reg a4 contains the return address, we must move it. */
24017 if (regs_available_for_popping
== 0
24018 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
24020 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24021 reg_containing_return_addr
= LR_REGNUM
;
24023 else if (size
> 12)
24025 /* Register a4 is being used to hold part of the return value,
24026 but we have dire need of a free, low register. */
24029 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
24032 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
24034 /* The fourth argument register is available. */
24035 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
24041 /* Pop as many registers as we can. */
24042 thumb_pop (f
, regs_available_for_popping
);
24044 /* Process the registers we popped. */
24045 if (reg_containing_return_addr
== -1)
24047 /* The return address was popped into the lowest numbered register. */
24048 regs_to_pop
&= ~(1 << LR_REGNUM
);
24050 reg_containing_return_addr
=
24051 number_of_first_bit_set (regs_available_for_popping
);
24053 /* Remove this register for the mask of available registers, so that
24054 the return address will not be corrupted by further pops. */
24055 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
24058 /* If we popped other registers then handle them here. */
24059 if (regs_available_for_popping
)
24063 /* Work out which register currently contains the frame pointer. */
24064 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24066 /* Move it into the correct place. */
24067 asm_fprintf (f
, "\tmov\t%r, %r\n",
24068 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
24070 /* (Temporarily) remove it from the mask of popped registers. */
24071 regs_available_for_popping
&= ~(1 << frame_pointer
);
24072 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
24074 if (regs_available_for_popping
)
24078 /* We popped the stack pointer as well,
24079 find the register that contains it. */
24080 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24082 /* Move it into the stack register. */
24083 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
24085 /* At this point we have popped all necessary registers, so
24086 do not worry about restoring regs_available_for_popping
24087 to its correct value:
24089 assert (pops_needed == 0)
24090 assert (regs_available_for_popping == (1 << frame_pointer))
24091 assert (regs_to_pop == (1 << STACK_POINTER)) */
24095 /* Since we have just move the popped value into the frame
24096 pointer, the popping register is available for reuse, and
24097 we know that we still have the stack pointer left to pop. */
24098 regs_available_for_popping
|= (1 << frame_pointer
);
24102 /* If we still have registers left on the stack, but we no longer have
24103 any registers into which we can pop them, then we must move the return
24104 address into the link register and make available the register that
24106 if (regs_available_for_popping
== 0 && pops_needed
> 0)
24108 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
24110 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
24111 reg_containing_return_addr
);
24113 reg_containing_return_addr
= LR_REGNUM
;
24116 /* If we have registers left on the stack then pop some more.
24117 We know that at most we will want to pop FP and SP. */
24118 if (pops_needed
> 0)
24123 thumb_pop (f
, regs_available_for_popping
);
24125 /* We have popped either FP or SP.
24126 Move whichever one it is into the correct register. */
24127 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24128 move_to
= number_of_first_bit_set (regs_to_pop
);
24130 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
24134 /* If we still have not popped everything then we must have only
24135 had one register available to us and we are now popping the SP. */
24136 if (pops_needed
> 0)
24140 thumb_pop (f
, regs_available_for_popping
);
24142 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24144 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
24146 assert (regs_to_pop == (1 << STACK_POINTER))
24147 assert (pops_needed == 1)
24151 /* If necessary restore the a4 register. */
24154 if (reg_containing_return_addr
!= LR_REGNUM
)
24156 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24157 reg_containing_return_addr
= LR_REGNUM
;
24160 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
24163 if (crtl
->calls_eh_return
)
24164 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24166 /* Return to caller. */
24167 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24169 /* This is for the cases where LR is not being used to contain the return
24170 address. It may therefore contain information that we might not want
24171 to leak, hence it must be cleared. The value in R0 will never be a
24172 secret at this point, so it is safe to use it, see the clearing code
24173 in 'cmse_nonsecure_entry_clear_before_return'. */
24174 if (reg_containing_return_addr
!= LR_REGNUM
)
24175 asm_fprintf (f
, "\tmov\tlr, r0\n");
24177 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr
);
24178 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
24181 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24184 /* Scan INSN just before assembler is output for it.
24185 For Thumb-1, we track the status of the condition codes; this
24186 information is used in the cbranchsi4_insn pattern. */
24188 thumb1_final_prescan_insn (rtx_insn
*insn
)
24190 if (flag_print_asm_name
)
24191 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
24192 INSN_ADDRESSES (INSN_UID (insn
)));
24193 /* Don't overwrite the previous setter when we get to a cbranch. */
24194 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
24196 enum attr_conds conds
;
24198 if (cfun
->machine
->thumb1_cc_insn
)
24200 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
24201 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
24204 conds
= get_attr_conds (insn
);
24205 if (conds
== CONDS_SET
)
24207 rtx set
= single_set (insn
);
24208 cfun
->machine
->thumb1_cc_insn
= insn
;
24209 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
24210 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
24211 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
24212 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
24214 rtx src1
= XEXP (SET_SRC (set
), 1);
24215 if (src1
== const0_rtx
)
24216 cfun
->machine
->thumb1_cc_mode
= CCmode
;
24218 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
24220 /* Record the src register operand instead of dest because
24221 cprop_hardreg pass propagates src. */
24222 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
24225 else if (conds
!= CONDS_NOCOND
)
24226 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
24229 /* Check if unexpected far jump is used. */
24230 if (cfun
->machine
->lr_save_eliminated
24231 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24232 internal_error("Unexpected thumb1 far jump");
24236 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
24238 unsigned HOST_WIDE_INT mask
= 0xff;
24241 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
24242 if (val
== 0) /* XXX */
24245 for (i
= 0; i
< 25; i
++)
24246 if ((val
& (mask
<< i
)) == val
)
24252 /* Returns nonzero if the current function contains,
24253 or might contain a far jump. */
24255 thumb_far_jump_used_p (void)
24258 bool far_jump
= false;
24259 unsigned int func_size
= 0;
24261 /* If we have already decided that far jumps may be used,
24262 do not bother checking again, and always return true even if
24263 it turns out that they are not being used. Once we have made
24264 the decision that far jumps are present (and that hence the link
24265 register will be pushed onto the stack) we cannot go back on it. */
24266 if (cfun
->machine
->far_jump_used
)
24269 /* If this function is not being called from the prologue/epilogue
24270 generation code then it must be being called from the
24271 INITIAL_ELIMINATION_OFFSET macro. */
24272 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
24274 /* In this case we know that we are being asked about the elimination
24275 of the arg pointer register. If that register is not being used,
24276 then there are no arguments on the stack, and we do not have to
24277 worry that a far jump might force the prologue to push the link
24278 register, changing the stack offsets. In this case we can just
24279 return false, since the presence of far jumps in the function will
24280 not affect stack offsets.
24282 If the arg pointer is live (or if it was live, but has now been
24283 eliminated and so set to dead) then we do have to test to see if
24284 the function might contain a far jump. This test can lead to some
24285 false negatives, since before reload is completed, then length of
24286 branch instructions is not known, so gcc defaults to returning their
24287 longest length, which in turn sets the far jump attribute to true.
24289 A false negative will not result in bad code being generated, but it
24290 will result in a needless push and pop of the link register. We
24291 hope that this does not occur too often.
24293 If we need doubleword stack alignment this could affect the other
24294 elimination offsets so we can't risk getting it wrong. */
24295 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
24296 cfun
->machine
->arg_pointer_live
= 1;
24297 else if (!cfun
->machine
->arg_pointer_live
)
24301 /* We should not change far_jump_used during or after reload, as there is
24302 no chance to change stack frame layout. */
24303 if (reload_in_progress
|| reload_completed
)
24306 /* Check to see if the function contains a branch
24307 insn with the far jump attribute set. */
24308 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
24310 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24314 func_size
+= get_attr_length (insn
);
24317 /* Attribute far_jump will always be true for thumb1 before
24318 shorten_branch pass. So checking far_jump attribute before
24319 shorten_branch isn't much useful.
24321 Following heuristic tries to estimate more accurately if a far jump
24322 may finally be used. The heuristic is very conservative as there is
24323 no chance to roll-back the decision of not to use far jump.
24325 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24326 2-byte insn is associated with a 4 byte constant pool. Using
24327 function size 2048/3 as the threshold is conservative enough. */
24330 if ((func_size
* 3) >= 2048)
24332 /* Record the fact that we have decided that
24333 the function does use far jumps. */
24334 cfun
->machine
->far_jump_used
= 1;
24342 /* Return nonzero if FUNC must be entered in ARM mode. */
24344 is_called_in_ARM_mode (tree func
)
24346 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
24348 /* Ignore the problem about functions whose address is taken. */
24349 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
24353 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
24359 /* Given the stack offsets and register mask in OFFSETS, decide how
24360 many additional registers to push instead of subtracting a constant
24361 from SP. For epilogues the principle is the same except we use pop.
24362 FOR_PROLOGUE indicates which we're generating. */
24364 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
24366 HOST_WIDE_INT amount
;
24367 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
24368 /* Extract a mask of the ones we can give to the Thumb's push/pop
24370 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
24371 /* Then count how many other high registers will need to be pushed. */
24372 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24373 int n_free
, reg_base
, size
;
24375 if (!for_prologue
&& frame_pointer_needed
)
24376 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24378 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24380 /* If the stack frame size is 512 exactly, we can save one load
24381 instruction, which should make this a win even when optimizing
24383 if (!optimize_size
&& amount
!= 512)
24386 /* Can't do this if there are high registers to push. */
24387 if (high_regs_pushed
!= 0)
24390 /* Shouldn't do it in the prologue if no registers would normally
24391 be pushed at all. In the epilogue, also allow it if we'll have
24392 a pop insn for the PC. */
24395 || TARGET_BACKTRACE
24396 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
24397 || TARGET_INTERWORK
24398 || crtl
->args
.pretend_args_size
!= 0))
24401 /* Don't do this if thumb_expand_prologue wants to emit instructions
24402 between the push and the stack frame allocation. */
24404 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24405 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
24412 size
= arm_size_return_regs ();
24413 reg_base
= ARM_NUM_INTS (size
);
24414 live_regs_mask
>>= reg_base
;
24417 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
24418 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
24420 live_regs_mask
>>= 1;
24426 gcc_assert (amount
/ 4 * 4 == amount
);
24428 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
24429 return (amount
- 508) / 4;
24430 if (amount
<= n_free
* 4)
24435 /* The bits which aren't usefully expanded as rtl. */
24437 thumb1_unexpanded_epilogue (void)
24439 arm_stack_offsets
*offsets
;
24441 unsigned long live_regs_mask
= 0;
24442 int high_regs_pushed
= 0;
24444 int had_to_push_lr
;
24447 if (cfun
->machine
->return_used_this_function
!= 0)
24450 if (IS_NAKED (arm_current_func_type ()))
24453 offsets
= arm_get_frame_offsets ();
24454 live_regs_mask
= offsets
->saved_regs_mask
;
24455 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24457 /* If we can deduce the registers used from the function's return value.
24458 This is more reliable that examining df_regs_ever_live_p () because that
24459 will be set if the register is ever used in the function, not just if
24460 the register is used to hold a return value. */
24461 size
= arm_size_return_regs ();
24463 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24466 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24467 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24470 /* The prolog may have pushed some high registers to use as
24471 work registers. e.g. the testsuite file:
24472 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24473 compiles to produce:
24474 push {r4, r5, r6, r7, lr}
24478 as part of the prolog. We have to undo that pushing here. */
24480 if (high_regs_pushed
)
24482 unsigned long mask
= live_regs_mask
& 0xff;
24485 /* The available low registers depend on the size of the value we are
24493 /* Oh dear! We have no low registers into which we can pop
24496 ("no low registers available for popping high registers");
24498 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
24499 if (live_regs_mask
& (1 << next_hi_reg
))
24502 while (high_regs_pushed
)
24504 /* Find lo register(s) into which the high register(s) can
24506 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24508 if (mask
& (1 << regno
))
24509 high_regs_pushed
--;
24510 if (high_regs_pushed
== 0)
24514 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24516 /* Pop the values into the low register(s). */
24517 thumb_pop (asm_out_file
, mask
);
24519 /* Move the value(s) into the high registers. */
24520 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24522 if (mask
& (1 << regno
))
24524 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24527 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24528 if (live_regs_mask
& (1 << next_hi_reg
))
24533 live_regs_mask
&= ~0x0f00;
24536 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24537 live_regs_mask
&= 0xff;
24539 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24541 /* Pop the return address into the PC. */
24542 if (had_to_push_lr
)
24543 live_regs_mask
|= 1 << PC_REGNUM
;
24545 /* Either no argument registers were pushed or a backtrace
24546 structure was created which includes an adjusted stack
24547 pointer, so just pop everything. */
24548 if (live_regs_mask
)
24549 thumb_pop (asm_out_file
, live_regs_mask
);
24551 /* We have either just popped the return address into the
24552 PC or it is was kept in LR for the entire function.
24553 Note that thumb_pop has already called thumb_exit if the
24554 PC was in the list. */
24555 if (!had_to_push_lr
)
24556 thumb_exit (asm_out_file
, LR_REGNUM
);
24560 /* Pop everything but the return address. */
24561 if (live_regs_mask
)
24562 thumb_pop (asm_out_file
, live_regs_mask
);
24564 if (had_to_push_lr
)
24568 /* We have no free low regs, so save one. */
24569 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24573 /* Get the return address into a temporary register. */
24574 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24578 /* Move the return address to lr. */
24579 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24581 /* Restore the low register. */
24582 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24587 regno
= LAST_ARG_REGNUM
;
24592 /* Remove the argument registers that were pushed onto the stack. */
24593 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24594 SP_REGNUM
, SP_REGNUM
,
24595 crtl
->args
.pretend_args_size
);
24597 thumb_exit (asm_out_file
, regno
);
24603 /* Functions to save and restore machine-specific function data. */
24604 static struct machine_function
*
24605 arm_init_machine_status (void)
24607 struct machine_function
*machine
;
24608 machine
= ggc_cleared_alloc
<machine_function
> ();
24610 #if ARM_FT_UNKNOWN != 0
24611 machine
->func_type
= ARM_FT_UNKNOWN
;
24616 /* Return an RTX indicating where the return address to the
24617 calling function can be found. */
24619 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24624 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24627 /* Do anything needed before RTL is emitted for each function. */
24629 arm_init_expanders (void)
24631 /* Arrange to initialize and mark the machine per-function status. */
24632 init_machine_status
= arm_init_machine_status
;
24634 /* This is to stop the combine pass optimizing away the alignment
24635 adjustment of va_arg. */
24636 /* ??? It is claimed that this should not be necessary. */
24638 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24641 /* Check that FUNC is called with a different mode. */
24644 arm_change_mode_p (tree func
)
24646 if (TREE_CODE (func
) != FUNCTION_DECL
)
24649 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
24652 callee_tree
= target_option_default_node
;
24654 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
24655 int flags
= callee_opts
->x_target_flags
;
24657 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
24660 /* Like arm_compute_initial_elimination offset. Simpler because there
24661 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24662 to point at the base of the local variables after static stack
24663 space for a function has been allocated. */
24666 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24668 arm_stack_offsets
*offsets
;
24670 offsets
= arm_get_frame_offsets ();
24674 case ARG_POINTER_REGNUM
:
24677 case STACK_POINTER_REGNUM
:
24678 return offsets
->outgoing_args
- offsets
->saved_args
;
24680 case FRAME_POINTER_REGNUM
:
24681 return offsets
->soft_frame
- offsets
->saved_args
;
24683 case ARM_HARD_FRAME_POINTER_REGNUM
:
24684 return offsets
->saved_regs
- offsets
->saved_args
;
24686 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24687 return offsets
->locals_base
- offsets
->saved_args
;
24690 gcc_unreachable ();
24694 case FRAME_POINTER_REGNUM
:
24697 case STACK_POINTER_REGNUM
:
24698 return offsets
->outgoing_args
- offsets
->soft_frame
;
24700 case ARM_HARD_FRAME_POINTER_REGNUM
:
24701 return offsets
->saved_regs
- offsets
->soft_frame
;
24703 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24704 return offsets
->locals_base
- offsets
->soft_frame
;
24707 gcc_unreachable ();
24712 gcc_unreachable ();
24716 /* Generate the function's prologue. */
24719 thumb1_expand_prologue (void)
24723 HOST_WIDE_INT amount
;
24724 HOST_WIDE_INT size
;
24725 arm_stack_offsets
*offsets
;
24726 unsigned long func_type
;
24728 unsigned long live_regs_mask
;
24729 unsigned long l_mask
;
24730 unsigned high_regs_pushed
= 0;
24731 bool lr_needs_saving
;
24733 func_type
= arm_current_func_type ();
24735 /* Naked functions don't have prologues. */
24736 if (IS_NAKED (func_type
))
24738 if (flag_stack_usage_info
)
24739 current_function_static_stack_size
= 0;
24743 if (IS_INTERRUPT (func_type
))
24745 error ("interrupt Service Routines cannot be coded in Thumb mode");
24749 if (is_called_in_ARM_mode (current_function_decl
))
24750 emit_insn (gen_prologue_thumb1_interwork ());
24752 offsets
= arm_get_frame_offsets ();
24753 live_regs_mask
= offsets
->saved_regs_mask
;
24754 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
24756 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24757 l_mask
= live_regs_mask
& 0x40ff;
24758 /* Then count how many other high registers will need to be pushed. */
24759 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24761 if (crtl
->args
.pretend_args_size
)
24763 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
24765 if (cfun
->machine
->uses_anonymous_args
)
24767 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
24768 unsigned long mask
;
24770 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
24771 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
24773 insn
= thumb1_emit_multi_reg_push (mask
, 0);
24777 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24778 stack_pointer_rtx
, x
));
24780 RTX_FRAME_RELATED_P (insn
) = 1;
24783 if (TARGET_BACKTRACE
)
24785 HOST_WIDE_INT offset
= 0;
24786 unsigned work_register
;
24787 rtx work_reg
, x
, arm_hfp_rtx
;
24789 /* We have been asked to create a stack backtrace structure.
24790 The code looks like this:
24794 0 sub SP, #16 Reserve space for 4 registers.
24795 2 push {R7} Push low registers.
24796 4 add R7, SP, #20 Get the stack pointer before the push.
24797 6 str R7, [SP, #8] Store the stack pointer
24798 (before reserving the space).
24799 8 mov R7, PC Get hold of the start of this code + 12.
24800 10 str R7, [SP, #16] Store it.
24801 12 mov R7, FP Get hold of the current frame pointer.
24802 14 str R7, [SP, #4] Store it.
24803 16 mov R7, LR Get hold of the current return address.
24804 18 str R7, [SP, #12] Store it.
24805 20 add R7, SP, #16 Point at the start of the
24806 backtrace structure.
24807 22 mov FP, R7 Put this value into the frame pointer. */
24809 work_register
= thumb_find_work_register (live_regs_mask
);
24810 work_reg
= gen_rtx_REG (SImode
, work_register
);
24811 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
24813 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24814 stack_pointer_rtx
, GEN_INT (-16)));
24815 RTX_FRAME_RELATED_P (insn
) = 1;
24819 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
24820 RTX_FRAME_RELATED_P (insn
) = 1;
24821 lr_needs_saving
= false;
24823 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
24826 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
24827 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24829 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
24830 x
= gen_frame_mem (SImode
, x
);
24831 emit_move_insn (x
, work_reg
);
24833 /* Make sure that the instruction fetching the PC is in the right place
24834 to calculate "start of backtrace creation code + 12". */
24835 /* ??? The stores using the common WORK_REG ought to be enough to
24836 prevent the scheduler from doing anything weird. Failing that
24837 we could always move all of the following into an UNSPEC_VOLATILE. */
24840 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24841 emit_move_insn (work_reg
, x
);
24843 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24844 x
= gen_frame_mem (SImode
, x
);
24845 emit_move_insn (x
, work_reg
);
24847 emit_move_insn (work_reg
, arm_hfp_rtx
);
24849 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24850 x
= gen_frame_mem (SImode
, x
);
24851 emit_move_insn (x
, work_reg
);
24855 emit_move_insn (work_reg
, arm_hfp_rtx
);
24857 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24858 x
= gen_frame_mem (SImode
, x
);
24859 emit_move_insn (x
, work_reg
);
24861 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24862 emit_move_insn (work_reg
, x
);
24864 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24865 x
= gen_frame_mem (SImode
, x
);
24866 emit_move_insn (x
, work_reg
);
24869 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
24870 emit_move_insn (work_reg
, x
);
24872 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
24873 x
= gen_frame_mem (SImode
, x
);
24874 emit_move_insn (x
, work_reg
);
24876 x
= GEN_INT (offset
+ 12);
24877 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24879 emit_move_insn (arm_hfp_rtx
, work_reg
);
24881 /* Optimization: If we are not pushing any low registers but we are going
24882 to push some high registers then delay our first push. This will just
24883 be a push of LR and we can combine it with the push of the first high
24885 else if ((l_mask
& 0xff) != 0
24886 || (high_regs_pushed
== 0 && lr_needs_saving
))
24888 unsigned long mask
= l_mask
;
24889 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
24890 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
24891 RTX_FRAME_RELATED_P (insn
) = 1;
24892 lr_needs_saving
= false;
24895 if (high_regs_pushed
)
24897 unsigned pushable_regs
;
24898 unsigned next_hi_reg
;
24899 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
24900 : crtl
->args
.info
.nregs
;
24901 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
24903 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
24904 if (live_regs_mask
& (1 << next_hi_reg
))
24907 /* Here we need to mask out registers used for passing arguments
24908 even if they can be pushed. This is to avoid using them to stash the high
24909 registers. Such kind of stash may clobber the use of arguments. */
24910 pushable_regs
= l_mask
& (~arg_regs_mask
);
24911 if (lr_needs_saving
)
24912 pushable_regs
&= ~(1 << LR_REGNUM
);
24914 if (pushable_regs
== 0)
24915 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
24917 while (high_regs_pushed
> 0)
24919 unsigned long real_regs_mask
= 0;
24920 unsigned long push_mask
= 0;
24922 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
24924 if (pushable_regs
& (1 << regno
))
24926 emit_move_insn (gen_rtx_REG (SImode
, regno
),
24927 gen_rtx_REG (SImode
, next_hi_reg
));
24929 high_regs_pushed
--;
24930 real_regs_mask
|= (1 << next_hi_reg
);
24931 push_mask
|= (1 << regno
);
24933 if (high_regs_pushed
)
24935 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
24937 if (live_regs_mask
& (1 << next_hi_reg
))
24945 /* If we had to find a work register and we have not yet
24946 saved the LR then add it to the list of regs to push. */
24947 if (lr_needs_saving
)
24949 push_mask
|= 1 << LR_REGNUM
;
24950 real_regs_mask
|= 1 << LR_REGNUM
;
24951 lr_needs_saving
= false;
24954 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
24955 RTX_FRAME_RELATED_P (insn
) = 1;
24959 /* Load the pic register before setting the frame pointer,
24960 so we can use r7 as a temporary work register. */
24961 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24962 arm_load_pic_register (live_regs_mask
);
24964 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
24965 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
24966 stack_pointer_rtx
);
24968 size
= offsets
->outgoing_args
- offsets
->saved_args
;
24969 if (flag_stack_usage_info
)
24970 current_function_static_stack_size
= size
;
24972 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24973 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
&& size
)
24974 sorry ("-fstack-check=specific for Thumb-1");
24976 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24977 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
24982 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24983 GEN_INT (- amount
)));
24984 RTX_FRAME_RELATED_P (insn
) = 1;
24990 /* The stack decrement is too big for an immediate value in a single
24991 insn. In theory we could issue multiple subtracts, but after
24992 three of them it becomes more space efficient to place the full
24993 value in the constant pool and load into a register. (Also the
24994 ARM debugger really likes to see only one stack decrement per
24995 function). So instead we look for a scratch register into which
24996 we can load the decrement, and then we subtract this from the
24997 stack pointer. Unfortunately on the thumb the only available
24998 scratch registers are the argument registers, and we cannot use
24999 these as they may hold arguments to the function. Instead we
25000 attempt to locate a call preserved register which is used by this
25001 function. If we can find one, then we know that it will have
25002 been pushed at the start of the prologue and so we can corrupt
25004 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
25005 if (live_regs_mask
& (1 << regno
))
25008 gcc_assert(regno
<= LAST_LO_REGNUM
);
25010 reg
= gen_rtx_REG (SImode
, regno
);
25012 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
25014 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25015 stack_pointer_rtx
, reg
));
25017 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
25018 plus_constant (Pmode
, stack_pointer_rtx
,
25020 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
25021 RTX_FRAME_RELATED_P (insn
) = 1;
25025 if (frame_pointer_needed
)
25026 thumb_set_frame_pointer (offsets
);
25028 /* If we are profiling, make sure no instructions are scheduled before
25029 the call to mcount. Similarly if the user has requested no
25030 scheduling in the prolog. Similarly if we want non-call exceptions
25031 using the EABI unwinder, to prevent faulting instructions from being
25032 swapped with a stack adjustment. */
25033 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
25034 || (arm_except_unwind_info (&global_options
) == UI_TARGET
25035 && cfun
->can_throw_non_call_exceptions
))
25036 emit_insn (gen_blockage ());
25038 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
25039 if (live_regs_mask
& 0xff)
25040 cfun
->machine
->lr_save_eliminated
= 0;
25043 /* Clear caller saved registers not used to pass return values and leaked
25044 condition flags before exiting a cmse_nonsecure_entry function. */
25047 cmse_nonsecure_entry_clear_before_return (void)
25049 uint64_t to_clear_mask
[2];
25050 uint32_t padding_bits_to_clear
= 0;
25051 uint32_t * padding_bits_to_clear_ptr
= &padding_bits_to_clear
;
25052 int regno
, maxregno
= IP_REGNUM
;
25056 to_clear_mask
[0] = (1ULL << (NUM_ARG_REGS
)) - 1;
25057 to_clear_mask
[0] |= (1ULL << IP_REGNUM
);
25059 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25060 registers. We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
25061 to make sure the instructions used to clear them are present. */
25062 if (TARGET_HARD_FLOAT
&& !TARGET_THUMB1
)
25064 uint64_t float_mask
= (1ULL << (D7_VFP_REGNUM
+ 1)) - 1;
25065 maxregno
= LAST_VFP_REGNUM
;
25067 float_mask
&= ~((1ULL << FIRST_VFP_REGNUM
) - 1);
25068 to_clear_mask
[0] |= float_mask
;
25070 float_mask
= (1ULL << (maxregno
- 63)) - 1;
25071 to_clear_mask
[1] = float_mask
;
25073 /* Make sure we don't clear the two scratch registers used to clear the
25074 relevant FPSCR bits in output_return_instruction. */
25075 emit_use (gen_rtx_REG (SImode
, IP_REGNUM
));
25076 to_clear_mask
[0] &= ~(1ULL << IP_REGNUM
);
25077 emit_use (gen_rtx_REG (SImode
, 4));
25078 to_clear_mask
[0] &= ~(1ULL << 4);
25081 /* If the user has defined registers to be caller saved, these are no longer
25082 restored by the function before returning and must thus be cleared for
25083 security purposes. */
25084 for (regno
= NUM_ARG_REGS
; regno
< LAST_VFP_REGNUM
; regno
++)
25086 /* We do not touch registers that can be used to pass arguments as per
25087 the AAPCS, since these should never be made callee-saved by user
25089 if (IN_RANGE (regno
, FIRST_VFP_REGNUM
, D7_VFP_REGNUM
))
25091 if (IN_RANGE (regno
, IP_REGNUM
, PC_REGNUM
))
25093 if (call_used_regs
[regno
])
25094 to_clear_mask
[regno
/ 64] |= (1ULL << (regno
% 64));
25097 /* Make sure we do not clear the registers used to return the result in. */
25098 result_type
= TREE_TYPE (DECL_RESULT (current_function_decl
));
25099 if (!VOID_TYPE_P (result_type
))
25101 result_rtl
= arm_function_value (result_type
, current_function_decl
, 0);
25103 /* No need to check that we return in registers, because we don't
25104 support returning on stack yet. */
25106 &= ~compute_not_to_clear_mask (result_type
, result_rtl
, 0,
25107 padding_bits_to_clear_ptr
);
25110 if (padding_bits_to_clear
!= 0)
25113 /* Padding bits to clear is not 0 so we know we are dealing with
25114 returning a composite type, which only uses r0. Let's make sure that
25115 r1-r3 is cleared too, we will use r1 as a scratch register. */
25116 gcc_assert ((to_clear_mask
[0] & 0xe) == 0xe);
25118 reg_rtx
= gen_rtx_REG (SImode
, R1_REGNUM
);
25120 /* Fill the lower half of the negated padding_bits_to_clear. */
25121 emit_move_insn (reg_rtx
,
25122 GEN_INT ((((~padding_bits_to_clear
) << 16u) >> 16u)));
25124 /* Also fill the top half of the negated padding_bits_to_clear. */
25125 if (((~padding_bits_to_clear
) >> 16) > 0)
25126 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode
, reg_rtx
,
25129 GEN_INT ((~padding_bits_to_clear
) >> 16)));
25131 emit_insn (gen_andsi3 (gen_rtx_REG (SImode
, R0_REGNUM
),
25132 gen_rtx_REG (SImode
, R0_REGNUM
),
25136 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
25138 if (!(to_clear_mask
[regno
/ 64] & (1ULL << (regno
% 64))))
25141 if (IS_VFP_REGNUM (regno
))
25143 /* If regno is an even vfp register and its successor is also to
25144 be cleared, use vmov. */
25145 if (TARGET_VFP_DOUBLE
25146 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
25147 && to_clear_mask
[regno
/ 64] & (1ULL << ((regno
% 64) + 1)))
25149 emit_move_insn (gen_rtx_REG (DFmode
, regno
),
25150 CONST1_RTX (DFmode
));
25151 emit_use (gen_rtx_REG (DFmode
, regno
));
25156 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
25157 CONST1_RTX (SFmode
));
25158 emit_use (gen_rtx_REG (SFmode
, regno
));
25165 if (regno
== R0_REGNUM
)
25166 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25169 /* R0 has either been cleared before, see code above, or it
25170 holds a return value, either way it is not secret
25172 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25173 gen_rtx_REG (SImode
, R0_REGNUM
));
25174 emit_use (gen_rtx_REG (SImode
, regno
));
25178 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25179 gen_rtx_REG (SImode
, LR_REGNUM
));
25180 emit_use (gen_rtx_REG (SImode
, regno
));
25186 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25187 POP instruction can be generated. LR should be replaced by PC. All
25188 the checks required are already done by USE_RETURN_INSN (). Hence,
25189 all we really need to check here is if single register is to be
25190 returned, or multiple register return. */
25192 thumb2_expand_return (bool simple_return
)
25195 unsigned long saved_regs_mask
;
25196 arm_stack_offsets
*offsets
;
25198 offsets
= arm_get_frame_offsets ();
25199 saved_regs_mask
= offsets
->saved_regs_mask
;
25201 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25202 if (saved_regs_mask
& (1 << i
))
25205 if (!simple_return
&& saved_regs_mask
)
25207 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25208 functions or adapt code to handle according to ACLE. This path should
25209 not be reachable for cmse_nonsecure_entry functions though we prefer
25210 to assert it for now to ensure that future code changes do not silently
25211 change this behavior. */
25212 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25215 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25216 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
25217 rtx addr
= gen_rtx_MEM (SImode
,
25218 gen_rtx_POST_INC (SImode
,
25219 stack_pointer_rtx
));
25220 set_mem_alias_set (addr
, get_frame_alias_set ());
25221 XVECEXP (par
, 0, 0) = ret_rtx
;
25222 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
25223 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
25224 emit_jump_insn (par
);
25228 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
25229 saved_regs_mask
|= (1 << PC_REGNUM
);
25230 arm_emit_multi_reg_pop (saved_regs_mask
);
25235 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25236 cmse_nonsecure_entry_clear_before_return ();
25237 emit_jump_insn (simple_return_rtx
);
25242 thumb1_expand_epilogue (void)
25244 HOST_WIDE_INT amount
;
25245 arm_stack_offsets
*offsets
;
25248 /* Naked functions don't have prologues. */
25249 if (IS_NAKED (arm_current_func_type ()))
25252 offsets
= arm_get_frame_offsets ();
25253 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25255 if (frame_pointer_needed
)
25257 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
25258 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25260 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
25262 gcc_assert (amount
>= 0);
25265 emit_insn (gen_blockage ());
25268 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25269 GEN_INT (amount
)));
25272 /* r3 is always free in the epilogue. */
25273 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
25275 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
25276 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
25280 /* Emit a USE (stack_pointer_rtx), so that
25281 the stack adjustment will not be deleted. */
25282 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25284 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
25285 emit_insn (gen_blockage ());
25287 /* Emit a clobber for each insn that will be restored in the epilogue,
25288 so that flow2 will get register lifetimes correct. */
25289 for (regno
= 0; regno
< 13; regno
++)
25290 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
25291 emit_clobber (gen_rtx_REG (SImode
, regno
));
25293 if (! df_regs_ever_live_p (LR_REGNUM
))
25294 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
25296 /* Clear all caller-saved regs that are not used to return. */
25297 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25298 cmse_nonsecure_entry_clear_before_return ();
25301 /* Epilogue code for APCS frame. */
25303 arm_expand_epilogue_apcs_frame (bool really_return
)
25305 unsigned long func_type
;
25306 unsigned long saved_regs_mask
;
25309 int floats_from_frame
= 0;
25310 arm_stack_offsets
*offsets
;
25312 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
25313 func_type
= arm_current_func_type ();
25315 /* Get frame offsets for ARM. */
25316 offsets
= arm_get_frame_offsets ();
25317 saved_regs_mask
= offsets
->saved_regs_mask
;
25319 /* Find the offset of the floating-point save area in the frame. */
25321 = (offsets
->saved_args
25322 + arm_compute_static_chain_stack_bytes ()
25325 /* Compute how many core registers saved and how far away the floats are. */
25326 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25327 if (saved_regs_mask
& (1 << i
))
25330 floats_from_frame
+= 4;
25333 if (TARGET_HARD_FLOAT
)
25336 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
25338 /* The offset is from IP_REGNUM. */
25339 int saved_size
= arm_get_vfp_saved_size ();
25340 if (saved_size
> 0)
25343 floats_from_frame
+= saved_size
;
25344 insn
= emit_insn (gen_addsi3 (ip_rtx
,
25345 hard_frame_pointer_rtx
,
25346 GEN_INT (-floats_from_frame
)));
25347 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
25348 ip_rtx
, hard_frame_pointer_rtx
);
25351 /* Generate VFP register multi-pop. */
25352 start_reg
= FIRST_VFP_REGNUM
;
25354 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
25355 /* Look for a case where a reg does not need restoring. */
25356 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25357 && (!df_regs_ever_live_p (i
+ 1)
25358 || call_used_regs
[i
+ 1]))
25360 if (start_reg
!= i
)
25361 arm_emit_vfp_multi_reg_pop (start_reg
,
25362 (i
- start_reg
) / 2,
25363 gen_rtx_REG (SImode
,
25368 /* Restore the remaining regs that we have discovered (or possibly
25369 even all of them, if the conditional in the for loop never
25371 if (start_reg
!= i
)
25372 arm_emit_vfp_multi_reg_pop (start_reg
,
25373 (i
- start_reg
) / 2,
25374 gen_rtx_REG (SImode
, IP_REGNUM
));
25379 /* The frame pointer is guaranteed to be non-double-word aligned, as
25380 it is set to double-word-aligned old_stack_pointer - 4. */
25382 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
25384 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
25385 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25387 rtx addr
= gen_frame_mem (V2SImode
,
25388 plus_constant (Pmode
, hard_frame_pointer_rtx
,
25390 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25391 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25392 gen_rtx_REG (V2SImode
, i
),
25398 /* saved_regs_mask should contain IP which contains old stack pointer
25399 at the time of activation creation. Since SP and IP are adjacent registers,
25400 we can restore the value directly into SP. */
25401 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
25402 saved_regs_mask
&= ~(1 << IP_REGNUM
);
25403 saved_regs_mask
|= (1 << SP_REGNUM
);
25405 /* There are two registers left in saved_regs_mask - LR and PC. We
25406 only need to restore LR (the return address), but to
25407 save time we can load it directly into PC, unless we need a
25408 special function exit sequence, or we are not really returning. */
25410 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
25411 && !crtl
->calls_eh_return
)
25412 /* Delete LR from the register mask, so that LR on
25413 the stack is loaded into the PC in the register mask. */
25414 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25416 saved_regs_mask
&= ~(1 << PC_REGNUM
);
25418 num_regs
= bit_count (saved_regs_mask
);
25419 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
25422 emit_insn (gen_blockage ());
25423 /* Unwind the stack to just below the saved registers. */
25424 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25425 hard_frame_pointer_rtx
,
25426 GEN_INT (- 4 * num_regs
)));
25428 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
25429 stack_pointer_rtx
, hard_frame_pointer_rtx
);
25432 arm_emit_multi_reg_pop (saved_regs_mask
);
25434 if (IS_INTERRUPT (func_type
))
25436 /* Interrupt handlers will have pushed the
25437 IP onto the stack, so restore it now. */
25439 rtx addr
= gen_rtx_MEM (SImode
,
25440 gen_rtx_POST_INC (SImode
,
25441 stack_pointer_rtx
));
25442 set_mem_alias_set (addr
, get_frame_alias_set ());
25443 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
25444 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25445 gen_rtx_REG (SImode
, IP_REGNUM
),
25449 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
25452 if (crtl
->calls_eh_return
)
25453 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25455 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25457 if (IS_STACKALIGN (func_type
))
25458 /* Restore the original stack pointer. Before prologue, the stack was
25459 realigned and the original stack pointer saved in r0. For details,
25460 see comment in arm_expand_prologue. */
25461 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25463 emit_jump_insn (simple_return_rtx
);
25466 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25467 function is not a sibcall. */
25469 arm_expand_epilogue (bool really_return
)
25471 unsigned long func_type
;
25472 unsigned long saved_regs_mask
;
25476 arm_stack_offsets
*offsets
;
25478 func_type
= arm_current_func_type ();
25480 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25481 let output_return_instruction take care of instruction emission if any. */
25482 if (IS_NAKED (func_type
)
25483 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
25486 emit_jump_insn (simple_return_rtx
);
25490 /* If we are throwing an exception, then we really must be doing a
25491 return, so we can't tail-call. */
25492 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
25494 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
25496 arm_expand_epilogue_apcs_frame (really_return
);
25500 /* Get frame offsets for ARM. */
25501 offsets
= arm_get_frame_offsets ();
25502 saved_regs_mask
= offsets
->saved_regs_mask
;
25503 num_regs
= bit_count (saved_regs_mask
);
25505 if (frame_pointer_needed
)
25508 /* Restore stack pointer if necessary. */
25511 /* In ARM mode, frame pointer points to first saved register.
25512 Restore stack pointer to last saved register. */
25513 amount
= offsets
->frame
- offsets
->saved_regs
;
25515 /* Force out any pending memory operations that reference stacked data
25516 before stack de-allocation occurs. */
25517 emit_insn (gen_blockage ());
25518 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25519 hard_frame_pointer_rtx
,
25520 GEN_INT (amount
)));
25521 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25523 hard_frame_pointer_rtx
);
25525 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25527 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25531 /* In Thumb-2 mode, the frame pointer points to the last saved
25533 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25536 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
25537 hard_frame_pointer_rtx
,
25538 GEN_INT (amount
)));
25539 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25540 hard_frame_pointer_rtx
,
25541 hard_frame_pointer_rtx
);
25544 /* Force out any pending memory operations that reference stacked data
25545 before stack de-allocation occurs. */
25546 emit_insn (gen_blockage ());
25547 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
25548 hard_frame_pointer_rtx
));
25549 arm_add_cfa_adjust_cfa_note (insn
, 0,
25551 hard_frame_pointer_rtx
);
25552 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25554 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25559 /* Pop off outgoing args and local frame to adjust stack pointer to
25560 last saved register. */
25561 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25565 /* Force out any pending memory operations that reference stacked data
25566 before stack de-allocation occurs. */
25567 emit_insn (gen_blockage ());
25568 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25570 GEN_INT (amount
)));
25571 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25572 stack_pointer_rtx
, stack_pointer_rtx
);
25573 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25575 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25579 if (TARGET_HARD_FLOAT
)
25581 /* Generate VFP register multi-pop. */
25582 int end_reg
= LAST_VFP_REGNUM
+ 1;
25584 /* Scan the registers in reverse order. We need to match
25585 any groupings made in the prologue and generate matching
25586 vldm operations. The need to match groups is because,
25587 unlike pop, vldm can only do consecutive regs. */
25588 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
25589 /* Look for a case where a reg does not need restoring. */
25590 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25591 && (!df_regs_ever_live_p (i
+ 1)
25592 || call_used_regs
[i
+ 1]))
25594 /* Restore the regs discovered so far (from reg+2 to
25596 if (end_reg
> i
+ 2)
25597 arm_emit_vfp_multi_reg_pop (i
+ 2,
25598 (end_reg
- (i
+ 2)) / 2,
25599 stack_pointer_rtx
);
25603 /* Restore the remaining regs that we have discovered (or possibly
25604 even all of them, if the conditional in the for loop never
25606 if (end_reg
> i
+ 2)
25607 arm_emit_vfp_multi_reg_pop (i
+ 2,
25608 (end_reg
- (i
+ 2)) / 2,
25609 stack_pointer_rtx
);
25613 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
25614 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25617 rtx addr
= gen_rtx_MEM (V2SImode
,
25618 gen_rtx_POST_INC (SImode
,
25619 stack_pointer_rtx
));
25620 set_mem_alias_set (addr
, get_frame_alias_set ());
25621 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25622 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25623 gen_rtx_REG (V2SImode
, i
),
25625 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25626 stack_pointer_rtx
, stack_pointer_rtx
);
25629 if (saved_regs_mask
)
25632 bool return_in_pc
= false;
25634 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
25635 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
25636 && !IS_CMSE_ENTRY (func_type
)
25637 && !IS_STACKALIGN (func_type
)
25639 && crtl
->args
.pretend_args_size
== 0
25640 && saved_regs_mask
& (1 << LR_REGNUM
)
25641 && !crtl
->calls_eh_return
)
25643 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25644 saved_regs_mask
|= (1 << PC_REGNUM
);
25645 return_in_pc
= true;
25648 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
25650 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25651 if (saved_regs_mask
& (1 << i
))
25653 rtx addr
= gen_rtx_MEM (SImode
,
25654 gen_rtx_POST_INC (SImode
,
25655 stack_pointer_rtx
));
25656 set_mem_alias_set (addr
, get_frame_alias_set ());
25658 if (i
== PC_REGNUM
)
25660 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25661 XVECEXP (insn
, 0, 0) = ret_rtx
;
25662 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
25664 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
25665 insn
= emit_jump_insn (insn
);
25669 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
25671 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25672 gen_rtx_REG (SImode
, i
),
25674 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25676 stack_pointer_rtx
);
25683 && current_tune
->prefer_ldrd_strd
25684 && !optimize_function_for_size_p (cfun
))
25687 thumb2_emit_ldrd_pop (saved_regs_mask
);
25688 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
25689 arm_emit_ldrd_pop (saved_regs_mask
);
25691 arm_emit_multi_reg_pop (saved_regs_mask
);
25694 arm_emit_multi_reg_pop (saved_regs_mask
);
25702 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
25706 rtx dwarf
= NULL_RTX
;
25708 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25710 GEN_INT (amount
)));
25712 RTX_FRAME_RELATED_P (tmp
) = 1;
25714 if (cfun
->machine
->uses_anonymous_args
)
25716 /* Restore pretend args. Refer arm_expand_prologue on how to save
25717 pretend_args in stack. */
25718 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25719 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25720 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25721 if (saved_regs_mask
& (1 << i
))
25723 rtx reg
= gen_rtx_REG (SImode
, i
);
25724 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25727 REG_NOTES (tmp
) = dwarf
;
25729 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25730 stack_pointer_rtx
, stack_pointer_rtx
);
25733 /* Clear all caller-saved regs that are not used to return. */
25734 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25736 /* CMSE_ENTRY always returns. */
25737 gcc_assert (really_return
);
25738 cmse_nonsecure_entry_clear_before_return ();
25741 if (!really_return
)
25744 if (crtl
->calls_eh_return
)
25745 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25747 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25749 if (IS_STACKALIGN (func_type
))
25750 /* Restore the original stack pointer. Before prologue, the stack was
25751 realigned and the original stack pointer saved in r0. For details,
25752 see comment in arm_expand_prologue. */
25753 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25755 emit_jump_insn (simple_return_rtx
);
25758 /* Implementation of insn prologue_thumb1_interwork. This is the first
25759 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25762 thumb1_output_interwork (void)
25765 FILE *f
= asm_out_file
;
25767 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25768 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25770 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25772 /* Generate code sequence to switch us into Thumb mode. */
25773 /* The .code 32 directive has already been emitted by
25774 ASM_DECLARE_FUNCTION_NAME. */
25775 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25776 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
25778 /* Generate a label, so that the debugger will notice the
25779 change in instruction sets. This label is also used by
25780 the assembler to bypass the ARM code when this function
25781 is called from a Thumb encoded function elsewhere in the
25782 same file. Hence the definition of STUB_NAME here must
25783 agree with the definition in gas/config/tc-arm.c. */
25785 #define STUB_NAME ".real_start_of"
25787 fprintf (f
, "\t.code\t16\n");
25789 if (arm_dllexport_name_p (name
))
25790 name
= arm_strip_name_encoding (name
);
25792 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
25793 fprintf (f
, "\t.thumb_func\n");
25794 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
25799 /* Handle the case of a double word load into a low register from
25800 a computed memory address. The computed address may involve a
25801 register which is overwritten by the load. */
25803 thumb_load_double_from_address (rtx
*operands
)
25811 gcc_assert (REG_P (operands
[0]));
25812 gcc_assert (MEM_P (operands
[1]));
25814 /* Get the memory address. */
25815 addr
= XEXP (operands
[1], 0);
25817 /* Work out how the memory address is computed. */
25818 switch (GET_CODE (addr
))
25821 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25823 if (REGNO (operands
[0]) == REGNO (addr
))
25825 output_asm_insn ("ldr\t%H0, %2", operands
);
25826 output_asm_insn ("ldr\t%0, %1", operands
);
25830 output_asm_insn ("ldr\t%0, %1", operands
);
25831 output_asm_insn ("ldr\t%H0, %2", operands
);
25836 /* Compute <address> + 4 for the high order load. */
25837 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25839 output_asm_insn ("ldr\t%0, %1", operands
);
25840 output_asm_insn ("ldr\t%H0, %2", operands
);
25844 arg1
= XEXP (addr
, 0);
25845 arg2
= XEXP (addr
, 1);
25847 if (CONSTANT_P (arg1
))
25848 base
= arg2
, offset
= arg1
;
25850 base
= arg1
, offset
= arg2
;
25852 gcc_assert (REG_P (base
));
25854 /* Catch the case of <address> = <reg> + <reg> */
25855 if (REG_P (offset
))
25857 int reg_offset
= REGNO (offset
);
25858 int reg_base
= REGNO (base
);
25859 int reg_dest
= REGNO (operands
[0]);
25861 /* Add the base and offset registers together into the
25862 higher destination register. */
25863 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
25864 reg_dest
+ 1, reg_base
, reg_offset
);
25866 /* Load the lower destination register from the address in
25867 the higher destination register. */
25868 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
25869 reg_dest
, reg_dest
+ 1);
25871 /* Load the higher destination register from its own address
25873 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
25874 reg_dest
+ 1, reg_dest
+ 1);
25878 /* Compute <address> + 4 for the high order load. */
25879 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25881 /* If the computed address is held in the low order register
25882 then load the high order register first, otherwise always
25883 load the low order register first. */
25884 if (REGNO (operands
[0]) == REGNO (base
))
25886 output_asm_insn ("ldr\t%H0, %2", operands
);
25887 output_asm_insn ("ldr\t%0, %1", operands
);
25891 output_asm_insn ("ldr\t%0, %1", operands
);
25892 output_asm_insn ("ldr\t%H0, %2", operands
);
25898 /* With no registers to worry about we can just load the value
25900 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25902 output_asm_insn ("ldr\t%H0, %2", operands
);
25903 output_asm_insn ("ldr\t%0, %1", operands
);
25907 gcc_unreachable ();
25914 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
25919 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25920 std::swap (operands
[4], operands
[5]);
25922 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
25923 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
25927 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25928 std::swap (operands
[4], operands
[5]);
25929 if (REGNO (operands
[5]) > REGNO (operands
[6]))
25930 std::swap (operands
[5], operands
[6]);
25931 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25932 std::swap (operands
[4], operands
[5]);
25934 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
25935 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
25939 gcc_unreachable ();
25945 /* Output a call-via instruction for thumb state. */
25947 thumb_call_via_reg (rtx reg
)
25949 int regno
= REGNO (reg
);
25952 gcc_assert (regno
< LR_REGNUM
);
25954 /* If we are in the normal text section we can use a single instance
25955 per compilation unit. If we are doing function sections, then we need
25956 an entry per section, since we can't rely on reachability. */
25957 if (in_section
== text_section
)
25959 thumb_call_reg_needed
= 1;
25961 if (thumb_call_via_label
[regno
] == NULL
)
25962 thumb_call_via_label
[regno
] = gen_label_rtx ();
25963 labelp
= thumb_call_via_label
+ regno
;
25967 if (cfun
->machine
->call_via
[regno
] == NULL
)
25968 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
25969 labelp
= cfun
->machine
->call_via
+ regno
;
25972 output_asm_insn ("bl\t%a0", labelp
);
25976 /* Routines for generating rtl. */
25978 thumb_expand_movmemqi (rtx
*operands
)
25980 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
25981 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
25982 HOST_WIDE_INT len
= INTVAL (operands
[2]);
25983 HOST_WIDE_INT offset
= 0;
25987 emit_insn (gen_movmem12b (out
, in
, out
, in
));
25993 emit_insn (gen_movmem8b (out
, in
, out
, in
));
25999 rtx reg
= gen_reg_rtx (SImode
);
26000 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
26001 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
26008 rtx reg
= gen_reg_rtx (HImode
);
26009 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
26010 plus_constant (Pmode
, in
,
26012 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
26021 rtx reg
= gen_reg_rtx (QImode
);
26022 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
26023 plus_constant (Pmode
, in
,
26025 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
26032 thumb_reload_out_hi (rtx
*operands
)
26034 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
26037 /* Return the length of a function name prefix
26038 that starts with the character 'c'. */
26040 arm_get_strip_length (int c
)
26044 ARM_NAME_ENCODING_LENGTHS
26049 /* Return a pointer to a function's name with any
26050 and all prefix encodings stripped from it. */
26052 arm_strip_name_encoding (const char *name
)
26056 while ((skip
= arm_get_strip_length (* name
)))
26062 /* If there is a '*' anywhere in the name's prefix, then
26063 emit the stripped name verbatim, otherwise prepend an
26064 underscore if leading underscores are being used. */
26066 arm_asm_output_labelref (FILE *stream
, const char *name
)
26071 while ((skip
= arm_get_strip_length (* name
)))
26073 verbatim
|= (*name
== '*');
26078 fputs (name
, stream
);
26080 asm_fprintf (stream
, "%U%s", name
);
26083 /* This function is used to emit an EABI tag and its associated value.
26084 We emit the numerical value of the tag in case the assembler does not
26085 support textual tags. (Eg gas prior to 2.20). If requested we include
26086 the tag name in a comment so that anyone reading the assembler output
26087 will know which tag is being set.
26089 This function is not static because arm-c.c needs it too. */
26092 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
26094 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
26095 if (flag_verbose_asm
|| flag_debug_asm
)
26096 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
26097 asm_fprintf (asm_out_file
, "\n");
26100 /* This function is used to print CPU tuning information as comment
26101 in assembler file. Pointers are not printed for now. */
26104 arm_print_tune_info (void)
26106 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune parameters\n");
26107 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"constant_limit:\t%d\n",
26108 current_tune
->constant_limit
);
26109 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26110 "max_insns_skipped:\t%d\n", current_tune
->max_insns_skipped
);
26111 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26112 "prefetch.num_slots:\t%d\n", current_tune
->prefetch
.num_slots
);
26113 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26114 "prefetch.l1_cache_size:\t%d\n",
26115 current_tune
->prefetch
.l1_cache_size
);
26116 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26117 "prefetch.l1_cache_line_size:\t%d\n",
26118 current_tune
->prefetch
.l1_cache_line_size
);
26119 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26120 "prefer_constant_pool:\t%d\n",
26121 (int) current_tune
->prefer_constant_pool
);
26122 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26123 "branch_cost:\t(s:speed, p:predictable)\n");
26124 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\ts&p\tcost\n");
26125 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t00\t%d\n",
26126 current_tune
->branch_cost (false, false));
26127 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t01\t%d\n",
26128 current_tune
->branch_cost (false, true));
26129 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t10\t%d\n",
26130 current_tune
->branch_cost (true, false));
26131 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t11\t%d\n",
26132 current_tune
->branch_cost (true, true));
26133 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26134 "prefer_ldrd_strd:\t%d\n",
26135 (int) current_tune
->prefer_ldrd_strd
);
26136 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26137 "logical_op_non_short_circuit:\t[%d,%d]\n",
26138 (int) current_tune
->logical_op_non_short_circuit_thumb
,
26139 (int) current_tune
->logical_op_non_short_circuit_arm
);
26140 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26141 "prefer_neon_for_64bits:\t%d\n",
26142 (int) current_tune
->prefer_neon_for_64bits
);
26143 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26144 "disparage_flag_setting_t16_encodings:\t%d\n",
26145 (int) current_tune
->disparage_flag_setting_t16_encodings
);
26146 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26147 "string_ops_prefer_neon:\t%d\n",
26148 (int) current_tune
->string_ops_prefer_neon
);
26149 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26150 "max_insns_inline_memset:\t%d\n",
26151 current_tune
->max_insns_inline_memset
);
26152 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"fusible_ops:\t%u\n",
26153 current_tune
->fusible_ops
);
26154 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"sched_autopref:\t%d\n",
26155 (int) current_tune
->sched_autopref
);
26158 /* Print .arch and .arch_extension directives corresponding to the
26159 current architecture configuration. */
26161 arm_print_asm_arch_directives ()
26163 const arch_option
*arch
26164 = arm_parse_arch_option_name (all_architectures
, "-march",
26165 arm_active_target
.arch_name
);
26166 auto_sbitmap
opt_bits (isa_num_bits
);
26170 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_active_target
.arch_name
);
26171 if (!arch
->common
.extensions
)
26174 for (const struct cpu_arch_extension
*opt
= arch
->common
.extensions
;
26180 arm_initialize_isa (opt_bits
, opt
->isa_bits
);
26182 /* If every feature bit of this option is set in the target
26183 ISA specification, print out the option name. However,
26184 don't print anything if all the bits are part of the
26185 FPU specification. */
26186 if (bitmap_subset_p (opt_bits
, arm_active_target
.isa
)
26187 && !bitmap_subset_p (opt_bits
, isa_all_fpubits
))
26188 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", opt
->name
);
26194 arm_file_start (void)
26200 /* We don't have a specified CPU. Use the architecture to
26203 Note: it might be better to do this unconditionally, then the
26204 assembler would not need to know about all new CPU names as
26206 if (!arm_active_target
.core_name
)
26208 /* armv7ve doesn't support any extensions. */
26209 if (strcmp (arm_active_target
.arch_name
, "armv7ve") == 0)
26211 /* Keep backward compatability for assemblers
26212 which don't support armv7ve. */
26213 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
26214 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
26215 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
26216 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
26217 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
26220 arm_print_asm_arch_directives ();
26222 else if (strncmp (arm_active_target
.core_name
, "generic", 7) == 0)
26223 asm_fprintf (asm_out_file
, "\t.arch %s\n",
26224 arm_active_target
.core_name
+ 8);
26227 const char* truncated_name
26228 = arm_rewrite_selected_cpu (arm_active_target
.core_name
);
26229 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
26232 if (print_tune_info
)
26233 arm_print_tune_info ();
26235 if (! TARGET_SOFT_FLOAT
)
26237 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
26238 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26240 if (TARGET_HARD_FLOAT_ABI
)
26241 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26244 /* Some of these attributes only apply when the corresponding features
26245 are used. However we don't have any easy way of figuring this out.
26246 Conservatively record the setting that would have been used. */
26248 if (flag_rounding_math
)
26249 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26251 if (!flag_unsafe_math_optimizations
)
26253 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26254 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26256 if (flag_signaling_nans
)
26257 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26259 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26260 flag_finite_math_only
? 1 : 3);
26262 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26263 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26264 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26265 flag_short_enums
? 1 : 2);
26267 /* Tag_ABI_optimization_goals. */
26270 else if (optimize
>= 2)
26276 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
26278 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26281 if (arm_fp16_format
)
26282 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26283 (int) arm_fp16_format
);
26285 if (arm_lang_output_object_attributes_hook
)
26286 arm_lang_output_object_attributes_hook();
26289 default_file_start ();
26293 arm_file_end (void)
26297 if (NEED_INDICATE_EXEC_STACK
)
26298 /* Add .note.GNU-stack. */
26299 file_end_indicate_exec_stack ();
26301 if (! thumb_call_reg_needed
)
26304 switch_to_section (text_section
);
26305 asm_fprintf (asm_out_file
, "\t.code 16\n");
26306 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
26308 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
26310 rtx label
= thumb_call_via_label
[regno
];
26314 targetm
.asm_out
.internal_label (asm_out_file
, "L",
26315 CODE_LABEL_NUMBER (label
));
26316 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
26322 /* Symbols in the text segment can be accessed without indirecting via the
26323 constant pool; it may take an extra binary operation, but this is still
26324 faster than indirecting via memory. Don't do this when not optimizing,
26325 since we won't be calculating al of the offsets necessary to do this
26329 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
26331 if (optimize
> 0 && TREE_CONSTANT (decl
))
26332 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
26334 default_encode_section_info (decl
, rtl
, first
);
26336 #endif /* !ARM_PE */
26339 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
26341 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
26342 && !strcmp (prefix
, "L"))
26344 arm_ccfsm_state
= 0;
26345 arm_target_insn
= NULL
;
26347 default_internal_label (stream
, prefix
, labelno
);
26350 /* Output code to add DELTA to the first argument, and then jump
26351 to FUNCTION. Used for C++ multiple inheritance. */
26354 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26355 HOST_WIDE_INT
, tree function
)
26357 static int thunk_label
= 0;
26360 int mi_delta
= delta
;
26361 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
26363 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
26366 mi_delta
= - mi_delta
;
26368 final_start_function (emit_barrier (), file
, 1);
26372 int labelno
= thunk_label
++;
26373 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
26374 /* Thunks are entered in arm mode when available. */
26375 if (TARGET_THUMB1_ONLY
)
26377 /* push r3 so we can use it as a temporary. */
26378 /* TODO: Omit this save if r3 is not used. */
26379 fputs ("\tpush {r3}\n", file
);
26380 fputs ("\tldr\tr3, ", file
);
26384 fputs ("\tldr\tr12, ", file
);
26386 assemble_name (file
, label
);
26387 fputc ('\n', file
);
26390 /* If we are generating PIC, the ldr instruction below loads
26391 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26392 the address of the add + 8, so we have:
26394 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26397 Note that we have "+ 1" because some versions of GNU ld
26398 don't set the low bit of the result for R_ARM_REL32
26399 relocations against thumb function symbols.
26400 On ARMv6M this is +4, not +8. */
26401 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
26402 assemble_name (file
, labelpc
);
26403 fputs (":\n", file
);
26404 if (TARGET_THUMB1_ONLY
)
26406 /* This is 2 insns after the start of the thunk, so we know it
26407 is 4-byte aligned. */
26408 fputs ("\tadd\tr3, pc, r3\n", file
);
26409 fputs ("\tmov r12, r3\n", file
);
26412 fputs ("\tadd\tr12, pc, r12\n", file
);
26414 else if (TARGET_THUMB1_ONLY
)
26415 fputs ("\tmov r12, r3\n", file
);
26417 if (TARGET_THUMB1_ONLY
)
26419 if (mi_delta
> 255)
26421 fputs ("\tldr\tr3, ", file
);
26422 assemble_name (file
, label
);
26423 fputs ("+4\n", file
);
26424 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
26425 mi_op
, this_regno
, this_regno
);
26427 else if (mi_delta
!= 0)
26429 /* Thumb1 unified syntax requires s suffix in instruction name when
26430 one of the operands is immediate. */
26431 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
26432 mi_op
, this_regno
, this_regno
,
26438 /* TODO: Use movw/movt for large constants when available. */
26439 while (mi_delta
!= 0)
26441 if ((mi_delta
& (3 << shift
)) == 0)
26445 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
26446 mi_op
, this_regno
, this_regno
,
26447 mi_delta
& (0xff << shift
));
26448 mi_delta
&= ~(0xff << shift
);
26455 if (TARGET_THUMB1_ONLY
)
26456 fputs ("\tpop\t{r3}\n", file
);
26458 fprintf (file
, "\tbx\tr12\n");
26459 ASM_OUTPUT_ALIGN (file
, 2);
26460 assemble_name (file
, label
);
26461 fputs (":\n", file
);
26464 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26465 rtx tem
= XEXP (DECL_RTL (function
), 0);
26466 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26467 pipeline offset is four rather than eight. Adjust the offset
26469 tem
= plus_constant (GET_MODE (tem
), tem
,
26470 TARGET_THUMB1_ONLY
? -3 : -7);
26471 tem
= gen_rtx_MINUS (GET_MODE (tem
),
26473 gen_rtx_SYMBOL_REF (Pmode
,
26474 ggc_strdup (labelpc
)));
26475 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
26478 /* Output ".word .LTHUNKn". */
26479 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
26481 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
26482 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
26486 fputs ("\tb\t", file
);
26487 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
26488 if (NEED_PLT_RELOC
)
26489 fputs ("(PLT)", file
);
26490 fputc ('\n', file
);
26493 final_end_function ();
26496 /* MI thunk handling for TARGET_32BIT. */
26499 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26500 HOST_WIDE_INT vcall_offset
, tree function
)
26502 /* On ARM, this_regno is R0 or R1 depending on
26503 whether the function returns an aggregate or not.
26505 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
26507 ? R1_REGNUM
: R0_REGNUM
);
26509 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
26510 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
26511 reload_completed
= 1;
26512 emit_note (NOTE_INSN_PROLOGUE_END
);
26514 /* Add DELTA to THIS_RTX. */
26516 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
26517 delta
, this_rtx
, this_rtx
, false);
26519 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26520 if (vcall_offset
!= 0)
26522 /* Load *THIS_RTX. */
26523 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
26524 /* Compute *THIS_RTX + VCALL_OFFSET. */
26525 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
26527 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26528 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
26529 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
26532 /* Generate a tail call to the target function. */
26533 if (!TREE_USED (function
))
26535 assemble_external (function
);
26536 TREE_USED (function
) = 1;
26538 rtx funexp
= XEXP (DECL_RTL (function
), 0);
26539 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
26540 rtx_insn
* insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
26541 SIBLING_CALL_P (insn
) = 1;
26543 insn
= get_insns ();
26544 shorten_branches (insn
);
26545 final_start_function (insn
, file
, 1);
26546 final (insn
, file
, 1);
26547 final_end_function ();
26549 /* Stop pretending this is a post-reload pass. */
26550 reload_completed
= 0;
26553 /* Output code to add DELTA to the first argument, and then jump
26554 to FUNCTION. Used for C++ multiple inheritance. */
26557 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
26558 HOST_WIDE_INT vcall_offset
, tree function
)
26561 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26563 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26567 arm_emit_vector_const (FILE *file
, rtx x
)
26570 const char * pattern
;
26572 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
26574 switch (GET_MODE (x
))
26576 case E_V2SImode
: pattern
= "%08x"; break;
26577 case E_V4HImode
: pattern
= "%04x"; break;
26578 case E_V8QImode
: pattern
= "%02x"; break;
26579 default: gcc_unreachable ();
26582 fprintf (file
, "0x");
26583 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
26587 element
= CONST_VECTOR_ELT (x
, i
);
26588 fprintf (file
, pattern
, INTVAL (element
));
26594 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26595 HFmode constant pool entries are actually loaded with ldr. */
26597 arm_emit_fp16_const (rtx c
)
26601 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
26602 if (WORDS_BIG_ENDIAN
)
26603 assemble_zeros (2);
26604 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
26605 if (!WORDS_BIG_ENDIAN
)
26606 assemble_zeros (2);
26610 arm_output_load_gr (rtx
*operands
)
26617 if (!MEM_P (operands
[1])
26618 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
26619 || !REG_P (reg
= XEXP (sum
, 0))
26620 || !CONST_INT_P (offset
= XEXP (sum
, 1))
26621 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
26622 return "wldrw%?\t%0, %1";
26624 /* Fix up an out-of-range load of a GR register. */
26625 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
26626 wcgr
= operands
[0];
26628 output_asm_insn ("ldr%?\t%0, %1", operands
);
26630 operands
[0] = wcgr
;
26632 output_asm_insn ("tmcr%?\t%0, %1", operands
);
26633 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
26638 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26640 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26641 named arg and all anonymous args onto the stack.
26642 XXX I know the prologue shouldn't be pushing registers, but it is faster
26646 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
26650 int second_time ATTRIBUTE_UNUSED
)
26652 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
26655 cfun
->machine
->uses_anonymous_args
= 1;
26656 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
26658 nregs
= pcum
->aapcs_ncrn
;
26661 int res
= arm_needs_doubleword_align (mode
, type
);
26662 if (res
< 0 && warn_psabi
)
26663 inform (input_location
, "parameter passing for argument of "
26664 "type %qT changed in GCC 7.1", type
);
26670 nregs
= pcum
->nregs
;
26672 if (nregs
< NUM_ARG_REGS
)
26673 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
26676 /* We can't rely on the caller doing the proper promotion when
26677 using APCS or ATPCS. */
26680 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
26682 return !TARGET_AAPCS_BASED
;
26685 static machine_mode
26686 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
26688 int *punsignedp ATTRIBUTE_UNUSED
,
26689 const_tree fntype ATTRIBUTE_UNUSED
,
26690 int for_return ATTRIBUTE_UNUSED
)
26692 if (GET_MODE_CLASS (mode
) == MODE_INT
26693 && GET_MODE_SIZE (mode
) < 4)
26701 arm_default_short_enums (void)
26703 return ARM_DEFAULT_SHORT_ENUMS
;
26707 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26710 arm_align_anon_bitfield (void)
26712 return TARGET_AAPCS_BASED
;
26716 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26719 arm_cxx_guard_type (void)
26721 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
26725 /* The EABI says test the least significant bit of a guard variable. */
26728 arm_cxx_guard_mask_bit (void)
26730 return TARGET_AAPCS_BASED
;
26734 /* The EABI specifies that all array cookies are 8 bytes long. */
26737 arm_get_cookie_size (tree type
)
26741 if (!TARGET_AAPCS_BASED
)
26742 return default_cxx_get_cookie_size (type
);
26744 size
= build_int_cst (sizetype
, 8);
26749 /* The EABI says that array cookies should also contain the element size. */
26752 arm_cookie_has_size (void)
26754 return TARGET_AAPCS_BASED
;
26758 /* The EABI says constructors and destructors should return a pointer to
26759 the object constructed/destroyed. */
26762 arm_cxx_cdtor_returns_this (void)
26764 return TARGET_AAPCS_BASED
;
26767 /* The EABI says that an inline function may never be the key
26771 arm_cxx_key_method_may_be_inline (void)
26773 return !TARGET_AAPCS_BASED
;
26777 arm_cxx_determine_class_data_visibility (tree decl
)
26779 if (!TARGET_AAPCS_BASED
26780 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
26783 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26784 is exported. However, on systems without dynamic vague linkage,
26785 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26786 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
26787 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
26789 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
26790 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
26794 arm_cxx_class_data_always_comdat (void)
26796 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26797 vague linkage if the class has no key function. */
26798 return !TARGET_AAPCS_BASED
;
26802 /* The EABI says __aeabi_atexit should be used to register static
26806 arm_cxx_use_aeabi_atexit (void)
26808 return TARGET_AAPCS_BASED
;
26813 arm_set_return_address (rtx source
, rtx scratch
)
26815 arm_stack_offsets
*offsets
;
26816 HOST_WIDE_INT delta
;
26818 unsigned long saved_regs
;
26820 offsets
= arm_get_frame_offsets ();
26821 saved_regs
= offsets
->saved_regs_mask
;
26823 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
26824 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26827 if (frame_pointer_needed
)
26828 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
26831 /* LR will be the first saved register. */
26832 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
26837 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
26838 GEN_INT (delta
& ~4095)));
26843 addr
= stack_pointer_rtx
;
26845 addr
= plus_constant (Pmode
, addr
, delta
);
26847 /* The store needs to be marked as frame related in order to prevent
26848 DSE from deleting it as dead if it is based on fp. */
26849 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26850 RTX_FRAME_RELATED_P (insn
) = 1;
26851 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26857 thumb_set_return_address (rtx source
, rtx scratch
)
26859 arm_stack_offsets
*offsets
;
26860 HOST_WIDE_INT delta
;
26861 HOST_WIDE_INT limit
;
26864 unsigned long mask
;
26868 offsets
= arm_get_frame_offsets ();
26869 mask
= offsets
->saved_regs_mask
;
26870 if (mask
& (1 << LR_REGNUM
))
26873 /* Find the saved regs. */
26874 if (frame_pointer_needed
)
26876 delta
= offsets
->soft_frame
- offsets
->saved_args
;
26877 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
26883 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
26886 /* Allow for the stack frame. */
26887 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
26889 /* The link register is always the first saved register. */
26892 /* Construct the address. */
26893 addr
= gen_rtx_REG (SImode
, reg
);
26896 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
26897 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
26901 addr
= plus_constant (Pmode
, addr
, delta
);
26903 /* The store needs to be marked as frame related in order to prevent
26904 DSE from deleting it as dead if it is based on fp. */
26905 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26906 RTX_FRAME_RELATED_P (insn
) = 1;
26907 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26910 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26913 /* Implements target hook vector_mode_supported_p. */
26915 arm_vector_mode_supported_p (machine_mode mode
)
26917 /* Neon also supports V2SImode, etc. listed in the clause below. */
26918 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
26919 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
26920 || mode
== V2DImode
|| mode
== V8HFmode
))
26923 if ((TARGET_NEON
|| TARGET_IWMMXT
)
26924 && ((mode
== V2SImode
)
26925 || (mode
== V4HImode
)
26926 || (mode
== V8QImode
)))
26929 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
26930 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
26931 || mode
== V2HAmode
))
26937 /* Implements target hook array_mode_supported_p. */
26940 arm_array_mode_supported_p (machine_mode mode
,
26941 unsigned HOST_WIDE_INT nelems
)
26944 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
26945 && (nelems
>= 2 && nelems
<= 4))
26951 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26952 registers when autovectorizing for Neon, at least until multiple vector
26953 widths are supported properly by the middle-end. */
26955 static machine_mode
26956 arm_preferred_simd_mode (scalar_mode mode
)
26962 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
26964 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
26966 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
26968 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
26970 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
26977 if (TARGET_REALLY_IWMMXT
)
26993 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26995 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26996 using r0-r4 for function arguments, r7 for the stack frame and don't have
26997 enough left over to do doubleword arithmetic. For Thumb-2 all the
26998 potentially problematic instructions accept high registers so this is not
26999 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27000 that require many low registers. */
27002 arm_class_likely_spilled_p (reg_class_t rclass
)
27004 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
27005 || rclass
== CC_REG
)
27011 /* Implements target hook small_register_classes_for_mode_p. */
27013 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
27015 return TARGET_THUMB1
;
27018 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27019 ARM insns and therefore guarantee that the shift count is modulo 256.
27020 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27021 guarantee no particular behavior for out-of-range counts. */
27023 static unsigned HOST_WIDE_INT
27024 arm_shift_truncation_mask (machine_mode mode
)
27026 return mode
== SImode
? 255 : 0;
27030 /* Map internal gcc register numbers to DWARF2 register numbers. */
27033 arm_dbx_register_number (unsigned int regno
)
27038 if (IS_VFP_REGNUM (regno
))
27040 /* See comment in arm_dwarf_register_span. */
27041 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
27042 return 64 + regno
- FIRST_VFP_REGNUM
;
27044 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
27047 if (IS_IWMMXT_GR_REGNUM (regno
))
27048 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
27050 if (IS_IWMMXT_REGNUM (regno
))
27051 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
27053 return DWARF_FRAME_REGISTERS
;
27056 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27057 GCC models tham as 64 32-bit registers, so we need to describe this to
27058 the DWARF generation code. Other registers can use the default. */
27060 arm_dwarf_register_span (rtx rtl
)
27068 regno
= REGNO (rtl
);
27069 if (!IS_VFP_REGNUM (regno
))
27072 /* XXX FIXME: The EABI defines two VFP register ranges:
27073 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27075 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27076 corresponding D register. Until GDB supports this, we shall use the
27077 legacy encodings. We also use these encodings for D0-D15 for
27078 compatibility with older debuggers. */
27079 mode
= GET_MODE (rtl
);
27080 if (GET_MODE_SIZE (mode
) < 8)
27083 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
27085 nregs
= GET_MODE_SIZE (mode
) / 4;
27086 for (i
= 0; i
< nregs
; i
+= 2)
27087 if (TARGET_BIG_END
)
27089 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
27090 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
27094 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
27095 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
27100 nregs
= GET_MODE_SIZE (mode
) / 8;
27101 for (i
= 0; i
< nregs
; i
++)
27102 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
27105 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
27108 #if ARM_UNWIND_INFO
27109 /* Emit unwind directives for a store-multiple instruction or stack pointer
27110 push during alignment.
27111 These should only ever be generated by the function prologue code, so
27112 expect them to have a particular form.
27113 The store-multiple instruction sometimes pushes pc as the last register,
27114 although it should not be tracked into unwind information, or for -Os
27115 sometimes pushes some dummy registers before first register that needs
27116 to be tracked in unwind information; such dummy registers are there just
27117 to avoid separate stack adjustment, and will not be restored in the
27121 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
27124 HOST_WIDE_INT offset
;
27125 HOST_WIDE_INT nregs
;
27129 unsigned padfirst
= 0, padlast
= 0;
27132 e
= XVECEXP (p
, 0, 0);
27133 gcc_assert (GET_CODE (e
) == SET
);
27135 /* First insn will adjust the stack pointer. */
27136 gcc_assert (GET_CODE (e
) == SET
27137 && REG_P (SET_DEST (e
))
27138 && REGNO (SET_DEST (e
)) == SP_REGNUM
27139 && GET_CODE (SET_SRC (e
)) == PLUS
);
27141 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
27142 nregs
= XVECLEN (p
, 0) - 1;
27143 gcc_assert (nregs
);
27145 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
27148 /* For -Os dummy registers can be pushed at the beginning to
27149 avoid separate stack pointer adjustment. */
27150 e
= XVECEXP (p
, 0, 1);
27151 e
= XEXP (SET_DEST (e
), 0);
27152 if (GET_CODE (e
) == PLUS
)
27153 padfirst
= INTVAL (XEXP (e
, 1));
27154 gcc_assert (padfirst
== 0 || optimize_size
);
27155 /* The function prologue may also push pc, but not annotate it as it is
27156 never restored. We turn this into a stack pointer adjustment. */
27157 e
= XVECEXP (p
, 0, nregs
);
27158 e
= XEXP (SET_DEST (e
), 0);
27159 if (GET_CODE (e
) == PLUS
)
27160 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
27162 padlast
= offset
- 4;
27163 gcc_assert (padlast
== 0 || padlast
== 4);
27165 fprintf (asm_out_file
, "\t.pad #4\n");
27167 fprintf (asm_out_file
, "\t.save {");
27169 else if (IS_VFP_REGNUM (reg
))
27172 fprintf (asm_out_file
, "\t.vsave {");
27175 /* Unknown register type. */
27176 gcc_unreachable ();
27178 /* If the stack increment doesn't match the size of the saved registers,
27179 something has gone horribly wrong. */
27180 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
27184 /* The remaining insns will describe the stores. */
27185 for (i
= 1; i
<= nregs
; i
++)
27187 /* Expect (set (mem <addr>) (reg)).
27188 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27189 e
= XVECEXP (p
, 0, i
);
27190 gcc_assert (GET_CODE (e
) == SET
27191 && MEM_P (SET_DEST (e
))
27192 && REG_P (SET_SRC (e
)));
27194 reg
= REGNO (SET_SRC (e
));
27195 gcc_assert (reg
>= lastreg
);
27198 fprintf (asm_out_file
, ", ");
27199 /* We can't use %r for vfp because we need to use the
27200 double precision register names. */
27201 if (IS_VFP_REGNUM (reg
))
27202 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
27204 asm_fprintf (asm_out_file
, "%r", reg
);
27208 /* Check that the addresses are consecutive. */
27209 e
= XEXP (SET_DEST (e
), 0);
27210 if (GET_CODE (e
) == PLUS
)
27211 gcc_assert (REG_P (XEXP (e
, 0))
27212 && REGNO (XEXP (e
, 0)) == SP_REGNUM
27213 && CONST_INT_P (XEXP (e
, 1))
27214 && offset
== INTVAL (XEXP (e
, 1)));
27218 && REGNO (e
) == SP_REGNUM
);
27219 offset
+= reg_size
;
27222 fprintf (asm_out_file
, "}\n");
27224 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
27227 /* Emit unwind directives for a SET. */
27230 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
27238 switch (GET_CODE (e0
))
27241 /* Pushing a single register. */
27242 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
27243 || !REG_P (XEXP (XEXP (e0
, 0), 0))
27244 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
27247 asm_fprintf (asm_out_file
, "\t.save ");
27248 if (IS_VFP_REGNUM (REGNO (e1
)))
27249 asm_fprintf(asm_out_file
, "{d%d}\n",
27250 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
27252 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
27256 if (REGNO (e0
) == SP_REGNUM
)
27258 /* A stack increment. */
27259 if (GET_CODE (e1
) != PLUS
27260 || !REG_P (XEXP (e1
, 0))
27261 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
27262 || !CONST_INT_P (XEXP (e1
, 1)))
27265 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
27266 -INTVAL (XEXP (e1
, 1)));
27268 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
27270 HOST_WIDE_INT offset
;
27272 if (GET_CODE (e1
) == PLUS
)
27274 if (!REG_P (XEXP (e1
, 0))
27275 || !CONST_INT_P (XEXP (e1
, 1)))
27277 reg
= REGNO (XEXP (e1
, 0));
27278 offset
= INTVAL (XEXP (e1
, 1));
27279 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
27280 HARD_FRAME_POINTER_REGNUM
, reg
,
27283 else if (REG_P (e1
))
27286 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
27287 HARD_FRAME_POINTER_REGNUM
, reg
);
27292 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
27294 /* Move from sp to reg. */
27295 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
27297 else if (GET_CODE (e1
) == PLUS
27298 && REG_P (XEXP (e1
, 0))
27299 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
27300 && CONST_INT_P (XEXP (e1
, 1)))
27302 /* Set reg to offset from sp. */
27303 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
27304 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
27316 /* Emit unwind directives for the given insn. */
27319 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
27322 bool handled_one
= false;
27324 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27327 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27328 && (TREE_NOTHROW (current_function_decl
)
27329 || crtl
->all_throwers_are_sibcalls
))
27332 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
27335 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
27337 switch (REG_NOTE_KIND (note
))
27339 case REG_FRAME_RELATED_EXPR
:
27340 pat
= XEXP (note
, 0);
27343 case REG_CFA_REGISTER
:
27344 pat
= XEXP (note
, 0);
27347 pat
= PATTERN (insn
);
27348 if (GET_CODE (pat
) == PARALLEL
)
27349 pat
= XVECEXP (pat
, 0, 0);
27352 /* Only emitted for IS_STACKALIGN re-alignment. */
27357 src
= SET_SRC (pat
);
27358 dest
= SET_DEST (pat
);
27360 gcc_assert (src
== stack_pointer_rtx
);
27361 reg
= REGNO (dest
);
27362 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27365 handled_one
= true;
27368 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27369 to get correct dwarf information for shrink-wrap. We should not
27370 emit unwind information for it because these are used either for
27371 pretend arguments or notes to adjust sp and restore registers from
27373 case REG_CFA_DEF_CFA
:
27374 case REG_CFA_ADJUST_CFA
:
27375 case REG_CFA_RESTORE
:
27378 case REG_CFA_EXPRESSION
:
27379 case REG_CFA_OFFSET
:
27380 /* ??? Only handling here what we actually emit. */
27381 gcc_unreachable ();
27389 pat
= PATTERN (insn
);
27392 switch (GET_CODE (pat
))
27395 arm_unwind_emit_set (asm_out_file
, pat
);
27399 /* Store multiple. */
27400 arm_unwind_emit_sequence (asm_out_file
, pat
);
27409 /* Output a reference from a function exception table to the type_info
27410 object X. The EABI specifies that the symbol should be relocated by
27411 an R_ARM_TARGET2 relocation. */
27414 arm_output_ttype (rtx x
)
27416 fputs ("\t.word\t", asm_out_file
);
27417 output_addr_const (asm_out_file
, x
);
27418 /* Use special relocations for symbol references. */
27419 if (!CONST_INT_P (x
))
27420 fputs ("(TARGET2)", asm_out_file
);
27421 fputc ('\n', asm_out_file
);
27426 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27429 arm_asm_emit_except_personality (rtx personality
)
27431 fputs ("\t.personality\t", asm_out_file
);
27432 output_addr_const (asm_out_file
, personality
);
27433 fputc ('\n', asm_out_file
);
27435 #endif /* ARM_UNWIND_INFO */
27437 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27440 arm_asm_init_sections (void)
27442 #if ARM_UNWIND_INFO
27443 exception_section
= get_unnamed_section (0, output_section_asm_op
,
27445 #endif /* ARM_UNWIND_INFO */
27447 #ifdef OBJECT_FORMAT_ELF
27448 if (target_pure_code
)
27449 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
27453 /* Output unwind directives for the start/end of a function. */
27456 arm_output_fn_unwind (FILE * f
, bool prologue
)
27458 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27462 fputs ("\t.fnstart\n", f
);
27465 /* If this function will never be unwound, then mark it as such.
27466 The came condition is used in arm_unwind_emit to suppress
27467 the frame annotations. */
27468 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27469 && (TREE_NOTHROW (current_function_decl
)
27470 || crtl
->all_throwers_are_sibcalls
))
27471 fputs("\t.cantunwind\n", f
);
27473 fputs ("\t.fnend\n", f
);
27478 arm_emit_tls_decoration (FILE *fp
, rtx x
)
27480 enum tls_reloc reloc
;
27483 val
= XVECEXP (x
, 0, 0);
27484 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
27486 output_addr_const (fp
, val
);
27491 fputs ("(tlsgd)", fp
);
27494 fputs ("(tlsldm)", fp
);
27497 fputs ("(tlsldo)", fp
);
27500 fputs ("(gottpoff)", fp
);
27503 fputs ("(tpoff)", fp
);
27506 fputs ("(tlsdesc)", fp
);
27509 gcc_unreachable ();
27518 fputs (" + (. - ", fp
);
27519 output_addr_const (fp
, XVECEXP (x
, 0, 2));
27520 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27521 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
27522 output_addr_const (fp
, XVECEXP (x
, 0, 3));
27532 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27535 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
27537 gcc_assert (size
== 4);
27538 fputs ("\t.word\t", file
);
27539 output_addr_const (file
, x
);
27540 fputs ("(tlsldo)", file
);
27543 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27546 arm_output_addr_const_extra (FILE *fp
, rtx x
)
27548 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
27549 return arm_emit_tls_decoration (fp
, x
);
27550 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
27553 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
27555 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
27556 assemble_name_raw (fp
, label
);
27560 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
27562 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
27566 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27570 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
27572 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27576 output_addr_const (fp
, XVECEXP (x
, 0, 1));
27580 else if (GET_CODE (x
) == CONST_VECTOR
)
27581 return arm_emit_vector_const (fp
, x
);
27586 /* Output assembly for a shift instruction.
27587 SET_FLAGS determines how the instruction modifies the condition codes.
27588 0 - Do not set condition codes.
27589 1 - Set condition codes.
27590 2 - Use smallest instruction. */
27592 arm_output_shift(rtx
* operands
, int set_flags
)
27595 static const char flag_chars
[3] = {'?', '.', '!'};
27600 c
= flag_chars
[set_flags
];
27601 shift
= shift_op(operands
[3], &val
);
27605 operands
[2] = GEN_INT(val
);
27606 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
27609 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
27611 output_asm_insn (pattern
, operands
);
27615 /* Output assembly for a WMMX immediate shift instruction. */
27617 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
27619 int shift
= INTVAL (operands
[2]);
27621 machine_mode opmode
= GET_MODE (operands
[0]);
27623 gcc_assert (shift
>= 0);
27625 /* If the shift value in the register versions is > 63 (for D qualifier),
27626 31 (for W qualifier) or 15 (for H qualifier). */
27627 if (((opmode
== V4HImode
) && (shift
> 15))
27628 || ((opmode
== V2SImode
) && (shift
> 31))
27629 || ((opmode
== DImode
) && (shift
> 63)))
27633 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27634 output_asm_insn (templ
, operands
);
27635 if (opmode
== DImode
)
27637 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
27638 output_asm_insn (templ
, operands
);
27643 /* The destination register will contain all zeros. */
27644 sprintf (templ
, "wzero\t%%0");
27645 output_asm_insn (templ
, operands
);
27650 if ((opmode
== DImode
) && (shift
> 32))
27652 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27653 output_asm_insn (templ
, operands
);
27654 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
27655 output_asm_insn (templ
, operands
);
27659 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
27660 output_asm_insn (templ
, operands
);
27665 /* Output assembly for a WMMX tinsr instruction. */
27667 arm_output_iwmmxt_tinsr (rtx
*operands
)
27669 int mask
= INTVAL (operands
[3]);
27672 int units
= mode_nunits
[GET_MODE (operands
[0])];
27673 gcc_assert ((mask
& (mask
- 1)) == 0);
27674 for (i
= 0; i
< units
; ++i
)
27676 if ((mask
& 0x01) == 1)
27682 gcc_assert (i
< units
);
27684 switch (GET_MODE (operands
[0]))
27687 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
27690 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
27693 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
27696 gcc_unreachable ();
27699 output_asm_insn (templ
, operands
);
27704 /* Output a Thumb-1 casesi dispatch sequence. */
27706 thumb1_output_casesi (rtx
*operands
)
27708 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
27710 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27712 switch (GET_MODE(diff_vec
))
27715 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27716 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27718 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27719 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27721 return "bl\t%___gnu_thumb1_case_si";
27723 gcc_unreachable ();
27727 /* Output a Thumb-2 casesi instruction. */
27729 thumb2_output_casesi (rtx
*operands
)
27731 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
27733 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27735 output_asm_insn ("cmp\t%0, %1", operands
);
27736 output_asm_insn ("bhi\t%l3", operands
);
27737 switch (GET_MODE(diff_vec
))
27740 return "tbb\t[%|pc, %0]";
27742 return "tbh\t[%|pc, %0, lsl #1]";
27746 output_asm_insn ("adr\t%4, %l2", operands
);
27747 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
27748 output_asm_insn ("add\t%4, %4, %5", operands
);
27753 output_asm_insn ("adr\t%4, %l2", operands
);
27754 return "ldr\t%|pc, [%4, %0, lsl #2]";
27757 gcc_unreachable ();
27761 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27762 per-core tuning structs. */
27764 arm_issue_rate (void)
27766 return current_tune
->issue_rate
;
27769 /* Return how many instructions should scheduler lookahead to choose the
27772 arm_first_cycle_multipass_dfa_lookahead (void)
27774 int issue_rate
= arm_issue_rate ();
27776 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
27779 /* Enable modeling of L2 auto-prefetcher. */
27781 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
27783 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
27787 arm_mangle_type (const_tree type
)
27789 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27790 has to be managled as if it is in the "std" namespace. */
27791 if (TARGET_AAPCS_BASED
27792 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
27793 return "St9__va_list";
27795 /* Half-precision float. */
27796 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
27799 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27801 if (TYPE_NAME (type
) != NULL
)
27802 return arm_mangle_builtin_type (type
);
27804 /* Use the default mangling. */
27808 /* Order of allocation of core registers for Thumb: this allocation is
27809 written over the corresponding initial entries of the array
27810 initialized with REG_ALLOC_ORDER. We allocate all low registers
27811 first. Saving and restoring a low register is usually cheaper than
27812 using a call-clobbered high register. */
27814 static const int thumb_core_reg_alloc_order
[] =
27816 3, 2, 1, 0, 4, 5, 6, 7,
27817 12, 14, 8, 9, 10, 11
27820 /* Adjust register allocation order when compiling for Thumb. */
27823 arm_order_regs_for_local_alloc (void)
27825 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
27826 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
27828 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
27829 sizeof (thumb_core_reg_alloc_order
));
27832 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27835 arm_frame_pointer_required (void)
27837 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
27840 /* If the function receives nonlocal gotos, it needs to save the frame
27841 pointer in the nonlocal_goto_save_area object. */
27842 if (cfun
->has_nonlocal_label
)
27845 /* The frame pointer is required for non-leaf APCS frames. */
27846 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !crtl
->is_leaf
)
27849 /* If we are probing the stack in the prologue, we will have a faulting
27850 instruction prior to the stack adjustment and this requires a frame
27851 pointer if we want to catch the exception using the EABI unwinder. */
27852 if (!IS_INTERRUPT (arm_current_func_type ())
27853 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
27854 && arm_except_unwind_info (&global_options
) == UI_TARGET
27855 && cfun
->can_throw_non_call_exceptions
)
27857 HOST_WIDE_INT size
= get_frame_size ();
27859 /* That's irrelevant if there is no stack adjustment. */
27863 /* That's relevant only if there is a stack probe. */
27864 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
27866 /* We don't have the final size of the frame so adjust. */
27867 size
+= 32 * UNITS_PER_WORD
;
27868 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
27878 /* Only thumb1 can't support conditional execution, so return true if
27879 the target is not thumb1. */
27881 arm_have_conditional_execution (void)
27883 return !TARGET_THUMB1
;
27886 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27887 static HOST_WIDE_INT
27888 arm_vector_alignment (const_tree type
)
27890 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
27892 if (TARGET_AAPCS_BASED
)
27893 align
= MIN (align
, 64);
27898 static unsigned int
27899 arm_autovectorize_vector_sizes (void)
27901 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
27905 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
27907 /* Vectors which aren't in packed structures will not be less aligned than
27908 the natural alignment of their element type, so this is safe. */
27909 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27912 return default_builtin_vector_alignment_reachable (type
, is_packed
);
27916 arm_builtin_support_vector_misalignment (machine_mode mode
,
27917 const_tree type
, int misalignment
,
27920 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27922 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
27927 /* If the misalignment is unknown, we should be able to handle the access
27928 so long as it is not to a member of a packed data structure. */
27929 if (misalignment
== -1)
27932 /* Return true if the misalignment is a multiple of the natural alignment
27933 of the vector's element type. This is probably always going to be
27934 true in practice, since we've already established that this isn't a
27936 return ((misalignment
% align
) == 0);
27939 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
27944 arm_conditional_register_usage (void)
27948 if (TARGET_THUMB1
&& optimize_size
)
27950 /* When optimizing for size on Thumb-1, it's better not
27951 to use the HI regs, because of the overhead of
27953 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
27954 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
27957 /* The link register can be clobbered by any branch insn,
27958 but we have no way to track that at present, so mark
27959 it as unavailable. */
27961 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
27963 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
)
27965 /* VFPv3 registers are disabled when earlier VFP
27966 versions are selected due to the definition of
27967 LAST_VFP_REGNUM. */
27968 for (regno
= FIRST_VFP_REGNUM
;
27969 regno
<= LAST_VFP_REGNUM
; ++ regno
)
27971 fixed_regs
[regno
] = 0;
27972 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
27973 || regno
>= FIRST_VFP_REGNUM
+ 32;
27977 if (TARGET_REALLY_IWMMXT
)
27979 regno
= FIRST_IWMMXT_GR_REGNUM
;
27980 /* The 2002/10/09 revision of the XScale ABI has wCG0
27981 and wCG1 as call-preserved registers. The 2002/11/21
27982 revision changed this so that all wCG registers are
27983 scratch registers. */
27984 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
27985 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
27986 fixed_regs
[regno
] = 0;
27987 /* The XScale ABI has wR0 - wR9 as scratch registers,
27988 the rest as call-preserved registers. */
27989 for (regno
= FIRST_IWMMXT_REGNUM
;
27990 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
27992 fixed_regs
[regno
] = 0;
27993 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
27997 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
27999 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
28000 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
28002 else if (TARGET_APCS_STACK
)
28004 fixed_regs
[10] = 1;
28005 call_used_regs
[10] = 1;
28007 /* -mcaller-super-interworking reserves r11 for calls to
28008 _interwork_r11_call_via_rN(). Making the register global
28009 is an easy way of ensuring that it remains valid for all
28011 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
28012 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
28014 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28015 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28016 if (TARGET_CALLER_INTERWORKING
)
28017 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28019 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28023 arm_preferred_rename_class (reg_class_t rclass
)
28025 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28026 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28027 and code size can be reduced. */
28028 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
28034 /* Compute the attribute "length" of insn "*push_multi".
28035 So this function MUST be kept in sync with that insn pattern. */
28037 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
28039 int i
, regno
, hi_reg
;
28040 int num_saves
= XVECLEN (parallel_op
, 0);
28050 regno
= REGNO (first_op
);
28051 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28052 list is 8-bit. Normally this means all registers in the list must be
28053 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28054 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28055 with 16-bit encoding. */
28056 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
28057 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
28059 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
28060 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
28068 /* Compute the attribute "length" of insn. Currently, this function is used
28069 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28070 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28071 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28072 true if OPERANDS contains insn which explicit updates base register. */
28075 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
28084 rtx parallel_op
= operands
[0];
28085 /* Initialize to elements number of PARALLEL. */
28086 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
28087 /* Initialize the value to base register. */
28088 unsigned regno
= REGNO (operands
[1]);
28089 /* Skip return and write back pattern.
28090 We only need register pop pattern for later analysis. */
28091 unsigned first_indx
= 0;
28092 first_indx
+= return_pc
? 1 : 0;
28093 first_indx
+= write_back_p
? 1 : 0;
28095 /* A pop operation can be done through LDM or POP. If the base register is SP
28096 and if it's with write back, then a LDM will be alias of POP. */
28097 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
28098 bool ldm_p
= !pop_p
;
28100 /* Check base register for LDM. */
28101 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
28104 /* Check each register in the list. */
28105 for (; indx
>= first_indx
; indx
--)
28107 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
28108 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28109 comment in arm_attr_length_push_multi. */
28110 if (REGNO_REG_CLASS (regno
) == HI_REGS
28111 && (regno
!= PC_REGNUM
|| ldm_p
))
28118 /* Compute the number of instructions emitted by output_move_double. */
28120 arm_count_output_move_double_insns (rtx
*operands
)
28124 /* output_move_double may modify the operands array, so call it
28125 here on a copy of the array. */
28126 ops
[0] = operands
[0];
28127 ops
[1] = operands
[1];
28128 output_move_double (ops
, false, &count
);
28133 vfp3_const_double_for_fract_bits (rtx operand
)
28135 REAL_VALUE_TYPE r0
;
28137 if (!CONST_DOUBLE_P (operand
))
28140 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
28141 if (exact_real_inverse (DFmode
, &r0
)
28142 && !REAL_VALUE_NEGATIVE (r0
))
28144 if (exact_real_truncate (DFmode
, &r0
))
28146 HOST_WIDE_INT value
= real_to_integer (&r0
);
28147 value
= value
& 0xffffffff;
28148 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
28150 int ret
= exact_log2 (value
);
28151 gcc_assert (IN_RANGE (ret
, 0, 31));
28159 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28160 log2 is in [1, 32], return that log2. Otherwise return -1.
28161 This is used in the patterns for vcvt.s32.f32 floating-point to
28162 fixed-point conversions. */
28165 vfp3_const_double_for_bits (rtx x
)
28167 const REAL_VALUE_TYPE
*r
;
28169 if (!CONST_DOUBLE_P (x
))
28172 r
= CONST_DOUBLE_REAL_VALUE (x
);
28174 if (REAL_VALUE_NEGATIVE (*r
)
28175 || REAL_VALUE_ISNAN (*r
)
28176 || REAL_VALUE_ISINF (*r
)
28177 || !real_isinteger (r
, SFmode
))
28180 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
28182 /* The exact_log2 above will have returned -1 if this is
28183 not an exact log2. */
28184 if (!IN_RANGE (hwint
, 1, 32))
28191 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28194 arm_pre_atomic_barrier (enum memmodel model
)
28196 if (need_atomic_barrier_p (model
, true))
28197 emit_insn (gen_memory_barrier ());
28201 arm_post_atomic_barrier (enum memmodel model
)
28203 if (need_atomic_barrier_p (model
, false))
28204 emit_insn (gen_memory_barrier ());
28207 /* Emit the load-exclusive and store-exclusive instructions.
28208 Use acquire and release versions if necessary. */
28211 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
28213 rtx (*gen
) (rtx
, rtx
);
28219 case E_QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
28220 case E_HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
28221 case E_SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
28222 case E_DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
28224 gcc_unreachable ();
28231 case E_QImode
: gen
= gen_arm_load_exclusiveqi
; break;
28232 case E_HImode
: gen
= gen_arm_load_exclusivehi
; break;
28233 case E_SImode
: gen
= gen_arm_load_exclusivesi
; break;
28234 case E_DImode
: gen
= gen_arm_load_exclusivedi
; break;
28236 gcc_unreachable ();
28240 emit_insn (gen (rval
, mem
));
28244 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
28247 rtx (*gen
) (rtx
, rtx
, rtx
);
28253 case E_QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
28254 case E_HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
28255 case E_SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
28256 case E_DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
28258 gcc_unreachable ();
28265 case E_QImode
: gen
= gen_arm_store_exclusiveqi
; break;
28266 case E_HImode
: gen
= gen_arm_store_exclusivehi
; break;
28267 case E_SImode
: gen
= gen_arm_store_exclusivesi
; break;
28268 case E_DImode
: gen
= gen_arm_store_exclusivedi
; break;
28270 gcc_unreachable ();
28274 emit_insn (gen (bval
, rval
, mem
));
28277 /* Mark the previous jump instruction as unlikely. */
28280 emit_unlikely_jump (rtx insn
)
28282 rtx_insn
*jump
= emit_jump_insn (insn
);
28283 add_reg_br_prob_note (jump
, profile_probability::very_unlikely ());
28286 /* Expand a compare and swap pattern. */
28289 arm_expand_compare_and_swap (rtx operands
[])
28291 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
28293 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
28295 bval
= operands
[0];
28296 rval
= operands
[1];
28298 oldval
= operands
[3];
28299 newval
= operands
[4];
28300 is_weak
= operands
[5];
28301 mod_s
= operands
[6];
28302 mod_f
= operands
[7];
28303 mode
= GET_MODE (mem
);
28305 /* Normally the succ memory model must be stronger than fail, but in the
28306 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28307 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28309 if (TARGET_HAVE_LDACQ
28310 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
28311 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
28312 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
28318 /* For narrow modes, we're going to perform the comparison in SImode,
28319 so do the zero-extension now. */
28320 rval
= gen_reg_rtx (SImode
);
28321 oldval
= convert_modes (SImode
, mode
, oldval
, true);
28325 /* Force the value into a register if needed. We waited until after
28326 the zero-extension above to do this properly. */
28327 if (!arm_add_operand (oldval
, SImode
))
28328 oldval
= force_reg (SImode
, oldval
);
28332 if (!cmpdi_operand (oldval
, mode
))
28333 oldval
= force_reg (mode
, oldval
);
28337 gcc_unreachable ();
28344 case E_QImode
: gen
= gen_atomic_compare_and_swapt1qi_1
; break;
28345 case E_HImode
: gen
= gen_atomic_compare_and_swapt1hi_1
; break;
28346 case E_SImode
: gen
= gen_atomic_compare_and_swapt1si_1
; break;
28347 case E_DImode
: gen
= gen_atomic_compare_and_swapt1di_1
; break;
28349 gcc_unreachable ();
28356 case E_QImode
: gen
= gen_atomic_compare_and_swap32qi_1
; break;
28357 case E_HImode
: gen
= gen_atomic_compare_and_swap32hi_1
; break;
28358 case E_SImode
: gen
= gen_atomic_compare_and_swap32si_1
; break;
28359 case E_DImode
: gen
= gen_atomic_compare_and_swap32di_1
; break;
28361 gcc_unreachable ();
28365 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CC_Zmode
, CC_REGNUM
);
28366 emit_insn (gen (bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
28368 if (mode
== QImode
|| mode
== HImode
)
28369 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
28371 /* In all cases, we arrange for success to be signaled by Z set.
28372 This arrangement allows for the boolean result to be used directly
28373 in a subsequent branch, post optimization. For Thumb-1 targets, the
28374 boolean negation of the result is also stored in bval because Thumb-1
28375 backend lacks dependency tracking for CC flag due to flag-setting not
28376 being represented at RTL level. */
28378 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
28381 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
28382 emit_insn (gen_rtx_SET (bval
, x
));
28386 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28387 another memory store between the load-exclusive and store-exclusive can
28388 reset the monitor from Exclusive to Open state. This means we must wait
28389 until after reload to split the pattern, lest we get a register spill in
28390 the middle of the atomic sequence. Success of the compare and swap is
28391 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28392 for Thumb-1 targets (ie. negation of the boolean value returned by
28393 atomic_compare_and_swapmode standard pattern in operand 0). */
28396 arm_split_compare_and_swap (rtx operands
[])
28398 rtx rval
, mem
, oldval
, newval
, neg_bval
;
28400 enum memmodel mod_s
, mod_f
;
28402 rtx_code_label
*label1
, *label2
;
28405 rval
= operands
[1];
28407 oldval
= operands
[3];
28408 newval
= operands
[4];
28409 is_weak
= (operands
[5] != const0_rtx
);
28410 mod_s
= memmodel_from_int (INTVAL (operands
[6]));
28411 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
28412 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
28413 mode
= GET_MODE (mem
);
28415 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
28417 bool use_acquire
= TARGET_HAVE_LDACQ
28418 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28419 || is_mm_release (mod_s
));
28421 bool use_release
= TARGET_HAVE_LDACQ
28422 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28423 || is_mm_acquire (mod_s
));
28425 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28426 a full barrier is emitted after the store-release. */
28428 use_acquire
= false;
28430 /* Checks whether a barrier is needed and emits one accordingly. */
28431 if (!(use_acquire
|| use_release
))
28432 arm_pre_atomic_barrier (mod_s
);
28437 label1
= gen_label_rtx ();
28438 emit_label (label1
);
28440 label2
= gen_label_rtx ();
28442 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
28444 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28445 as required to communicate with arm_expand_compare_and_swap. */
28448 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
28449 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28450 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
28451 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
28452 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
28456 emit_move_insn (neg_bval
, const1_rtx
);
28457 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
28458 if (thumb1_cmpneg_operand (oldval
, SImode
))
28459 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval
, rval
, oldval
,
28462 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
28465 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
28467 /* Weak or strong, we want EQ to be true for success, so that we
28468 match the flags that we got from the compare above. */
28471 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
28472 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
28473 emit_insn (gen_rtx_SET (cond
, x
));
28478 /* Z is set to boolean value of !neg_bval, as required to communicate
28479 with arm_expand_compare_and_swap. */
28480 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
28481 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
28484 if (!is_mm_relaxed (mod_f
))
28485 emit_label (label2
);
28487 /* Checks whether a barrier is needed and emits one accordingly. */
28489 || !(use_acquire
|| use_release
))
28490 arm_post_atomic_barrier (mod_s
);
28492 if (is_mm_relaxed (mod_f
))
28493 emit_label (label2
);
28496 /* Split an atomic operation pattern. Operation is given by CODE and is one
28497 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28498 operation). Operation is performed on the content at MEM and on VALUE
28499 following the memory model MODEL_RTX. The content at MEM before and after
28500 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28501 success of the operation is returned in COND. Using a scratch register or
28502 an operand register for these determines what result is returned for that
28506 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
28507 rtx value
, rtx model_rtx
, rtx cond
)
28509 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
28510 machine_mode mode
= GET_MODE (mem
);
28511 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
28512 rtx_code_label
*label
;
28513 bool all_low_regs
, bind_old_new
;
28516 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
28518 bool use_acquire
= TARGET_HAVE_LDACQ
28519 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28520 || is_mm_release (model
));
28522 bool use_release
= TARGET_HAVE_LDACQ
28523 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28524 || is_mm_acquire (model
));
28526 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28527 a full barrier is emitted after the store-release. */
28529 use_acquire
= false;
28531 /* Checks whether a barrier is needed and emits one accordingly. */
28532 if (!(use_acquire
|| use_release
))
28533 arm_pre_atomic_barrier (model
);
28535 label
= gen_label_rtx ();
28536 emit_label (label
);
28539 new_out
= gen_lowpart (wmode
, new_out
);
28541 old_out
= gen_lowpart (wmode
, old_out
);
28544 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
28546 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
28548 /* Does the operation require destination and first operand to use the same
28549 register? This is decided by register constraints of relevant insn
28550 patterns in thumb1.md. */
28551 gcc_assert (!new_out
|| REG_P (new_out
));
28552 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
28553 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
28554 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
28559 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
28561 /* We want to return the old value while putting the result of the operation
28562 in the same register as the old value so copy the old value over to the
28563 destination register and use that register for the operation. */
28564 if (old_out
&& bind_old_new
)
28566 emit_move_insn (new_out
, old_out
);
28577 x
= gen_rtx_AND (wmode
, old_out
, value
);
28578 emit_insn (gen_rtx_SET (new_out
, x
));
28579 x
= gen_rtx_NOT (wmode
, new_out
);
28580 emit_insn (gen_rtx_SET (new_out
, x
));
28584 if (CONST_INT_P (value
))
28586 value
= GEN_INT (-INTVAL (value
));
28592 if (mode
== DImode
)
28594 /* DImode plus/minus need to clobber flags. */
28595 /* The adddi3 and subdi3 patterns are incorrectly written so that
28596 they require matching operands, even when we could easily support
28597 three operands. Thankfully, this can be fixed up post-splitting,
28598 as the individual add+adc patterns do accept three operands and
28599 post-reload cprop can make these moves go away. */
28600 emit_move_insn (new_out
, old_out
);
28602 x
= gen_adddi3 (new_out
, new_out
, value
);
28604 x
= gen_subdi3 (new_out
, new_out
, value
);
28611 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
28612 emit_insn (gen_rtx_SET (new_out
, x
));
28616 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
28619 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28620 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
28622 /* Checks whether a barrier is needed and emits one accordingly. */
28624 || !(use_acquire
|| use_release
))
28625 arm_post_atomic_barrier (model
);
28628 #define MAX_VECT_LEN 16
28630 struct expand_vec_perm_d
28632 rtx target
, op0
, op1
;
28633 unsigned char perm
[MAX_VECT_LEN
];
28634 machine_mode vmode
;
28635 unsigned char nelt
;
28640 /* Generate a variable permutation. */
28643 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28645 machine_mode vmode
= GET_MODE (target
);
28646 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28648 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
28649 gcc_checking_assert (GET_MODE (op0
) == vmode
);
28650 gcc_checking_assert (GET_MODE (op1
) == vmode
);
28651 gcc_checking_assert (GET_MODE (sel
) == vmode
);
28652 gcc_checking_assert (TARGET_NEON
);
28656 if (vmode
== V8QImode
)
28657 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
28659 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
28665 if (vmode
== V8QImode
)
28667 pair
= gen_reg_rtx (V16QImode
);
28668 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
28669 pair
= gen_lowpart (TImode
, pair
);
28670 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
28674 pair
= gen_reg_rtx (OImode
);
28675 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
28676 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
28682 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28684 machine_mode vmode
= GET_MODE (target
);
28685 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
28686 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28687 rtx rmask
[MAX_VECT_LEN
], mask
;
28689 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28690 numbering of elements for big-endian, we must reverse the order. */
28691 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
28693 /* The VTBL instruction does not use a modulo index, so we must take care
28694 of that ourselves. */
28695 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28696 for (i
= 0; i
< nelt
; ++i
)
28698 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
28699 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
28701 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
28704 /* Map lane ordering between architectural lane order, and GCC lane order,
28705 taking into account ABI. See comment above output_move_neon for details. */
28708 neon_endian_lane_map (machine_mode mode
, int lane
)
28710 if (BYTES_BIG_ENDIAN
)
28712 int nelems
= GET_MODE_NUNITS (mode
);
28713 /* Reverse lane order. */
28714 lane
= (nelems
- 1 - lane
);
28715 /* Reverse D register order, to match ABI. */
28716 if (GET_MODE_SIZE (mode
) == 16)
28717 lane
= lane
^ (nelems
/ 2);
28722 /* Some permutations index into pairs of vectors, this is a helper function
28723 to map indexes into those pairs of vectors. */
28726 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
28728 int nelem
= GET_MODE_NUNITS (mode
);
28729 if (BYTES_BIG_ENDIAN
)
28731 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
28735 /* Generate or test for an insn that supports a constant permutation. */
28737 /* Recognize patterns for the VUZP insns. */
28740 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
28742 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28743 rtx out0
, out1
, in0
, in1
;
28744 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28748 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28751 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28752 big endian pattern on 64 bit vectors, so we correct for that. */
28753 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
28754 && GET_MODE_SIZE (d
->vmode
) == 8 ? d
->nelt
: 0;
28756 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
28758 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28760 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
28764 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28766 for (i
= 0; i
< nelt
; i
++)
28769 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
28770 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
28780 case E_V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
28781 case E_V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
28782 case E_V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
28783 case E_V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
28784 case E_V8HFmode
: gen
= gen_neon_vuzpv8hf_internal
; break;
28785 case E_V4HFmode
: gen
= gen_neon_vuzpv4hf_internal
; break;
28786 case E_V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
28787 case E_V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
28788 case E_V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
28789 case E_V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
28791 gcc_unreachable ();
28796 if (swap_nelt
!= 0)
28797 std::swap (in0
, in1
);
28800 out1
= gen_reg_rtx (d
->vmode
);
28802 std::swap (out0
, out1
);
28804 emit_insn (gen (out0
, in0
, in1
, out1
));
28808 /* Recognize patterns for the VZIP insns. */
28811 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
28813 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
28814 rtx out0
, out1
, in0
, in1
;
28815 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28819 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28822 is_swapped
= BYTES_BIG_ENDIAN
;
28824 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
28827 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
28829 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28833 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28835 for (i
= 0; i
< nelt
/ 2; i
++)
28838 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
28839 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
28843 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
28844 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
28855 case E_V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
28856 case E_V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
28857 case E_V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
28858 case E_V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
28859 case E_V8HFmode
: gen
= gen_neon_vzipv8hf_internal
; break;
28860 case E_V4HFmode
: gen
= gen_neon_vzipv4hf_internal
; break;
28861 case E_V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
28862 case E_V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
28863 case E_V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
28864 case E_V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
28866 gcc_unreachable ();
28872 std::swap (in0
, in1
);
28875 out1
= gen_reg_rtx (d
->vmode
);
28877 std::swap (out0
, out1
);
28879 emit_insn (gen (out0
, in0
, in1
, out1
));
28883 /* Recognize patterns for the VREV insns. */
28886 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
28888 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
28889 rtx (*gen
)(rtx
, rtx
);
28891 if (!d
->one_vector_p
)
28900 case E_V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
28901 case E_V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
28909 case E_V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
28910 case E_V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
28911 case E_V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
28912 case E_V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
28913 case E_V8HFmode
: gen
= gen_neon_vrev64v8hf
; break;
28914 case E_V4HFmode
: gen
= gen_neon_vrev64v4hf
; break;
28922 case E_V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
28923 case E_V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
28924 case E_V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
28925 case E_V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
28926 case E_V4SImode
: gen
= gen_neon_vrev64v4si
; break;
28927 case E_V2SImode
: gen
= gen_neon_vrev64v2si
; break;
28928 case E_V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
28929 case E_V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
28938 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
28939 for (j
= 0; j
<= diff
; j
+= 1)
28941 /* This is guaranteed to be true as the value of diff
28942 is 7, 3, 1 and we should have enough elements in the
28943 queue to generate this. Getting a vector mask with a
28944 value of diff other than these values implies that
28945 something is wrong by the time we get here. */
28946 gcc_assert (i
+ j
< nelt
);
28947 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
28955 emit_insn (gen (d
->target
, d
->op0
));
28959 /* Recognize patterns for the VTRN insns. */
28962 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
28964 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28965 rtx out0
, out1
, in0
, in1
;
28966 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28968 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28971 /* Note that these are little-endian tests. Adjust for big-endian later. */
28972 if (d
->perm
[0] == 0)
28974 else if (d
->perm
[0] == 1)
28978 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28980 for (i
= 0; i
< nelt
; i
+= 2)
28982 if (d
->perm
[i
] != i
+ odd
)
28984 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
28994 case E_V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
28995 case E_V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
28996 case E_V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
28997 case E_V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
28998 case E_V8HFmode
: gen
= gen_neon_vtrnv8hf_internal
; break;
28999 case E_V4HFmode
: gen
= gen_neon_vtrnv4hf_internal
; break;
29000 case E_V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
29001 case E_V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
29002 case E_V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
29003 case E_V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
29005 gcc_unreachable ();
29010 if (BYTES_BIG_ENDIAN
)
29012 std::swap (in0
, in1
);
29017 out1
= gen_reg_rtx (d
->vmode
);
29019 std::swap (out0
, out1
);
29021 emit_insn (gen (out0
, in0
, in1
, out1
));
29025 /* Recognize patterns for the VEXT insns. */
29028 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
29030 unsigned int i
, nelt
= d
->nelt
;
29031 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
29034 unsigned int location
;
29036 unsigned int next
= d
->perm
[0] + 1;
29038 /* TODO: Handle GCC's numbering of elements for big-endian. */
29039 if (BYTES_BIG_ENDIAN
)
29042 /* Check if the extracted indexes are increasing by one. */
29043 for (i
= 1; i
< nelt
; next
++, i
++)
29045 /* If we hit the most significant element of the 2nd vector in
29046 the previous iteration, no need to test further. */
29047 if (next
== 2 * nelt
)
29050 /* If we are operating on only one vector: it could be a
29051 rotation. If there are only two elements of size < 64, let
29052 arm_evpc_neon_vrev catch it. */
29053 if (d
->one_vector_p
&& (next
== nelt
))
29055 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
29061 if (d
->perm
[i
] != next
)
29065 location
= d
->perm
[0];
29069 case E_V16QImode
: gen
= gen_neon_vextv16qi
; break;
29070 case E_V8QImode
: gen
= gen_neon_vextv8qi
; break;
29071 case E_V4HImode
: gen
= gen_neon_vextv4hi
; break;
29072 case E_V8HImode
: gen
= gen_neon_vextv8hi
; break;
29073 case E_V2SImode
: gen
= gen_neon_vextv2si
; break;
29074 case E_V4SImode
: gen
= gen_neon_vextv4si
; break;
29075 case E_V4HFmode
: gen
= gen_neon_vextv4hf
; break;
29076 case E_V8HFmode
: gen
= gen_neon_vextv8hf
; break;
29077 case E_V2SFmode
: gen
= gen_neon_vextv2sf
; break;
29078 case E_V4SFmode
: gen
= gen_neon_vextv4sf
; break;
29079 case E_V2DImode
: gen
= gen_neon_vextv2di
; break;
29088 offset
= GEN_INT (location
);
29089 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
29093 /* The NEON VTBL instruction is a fully variable permuation that's even
29094 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29095 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29096 can do slightly better by expanding this as a constant where we don't
29097 have to apply a mask. */
29100 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
29102 rtx rperm
[MAX_VECT_LEN
], sel
;
29103 machine_mode vmode
= d
->vmode
;
29104 unsigned int i
, nelt
= d
->nelt
;
29106 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29107 numbering of elements for big-endian, we must reverse the order. */
29108 if (BYTES_BIG_ENDIAN
)
29114 /* Generic code will try constant permutation twice. Once with the
29115 original mode and again with the elements lowered to QImode.
29116 So wait and don't do the selector expansion ourselves. */
29117 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
29120 for (i
= 0; i
< nelt
; ++i
)
29121 rperm
[i
] = GEN_INT (d
->perm
[i
]);
29122 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
29123 sel
= force_reg (vmode
, sel
);
29125 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
29130 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
29132 /* Check if the input mask matches vext before reordering the
29135 if (arm_evpc_neon_vext (d
))
29138 /* The pattern matching functions above are written to look for a small
29139 number to begin the sequence (0, 1, N/2). If we begin with an index
29140 from the second operand, we can swap the operands. */
29141 if (d
->perm
[0] >= d
->nelt
)
29143 unsigned i
, nelt
= d
->nelt
;
29145 for (i
= 0; i
< nelt
; ++i
)
29146 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
29148 std::swap (d
->op0
, d
->op1
);
29153 if (arm_evpc_neon_vuzp (d
))
29155 if (arm_evpc_neon_vzip (d
))
29157 if (arm_evpc_neon_vrev (d
))
29159 if (arm_evpc_neon_vtrn (d
))
29161 return arm_evpc_neon_vtbl (d
);
29166 /* Expand a vec_perm_const pattern. */
29169 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
29171 struct expand_vec_perm_d d
;
29172 int i
, nelt
, which
;
29178 d
.vmode
= GET_MODE (target
);
29179 gcc_assert (VECTOR_MODE_P (d
.vmode
));
29180 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
29181 d
.testing_p
= false;
29183 for (i
= which
= 0; i
< nelt
; ++i
)
29185 rtx e
= XVECEXP (sel
, 0, i
);
29186 int ei
= INTVAL (e
) & (2 * nelt
- 1);
29187 which
|= (ei
< nelt
? 1 : 2);
29197 d
.one_vector_p
= false;
29198 if (!rtx_equal_p (op0
, op1
))
29201 /* The elements of PERM do not suggest that only the first operand
29202 is used, but both operands are identical. Allow easier matching
29203 of the permutation by folding the permutation into the single
29207 for (i
= 0; i
< nelt
; ++i
)
29208 d
.perm
[i
] &= nelt
- 1;
29210 d
.one_vector_p
= true;
29215 d
.one_vector_p
= true;
29219 return arm_expand_vec_perm_const_1 (&d
);
29222 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29225 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
29226 const unsigned char *sel
)
29228 struct expand_vec_perm_d d
;
29229 unsigned int i
, nelt
, which
;
29233 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
29234 d
.testing_p
= true;
29235 memcpy (d
.perm
, sel
, nelt
);
29237 /* Categorize the set of elements in the selector. */
29238 for (i
= which
= 0; i
< nelt
; ++i
)
29240 unsigned char e
= d
.perm
[i
];
29241 gcc_assert (e
< 2 * nelt
);
29242 which
|= (e
< nelt
? 1 : 2);
29245 /* For all elements from second vector, fold the elements to first. */
29247 for (i
= 0; i
< nelt
; ++i
)
29250 /* Check whether the mask can be applied to the vector type. */
29251 d
.one_vector_p
= (which
!= 3);
29253 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
29254 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
29255 if (!d
.one_vector_p
)
29256 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
29259 ret
= arm_expand_vec_perm_const_1 (&d
);
29266 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
29268 /* If we are soft float and we do not have ldrd
29269 then all auto increment forms are ok. */
29270 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
29275 /* Post increment and Pre Decrement are supported for all
29276 instruction forms except for vector forms. */
29279 if (VECTOR_MODE_P (mode
))
29281 if (code
!= ARM_PRE_DEC
)
29291 /* Without LDRD and mode size greater than
29292 word size, there is no point in auto-incrementing
29293 because ldm and stm will not have these forms. */
29294 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
29297 /* Vector and floating point modes do not support
29298 these auto increment forms. */
29299 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
29312 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29313 on ARM, since we know that shifts by negative amounts are no-ops.
29314 Additionally, the default expansion code is not available or suitable
29315 for post-reload insn splits (this can occur when the register allocator
29316 chooses not to do a shift in NEON).
29318 This function is used in both initial expand and post-reload splits, and
29319 handles all kinds of 64-bit shifts.
29321 Input requirements:
29322 - It is safe for the input and output to be the same register, but
29323 early-clobber rules apply for the shift amount and scratch registers.
29324 - Shift by register requires both scratch registers. In all other cases
29325 the scratch registers may be NULL.
29326 - Ashiftrt by a register also clobbers the CC register. */
29328 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
29329 rtx amount
, rtx scratch1
, rtx scratch2
)
29331 rtx out_high
= gen_highpart (SImode
, out
);
29332 rtx out_low
= gen_lowpart (SImode
, out
);
29333 rtx in_high
= gen_highpart (SImode
, in
);
29334 rtx in_low
= gen_lowpart (SImode
, in
);
29337 in = the register pair containing the input value.
29338 out = the destination register pair.
29339 up = the high- or low-part of each pair.
29340 down = the opposite part to "up".
29341 In a shift, we can consider bits to shift from "up"-stream to
29342 "down"-stream, so in a left-shift "up" is the low-part and "down"
29343 is the high-part of each register pair. */
29345 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
29346 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
29347 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
29348 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
29350 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
29352 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
29353 && GET_MODE (out
) == DImode
);
29355 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
29356 && GET_MODE (in
) == DImode
);
29358 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
29359 && GET_MODE (amount
) == SImode
)
29360 || CONST_INT_P (amount
)));
29361 gcc_assert (scratch1
== NULL
29362 || (GET_CODE (scratch1
) == SCRATCH
)
29363 || (GET_MODE (scratch1
) == SImode
29364 && REG_P (scratch1
)));
29365 gcc_assert (scratch2
== NULL
29366 || (GET_CODE (scratch2
) == SCRATCH
)
29367 || (GET_MODE (scratch2
) == SImode
29368 && REG_P (scratch2
)));
29369 gcc_assert (!REG_P (out
) || !REG_P (amount
)
29370 || !HARD_REGISTER_P (out
)
29371 || (REGNO (out
) != REGNO (amount
)
29372 && REGNO (out
) + 1 != REGNO (amount
)));
29374 /* Macros to make following code more readable. */
29375 #define SUB_32(DEST,SRC) \
29376 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29377 #define RSB_32(DEST,SRC) \
29378 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29379 #define SUB_S_32(DEST,SRC) \
29380 gen_addsi3_compare0 ((DEST), (SRC), \
29382 #define SET(DEST,SRC) \
29383 gen_rtx_SET ((DEST), (SRC))
29384 #define SHIFT(CODE,SRC,AMOUNT) \
29385 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29386 #define LSHIFT(CODE,SRC,AMOUNT) \
29387 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29388 SImode, (SRC), (AMOUNT))
29389 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29390 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29391 SImode, (SRC), (AMOUNT))
29393 gen_rtx_IOR (SImode, (A), (B))
29394 #define BRANCH(COND,LABEL) \
29395 gen_arm_cond_branch ((LABEL), \
29396 gen_rtx_ ## COND (CCmode, cc_reg, \
29400 /* Shifts by register and shifts by constant are handled separately. */
29401 if (CONST_INT_P (amount
))
29403 /* We have a shift-by-constant. */
29405 /* First, handle out-of-range shift amounts.
29406 In both cases we try to match the result an ARM instruction in a
29407 shift-by-register would give. This helps reduce execution
29408 differences between optimization levels, but it won't stop other
29409 parts of the compiler doing different things. This is "undefined
29410 behavior, in any case. */
29411 if (INTVAL (amount
) <= 0)
29412 emit_insn (gen_movdi (out
, in
));
29413 else if (INTVAL (amount
) >= 64)
29415 if (code
== ASHIFTRT
)
29417 rtx const31_rtx
= GEN_INT (31);
29418 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
29419 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
29422 emit_insn (gen_movdi (out
, const0_rtx
));
29425 /* Now handle valid shifts. */
29426 else if (INTVAL (amount
) < 32)
29428 /* Shifts by a constant less than 32. */
29429 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
29431 /* Clearing the out register in DImode first avoids lots
29432 of spilling and results in less stack usage.
29433 Later this redundant insn is completely removed.
29434 Do that only if "in" and "out" are different registers. */
29435 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29436 emit_insn (SET (out
, const0_rtx
));
29437 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29438 emit_insn (SET (out_down
,
29439 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
29441 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29445 /* Shifts by a constant greater than 31. */
29446 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
29448 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29449 emit_insn (SET (out
, const0_rtx
));
29450 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
29451 if (code
== ASHIFTRT
)
29452 emit_insn (gen_ashrsi3 (out_up
, in_up
,
29455 emit_insn (SET (out_up
, const0_rtx
));
29460 /* We have a shift-by-register. */
29461 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
29463 /* This alternative requires the scratch registers. */
29464 gcc_assert (scratch1
&& REG_P (scratch1
));
29465 gcc_assert (scratch2
&& REG_P (scratch2
));
29467 /* We will need the values "amount-32" and "32-amount" later.
29468 Swapping them around now allows the later code to be more general. */
29472 emit_insn (SUB_32 (scratch1
, amount
));
29473 emit_insn (RSB_32 (scratch2
, amount
));
29476 emit_insn (RSB_32 (scratch1
, amount
));
29477 /* Also set CC = amount > 32. */
29478 emit_insn (SUB_S_32 (scratch2
, amount
));
29481 emit_insn (RSB_32 (scratch1
, amount
));
29482 emit_insn (SUB_32 (scratch2
, amount
));
29485 gcc_unreachable ();
29488 /* Emit code like this:
29491 out_down = in_down << amount;
29492 out_down = (in_up << (amount - 32)) | out_down;
29493 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29494 out_up = in_up << amount;
29497 out_down = in_down >> amount;
29498 out_down = (in_up << (32 - amount)) | out_down;
29500 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29501 out_up = in_up << amount;
29504 out_down = in_down >> amount;
29505 out_down = (in_up << (32 - amount)) | out_down;
29507 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29508 out_up = in_up << amount;
29510 The ARM and Thumb2 variants are the same but implemented slightly
29511 differently. If this were only called during expand we could just
29512 use the Thumb2 case and let combine do the right thing, but this
29513 can also be called from post-reload splitters. */
29515 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29517 if (!TARGET_THUMB2
)
29519 /* Emit code for ARM mode. */
29520 emit_insn (SET (out_down
,
29521 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
29522 if (code
== ASHIFTRT
)
29524 rtx_code_label
*done_label
= gen_label_rtx ();
29525 emit_jump_insn (BRANCH (LT
, done_label
));
29526 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
29528 emit_label (done_label
);
29531 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
29536 /* Emit code for Thumb2 mode.
29537 Thumb2 can't do shift and or in one insn. */
29538 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
29539 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
29541 if (code
== ASHIFTRT
)
29543 rtx_code_label
*done_label
= gen_label_rtx ();
29544 emit_jump_insn (BRANCH (LT
, done_label
));
29545 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
29546 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
29547 emit_label (done_label
);
29551 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
29552 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
29556 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29570 /* Returns true if the pattern is a valid symbolic address, which is either a
29571 symbol_ref or (symbol_ref + addend).
29573 According to the ARM ELF ABI, the initial addend of REL-type relocations
29574 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29575 literal field of the instruction as a 16-bit signed value in the range
29576 -32768 <= A < 32768. */
29579 arm_valid_symbolic_address_p (rtx addr
)
29581 rtx xop0
, xop1
= NULL_RTX
;
29584 if (GET_CODE (tmp
) == SYMBOL_REF
|| GET_CODE (tmp
) == LABEL_REF
)
29587 /* (const (plus: symbol_ref const_int)) */
29588 if (GET_CODE (addr
) == CONST
)
29589 tmp
= XEXP (addr
, 0);
29591 if (GET_CODE (tmp
) == PLUS
)
29593 xop0
= XEXP (tmp
, 0);
29594 xop1
= XEXP (tmp
, 1);
29596 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
29597 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
29603 /* Returns true if a valid comparison operation and makes
29604 the operands in a form that is valid. */
29606 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
29608 enum rtx_code code
= GET_CODE (*comparison
);
29610 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
29611 ? GET_MODE (*op2
) : GET_MODE (*op1
);
29613 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
29615 if (code
== UNEQ
|| code
== LTGT
)
29618 code_int
= (int)code
;
29619 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
29620 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
29625 if (!arm_add_operand (*op1
, mode
))
29626 *op1
= force_reg (mode
, *op1
);
29627 if (!arm_add_operand (*op2
, mode
))
29628 *op2
= force_reg (mode
, *op2
);
29632 if (!cmpdi_operand (*op1
, mode
))
29633 *op1
= force_reg (mode
, *op1
);
29634 if (!cmpdi_operand (*op2
, mode
))
29635 *op2
= force_reg (mode
, *op2
);
29639 if (!TARGET_VFP_FP16INST
)
29641 /* FP16 comparisons are done in SF mode. */
29643 *op1
= convert_to_mode (mode
, *op1
, 1);
29644 *op2
= convert_to_mode (mode
, *op2
, 1);
29645 /* Fall through. */
29648 if (!vfp_compare_operand (*op1
, mode
))
29649 *op1
= force_reg (mode
, *op1
);
29650 if (!vfp_compare_operand (*op2
, mode
))
29651 *op2
= force_reg (mode
, *op2
);
29661 /* Maximum number of instructions to set block of memory. */
29663 arm_block_set_max_insns (void)
29665 if (optimize_function_for_size_p (cfun
))
29668 return current_tune
->max_insns_inline_memset
;
29671 /* Return TRUE if it's profitable to set block of memory for
29672 non-vectorized case. VAL is the value to set the memory
29673 with. LENGTH is the number of bytes to set. ALIGN is the
29674 alignment of the destination memory in bytes. UNALIGNED_P
29675 is TRUE if we can only set the memory with instructions
29676 meeting alignment requirements. USE_STRD_P is TRUE if we
29677 can use strd to set the memory. */
29679 arm_block_set_non_vect_profit_p (rtx val
,
29680 unsigned HOST_WIDE_INT length
,
29681 unsigned HOST_WIDE_INT align
,
29682 bool unaligned_p
, bool use_strd_p
)
29685 /* For leftovers in bytes of 0-7, we can set the memory block using
29686 strb/strh/str with minimum instruction number. */
29687 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29691 num
= arm_const_inline_cost (SET
, val
);
29692 num
+= length
/ align
+ length
% align
;
29694 else if (use_strd_p
)
29696 num
= arm_const_double_inline_cost (val
);
29697 num
+= (length
>> 3) + leftover
[length
& 7];
29701 num
= arm_const_inline_cost (SET
, val
);
29702 num
+= (length
>> 2) + leftover
[length
& 3];
29705 /* We may be able to combine last pair STRH/STRB into a single STR
29706 by shifting one byte back. */
29707 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
29710 return (num
<= arm_block_set_max_insns ());
29713 /* Return TRUE if it's profitable to set block of memory for
29714 vectorized case. LENGTH is the number of bytes to set.
29715 ALIGN is the alignment of destination memory in bytes.
29716 MODE is the vector mode used to set the memory. */
29718 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
29719 unsigned HOST_WIDE_INT align
,
29723 bool unaligned_p
= ((align
& 3) != 0);
29724 unsigned int nelt
= GET_MODE_NUNITS (mode
);
29726 /* Instruction loading constant value. */
29728 /* Instructions storing the memory. */
29729 num
+= (length
+ nelt
- 1) / nelt
;
29730 /* Instructions adjusting the address expression. Only need to
29731 adjust address expression if it's 4 bytes aligned and bytes
29732 leftover can only be stored by mis-aligned store instruction. */
29733 if (!unaligned_p
&& (length
& 3) != 0)
29736 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29737 if (!unaligned_p
&& mode
== V16QImode
)
29740 return (num
<= arm_block_set_max_insns ());
29743 /* Set a block of memory using vectorization instructions for the
29744 unaligned case. We fill the first LENGTH bytes of the memory
29745 area starting from DSTBASE with byte constant VALUE. ALIGN is
29746 the alignment requirement of memory. Return TRUE if succeeded. */
29748 arm_block_set_unaligned_vect (rtx dstbase
,
29749 unsigned HOST_WIDE_INT length
,
29750 unsigned HOST_WIDE_INT value
,
29751 unsigned HOST_WIDE_INT align
)
29753 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
29755 rtx val_elt
, val_vec
, reg
;
29756 rtx rval
[MAX_VECT_LEN
];
29757 rtx (*gen_func
) (rtx
, rtx
);
29759 unsigned HOST_WIDE_INT v
= value
;
29760 unsigned int offset
= 0;
29761 gcc_assert ((align
& 0x3) != 0);
29762 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29763 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29764 if (length
>= nelt_v16
)
29767 gen_func
= gen_movmisalignv16qi
;
29772 gen_func
= gen_movmisalignv8qi
;
29774 nelt_mode
= GET_MODE_NUNITS (mode
);
29775 gcc_assert (length
>= nelt_mode
);
29776 /* Skip if it isn't profitable. */
29777 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29780 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29781 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29783 v
= sext_hwi (v
, BITS_PER_WORD
);
29784 val_elt
= GEN_INT (v
);
29785 for (j
= 0; j
< nelt_mode
; j
++)
29788 reg
= gen_reg_rtx (mode
);
29789 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29790 /* Emit instruction loading the constant value. */
29791 emit_move_insn (reg
, val_vec
);
29793 /* Handle nelt_mode bytes in a vector. */
29794 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29796 emit_insn ((*gen_func
) (mem
, reg
));
29797 if (i
+ 2 * nelt_mode
<= length
)
29799 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
29800 offset
+= nelt_mode
;
29801 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29805 /* If there are not less than nelt_v8 bytes leftover, we must be in
29807 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
29809 /* Handle (8, 16) bytes leftover. */
29810 if (i
+ nelt_v8
< length
)
29812 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
29813 offset
+= length
- i
;
29814 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29816 /* We are shifting bytes back, set the alignment accordingly. */
29817 if ((length
& 1) != 0 && align
>= 2)
29818 set_mem_align (mem
, BITS_PER_UNIT
);
29820 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29822 /* Handle (0, 8] bytes leftover. */
29823 else if (i
< length
&& i
+ nelt_v8
>= length
)
29825 if (mode
== V16QImode
)
29826 reg
= gen_lowpart (V8QImode
, reg
);
29828 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
29829 + (nelt_mode
- nelt_v8
))));
29830 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
29831 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
29833 /* We are shifting bytes back, set the alignment accordingly. */
29834 if ((length
& 1) != 0 && align
>= 2)
29835 set_mem_align (mem
, BITS_PER_UNIT
);
29837 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29843 /* Set a block of memory using vectorization instructions for the
29844 aligned case. We fill the first LENGTH bytes of the memory area
29845 starting from DSTBASE with byte constant VALUE. ALIGN is the
29846 alignment requirement of memory. Return TRUE if succeeded. */
29848 arm_block_set_aligned_vect (rtx dstbase
,
29849 unsigned HOST_WIDE_INT length
,
29850 unsigned HOST_WIDE_INT value
,
29851 unsigned HOST_WIDE_INT align
)
29853 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
29854 rtx dst
, addr
, mem
;
29855 rtx val_elt
, val_vec
, reg
;
29856 rtx rval
[MAX_VECT_LEN
];
29858 unsigned HOST_WIDE_INT v
= value
;
29859 unsigned int offset
= 0;
29861 gcc_assert ((align
& 0x3) == 0);
29862 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29863 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29864 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
29869 nelt_mode
= GET_MODE_NUNITS (mode
);
29870 gcc_assert (length
>= nelt_mode
);
29871 /* Skip if it isn't profitable. */
29872 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29875 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29877 v
= sext_hwi (v
, BITS_PER_WORD
);
29878 val_elt
= GEN_INT (v
);
29879 for (j
= 0; j
< nelt_mode
; j
++)
29882 reg
= gen_reg_rtx (mode
);
29883 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29884 /* Emit instruction loading the constant value. */
29885 emit_move_insn (reg
, val_vec
);
29888 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29889 if (mode
== V16QImode
)
29891 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29892 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29894 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29895 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
29897 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29898 offset
+= length
- nelt_mode
;
29899 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29900 /* We are shifting bytes back, set the alignment accordingly. */
29901 if ((length
& 0x3) == 0)
29902 set_mem_align (mem
, BITS_PER_UNIT
* 4);
29903 else if ((length
& 0x1) == 0)
29904 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29906 set_mem_align (mem
, BITS_PER_UNIT
);
29908 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29911 /* Fall through for bytes leftover. */
29913 nelt_mode
= GET_MODE_NUNITS (mode
);
29914 reg
= gen_lowpart (V8QImode
, reg
);
29917 /* Handle 8 bytes in a vector. */
29918 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29920 addr
= plus_constant (Pmode
, dst
, i
);
29921 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
29922 emit_move_insn (mem
, reg
);
29925 /* Handle single word leftover by shifting 4 bytes back. We can
29926 use aligned access for this case. */
29927 if (i
+ UNITS_PER_WORD
== length
)
29929 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
29930 offset
+= i
- UNITS_PER_WORD
;
29931 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
29932 /* We are shifting 4 bytes back, set the alignment accordingly. */
29933 if (align
> UNITS_PER_WORD
)
29934 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
29936 emit_move_insn (mem
, reg
);
29938 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29939 We have to use unaligned access for this case. */
29940 else if (i
< length
)
29942 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29943 offset
+= length
- nelt_mode
;
29944 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29945 /* We are shifting bytes back, set the alignment accordingly. */
29946 if ((length
& 1) == 0)
29947 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29949 set_mem_align (mem
, BITS_PER_UNIT
);
29951 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29957 /* Set a block of memory using plain strh/strb instructions, only
29958 using instructions allowed by ALIGN on processor. We fill the
29959 first LENGTH bytes of the memory area starting from DSTBASE
29960 with byte constant VALUE. ALIGN is the alignment requirement
29963 arm_block_set_unaligned_non_vect (rtx dstbase
,
29964 unsigned HOST_WIDE_INT length
,
29965 unsigned HOST_WIDE_INT value
,
29966 unsigned HOST_WIDE_INT align
)
29969 rtx dst
, addr
, mem
;
29970 rtx val_exp
, val_reg
, reg
;
29972 HOST_WIDE_INT v
= value
;
29974 gcc_assert (align
== 1 || align
== 2);
29977 v
|= (value
<< BITS_PER_UNIT
);
29979 v
= sext_hwi (v
, BITS_PER_WORD
);
29980 val_exp
= GEN_INT (v
);
29981 /* Skip if it isn't profitable. */
29982 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29983 align
, true, false))
29986 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29987 mode
= (align
== 2 ? HImode
: QImode
);
29988 val_reg
= force_reg (SImode
, val_exp
);
29989 reg
= gen_lowpart (mode
, val_reg
);
29991 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
29993 addr
= plus_constant (Pmode
, dst
, i
);
29994 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
29995 emit_move_insn (mem
, reg
);
29998 /* Handle single byte leftover. */
29999 if (i
+ 1 == length
)
30001 reg
= gen_lowpart (QImode
, val_reg
);
30002 addr
= plus_constant (Pmode
, dst
, i
);
30003 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
30004 emit_move_insn (mem
, reg
);
30008 gcc_assert (i
== length
);
30012 /* Set a block of memory using plain strd/str/strh/strb instructions,
30013 to permit unaligned copies on processors which support unaligned
30014 semantics for those instructions. We fill the first LENGTH bytes
30015 of the memory area starting from DSTBASE with byte constant VALUE.
30016 ALIGN is the alignment requirement of memory. */
30018 arm_block_set_aligned_non_vect (rtx dstbase
,
30019 unsigned HOST_WIDE_INT length
,
30020 unsigned HOST_WIDE_INT value
,
30021 unsigned HOST_WIDE_INT align
)
30024 rtx dst
, addr
, mem
;
30025 rtx val_exp
, val_reg
, reg
;
30026 unsigned HOST_WIDE_INT v
;
30029 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
30030 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
30032 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
30033 if (length
< UNITS_PER_WORD
)
30034 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
30037 v
|= (v
<< BITS_PER_WORD
);
30039 v
= sext_hwi (v
, BITS_PER_WORD
);
30041 val_exp
= GEN_INT (v
);
30042 /* Skip if it isn't profitable. */
30043 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30044 align
, false, use_strd_p
))
30049 /* Try without strd. */
30050 v
= (v
>> BITS_PER_WORD
);
30051 v
= sext_hwi (v
, BITS_PER_WORD
);
30052 val_exp
= GEN_INT (v
);
30053 use_strd_p
= false;
30054 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30055 align
, false, use_strd_p
))
30060 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30061 /* Handle double words using strd if possible. */
30064 val_reg
= force_reg (DImode
, val_exp
);
30066 for (; (i
+ 8 <= length
); i
+= 8)
30068 addr
= plus_constant (Pmode
, dst
, i
);
30069 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
30070 emit_move_insn (mem
, reg
);
30074 val_reg
= force_reg (SImode
, val_exp
);
30076 /* Handle words. */
30077 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
30078 for (; (i
+ 4 <= length
); i
+= 4)
30080 addr
= plus_constant (Pmode
, dst
, i
);
30081 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
30082 if ((align
& 3) == 0)
30083 emit_move_insn (mem
, reg
);
30085 emit_insn (gen_unaligned_storesi (mem
, reg
));
30088 /* Merge last pair of STRH and STRB into a STR if possible. */
30089 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
30091 addr
= plus_constant (Pmode
, dst
, i
- 1);
30092 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
30093 /* We are shifting one byte back, set the alignment accordingly. */
30094 if ((align
& 1) == 0)
30095 set_mem_align (mem
, BITS_PER_UNIT
);
30097 /* Most likely this is an unaligned access, and we can't tell at
30098 compilation time. */
30099 emit_insn (gen_unaligned_storesi (mem
, reg
));
30103 /* Handle half word leftover. */
30104 if (i
+ 2 <= length
)
30106 reg
= gen_lowpart (HImode
, val_reg
);
30107 addr
= plus_constant (Pmode
, dst
, i
);
30108 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
30109 if ((align
& 1) == 0)
30110 emit_move_insn (mem
, reg
);
30112 emit_insn (gen_unaligned_storehi (mem
, reg
));
30117 /* Handle single byte leftover. */
30118 if (i
+ 1 == length
)
30120 reg
= gen_lowpart (QImode
, val_reg
);
30121 addr
= plus_constant (Pmode
, dst
, i
);
30122 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
30123 emit_move_insn (mem
, reg
);
30129 /* Set a block of memory using vectorization instructions for both
30130 aligned and unaligned cases. We fill the first LENGTH bytes of
30131 the memory area starting from DSTBASE with byte constant VALUE.
30132 ALIGN is the alignment requirement of memory. */
30134 arm_block_set_vect (rtx dstbase
,
30135 unsigned HOST_WIDE_INT length
,
30136 unsigned HOST_WIDE_INT value
,
30137 unsigned HOST_WIDE_INT align
)
30139 /* Check whether we need to use unaligned store instruction. */
30140 if (((align
& 3) != 0 || (length
& 3) != 0)
30141 /* Check whether unaligned store instruction is available. */
30142 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
30145 if ((align
& 3) == 0)
30146 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
30148 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
30151 /* Expand string store operation. Firstly we try to do that by using
30152 vectorization instructions, then try with ARM unaligned access and
30153 double-word store if profitable. OPERANDS[0] is the destination,
30154 OPERANDS[1] is the number of bytes, operands[2] is the value to
30155 initialize the memory, OPERANDS[3] is the known alignment of the
30158 arm_gen_setmem (rtx
*operands
)
30160 rtx dstbase
= operands
[0];
30161 unsigned HOST_WIDE_INT length
;
30162 unsigned HOST_WIDE_INT value
;
30163 unsigned HOST_WIDE_INT align
;
30165 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
30168 length
= UINTVAL (operands
[1]);
30172 value
= (UINTVAL (operands
[2]) & 0xFF);
30173 align
= UINTVAL (operands
[3]);
30174 if (TARGET_NEON
&& length
>= 8
30175 && current_tune
->string_ops_prefer_neon
30176 && arm_block_set_vect (dstbase
, length
, value
, align
))
30179 if (!unaligned_access
&& (align
& 3) != 0)
30180 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
30182 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
30187 arm_macro_fusion_p (void)
30189 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
30192 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30193 for MOVW / MOVT macro fusion. */
30196 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
30198 /* We are trying to fuse
30199 movw imm / movt imm
30200 instructions as a group that gets scheduled together. */
30202 rtx set_dest
= SET_DEST (curr_set
);
30204 if (GET_MODE (set_dest
) != SImode
)
30207 /* We are trying to match:
30208 prev (movw) == (set (reg r0) (const_int imm16))
30209 curr (movt) == (set (zero_extract (reg r0)
30212 (const_int imm16_1))
30214 prev (movw) == (set (reg r1)
30215 (high (symbol_ref ("SYM"))))
30216 curr (movt) == (set (reg r0)
30218 (symbol_ref ("SYM")))) */
30220 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
30222 if (CONST_INT_P (SET_SRC (curr_set
))
30223 && CONST_INT_P (SET_SRC (prev_set
))
30224 && REG_P (XEXP (set_dest
, 0))
30225 && REG_P (SET_DEST (prev_set
))
30226 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
30230 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
30231 && REG_P (SET_DEST (curr_set
))
30232 && REG_P (SET_DEST (prev_set
))
30233 && GET_CODE (SET_SRC (prev_set
)) == HIGH
30234 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
30241 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
30243 rtx prev_set
= single_set (prev
);
30244 rtx curr_set
= single_set (curr
);
30250 if (any_condjump_p (curr
))
30253 if (!arm_macro_fusion_p ())
30256 if (current_tune
->fusible_ops
& tune_params::FUSE_AES_AESMC
30257 && aarch_crypto_can_dual_issue (prev
, curr
))
30260 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
30261 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
30267 /* Return true iff the instruction fusion described by OP is enabled. */
30269 arm_fusion_enabled_p (tune_params::fuse_ops op
)
30271 return current_tune
->fusible_ops
& op
;
30274 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30275 scheduled for speculative execution. Reject the long-running division
30276 and square-root instructions. */
30279 arm_sched_can_speculate_insn (rtx_insn
*insn
)
30281 switch (get_attr_type (insn
))
30289 case TYPE_NEON_FP_SQRT_S
:
30290 case TYPE_NEON_FP_SQRT_D
:
30291 case TYPE_NEON_FP_SQRT_S_Q
:
30292 case TYPE_NEON_FP_SQRT_D_Q
:
30293 case TYPE_NEON_FP_DIV_S
:
30294 case TYPE_NEON_FP_DIV_D
:
30295 case TYPE_NEON_FP_DIV_S_Q
:
30296 case TYPE_NEON_FP_DIV_D_Q
:
30303 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30305 static unsigned HOST_WIDE_INT
30306 arm_asan_shadow_offset (void)
30308 return HOST_WIDE_INT_1U
<< 29;
30312 /* This is a temporary fix for PR60655. Ideally we need
30313 to handle most of these cases in the generic part but
30314 currently we reject minus (..) (sym_ref). We try to
30315 ameliorate the case with minus (sym_ref1) (sym_ref2)
30316 where they are in the same section. */
30319 arm_const_not_ok_for_debug_p (rtx p
)
30321 tree decl_op0
= NULL
;
30322 tree decl_op1
= NULL
;
30324 if (GET_CODE (p
) == MINUS
)
30326 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
30328 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
30330 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
30331 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
30333 if ((VAR_P (decl_op1
)
30334 || TREE_CODE (decl_op1
) == CONST_DECL
)
30335 && (VAR_P (decl_op0
)
30336 || TREE_CODE (decl_op0
) == CONST_DECL
))
30337 return (get_variable_section (decl_op1
, false)
30338 != get_variable_section (decl_op0
, false));
30340 if (TREE_CODE (decl_op1
) == LABEL_DECL
30341 && TREE_CODE (decl_op0
) == LABEL_DECL
)
30342 return (DECL_CONTEXT (decl_op1
)
30343 != DECL_CONTEXT (decl_op0
));
30353 /* return TRUE if x is a reference to a value in a constant pool */
30355 arm_is_constant_pool_ref (rtx x
)
30358 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
30359 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
30362 /* Remember the last target of arm_set_current_function. */
30363 static GTY(()) tree arm_previous_fndecl
;
30365 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30368 save_restore_target_globals (tree new_tree
)
30370 /* If we have a previous state, use it. */
30371 if (TREE_TARGET_GLOBALS (new_tree
))
30372 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
30373 else if (new_tree
== target_option_default_node
)
30374 restore_target_globals (&default_target_globals
);
30377 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30378 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
30381 arm_option_params_internal ();
30384 /* Invalidate arm_previous_fndecl. */
30387 arm_reset_previous_fndecl (void)
30389 arm_previous_fndecl
= NULL_TREE
;
30392 /* Establish appropriate back-end context for processing the function
30393 FNDECL. The argument might be NULL to indicate processing at top
30394 level, outside of any function scope. */
30397 arm_set_current_function (tree fndecl
)
30399 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
30402 tree old_tree
= (arm_previous_fndecl
30403 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
30406 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30408 /* If current function has no attributes but previous one did,
30409 use the default node. */
30410 if (! new_tree
&& old_tree
)
30411 new_tree
= target_option_default_node
;
30413 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30414 the default have been handled by save_restore_target_globals from
30415 arm_pragma_target_parse. */
30416 if (old_tree
== new_tree
)
30419 arm_previous_fndecl
= fndecl
;
30421 /* First set the target options. */
30422 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
30424 save_restore_target_globals (new_tree
);
30427 /* Implement TARGET_OPTION_PRINT. */
30430 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
30432 int flags
= ptr
->x_target_flags
;
30433 const char *fpu_name
;
30435 fpu_name
= (ptr
->x_arm_fpu_index
== TARGET_FPU_auto
30436 ? "auto" : all_fpus
[ptr
->x_arm_fpu_index
].name
);
30438 fprintf (file
, "%*sselected isa %s\n", indent
, "",
30439 TARGET_THUMB2_P (flags
) ? "thumb2" :
30440 TARGET_THUMB_P (flags
) ? "thumb1" :
30443 if (ptr
->x_arm_arch_string
)
30444 fprintf (file
, "%*sselected architecture %s\n", indent
, "",
30445 ptr
->x_arm_arch_string
);
30447 if (ptr
->x_arm_cpu_string
)
30448 fprintf (file
, "%*sselected CPU %s\n", indent
, "",
30449 ptr
->x_arm_cpu_string
);
30451 if (ptr
->x_arm_tune_string
)
30452 fprintf (file
, "%*sselected tune %s\n", indent
, "",
30453 ptr
->x_arm_tune_string
);
30455 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_name
);
30458 /* Hook to determine if one function can safely inline another. */
30461 arm_can_inline_p (tree caller
, tree callee
)
30463 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
30464 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
30465 bool can_inline
= true;
30467 struct cl_target_option
*caller_opts
30468 = TREE_TARGET_OPTION (caller_tree
? caller_tree
30469 : target_option_default_node
);
30471 struct cl_target_option
*callee_opts
30472 = TREE_TARGET_OPTION (callee_tree
? callee_tree
30473 : target_option_default_node
);
30475 if (callee_opts
== caller_opts
)
30478 /* Callee's ISA features should be a subset of the caller's. */
30479 struct arm_build_target caller_target
;
30480 struct arm_build_target callee_target
;
30481 caller_target
.isa
= sbitmap_alloc (isa_num_bits
);
30482 callee_target
.isa
= sbitmap_alloc (isa_num_bits
);
30484 arm_configure_build_target (&caller_target
, caller_opts
, &global_options_set
,
30486 arm_configure_build_target (&callee_target
, callee_opts
, &global_options_set
,
30488 if (!bitmap_subset_p (callee_target
.isa
, caller_target
.isa
))
30489 can_inline
= false;
30491 sbitmap_free (caller_target
.isa
);
30492 sbitmap_free (callee_target
.isa
);
30494 /* OK to inline between different modes.
30495 Function with mode specific instructions, e.g using asm,
30496 must be explicitly protected with noinline. */
30500 /* Hook to fix function's alignment affected by target attribute. */
30503 arm_relayout_function (tree fndecl
)
30505 if (DECL_USER_ALIGN (fndecl
))
30508 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30511 callee_tree
= target_option_default_node
;
30513 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
30516 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts
->x_target_flags
)));
30519 /* Inner function to process the attribute((target(...))), take an argument and
30520 set the current options from the argument. If we have a list, recursively
30521 go over the list. */
30524 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
30526 if (TREE_CODE (args
) == TREE_LIST
)
30530 for (; args
; args
= TREE_CHAIN (args
))
30531 if (TREE_VALUE (args
)
30532 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
30537 else if (TREE_CODE (args
) != STRING_CST
)
30539 error ("attribute %<target%> argument not a string");
30543 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
30546 while ((q
= strtok (argstr
, ",")) != NULL
)
30548 while (ISSPACE (*q
)) ++q
;
30551 if (!strncmp (q
, "thumb", 5))
30552 opts
->x_target_flags
|= MASK_THUMB
;
30554 else if (!strncmp (q
, "arm", 3))
30555 opts
->x_target_flags
&= ~MASK_THUMB
;
30557 else if (!strncmp (q
, "fpu=", 4))
30560 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+4,
30561 &fpu_index
, CL_TARGET
))
30563 error ("invalid fpu for attribute(target(\"%s\"))", q
);
30566 if (fpu_index
== TARGET_FPU_auto
)
30568 /* This doesn't really make sense until we support
30569 general dynamic selection of the architecture and all
30571 sorry ("auto fpu selection not currently permitted here");
30574 opts
->x_arm_fpu_index
= (enum fpu_type
) fpu_index
;
30578 error ("attribute(target(\"%s\")) is unknown", q
);
30586 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30589 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
30590 struct gcc_options
*opts_set
)
30592 struct cl_target_option cl_opts
;
30594 if (!arm_valid_target_attribute_rec (args
, opts
))
30597 cl_target_option_save (&cl_opts
, opts
);
30598 arm_configure_build_target (&arm_active_target
, &cl_opts
, opts_set
, false);
30599 arm_option_check_internal (opts
);
30600 /* Do any overrides, such as global options arch=xxx. */
30601 arm_option_override_internal (opts
, opts_set
);
30603 return build_target_option_node (opts
);
30607 add_attribute (const char * mode
, tree
*attributes
)
30609 size_t len
= strlen (mode
);
30610 tree value
= build_string (len
, mode
);
30612 TREE_TYPE (value
) = build_array_type (char_type_node
,
30613 build_index_type (size_int (len
)));
30615 *attributes
= tree_cons (get_identifier ("target"),
30616 build_tree_list (NULL_TREE
, value
),
30620 /* For testing. Insert thumb or arm modes alternatively on functions. */
30623 arm_insert_attributes (tree fndecl
, tree
* attributes
)
30627 if (! TARGET_FLIP_THUMB
)
30630 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
30631 || DECL_BUILT_IN (fndecl
) || DECL_ARTIFICIAL (fndecl
))
30634 /* Nested definitions must inherit mode. */
30635 if (current_function_decl
)
30637 mode
= TARGET_THUMB
? "thumb" : "arm";
30638 add_attribute (mode
, attributes
);
30642 /* If there is already a setting don't change it. */
30643 if (lookup_attribute ("target", *attributes
) != NULL
)
30646 mode
= thumb_flipper
? "thumb" : "arm";
30647 add_attribute (mode
, attributes
);
30649 thumb_flipper
= !thumb_flipper
;
30652 /* Hook to validate attribute((target("string"))). */
30655 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
30656 tree args
, int ARG_UNUSED (flags
))
30659 struct gcc_options func_options
;
30660 tree cur_tree
, new_optimize
;
30661 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
30663 /* Get the optimization options of the current function. */
30664 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
30666 /* If the function changed the optimization levels as well as setting target
30667 options, start with the optimizations specified. */
30668 if (!func_optimize
)
30669 func_optimize
= optimization_default_node
;
30671 /* Init func_options. */
30672 memset (&func_options
, 0, sizeof (func_options
));
30673 init_options_struct (&func_options
, NULL
);
30674 lang_hooks
.init_options_struct (&func_options
);
30676 /* Initialize func_options to the defaults. */
30677 cl_optimization_restore (&func_options
,
30678 TREE_OPTIMIZATION (func_optimize
));
30680 cl_target_option_restore (&func_options
,
30681 TREE_TARGET_OPTION (target_option_default_node
));
30683 /* Set func_options flags with new target mode. */
30684 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
30685 &global_options_set
);
30687 if (cur_tree
== NULL_TREE
)
30690 new_optimize
= build_optimization_node (&func_options
);
30692 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
30694 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
30696 finalize_options_struct (&func_options
);
30701 /* Match an ISA feature bitmap to a named FPU. We always use the
30702 first entry that exactly matches the feature set, so that we
30703 effectively canonicalize the FPU name for the assembler. */
30705 arm_identify_fpu_from_isa (sbitmap isa
)
30707 auto_sbitmap
fpubits (isa_num_bits
);
30708 auto_sbitmap
cand_fpubits (isa_num_bits
);
30710 bitmap_and (fpubits
, isa
, isa_all_fpubits
);
30712 /* If there are no ISA feature bits relating to the FPU, we must be
30713 doing soft-float. */
30714 if (bitmap_empty_p (fpubits
))
30717 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
30719 arm_initialize_isa (cand_fpubits
, all_fpus
[i
].isa_bits
);
30720 if (bitmap_equal_p (fpubits
, cand_fpubits
))
30721 return all_fpus
[i
].name
;
30723 /* We must find an entry, or things have gone wrong. */
30724 gcc_unreachable ();
30728 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
30731 fprintf (stream
, "\t.syntax unified\n");
30735 if (is_called_in_ARM_mode (decl
)
30736 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
30737 && cfun
->is_thunk
))
30738 fprintf (stream
, "\t.code 32\n");
30739 else if (TARGET_THUMB1
)
30740 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
30742 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
30745 fprintf (stream
, "\t.arm\n");
30747 asm_fprintf (asm_out_file
, "\t.fpu %s\n",
30750 : arm_identify_fpu_from_isa (arm_active_target
.isa
)));
30752 if (TARGET_POKE_FUNCTION_NAME
)
30753 arm_poke_function_name (stream
, (const char *) name
);
30756 /* If MEM is in the form of [base+offset], extract the two parts
30757 of address and set to BASE and OFFSET, otherwise return false
30758 after clearing BASE and OFFSET. */
30761 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
30765 gcc_assert (MEM_P (mem
));
30767 addr
= XEXP (mem
, 0);
30769 /* Strip off const from addresses like (const (addr)). */
30770 if (GET_CODE (addr
) == CONST
)
30771 addr
= XEXP (addr
, 0);
30773 if (GET_CODE (addr
) == REG
)
30776 *offset
= const0_rtx
;
30780 if (GET_CODE (addr
) == PLUS
30781 && GET_CODE (XEXP (addr
, 0)) == REG
30782 && CONST_INT_P (XEXP (addr
, 1)))
30784 *base
= XEXP (addr
, 0);
30785 *offset
= XEXP (addr
, 1);
30790 *offset
= NULL_RTX
;
30795 /* If INSN is a load or store of address in the form of [base+offset],
30796 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30797 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30798 otherwise return FALSE. */
30801 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
30805 gcc_assert (INSN_P (insn
));
30806 x
= PATTERN (insn
);
30807 if (GET_CODE (x
) != SET
)
30811 dest
= SET_DEST (x
);
30812 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
30815 extract_base_offset_in_addr (dest
, base
, offset
);
30817 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
30820 extract_base_offset_in_addr (src
, base
, offset
);
30825 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
30828 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30830 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30831 and PRI are only calculated for these instructions. For other instruction,
30832 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30833 instruction fusion can be supported by returning different priorities.
30835 It's important that irrelevant instructions get the largest FUSION_PRI. */
30838 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
30839 int *fusion_pri
, int *pri
)
30845 gcc_assert (INSN_P (insn
));
30848 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
30855 /* Load goes first. */
30857 *fusion_pri
= tmp
- 1;
30859 *fusion_pri
= tmp
- 2;
30863 /* INSN with smaller base register goes first. */
30864 tmp
-= ((REGNO (base
) & 0xff) << 20);
30866 /* INSN with smaller offset goes first. */
30867 off_val
= (int)(INTVAL (offset
));
30869 tmp
-= (off_val
& 0xfffff);
30871 tmp
+= ((- off_val
) & 0xfffff);
30878 /* Construct and return a PARALLEL RTX vector with elements numbering the
30879 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30880 the vector - from the perspective of the architecture. This does not
30881 line up with GCC's perspective on lane numbers, so we end up with
30882 different masks depending on our target endian-ness. The diagram
30883 below may help. We must draw the distinction when building masks
30884 which select one half of the vector. An instruction selecting
30885 architectural low-lanes for a big-endian target, must be described using
30886 a mask selecting GCC high-lanes.
30888 Big-Endian Little-Endian
30890 GCC 0 1 2 3 3 2 1 0
30891 | x | x | x | x | | x | x | x | x |
30892 Architecture 3 2 1 0 3 2 1 0
30894 Low Mask: { 2, 3 } { 0, 1 }
30895 High Mask: { 0, 1 } { 2, 3 }
30899 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
30901 int nunits
= GET_MODE_NUNITS (mode
);
30902 rtvec v
= rtvec_alloc (nunits
/ 2);
30903 int high_base
= nunits
/ 2;
30909 if (BYTES_BIG_ENDIAN
)
30910 base
= high
? low_base
: high_base
;
30912 base
= high
? high_base
: low_base
;
30914 for (i
= 0; i
< nunits
/ 2; i
++)
30915 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
30917 t1
= gen_rtx_PARALLEL (mode
, v
);
30921 /* Check OP for validity as a PARALLEL RTX vector with elements
30922 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30923 from the perspective of the architecture. See the diagram above
30924 arm_simd_vect_par_cnst_half_p for more details. */
30927 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
30930 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
30931 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
30932 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
30935 if (!VECTOR_MODE_P (mode
))
30938 if (count_op
!= count_ideal
)
30941 for (i
= 0; i
< count_ideal
; i
++)
30943 rtx elt_op
= XVECEXP (op
, 0, i
);
30944 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
30946 if (!CONST_INT_P (elt_op
)
30947 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
30953 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30956 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
30959 /* For now, we punt and not handle this for TARGET_THUMB1. */
30960 if (vcall_offset
&& TARGET_THUMB1
)
30963 /* Otherwise ok. */
30967 /* Generate RTL for a conditional branch with rtx comparison CODE in
30968 mode CC_MODE. The destination of the unlikely conditional branch
30972 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
30976 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
30977 gen_rtx_REG (cc_mode
, CC_REGNUM
),
30980 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30981 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
30983 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
30986 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30988 For pure-code sections there is no letter code for this attribute, so
30989 output all the section flags numerically when this is needed. */
30992 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
30995 if (flags
& SECTION_ARM_PURECODE
)
30999 if (!(flags
& SECTION_DEBUG
))
31001 if (flags
& SECTION_EXCLUDE
)
31002 *num
|= 0x80000000;
31003 if (flags
& SECTION_WRITE
)
31005 if (flags
& SECTION_CODE
)
31007 if (flags
& SECTION_MERGE
)
31009 if (flags
& SECTION_STRINGS
)
31011 if (flags
& SECTION_TLS
)
31013 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
31022 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31024 If pure-code is passed as an option, make sure all functions are in
31025 sections that have the SHF_ARM_PURECODE attribute. */
31028 arm_function_section (tree decl
, enum node_frequency freq
,
31029 bool startup
, bool exit
)
31031 const char * section_name
;
31034 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
31035 return default_function_section (decl
, freq
, startup
, exit
);
31037 if (!target_pure_code
)
31038 return default_function_section (decl
, freq
, startup
, exit
);
31041 section_name
= DECL_SECTION_NAME (decl
);
31043 /* If a function is not in a named section then it falls under the 'default'
31044 text section, also known as '.text'. We can preserve previous behavior as
31045 the default text section already has the SHF_ARM_PURECODE section
31049 section
*default_sec
= default_function_section (decl
, freq
, startup
,
31052 /* If default_sec is not null, then it must be a special section like for
31053 example .text.startup. We set the pure-code attribute and return the
31054 same section to preserve existing behavior. */
31056 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
31057 return default_sec
;
31060 /* Otherwise look whether a section has already been created with
31062 sec
= get_named_section (decl
, section_name
, 0);
31064 /* If that is not the case passing NULL as the section's name to
31065 'get_named_section' will create a section with the declaration's
31067 sec
= get_named_section (decl
, NULL
, 0);
31069 /* Set the SHF_ARM_PURECODE attribute. */
31070 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
31075 /* Implements the TARGET_SECTION_FLAGS hook.
31077 If DECL is a function declaration and pure-code is passed as an option
31078 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31079 section's name and RELOC indicates whether the declarations initializer may
31080 contain runtime relocations. */
31082 static unsigned int
31083 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
31085 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
31087 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
31088 flags
|= SECTION_ARM_PURECODE
;
31093 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31096 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
31098 rtx
*quot_p
, rtx
*rem_p
)
31100 if (mode
== SImode
)
31101 gcc_assert (!TARGET_IDIV
);
31103 scalar_int_mode libval_mode
31104 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode
));
31106 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
31108 op0
, GET_MODE (op0
),
31109 op1
, GET_MODE (op1
));
31111 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
31112 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
31113 GET_MODE_SIZE (mode
));
31115 gcc_assert (quotient
);
31116 gcc_assert (remainder
);
31118 *quot_p
= quotient
;
31119 *rem_p
= remainder
;
31122 /* This function checks for the availability of the coprocessor builtin passed
31123 in BUILTIN for the current target. Returns true if it is available and
31124 false otherwise. If a BUILTIN is passed for which this function has not
31125 been implemented it will cause an exception. */
31128 arm_coproc_builtin_available (enum unspecv builtin
)
31130 /* None of these builtins are available in Thumb mode if the target only
31131 supports Thumb-1. */
31149 case VUNSPEC_LDC2L
:
31151 case VUNSPEC_STC2L
:
31154 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31161 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31163 if (arm_arch6
|| arm_arch5te
)
31166 case VUNSPEC_MCRR2
:
31167 case VUNSPEC_MRRC2
:
31172 gcc_unreachable ();
31177 /* This function returns true if OP is a valid memory operand for the ldc and
31178 stc coprocessor instructions and false otherwise. */
31181 arm_coproc_ldc_stc_legitimate_address (rtx op
)
31183 HOST_WIDE_INT range
;
31184 /* Has to be a memory operand. */
31190 /* We accept registers. */
31194 switch GET_CODE (op
)
31198 /* Or registers with an offset. */
31199 if (!REG_P (XEXP (op
, 0)))
31204 /* The offset must be an immediate though. */
31205 if (!CONST_INT_P (op
))
31208 range
= INTVAL (op
);
31210 /* Within the range of [-1020,1020]. */
31211 if (!IN_RANGE (range
, -1020, 1020))
31214 /* And a multiple of 4. */
31215 return (range
% 4) == 0;
31221 return REG_P (XEXP (op
, 0));
31223 gcc_unreachable ();
31229 namespace selftest
{
31231 /* Scan the static data tables generated by parsecpu.awk looking for
31232 potential issues with the data. We primarily check for
31233 inconsistencies in the option extensions at present (extensions
31234 that duplicate others but aren't marked as aliases). Furthermore,
31235 for correct canonicalization later options must never be a subset
31236 of an earlier option. Any extension should also only specify other
31237 feature bits and never an architecture bit. The architecture is inferred
31238 from the declaration of the extension. */
31240 arm_test_cpu_arch_data (void)
31242 const arch_option
*arch
;
31243 const cpu_option
*cpu
;
31244 auto_sbitmap
target_isa (isa_num_bits
);
31245 auto_sbitmap
isa1 (isa_num_bits
);
31246 auto_sbitmap
isa2 (isa_num_bits
);
31248 for (arch
= all_architectures
; arch
->common
.name
!= NULL
; ++arch
)
31250 const cpu_arch_extension
*ext1
, *ext2
;
31252 if (arch
->common
.extensions
== NULL
)
31255 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
31257 for (ext1
= arch
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
31262 arm_initialize_isa (isa1
, ext1
->isa_bits
);
31263 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
31265 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
31268 arm_initialize_isa (isa2
, ext2
->isa_bits
);
31269 /* If the option is a subset of the parent option, it doesn't
31270 add anything and so isn't useful. */
31271 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
31273 /* If the extension specifies any architectural bits then
31274 disallow it. Extensions should only specify feature bits. */
31275 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
31280 for (cpu
= all_cores
; cpu
->common
.name
!= NULL
; ++cpu
)
31282 const cpu_arch_extension
*ext1
, *ext2
;
31284 if (cpu
->common
.extensions
== NULL
)
31287 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
31289 for (ext1
= cpu
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
31294 arm_initialize_isa (isa1
, ext1
->isa_bits
);
31295 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
31297 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
31300 arm_initialize_isa (isa2
, ext2
->isa_bits
);
31301 /* If the option is a subset of the parent option, it doesn't
31302 add anything and so isn't useful. */
31303 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
31305 /* If the extension specifies any architectural bits then
31306 disallow it. Extensions should only specify feature bits. */
31307 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
31314 arm_run_selftests (void)
31316 arm_test_cpu_arch_data ();
31318 } /* Namespace selftest. */
31320 #undef TARGET_RUN_TARGET_SELFTESTS
31321 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31322 #endif /* CHECKING_P */
31324 struct gcc_target targetm
= TARGET_INITIALIZER
;
31326 #include "gt-arm.h"