1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
34 #include "stringpool.h"
41 #include "diagnostic-core.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
48 #include "insn-attr.h"
54 #include "sched-int.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
62 #include "target-globals.h"
64 #include "tm-constrs.h"
66 #include "optabs-libfuncs.h"
71 /* This file should be included last. */
72 #include "target-def.h"
74 /* Forward definitions of types. */
75 typedef struct minipool_node Mnode
;
76 typedef struct minipool_fixup Mfix
;
78 void (*arm_lang_output_object_attributes_hook
)(void);
85 /* Forward function declarations. */
86 static bool arm_const_not_ok_for_debug_p (rtx
);
87 static int arm_needs_doubleword_align (machine_mode
, const_tree
);
88 static int arm_compute_static_chain_stack_bytes (void);
89 static arm_stack_offsets
*arm_get_frame_offsets (void);
90 static void arm_compute_frame_layout (void);
91 static void arm_add_gc_roots (void);
92 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
93 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
94 static unsigned bit_count (unsigned long);
95 static unsigned bitmap_popcount (const sbitmap
);
96 static int arm_address_register_rtx_p (rtx
, int);
97 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
98 static bool is_called_in_ARM_mode (tree
);
99 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
100 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
101 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
102 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
103 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
104 inline static int thumb1_index_register_rtx_p (rtx
, int);
105 static int thumb_far_jump_used_p (void);
106 static bool thumb_force_lr_save (void);
107 static unsigned arm_size_return_regs (void);
108 static bool arm_assemble_integer (rtx
, unsigned int, int);
109 static void arm_print_operand (FILE *, rtx
, int);
110 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
111 static bool arm_print_operand_punct_valid_p (unsigned char code
);
112 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
113 static arm_cc
get_arm_condition_code (rtx
);
114 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
115 static const char *output_multi_immediate (rtx
*, const char *, const char *,
117 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
118 static struct machine_function
*arm_init_machine_status (void);
119 static void thumb_exit (FILE *, int);
120 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
121 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
122 static Mnode
*add_minipool_forward_ref (Mfix
*);
123 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
124 static Mnode
*add_minipool_backward_ref (Mfix
*);
125 static void assign_minipool_offsets (Mfix
*);
126 static void arm_print_value (FILE *, rtx
);
127 static void dump_minipool (rtx_insn
*);
128 static int arm_barrier_cost (rtx_insn
*);
129 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
130 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
131 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
133 static void arm_reorg (void);
134 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
135 static unsigned long arm_compute_save_reg0_reg12_mask (void);
136 static unsigned long arm_compute_save_core_reg_mask (void);
137 static unsigned long arm_isr_value (tree
);
138 static unsigned long arm_compute_func_type (void);
139 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
140 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
141 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
143 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
145 static tree
arm_handle_cmse_nonsecure_entry (tree
*, tree
, tree
, int, bool *);
146 static tree
arm_handle_cmse_nonsecure_call (tree
*, tree
, tree
, int, bool *);
147 static void arm_output_function_epilogue (FILE *);
148 static void arm_output_function_prologue (FILE *);
149 static int arm_comp_type_attributes (const_tree
, const_tree
);
150 static void arm_set_default_type_attributes (tree
);
151 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
152 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
153 static int optimal_immediate_sequence (enum rtx_code code
,
154 unsigned HOST_WIDE_INT val
,
155 struct four_ints
*return_sequence
);
156 static int optimal_immediate_sequence_1 (enum rtx_code code
,
157 unsigned HOST_WIDE_INT val
,
158 struct four_ints
*return_sequence
,
160 static int arm_get_strip_length (int);
161 static bool arm_function_ok_for_sibcall (tree
, tree
);
162 static machine_mode
arm_promote_function_mode (const_tree
,
165 static bool arm_return_in_memory (const_tree
, const_tree
);
166 static rtx
arm_function_value (const_tree
, const_tree
, bool);
167 static rtx
arm_libcall_value_1 (machine_mode
);
168 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
169 static bool arm_function_value_regno_p (const unsigned int);
170 static void arm_internal_label (FILE *, const char *, unsigned long);
171 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
173 static bool arm_have_conditional_execution (void);
174 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
175 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
176 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
177 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
178 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
179 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
180 static void emit_constant_insn (rtx cond
, rtx pattern
);
181 static rtx_insn
*emit_set_insn (rtx
, rtx
);
182 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
183 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
185 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
187 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
189 static pad_direction
arm_function_arg_padding (machine_mode
, const_tree
);
190 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
191 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
193 static rtx
aapcs_libcall_value (machine_mode
);
194 static int aapcs_select_return_coproc (const_tree
, const_tree
);
196 #ifdef OBJECT_FORMAT_ELF
197 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
198 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
201 static void arm_encode_section_info (tree
, rtx
, int);
204 static void arm_file_end (void);
205 static void arm_file_start (void);
206 static void arm_insert_attributes (tree
, tree
*);
208 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
210 static bool arm_pass_by_reference (cumulative_args_t
,
211 machine_mode
, const_tree
, bool);
212 static bool arm_promote_prototypes (const_tree
);
213 static bool arm_default_short_enums (void);
214 static bool arm_align_anon_bitfield (void);
215 static bool arm_return_in_msb (const_tree
);
216 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
217 static bool arm_return_in_memory (const_tree
, const_tree
);
219 static void arm_unwind_emit (FILE *, rtx_insn
*);
220 static bool arm_output_ttype (rtx
);
221 static void arm_asm_emit_except_personality (rtx
);
223 static void arm_asm_init_sections (void);
224 static rtx
arm_dwarf_register_span (rtx
);
226 static tree
arm_cxx_guard_type (void);
227 static bool arm_cxx_guard_mask_bit (void);
228 static tree
arm_get_cookie_size (tree
);
229 static bool arm_cookie_has_size (void);
230 static bool arm_cxx_cdtor_returns_this (void);
231 static bool arm_cxx_key_method_may_be_inline (void);
232 static void arm_cxx_determine_class_data_visibility (tree
);
233 static bool arm_cxx_class_data_always_comdat (void);
234 static bool arm_cxx_use_aeabi_atexit (void);
235 static void arm_init_libfuncs (void);
236 static tree
arm_build_builtin_va_list (void);
237 static void arm_expand_builtin_va_start (tree
, rtx
);
238 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
239 static void arm_option_override (void);
240 static void arm_option_save (struct cl_target_option
*, struct gcc_options
*);
241 static void arm_option_restore (struct gcc_options
*,
242 struct cl_target_option
*);
243 static void arm_override_options_after_change (void);
244 static void arm_option_print (FILE *, int, struct cl_target_option
*);
245 static void arm_set_current_function (tree
);
246 static bool arm_can_inline_p (tree
, tree
);
247 static void arm_relayout_function (tree
);
248 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
249 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
250 static bool arm_sched_can_speculate_insn (rtx_insn
*);
251 static bool arm_macro_fusion_p (void);
252 static bool arm_cannot_copy_insn_p (rtx_insn
*);
253 static int arm_issue_rate (void);
254 static int arm_first_cycle_multipass_dfa_lookahead (void);
255 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
256 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
257 static bool arm_output_addr_const_extra (FILE *, rtx
);
258 static bool arm_allocate_stack_slots_for_args (void);
259 static bool arm_warn_func_return (tree
);
260 static tree
arm_promoted_type (const_tree t
);
261 static bool arm_scalar_mode_supported_p (scalar_mode
);
262 static bool arm_frame_pointer_required (void);
263 static bool arm_can_eliminate (const int, const int);
264 static void arm_asm_trampoline_template (FILE *);
265 static void arm_trampoline_init (rtx
, tree
, rtx
);
266 static rtx
arm_trampoline_adjust_address (rtx
);
267 static rtx_insn
*arm_pic_static_addr (rtx orig
, rtx reg
);
268 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
269 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
270 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
271 static bool arm_array_mode_supported_p (machine_mode
,
272 unsigned HOST_WIDE_INT
);
273 static machine_mode
arm_preferred_simd_mode (scalar_mode
);
274 static bool arm_class_likely_spilled_p (reg_class_t
);
275 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
276 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
277 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
281 static void arm_conditional_register_usage (void);
282 static enum flt_eval_method
arm_excess_precision (enum excess_precision_type
);
283 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
284 static unsigned int arm_autovectorize_vector_sizes (void);
285 static int arm_default_branch_cost (bool, bool);
286 static int arm_cortex_a5_branch_cost (bool, bool);
287 static int arm_cortex_m_branch_cost (bool, bool);
288 static int arm_cortex_m7_branch_cost (bool, bool);
290 static bool arm_vectorize_vec_perm_const_ok (machine_mode
, vec_perm_indices
);
292 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
294 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
296 int misalign ATTRIBUTE_UNUSED
);
297 static unsigned arm_add_stmt_cost (void *data
, int count
,
298 enum vect_cost_for_stmt kind
,
299 struct _stmt_vec_info
*stmt_info
,
301 enum vect_cost_model_location where
);
303 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
304 bool op0_preserve_value
);
305 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
307 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
308 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
310 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
311 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
312 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
314 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
315 static opt_scalar_float_mode
arm_floatn_mode (int, bool);
316 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode
);
317 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode
);
318 static bool arm_modes_tieable_p (machine_mode
, machine_mode
);
319 static HOST_WIDE_INT
arm_constant_alignment (const_tree
, HOST_WIDE_INT
);
321 /* Table of machine attributes. */
322 static const struct attribute_spec arm_attribute_table
[] =
324 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
325 affects_type_identity } */
326 /* Function calls made to this symbol must be done indirectly, because
327 it may lie outside of the 26 bit addressing range of a normal function
329 { "long_call", 0, 0, false, true, true, NULL
, false },
330 /* Whereas these functions are always known to reside within the 26 bit
332 { "short_call", 0, 0, false, true, true, NULL
, false },
333 /* Specify the procedure call conventions for a function. */
334 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
336 /* Interrupt Service Routines have special prologue and epilogue requirements. */
337 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
339 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
341 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
344 /* ARM/PE has three new attributes:
346 dllexport - for exporting a function/variable that will live in a dll
347 dllimport - for importing a function/variable from a dll
349 Microsoft allows multiple declspecs in one __declspec, separating
350 them with spaces. We do NOT support this. Instead, use __declspec
353 { "dllimport", 0, 0, true, false, false, NULL
, false },
354 { "dllexport", 0, 0, true, false, false, NULL
, false },
355 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
357 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
358 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
359 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
360 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
363 /* ARMv8-M Security Extensions support. */
364 { "cmse_nonsecure_entry", 0, 0, true, false, false,
365 arm_handle_cmse_nonsecure_entry
, false },
366 { "cmse_nonsecure_call", 0, 0, true, false, false,
367 arm_handle_cmse_nonsecure_call
, true },
368 { NULL
, 0, 0, false, false, false, NULL
, false }
371 /* Initialize the GCC target structure. */
372 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
373 #undef TARGET_MERGE_DECL_ATTRIBUTES
374 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
377 #undef TARGET_LEGITIMIZE_ADDRESS
378 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
380 #undef TARGET_ATTRIBUTE_TABLE
381 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
383 #undef TARGET_INSERT_ATTRIBUTES
384 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
386 #undef TARGET_ASM_FILE_START
387 #define TARGET_ASM_FILE_START arm_file_start
388 #undef TARGET_ASM_FILE_END
389 #define TARGET_ASM_FILE_END arm_file_end
391 #undef TARGET_ASM_ALIGNED_SI_OP
392 #define TARGET_ASM_ALIGNED_SI_OP NULL
393 #undef TARGET_ASM_INTEGER
394 #define TARGET_ASM_INTEGER arm_assemble_integer
396 #undef TARGET_PRINT_OPERAND
397 #define TARGET_PRINT_OPERAND arm_print_operand
398 #undef TARGET_PRINT_OPERAND_ADDRESS
399 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
400 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
401 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
403 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
404 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
406 #undef TARGET_ASM_FUNCTION_PROLOGUE
407 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
409 #undef TARGET_ASM_FUNCTION_EPILOGUE
410 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
412 #undef TARGET_CAN_INLINE_P
413 #define TARGET_CAN_INLINE_P arm_can_inline_p
415 #undef TARGET_RELAYOUT_FUNCTION
416 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
418 #undef TARGET_OPTION_OVERRIDE
419 #define TARGET_OPTION_OVERRIDE arm_option_override
421 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
422 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
424 #undef TARGET_OPTION_SAVE
425 #define TARGET_OPTION_SAVE arm_option_save
427 #undef TARGET_OPTION_RESTORE
428 #define TARGET_OPTION_RESTORE arm_option_restore
430 #undef TARGET_OPTION_PRINT
431 #define TARGET_OPTION_PRINT arm_option_print
433 #undef TARGET_COMP_TYPE_ATTRIBUTES
434 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
436 #undef TARGET_SCHED_CAN_SPECULATE_INSN
437 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
439 #undef TARGET_SCHED_MACRO_FUSION_P
440 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
442 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
443 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
445 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
446 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
448 #undef TARGET_SCHED_ADJUST_COST
449 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
451 #undef TARGET_SET_CURRENT_FUNCTION
452 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
454 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
455 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
457 #undef TARGET_SCHED_REORDER
458 #define TARGET_SCHED_REORDER arm_sched_reorder
460 #undef TARGET_REGISTER_MOVE_COST
461 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
463 #undef TARGET_MEMORY_MOVE_COST
464 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
466 #undef TARGET_ENCODE_SECTION_INFO
468 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
470 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
473 #undef TARGET_STRIP_NAME_ENCODING
474 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
476 #undef TARGET_ASM_INTERNAL_LABEL
477 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
479 #undef TARGET_FLOATN_MODE
480 #define TARGET_FLOATN_MODE arm_floatn_mode
482 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
483 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
485 #undef TARGET_FUNCTION_VALUE
486 #define TARGET_FUNCTION_VALUE arm_function_value
488 #undef TARGET_LIBCALL_VALUE
489 #define TARGET_LIBCALL_VALUE arm_libcall_value
491 #undef TARGET_FUNCTION_VALUE_REGNO_P
492 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
494 #undef TARGET_ASM_OUTPUT_MI_THUNK
495 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
496 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
497 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
499 #undef TARGET_RTX_COSTS
500 #define TARGET_RTX_COSTS arm_rtx_costs
501 #undef TARGET_ADDRESS_COST
502 #define TARGET_ADDRESS_COST arm_address_cost
504 #undef TARGET_SHIFT_TRUNCATION_MASK
505 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
506 #undef TARGET_VECTOR_MODE_SUPPORTED_P
507 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
508 #undef TARGET_ARRAY_MODE_SUPPORTED_P
509 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
510 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
511 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
512 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
513 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
514 arm_autovectorize_vector_sizes
516 #undef TARGET_MACHINE_DEPENDENT_REORG
517 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
519 #undef TARGET_INIT_BUILTINS
520 #define TARGET_INIT_BUILTINS arm_init_builtins
521 #undef TARGET_EXPAND_BUILTIN
522 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
523 #undef TARGET_BUILTIN_DECL
524 #define TARGET_BUILTIN_DECL arm_builtin_decl
526 #undef TARGET_INIT_LIBFUNCS
527 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
529 #undef TARGET_PROMOTE_FUNCTION_MODE
530 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
531 #undef TARGET_PROMOTE_PROTOTYPES
532 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
533 #undef TARGET_PASS_BY_REFERENCE
534 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
535 #undef TARGET_ARG_PARTIAL_BYTES
536 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
537 #undef TARGET_FUNCTION_ARG
538 #define TARGET_FUNCTION_ARG arm_function_arg
539 #undef TARGET_FUNCTION_ARG_ADVANCE
540 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
541 #undef TARGET_FUNCTION_ARG_PADDING
542 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
543 #undef TARGET_FUNCTION_ARG_BOUNDARY
544 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
546 #undef TARGET_SETUP_INCOMING_VARARGS
547 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
549 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
550 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
552 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
553 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
554 #undef TARGET_TRAMPOLINE_INIT
555 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
556 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
557 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
559 #undef TARGET_WARN_FUNC_RETURN
560 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
562 #undef TARGET_DEFAULT_SHORT_ENUMS
563 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
565 #undef TARGET_ALIGN_ANON_BITFIELD
566 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
568 #undef TARGET_NARROW_VOLATILE_BITFIELD
569 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
571 #undef TARGET_CXX_GUARD_TYPE
572 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
574 #undef TARGET_CXX_GUARD_MASK_BIT
575 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
577 #undef TARGET_CXX_GET_COOKIE_SIZE
578 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
580 #undef TARGET_CXX_COOKIE_HAS_SIZE
581 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
583 #undef TARGET_CXX_CDTOR_RETURNS_THIS
584 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
586 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
587 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
589 #undef TARGET_CXX_USE_AEABI_ATEXIT
590 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
592 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
593 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
594 arm_cxx_determine_class_data_visibility
596 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
597 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
599 #undef TARGET_RETURN_IN_MSB
600 #define TARGET_RETURN_IN_MSB arm_return_in_msb
602 #undef TARGET_RETURN_IN_MEMORY
603 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
605 #undef TARGET_MUST_PASS_IN_STACK
606 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
609 #undef TARGET_ASM_UNWIND_EMIT
610 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
612 /* EABI unwinding tables use a different format for the typeinfo tables. */
613 #undef TARGET_ASM_TTYPE
614 #define TARGET_ASM_TTYPE arm_output_ttype
616 #undef TARGET_ARM_EABI_UNWINDER
617 #define TARGET_ARM_EABI_UNWINDER true
619 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
620 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
622 #endif /* ARM_UNWIND_INFO */
624 #undef TARGET_ASM_INIT_SECTIONS
625 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
627 #undef TARGET_DWARF_REGISTER_SPAN
628 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
630 #undef TARGET_CANNOT_COPY_INSN_P
631 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
634 #undef TARGET_HAVE_TLS
635 #define TARGET_HAVE_TLS true
638 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
639 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
641 #undef TARGET_LEGITIMATE_CONSTANT_P
642 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
644 #undef TARGET_CANNOT_FORCE_CONST_MEM
645 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
647 #undef TARGET_MAX_ANCHOR_OFFSET
648 #define TARGET_MAX_ANCHOR_OFFSET 4095
650 /* The minimum is set such that the total size of the block
651 for a particular anchor is -4088 + 1 + 4095 bytes, which is
652 divisible by eight, ensuring natural spacing of anchors. */
653 #undef TARGET_MIN_ANCHOR_OFFSET
654 #define TARGET_MIN_ANCHOR_OFFSET -4088
656 #undef TARGET_SCHED_ISSUE_RATE
657 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
659 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
660 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
661 arm_first_cycle_multipass_dfa_lookahead
663 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
664 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
665 arm_first_cycle_multipass_dfa_lookahead_guard
667 #undef TARGET_MANGLE_TYPE
668 #define TARGET_MANGLE_TYPE arm_mangle_type
670 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
671 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
673 #undef TARGET_BUILD_BUILTIN_VA_LIST
674 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
675 #undef TARGET_EXPAND_BUILTIN_VA_START
676 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
677 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
678 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
681 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
682 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
685 #undef TARGET_LEGITIMATE_ADDRESS_P
686 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
688 #undef TARGET_PREFERRED_RELOAD_CLASS
689 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
691 #undef TARGET_PROMOTED_TYPE
692 #define TARGET_PROMOTED_TYPE arm_promoted_type
694 #undef TARGET_SCALAR_MODE_SUPPORTED_P
695 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
697 #undef TARGET_COMPUTE_FRAME_LAYOUT
698 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
700 #undef TARGET_FRAME_POINTER_REQUIRED
701 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
703 #undef TARGET_CAN_ELIMINATE
704 #define TARGET_CAN_ELIMINATE arm_can_eliminate
706 #undef TARGET_CONDITIONAL_REGISTER_USAGE
707 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
709 #undef TARGET_CLASS_LIKELY_SPILLED_P
710 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
712 #undef TARGET_VECTORIZE_BUILTINS
713 #define TARGET_VECTORIZE_BUILTINS
715 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
716 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
717 arm_builtin_vectorized_function
719 #undef TARGET_VECTOR_ALIGNMENT
720 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
722 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
723 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
724 arm_vector_alignment_reachable
726 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
727 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
728 arm_builtin_support_vector_misalignment
730 #undef TARGET_PREFERRED_RENAME_CLASS
731 #define TARGET_PREFERRED_RENAME_CLASS \
732 arm_preferred_rename_class
734 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
735 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
736 arm_vectorize_vec_perm_const_ok
738 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
739 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
740 arm_builtin_vectorization_cost
741 #undef TARGET_VECTORIZE_ADD_STMT_COST
742 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
744 #undef TARGET_CANONICALIZE_COMPARISON
745 #define TARGET_CANONICALIZE_COMPARISON \
746 arm_canonicalize_comparison
748 #undef TARGET_ASAN_SHADOW_OFFSET
749 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
751 #undef MAX_INSN_PER_IT_BLOCK
752 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
754 #undef TARGET_CAN_USE_DOLOOP_P
755 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
757 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
758 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
760 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
761 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
763 #undef TARGET_SCHED_FUSION_PRIORITY
764 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
766 #undef TARGET_ASM_FUNCTION_SECTION
767 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
769 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
770 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
772 #undef TARGET_SECTION_TYPE_FLAGS
773 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
775 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
776 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
778 #undef TARGET_C_EXCESS_PRECISION
779 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
781 /* Although the architecture reserves bits 0 and 1, only the former is
782 used for ARM/Thumb ISA selection in v7 and earlier versions. */
783 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
784 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
786 #undef TARGET_FIXED_CONDITION_CODE_REGS
787 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
789 #undef TARGET_HARD_REGNO_NREGS
790 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
791 #undef TARGET_HARD_REGNO_MODE_OK
792 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
794 #undef TARGET_MODES_TIEABLE_P
795 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
797 #undef TARGET_CAN_CHANGE_MODE_CLASS
798 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
800 #undef TARGET_CONSTANT_ALIGNMENT
801 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
803 /* Obstack for minipool constant handling. */
804 static struct obstack minipool_obstack
;
805 static char * minipool_startobj
;
807 /* The maximum number of insns skipped which
808 will be conditionalised if possible. */
809 static int max_insns_skipped
= 5;
811 extern FILE * asm_out_file
;
813 /* True if we are currently building a constant table. */
814 int making_const_table
;
816 /* The processor for which instructions should be scheduled. */
817 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
819 /* The current tuning set. */
820 const struct tune_params
*current_tune
;
822 /* Which floating point hardware to schedule for. */
825 /* Used for Thumb call_via trampolines. */
826 rtx thumb_call_via_label
[14];
827 static int thumb_call_reg_needed
;
829 /* The bits in this mask specify which instruction scheduling options should
831 unsigned int tune_flags
= 0;
833 /* The highest ARM architecture version supported by the
835 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
837 /* Active target architecture and tuning. */
839 struct arm_build_target arm_active_target
;
841 /* The following are used in the arm.md file as equivalents to bits
842 in the above two flag variables. */
844 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
847 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
850 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
853 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
856 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
859 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
862 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
865 /* Nonzero if this chip supports the ARM 6K extensions. */
868 /* Nonzero if this chip supports the ARM 6KZ extensions. */
871 /* Nonzero if instructions present in ARMv6-M can be used. */
874 /* Nonzero if this chip supports the ARM 7 extensions. */
877 /* Nonzero if this chip supports the Large Physical Address Extension. */
878 int arm_arch_lpae
= 0;
880 /* Nonzero if instructions not present in the 'M' profile can be used. */
881 int arm_arch_notm
= 0;
883 /* Nonzero if instructions present in ARMv7E-M can be used. */
886 /* Nonzero if instructions present in ARMv8 can be used. */
889 /* Nonzero if this chip supports the ARMv8.1 extensions. */
892 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
895 /* Nonzero if this chip supports the FP16 instructions extension of ARM
897 int arm_fp16_inst
= 0;
899 /* Nonzero if this chip can benefit from load scheduling. */
900 int arm_ld_sched
= 0;
902 /* Nonzero if this chip is a StrongARM. */
903 int arm_tune_strongarm
= 0;
905 /* Nonzero if this chip supports Intel Wireless MMX technology. */
906 int arm_arch_iwmmxt
= 0;
908 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
909 int arm_arch_iwmmxt2
= 0;
911 /* Nonzero if this chip is an XScale. */
912 int arm_arch_xscale
= 0;
914 /* Nonzero if tuning for XScale */
915 int arm_tune_xscale
= 0;
917 /* Nonzero if we want to tune for stores that access the write-buffer.
918 This typically means an ARM6 or ARM7 with MMU or MPU. */
919 int arm_tune_wbuf
= 0;
921 /* Nonzero if tuning for Cortex-A9. */
922 int arm_tune_cortex_a9
= 0;
924 /* Nonzero if we should define __THUMB_INTERWORK__ in the
926 XXX This is a bit of a hack, it's intended to help work around
927 problems in GLD which doesn't understand that armv5t code is
928 interworking clean. */
929 int arm_cpp_interwork
= 0;
931 /* Nonzero if chip supports Thumb 1. */
934 /* Nonzero if chip supports Thumb 2. */
937 /* Nonzero if chip supports integer division instruction. */
938 int arm_arch_arm_hwdiv
;
939 int arm_arch_thumb_hwdiv
;
941 /* Nonzero if chip disallows volatile memory access in IT block. */
942 int arm_arch_no_volatile_ce
;
944 /* Nonzero if we should use Neon to handle 64-bits operations rather
945 than core registers. */
946 int prefer_neon_for_64bits
= 0;
948 /* Nonzero if we shouldn't use literal pools. */
949 bool arm_disable_literal_pool
= false;
951 /* The register number to be used for the PIC offset register. */
952 unsigned arm_pic_register
= INVALID_REGNUM
;
954 enum arm_pcs arm_pcs_default
;
956 /* For an explanation of these variables, see final_prescan_insn below. */
958 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
959 enum arm_cond_code arm_current_cc
;
962 int arm_target_label
;
963 /* The number of conditionally executed insns, including the current insn. */
964 int arm_condexec_count
= 0;
965 /* A bitmask specifying the patterns for the IT block.
966 Zero means do not output an IT block before this insn. */
967 int arm_condexec_mask
= 0;
968 /* The number of bits used in arm_condexec_mask. */
969 int arm_condexec_masklen
= 0;
971 /* Nonzero if chip supports the ARMv8 CRC instructions. */
972 int arm_arch_crc
= 0;
974 /* Nonzero if chip supports the ARMv8-M security extensions. */
975 int arm_arch_cmse
= 0;
977 /* Nonzero if the core has a very small, high-latency, multiply unit. */
978 int arm_m_profile_small_mul
= 0;
980 /* The condition codes of the ARM, and the inverse function. */
981 static const char * const arm_condition_codes
[] =
983 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
984 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
987 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
988 int arm_regs_in_sequence
[] =
990 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
993 #define ARM_LSL_NAME "lsl"
994 #define streq(string1, string2) (strcmp (string1, string2) == 0)
996 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
997 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
998 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1000 /* Initialization code. */
1004 enum processor_type scheduler
;
1005 unsigned int tune_flags
;
1006 const struct tune_params
*tune
;
1009 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1010 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1017 /* arm generic vectorizer costs. */
1019 struct cpu_vec_costs arm_default_vec_cost
= {
1020 1, /* scalar_stmt_cost. */
1021 1, /* scalar load_cost. */
1022 1, /* scalar_store_cost. */
1023 1, /* vec_stmt_cost. */
1024 1, /* vec_to_scalar_cost. */
1025 1, /* scalar_to_vec_cost. */
1026 1, /* vec_align_load_cost. */
1027 1, /* vec_unalign_load_cost. */
1028 1, /* vec_unalign_store_cost. */
1029 1, /* vec_store_cost. */
1030 3, /* cond_taken_branch_cost. */
1031 1, /* cond_not_taken_branch_cost. */
1034 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1035 #include "aarch-cost-tables.h"
1039 const struct cpu_cost_table cortexa9_extra_costs
=
1046 COSTS_N_INSNS (1), /* shift_reg. */
1047 COSTS_N_INSNS (1), /* arith_shift. */
1048 COSTS_N_INSNS (2), /* arith_shift_reg. */
1050 COSTS_N_INSNS (1), /* log_shift_reg. */
1051 COSTS_N_INSNS (1), /* extend. */
1052 COSTS_N_INSNS (2), /* extend_arith. */
1053 COSTS_N_INSNS (1), /* bfi. */
1054 COSTS_N_INSNS (1), /* bfx. */
1058 true /* non_exec_costs_exec. */
1063 COSTS_N_INSNS (3), /* simple. */
1064 COSTS_N_INSNS (3), /* flag_setting. */
1065 COSTS_N_INSNS (2), /* extend. */
1066 COSTS_N_INSNS (3), /* add. */
1067 COSTS_N_INSNS (2), /* extend_add. */
1068 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1072 0, /* simple (N/A). */
1073 0, /* flag_setting (N/A). */
1074 COSTS_N_INSNS (4), /* extend. */
1076 COSTS_N_INSNS (4), /* extend_add. */
1082 COSTS_N_INSNS (2), /* load. */
1083 COSTS_N_INSNS (2), /* load_sign_extend. */
1084 COSTS_N_INSNS (2), /* ldrd. */
1085 COSTS_N_INSNS (2), /* ldm_1st. */
1086 1, /* ldm_regs_per_insn_1st. */
1087 2, /* ldm_regs_per_insn_subsequent. */
1088 COSTS_N_INSNS (5), /* loadf. */
1089 COSTS_N_INSNS (5), /* loadd. */
1090 COSTS_N_INSNS (1), /* load_unaligned. */
1091 COSTS_N_INSNS (2), /* store. */
1092 COSTS_N_INSNS (2), /* strd. */
1093 COSTS_N_INSNS (2), /* stm_1st. */
1094 1, /* stm_regs_per_insn_1st. */
1095 2, /* stm_regs_per_insn_subsequent. */
1096 COSTS_N_INSNS (1), /* storef. */
1097 COSTS_N_INSNS (1), /* stored. */
1098 COSTS_N_INSNS (1), /* store_unaligned. */
1099 COSTS_N_INSNS (1), /* loadv. */
1100 COSTS_N_INSNS (1) /* storev. */
1105 COSTS_N_INSNS (14), /* div. */
1106 COSTS_N_INSNS (4), /* mult. */
1107 COSTS_N_INSNS (7), /* mult_addsub. */
1108 COSTS_N_INSNS (30), /* fma. */
1109 COSTS_N_INSNS (3), /* addsub. */
1110 COSTS_N_INSNS (1), /* fpconst. */
1111 COSTS_N_INSNS (1), /* neg. */
1112 COSTS_N_INSNS (3), /* compare. */
1113 COSTS_N_INSNS (3), /* widen. */
1114 COSTS_N_INSNS (3), /* narrow. */
1115 COSTS_N_INSNS (3), /* toint. */
1116 COSTS_N_INSNS (3), /* fromint. */
1117 COSTS_N_INSNS (3) /* roundint. */
1121 COSTS_N_INSNS (24), /* div. */
1122 COSTS_N_INSNS (5), /* mult. */
1123 COSTS_N_INSNS (8), /* mult_addsub. */
1124 COSTS_N_INSNS (30), /* fma. */
1125 COSTS_N_INSNS (3), /* addsub. */
1126 COSTS_N_INSNS (1), /* fpconst. */
1127 COSTS_N_INSNS (1), /* neg. */
1128 COSTS_N_INSNS (3), /* compare. */
1129 COSTS_N_INSNS (3), /* widen. */
1130 COSTS_N_INSNS (3), /* narrow. */
1131 COSTS_N_INSNS (3), /* toint. */
1132 COSTS_N_INSNS (3), /* fromint. */
1133 COSTS_N_INSNS (3) /* roundint. */
1138 COSTS_N_INSNS (1) /* alu. */
1142 const struct cpu_cost_table cortexa8_extra_costs
=
1148 COSTS_N_INSNS (1), /* shift. */
1150 COSTS_N_INSNS (1), /* arith_shift. */
1151 0, /* arith_shift_reg. */
1152 COSTS_N_INSNS (1), /* log_shift. */
1153 0, /* log_shift_reg. */
1155 0, /* extend_arith. */
1161 true /* non_exec_costs_exec. */
1166 COSTS_N_INSNS (1), /* simple. */
1167 COSTS_N_INSNS (1), /* flag_setting. */
1168 COSTS_N_INSNS (1), /* extend. */
1169 COSTS_N_INSNS (1), /* add. */
1170 COSTS_N_INSNS (1), /* extend_add. */
1171 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1175 0, /* simple (N/A). */
1176 0, /* flag_setting (N/A). */
1177 COSTS_N_INSNS (2), /* extend. */
1179 COSTS_N_INSNS (2), /* extend_add. */
1185 COSTS_N_INSNS (1), /* load. */
1186 COSTS_N_INSNS (1), /* load_sign_extend. */
1187 COSTS_N_INSNS (1), /* ldrd. */
1188 COSTS_N_INSNS (1), /* ldm_1st. */
1189 1, /* ldm_regs_per_insn_1st. */
1190 2, /* ldm_regs_per_insn_subsequent. */
1191 COSTS_N_INSNS (1), /* loadf. */
1192 COSTS_N_INSNS (1), /* loadd. */
1193 COSTS_N_INSNS (1), /* load_unaligned. */
1194 COSTS_N_INSNS (1), /* store. */
1195 COSTS_N_INSNS (1), /* strd. */
1196 COSTS_N_INSNS (1), /* stm_1st. */
1197 1, /* stm_regs_per_insn_1st. */
1198 2, /* stm_regs_per_insn_subsequent. */
1199 COSTS_N_INSNS (1), /* storef. */
1200 COSTS_N_INSNS (1), /* stored. */
1201 COSTS_N_INSNS (1), /* store_unaligned. */
1202 COSTS_N_INSNS (1), /* loadv. */
1203 COSTS_N_INSNS (1) /* storev. */
1208 COSTS_N_INSNS (36), /* div. */
1209 COSTS_N_INSNS (11), /* mult. */
1210 COSTS_N_INSNS (20), /* mult_addsub. */
1211 COSTS_N_INSNS (30), /* fma. */
1212 COSTS_N_INSNS (9), /* addsub. */
1213 COSTS_N_INSNS (3), /* fpconst. */
1214 COSTS_N_INSNS (3), /* neg. */
1215 COSTS_N_INSNS (6), /* compare. */
1216 COSTS_N_INSNS (4), /* widen. */
1217 COSTS_N_INSNS (4), /* narrow. */
1218 COSTS_N_INSNS (8), /* toint. */
1219 COSTS_N_INSNS (8), /* fromint. */
1220 COSTS_N_INSNS (8) /* roundint. */
1224 COSTS_N_INSNS (64), /* div. */
1225 COSTS_N_INSNS (16), /* mult. */
1226 COSTS_N_INSNS (25), /* mult_addsub. */
1227 COSTS_N_INSNS (30), /* fma. */
1228 COSTS_N_INSNS (9), /* addsub. */
1229 COSTS_N_INSNS (3), /* fpconst. */
1230 COSTS_N_INSNS (3), /* neg. */
1231 COSTS_N_INSNS (6), /* compare. */
1232 COSTS_N_INSNS (6), /* widen. */
1233 COSTS_N_INSNS (6), /* narrow. */
1234 COSTS_N_INSNS (8), /* toint. */
1235 COSTS_N_INSNS (8), /* fromint. */
1236 COSTS_N_INSNS (8) /* roundint. */
1241 COSTS_N_INSNS (1) /* alu. */
1245 const struct cpu_cost_table cortexa5_extra_costs
=
1251 COSTS_N_INSNS (1), /* shift. */
1252 COSTS_N_INSNS (1), /* shift_reg. */
1253 COSTS_N_INSNS (1), /* arith_shift. */
1254 COSTS_N_INSNS (1), /* arith_shift_reg. */
1255 COSTS_N_INSNS (1), /* log_shift. */
1256 COSTS_N_INSNS (1), /* log_shift_reg. */
1257 COSTS_N_INSNS (1), /* extend. */
1258 COSTS_N_INSNS (1), /* extend_arith. */
1259 COSTS_N_INSNS (1), /* bfi. */
1260 COSTS_N_INSNS (1), /* bfx. */
1261 COSTS_N_INSNS (1), /* clz. */
1262 COSTS_N_INSNS (1), /* rev. */
1264 true /* non_exec_costs_exec. */
1271 COSTS_N_INSNS (1), /* flag_setting. */
1272 COSTS_N_INSNS (1), /* extend. */
1273 COSTS_N_INSNS (1), /* add. */
1274 COSTS_N_INSNS (1), /* extend_add. */
1275 COSTS_N_INSNS (7) /* idiv. */
1279 0, /* simple (N/A). */
1280 0, /* flag_setting (N/A). */
1281 COSTS_N_INSNS (1), /* extend. */
1283 COSTS_N_INSNS (2), /* extend_add. */
1289 COSTS_N_INSNS (1), /* load. */
1290 COSTS_N_INSNS (1), /* load_sign_extend. */
1291 COSTS_N_INSNS (6), /* ldrd. */
1292 COSTS_N_INSNS (1), /* ldm_1st. */
1293 1, /* ldm_regs_per_insn_1st. */
1294 2, /* ldm_regs_per_insn_subsequent. */
1295 COSTS_N_INSNS (2), /* loadf. */
1296 COSTS_N_INSNS (4), /* loadd. */
1297 COSTS_N_INSNS (1), /* load_unaligned. */
1298 COSTS_N_INSNS (1), /* store. */
1299 COSTS_N_INSNS (3), /* strd. */
1300 COSTS_N_INSNS (1), /* stm_1st. */
1301 1, /* stm_regs_per_insn_1st. */
1302 2, /* stm_regs_per_insn_subsequent. */
1303 COSTS_N_INSNS (2), /* storef. */
1304 COSTS_N_INSNS (2), /* stored. */
1305 COSTS_N_INSNS (1), /* store_unaligned. */
1306 COSTS_N_INSNS (1), /* loadv. */
1307 COSTS_N_INSNS (1) /* storev. */
1312 COSTS_N_INSNS (15), /* div. */
1313 COSTS_N_INSNS (3), /* mult. */
1314 COSTS_N_INSNS (7), /* mult_addsub. */
1315 COSTS_N_INSNS (7), /* fma. */
1316 COSTS_N_INSNS (3), /* addsub. */
1317 COSTS_N_INSNS (3), /* fpconst. */
1318 COSTS_N_INSNS (3), /* neg. */
1319 COSTS_N_INSNS (3), /* compare. */
1320 COSTS_N_INSNS (3), /* widen. */
1321 COSTS_N_INSNS (3), /* narrow. */
1322 COSTS_N_INSNS (3), /* toint. */
1323 COSTS_N_INSNS (3), /* fromint. */
1324 COSTS_N_INSNS (3) /* roundint. */
1328 COSTS_N_INSNS (30), /* div. */
1329 COSTS_N_INSNS (6), /* mult. */
1330 COSTS_N_INSNS (10), /* mult_addsub. */
1331 COSTS_N_INSNS (7), /* fma. */
1332 COSTS_N_INSNS (3), /* addsub. */
1333 COSTS_N_INSNS (3), /* fpconst. */
1334 COSTS_N_INSNS (3), /* neg. */
1335 COSTS_N_INSNS (3), /* compare. */
1336 COSTS_N_INSNS (3), /* widen. */
1337 COSTS_N_INSNS (3), /* narrow. */
1338 COSTS_N_INSNS (3), /* toint. */
1339 COSTS_N_INSNS (3), /* fromint. */
1340 COSTS_N_INSNS (3) /* roundint. */
1345 COSTS_N_INSNS (1) /* alu. */
1350 const struct cpu_cost_table cortexa7_extra_costs
=
1356 COSTS_N_INSNS (1), /* shift. */
1357 COSTS_N_INSNS (1), /* shift_reg. */
1358 COSTS_N_INSNS (1), /* arith_shift. */
1359 COSTS_N_INSNS (1), /* arith_shift_reg. */
1360 COSTS_N_INSNS (1), /* log_shift. */
1361 COSTS_N_INSNS (1), /* log_shift_reg. */
1362 COSTS_N_INSNS (1), /* extend. */
1363 COSTS_N_INSNS (1), /* extend_arith. */
1364 COSTS_N_INSNS (1), /* bfi. */
1365 COSTS_N_INSNS (1), /* bfx. */
1366 COSTS_N_INSNS (1), /* clz. */
1367 COSTS_N_INSNS (1), /* rev. */
1369 true /* non_exec_costs_exec. */
1376 COSTS_N_INSNS (1), /* flag_setting. */
1377 COSTS_N_INSNS (1), /* extend. */
1378 COSTS_N_INSNS (1), /* add. */
1379 COSTS_N_INSNS (1), /* extend_add. */
1380 COSTS_N_INSNS (7) /* idiv. */
1384 0, /* simple (N/A). */
1385 0, /* flag_setting (N/A). */
1386 COSTS_N_INSNS (1), /* extend. */
1388 COSTS_N_INSNS (2), /* extend_add. */
1394 COSTS_N_INSNS (1), /* load. */
1395 COSTS_N_INSNS (1), /* load_sign_extend. */
1396 COSTS_N_INSNS (3), /* ldrd. */
1397 COSTS_N_INSNS (1), /* ldm_1st. */
1398 1, /* ldm_regs_per_insn_1st. */
1399 2, /* ldm_regs_per_insn_subsequent. */
1400 COSTS_N_INSNS (2), /* loadf. */
1401 COSTS_N_INSNS (2), /* loadd. */
1402 COSTS_N_INSNS (1), /* load_unaligned. */
1403 COSTS_N_INSNS (1), /* store. */
1404 COSTS_N_INSNS (3), /* strd. */
1405 COSTS_N_INSNS (1), /* stm_1st. */
1406 1, /* stm_regs_per_insn_1st. */
1407 2, /* stm_regs_per_insn_subsequent. */
1408 COSTS_N_INSNS (2), /* storef. */
1409 COSTS_N_INSNS (2), /* stored. */
1410 COSTS_N_INSNS (1), /* store_unaligned. */
1411 COSTS_N_INSNS (1), /* loadv. */
1412 COSTS_N_INSNS (1) /* storev. */
1417 COSTS_N_INSNS (15), /* div. */
1418 COSTS_N_INSNS (3), /* mult. */
1419 COSTS_N_INSNS (7), /* mult_addsub. */
1420 COSTS_N_INSNS (7), /* fma. */
1421 COSTS_N_INSNS (3), /* addsub. */
1422 COSTS_N_INSNS (3), /* fpconst. */
1423 COSTS_N_INSNS (3), /* neg. */
1424 COSTS_N_INSNS (3), /* compare. */
1425 COSTS_N_INSNS (3), /* widen. */
1426 COSTS_N_INSNS (3), /* narrow. */
1427 COSTS_N_INSNS (3), /* toint. */
1428 COSTS_N_INSNS (3), /* fromint. */
1429 COSTS_N_INSNS (3) /* roundint. */
1433 COSTS_N_INSNS (30), /* div. */
1434 COSTS_N_INSNS (6), /* mult. */
1435 COSTS_N_INSNS (10), /* mult_addsub. */
1436 COSTS_N_INSNS (7), /* fma. */
1437 COSTS_N_INSNS (3), /* addsub. */
1438 COSTS_N_INSNS (3), /* fpconst. */
1439 COSTS_N_INSNS (3), /* neg. */
1440 COSTS_N_INSNS (3), /* compare. */
1441 COSTS_N_INSNS (3), /* widen. */
1442 COSTS_N_INSNS (3), /* narrow. */
1443 COSTS_N_INSNS (3), /* toint. */
1444 COSTS_N_INSNS (3), /* fromint. */
1445 COSTS_N_INSNS (3) /* roundint. */
1450 COSTS_N_INSNS (1) /* alu. */
1454 const struct cpu_cost_table cortexa12_extra_costs
=
1461 COSTS_N_INSNS (1), /* shift_reg. */
1462 COSTS_N_INSNS (1), /* arith_shift. */
1463 COSTS_N_INSNS (1), /* arith_shift_reg. */
1464 COSTS_N_INSNS (1), /* log_shift. */
1465 COSTS_N_INSNS (1), /* log_shift_reg. */
1467 COSTS_N_INSNS (1), /* extend_arith. */
1469 COSTS_N_INSNS (1), /* bfx. */
1470 COSTS_N_INSNS (1), /* clz. */
1471 COSTS_N_INSNS (1), /* rev. */
1473 true /* non_exec_costs_exec. */
1478 COSTS_N_INSNS (2), /* simple. */
1479 COSTS_N_INSNS (3), /* flag_setting. */
1480 COSTS_N_INSNS (2), /* extend. */
1481 COSTS_N_INSNS (3), /* add. */
1482 COSTS_N_INSNS (2), /* extend_add. */
1483 COSTS_N_INSNS (18) /* idiv. */
1487 0, /* simple (N/A). */
1488 0, /* flag_setting (N/A). */
1489 COSTS_N_INSNS (3), /* extend. */
1491 COSTS_N_INSNS (3), /* extend_add. */
1497 COSTS_N_INSNS (3), /* load. */
1498 COSTS_N_INSNS (3), /* load_sign_extend. */
1499 COSTS_N_INSNS (3), /* ldrd. */
1500 COSTS_N_INSNS (3), /* ldm_1st. */
1501 1, /* ldm_regs_per_insn_1st. */
1502 2, /* ldm_regs_per_insn_subsequent. */
1503 COSTS_N_INSNS (3), /* loadf. */
1504 COSTS_N_INSNS (3), /* loadd. */
1505 0, /* load_unaligned. */
1509 1, /* stm_regs_per_insn_1st. */
1510 2, /* stm_regs_per_insn_subsequent. */
1511 COSTS_N_INSNS (2), /* storef. */
1512 COSTS_N_INSNS (2), /* stored. */
1513 0, /* store_unaligned. */
1514 COSTS_N_INSNS (1), /* loadv. */
1515 COSTS_N_INSNS (1) /* storev. */
1520 COSTS_N_INSNS (17), /* div. */
1521 COSTS_N_INSNS (4), /* mult. */
1522 COSTS_N_INSNS (8), /* mult_addsub. */
1523 COSTS_N_INSNS (8), /* fma. */
1524 COSTS_N_INSNS (4), /* addsub. */
1525 COSTS_N_INSNS (2), /* fpconst. */
1526 COSTS_N_INSNS (2), /* neg. */
1527 COSTS_N_INSNS (2), /* compare. */
1528 COSTS_N_INSNS (4), /* widen. */
1529 COSTS_N_INSNS (4), /* narrow. */
1530 COSTS_N_INSNS (4), /* toint. */
1531 COSTS_N_INSNS (4), /* fromint. */
1532 COSTS_N_INSNS (4) /* roundint. */
1536 COSTS_N_INSNS (31), /* div. */
1537 COSTS_N_INSNS (4), /* mult. */
1538 COSTS_N_INSNS (8), /* mult_addsub. */
1539 COSTS_N_INSNS (8), /* fma. */
1540 COSTS_N_INSNS (4), /* addsub. */
1541 COSTS_N_INSNS (2), /* fpconst. */
1542 COSTS_N_INSNS (2), /* neg. */
1543 COSTS_N_INSNS (2), /* compare. */
1544 COSTS_N_INSNS (4), /* widen. */
1545 COSTS_N_INSNS (4), /* narrow. */
1546 COSTS_N_INSNS (4), /* toint. */
1547 COSTS_N_INSNS (4), /* fromint. */
1548 COSTS_N_INSNS (4) /* roundint. */
1553 COSTS_N_INSNS (1) /* alu. */
1557 const struct cpu_cost_table cortexa15_extra_costs
=
1565 COSTS_N_INSNS (1), /* arith_shift. */
1566 COSTS_N_INSNS (1), /* arith_shift_reg. */
1567 COSTS_N_INSNS (1), /* log_shift. */
1568 COSTS_N_INSNS (1), /* log_shift_reg. */
1570 COSTS_N_INSNS (1), /* extend_arith. */
1571 COSTS_N_INSNS (1), /* bfi. */
1576 true /* non_exec_costs_exec. */
1581 COSTS_N_INSNS (2), /* simple. */
1582 COSTS_N_INSNS (3), /* flag_setting. */
1583 COSTS_N_INSNS (2), /* extend. */
1584 COSTS_N_INSNS (2), /* add. */
1585 COSTS_N_INSNS (2), /* extend_add. */
1586 COSTS_N_INSNS (18) /* idiv. */
1590 0, /* simple (N/A). */
1591 0, /* flag_setting (N/A). */
1592 COSTS_N_INSNS (3), /* extend. */
1594 COSTS_N_INSNS (3), /* extend_add. */
1600 COSTS_N_INSNS (3), /* load. */
1601 COSTS_N_INSNS (3), /* load_sign_extend. */
1602 COSTS_N_INSNS (3), /* ldrd. */
1603 COSTS_N_INSNS (4), /* ldm_1st. */
1604 1, /* ldm_regs_per_insn_1st. */
1605 2, /* ldm_regs_per_insn_subsequent. */
1606 COSTS_N_INSNS (4), /* loadf. */
1607 COSTS_N_INSNS (4), /* loadd. */
1608 0, /* load_unaligned. */
1611 COSTS_N_INSNS (1), /* stm_1st. */
1612 1, /* stm_regs_per_insn_1st. */
1613 2, /* stm_regs_per_insn_subsequent. */
1616 0, /* store_unaligned. */
1617 COSTS_N_INSNS (1), /* loadv. */
1618 COSTS_N_INSNS (1) /* storev. */
1623 COSTS_N_INSNS (17), /* div. */
1624 COSTS_N_INSNS (4), /* mult. */
1625 COSTS_N_INSNS (8), /* mult_addsub. */
1626 COSTS_N_INSNS (8), /* fma. */
1627 COSTS_N_INSNS (4), /* addsub. */
1628 COSTS_N_INSNS (2), /* fpconst. */
1629 COSTS_N_INSNS (2), /* neg. */
1630 COSTS_N_INSNS (5), /* compare. */
1631 COSTS_N_INSNS (4), /* widen. */
1632 COSTS_N_INSNS (4), /* narrow. */
1633 COSTS_N_INSNS (4), /* toint. */
1634 COSTS_N_INSNS (4), /* fromint. */
1635 COSTS_N_INSNS (4) /* roundint. */
1639 COSTS_N_INSNS (31), /* div. */
1640 COSTS_N_INSNS (4), /* mult. */
1641 COSTS_N_INSNS (8), /* mult_addsub. */
1642 COSTS_N_INSNS (8), /* fma. */
1643 COSTS_N_INSNS (4), /* addsub. */
1644 COSTS_N_INSNS (2), /* fpconst. */
1645 COSTS_N_INSNS (2), /* neg. */
1646 COSTS_N_INSNS (2), /* compare. */
1647 COSTS_N_INSNS (4), /* widen. */
1648 COSTS_N_INSNS (4), /* narrow. */
1649 COSTS_N_INSNS (4), /* toint. */
1650 COSTS_N_INSNS (4), /* fromint. */
1651 COSTS_N_INSNS (4) /* roundint. */
1656 COSTS_N_INSNS (1) /* alu. */
1660 const struct cpu_cost_table v7m_extra_costs
=
1668 0, /* arith_shift. */
1669 COSTS_N_INSNS (1), /* arith_shift_reg. */
1671 COSTS_N_INSNS (1), /* log_shift_reg. */
1673 COSTS_N_INSNS (1), /* extend_arith. */
1678 COSTS_N_INSNS (1), /* non_exec. */
1679 false /* non_exec_costs_exec. */
1684 COSTS_N_INSNS (1), /* simple. */
1685 COSTS_N_INSNS (1), /* flag_setting. */
1686 COSTS_N_INSNS (2), /* extend. */
1687 COSTS_N_INSNS (1), /* add. */
1688 COSTS_N_INSNS (3), /* extend_add. */
1689 COSTS_N_INSNS (8) /* idiv. */
1693 0, /* simple (N/A). */
1694 0, /* flag_setting (N/A). */
1695 COSTS_N_INSNS (2), /* extend. */
1697 COSTS_N_INSNS (3), /* extend_add. */
1703 COSTS_N_INSNS (2), /* load. */
1704 0, /* load_sign_extend. */
1705 COSTS_N_INSNS (3), /* ldrd. */
1706 COSTS_N_INSNS (2), /* ldm_1st. */
1707 1, /* ldm_regs_per_insn_1st. */
1708 1, /* ldm_regs_per_insn_subsequent. */
1709 COSTS_N_INSNS (2), /* loadf. */
1710 COSTS_N_INSNS (3), /* loadd. */
1711 COSTS_N_INSNS (1), /* load_unaligned. */
1712 COSTS_N_INSNS (2), /* store. */
1713 COSTS_N_INSNS (3), /* strd. */
1714 COSTS_N_INSNS (2), /* stm_1st. */
1715 1, /* stm_regs_per_insn_1st. */
1716 1, /* stm_regs_per_insn_subsequent. */
1717 COSTS_N_INSNS (2), /* storef. */
1718 COSTS_N_INSNS (3), /* stored. */
1719 COSTS_N_INSNS (1), /* store_unaligned. */
1720 COSTS_N_INSNS (1), /* loadv. */
1721 COSTS_N_INSNS (1) /* storev. */
1726 COSTS_N_INSNS (7), /* div. */
1727 COSTS_N_INSNS (2), /* mult. */
1728 COSTS_N_INSNS (5), /* mult_addsub. */
1729 COSTS_N_INSNS (3), /* fma. */
1730 COSTS_N_INSNS (1), /* addsub. */
1742 COSTS_N_INSNS (15), /* div. */
1743 COSTS_N_INSNS (5), /* mult. */
1744 COSTS_N_INSNS (7), /* mult_addsub. */
1745 COSTS_N_INSNS (7), /* fma. */
1746 COSTS_N_INSNS (3), /* addsub. */
1759 COSTS_N_INSNS (1) /* alu. */
1763 const struct tune_params arm_slowmul_tune
=
1765 &generic_extra_costs
, /* Insn extra costs. */
1766 NULL
, /* Sched adj cost. */
1767 arm_default_branch_cost
,
1768 &arm_default_vec_cost
,
1769 3, /* Constant limit. */
1770 5, /* Max cond insns. */
1771 8, /* Memset max inline. */
1772 1, /* Issue rate. */
1773 ARM_PREFETCH_NOT_BENEFICIAL
,
1774 tune_params::PREF_CONST_POOL_TRUE
,
1775 tune_params::PREF_LDRD_FALSE
,
1776 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1777 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1778 tune_params::DISPARAGE_FLAGS_NEITHER
,
1779 tune_params::PREF_NEON_64_FALSE
,
1780 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1781 tune_params::FUSE_NOTHING
,
1782 tune_params::SCHED_AUTOPREF_OFF
1785 const struct tune_params arm_fastmul_tune
=
1787 &generic_extra_costs
, /* Insn extra costs. */
1788 NULL
, /* Sched adj cost. */
1789 arm_default_branch_cost
,
1790 &arm_default_vec_cost
,
1791 1, /* Constant limit. */
1792 5, /* Max cond insns. */
1793 8, /* Memset max inline. */
1794 1, /* Issue rate. */
1795 ARM_PREFETCH_NOT_BENEFICIAL
,
1796 tune_params::PREF_CONST_POOL_TRUE
,
1797 tune_params::PREF_LDRD_FALSE
,
1798 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1799 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1800 tune_params::DISPARAGE_FLAGS_NEITHER
,
1801 tune_params::PREF_NEON_64_FALSE
,
1802 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1803 tune_params::FUSE_NOTHING
,
1804 tune_params::SCHED_AUTOPREF_OFF
1807 /* StrongARM has early execution of branches, so a sequence that is worth
1808 skipping is shorter. Set max_insns_skipped to a lower value. */
1810 const struct tune_params arm_strongarm_tune
=
1812 &generic_extra_costs
, /* Insn extra costs. */
1813 NULL
, /* Sched adj cost. */
1814 arm_default_branch_cost
,
1815 &arm_default_vec_cost
,
1816 1, /* Constant limit. */
1817 3, /* Max cond insns. */
1818 8, /* Memset max inline. */
1819 1, /* Issue rate. */
1820 ARM_PREFETCH_NOT_BENEFICIAL
,
1821 tune_params::PREF_CONST_POOL_TRUE
,
1822 tune_params::PREF_LDRD_FALSE
,
1823 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1824 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1825 tune_params::DISPARAGE_FLAGS_NEITHER
,
1826 tune_params::PREF_NEON_64_FALSE
,
1827 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1828 tune_params::FUSE_NOTHING
,
1829 tune_params::SCHED_AUTOPREF_OFF
1832 const struct tune_params arm_xscale_tune
=
1834 &generic_extra_costs
, /* Insn extra costs. */
1835 xscale_sched_adjust_cost
,
1836 arm_default_branch_cost
,
1837 &arm_default_vec_cost
,
1838 2, /* Constant limit. */
1839 3, /* Max cond insns. */
1840 8, /* Memset max inline. */
1841 1, /* Issue rate. */
1842 ARM_PREFETCH_NOT_BENEFICIAL
,
1843 tune_params::PREF_CONST_POOL_TRUE
,
1844 tune_params::PREF_LDRD_FALSE
,
1845 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1846 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1847 tune_params::DISPARAGE_FLAGS_NEITHER
,
1848 tune_params::PREF_NEON_64_FALSE
,
1849 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1850 tune_params::FUSE_NOTHING
,
1851 tune_params::SCHED_AUTOPREF_OFF
1854 const struct tune_params arm_9e_tune
=
1856 &generic_extra_costs
, /* Insn extra costs. */
1857 NULL
, /* Sched adj cost. */
1858 arm_default_branch_cost
,
1859 &arm_default_vec_cost
,
1860 1, /* Constant limit. */
1861 5, /* Max cond insns. */
1862 8, /* Memset max inline. */
1863 1, /* Issue rate. */
1864 ARM_PREFETCH_NOT_BENEFICIAL
,
1865 tune_params::PREF_CONST_POOL_TRUE
,
1866 tune_params::PREF_LDRD_FALSE
,
1867 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1868 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1869 tune_params::DISPARAGE_FLAGS_NEITHER
,
1870 tune_params::PREF_NEON_64_FALSE
,
1871 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1872 tune_params::FUSE_NOTHING
,
1873 tune_params::SCHED_AUTOPREF_OFF
1876 const struct tune_params arm_marvell_pj4_tune
=
1878 &generic_extra_costs
, /* Insn extra costs. */
1879 NULL
, /* Sched adj cost. */
1880 arm_default_branch_cost
,
1881 &arm_default_vec_cost
,
1882 1, /* Constant limit. */
1883 5, /* Max cond insns. */
1884 8, /* Memset max inline. */
1885 2, /* Issue rate. */
1886 ARM_PREFETCH_NOT_BENEFICIAL
,
1887 tune_params::PREF_CONST_POOL_TRUE
,
1888 tune_params::PREF_LDRD_FALSE
,
1889 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1890 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1891 tune_params::DISPARAGE_FLAGS_NEITHER
,
1892 tune_params::PREF_NEON_64_FALSE
,
1893 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1894 tune_params::FUSE_NOTHING
,
1895 tune_params::SCHED_AUTOPREF_OFF
1898 const struct tune_params arm_v6t2_tune
=
1900 &generic_extra_costs
, /* Insn extra costs. */
1901 NULL
, /* Sched adj cost. */
1902 arm_default_branch_cost
,
1903 &arm_default_vec_cost
,
1904 1, /* Constant limit. */
1905 5, /* Max cond insns. */
1906 8, /* Memset max inline. */
1907 1, /* Issue rate. */
1908 ARM_PREFETCH_NOT_BENEFICIAL
,
1909 tune_params::PREF_CONST_POOL_FALSE
,
1910 tune_params::PREF_LDRD_FALSE
,
1911 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1912 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1913 tune_params::DISPARAGE_FLAGS_NEITHER
,
1914 tune_params::PREF_NEON_64_FALSE
,
1915 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1916 tune_params::FUSE_NOTHING
,
1917 tune_params::SCHED_AUTOPREF_OFF
1921 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1922 const struct tune_params arm_cortex_tune
=
1924 &generic_extra_costs
,
1925 NULL
, /* Sched adj cost. */
1926 arm_default_branch_cost
,
1927 &arm_default_vec_cost
,
1928 1, /* Constant limit. */
1929 5, /* Max cond insns. */
1930 8, /* Memset max inline. */
1931 2, /* Issue rate. */
1932 ARM_PREFETCH_NOT_BENEFICIAL
,
1933 tune_params::PREF_CONST_POOL_FALSE
,
1934 tune_params::PREF_LDRD_FALSE
,
1935 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1936 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1937 tune_params::DISPARAGE_FLAGS_NEITHER
,
1938 tune_params::PREF_NEON_64_FALSE
,
1939 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1940 tune_params::FUSE_NOTHING
,
1941 tune_params::SCHED_AUTOPREF_OFF
1944 const struct tune_params arm_cortex_a8_tune
=
1946 &cortexa8_extra_costs
,
1947 NULL
, /* Sched adj cost. */
1948 arm_default_branch_cost
,
1949 &arm_default_vec_cost
,
1950 1, /* Constant limit. */
1951 5, /* Max cond insns. */
1952 8, /* Memset max inline. */
1953 2, /* Issue rate. */
1954 ARM_PREFETCH_NOT_BENEFICIAL
,
1955 tune_params::PREF_CONST_POOL_FALSE
,
1956 tune_params::PREF_LDRD_FALSE
,
1957 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1958 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1959 tune_params::DISPARAGE_FLAGS_NEITHER
,
1960 tune_params::PREF_NEON_64_FALSE
,
1961 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1962 tune_params::FUSE_NOTHING
,
1963 tune_params::SCHED_AUTOPREF_OFF
1966 const struct tune_params arm_cortex_a7_tune
=
1968 &cortexa7_extra_costs
,
1969 NULL
, /* Sched adj cost. */
1970 arm_default_branch_cost
,
1971 &arm_default_vec_cost
,
1972 1, /* Constant limit. */
1973 5, /* Max cond insns. */
1974 8, /* Memset max inline. */
1975 2, /* Issue rate. */
1976 ARM_PREFETCH_NOT_BENEFICIAL
,
1977 tune_params::PREF_CONST_POOL_FALSE
,
1978 tune_params::PREF_LDRD_FALSE
,
1979 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1980 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1981 tune_params::DISPARAGE_FLAGS_NEITHER
,
1982 tune_params::PREF_NEON_64_FALSE
,
1983 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1984 tune_params::FUSE_NOTHING
,
1985 tune_params::SCHED_AUTOPREF_OFF
1988 const struct tune_params arm_cortex_a15_tune
=
1990 &cortexa15_extra_costs
,
1991 NULL
, /* Sched adj cost. */
1992 arm_default_branch_cost
,
1993 &arm_default_vec_cost
,
1994 1, /* Constant limit. */
1995 2, /* Max cond insns. */
1996 8, /* Memset max inline. */
1997 3, /* Issue rate. */
1998 ARM_PREFETCH_NOT_BENEFICIAL
,
1999 tune_params::PREF_CONST_POOL_FALSE
,
2000 tune_params::PREF_LDRD_TRUE
,
2001 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2002 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2003 tune_params::DISPARAGE_FLAGS_ALL
,
2004 tune_params::PREF_NEON_64_FALSE
,
2005 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2006 tune_params::FUSE_NOTHING
,
2007 tune_params::SCHED_AUTOPREF_FULL
2010 const struct tune_params arm_cortex_a35_tune
=
2012 &cortexa53_extra_costs
,
2013 NULL
, /* Sched adj cost. */
2014 arm_default_branch_cost
,
2015 &arm_default_vec_cost
,
2016 1, /* Constant limit. */
2017 5, /* Max cond insns. */
2018 8, /* Memset max inline. */
2019 1, /* Issue rate. */
2020 ARM_PREFETCH_NOT_BENEFICIAL
,
2021 tune_params::PREF_CONST_POOL_FALSE
,
2022 tune_params::PREF_LDRD_FALSE
,
2023 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2024 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2025 tune_params::DISPARAGE_FLAGS_NEITHER
,
2026 tune_params::PREF_NEON_64_FALSE
,
2027 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2028 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2029 tune_params::SCHED_AUTOPREF_OFF
2032 const struct tune_params arm_cortex_a53_tune
=
2034 &cortexa53_extra_costs
,
2035 NULL
, /* Sched adj cost. */
2036 arm_default_branch_cost
,
2037 &arm_default_vec_cost
,
2038 1, /* Constant limit. */
2039 5, /* Max cond insns. */
2040 8, /* Memset max inline. */
2041 2, /* Issue rate. */
2042 ARM_PREFETCH_NOT_BENEFICIAL
,
2043 tune_params::PREF_CONST_POOL_FALSE
,
2044 tune_params::PREF_LDRD_FALSE
,
2045 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2046 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2047 tune_params::DISPARAGE_FLAGS_NEITHER
,
2048 tune_params::PREF_NEON_64_FALSE
,
2049 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2050 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2051 tune_params::SCHED_AUTOPREF_OFF
2054 const struct tune_params arm_cortex_a57_tune
=
2056 &cortexa57_extra_costs
,
2057 NULL
, /* Sched adj cost. */
2058 arm_default_branch_cost
,
2059 &arm_default_vec_cost
,
2060 1, /* Constant limit. */
2061 2, /* Max cond insns. */
2062 8, /* Memset max inline. */
2063 3, /* Issue rate. */
2064 ARM_PREFETCH_NOT_BENEFICIAL
,
2065 tune_params::PREF_CONST_POOL_FALSE
,
2066 tune_params::PREF_LDRD_TRUE
,
2067 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2068 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2069 tune_params::DISPARAGE_FLAGS_ALL
,
2070 tune_params::PREF_NEON_64_FALSE
,
2071 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2072 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2073 tune_params::SCHED_AUTOPREF_FULL
2076 const struct tune_params arm_exynosm1_tune
=
2078 &exynosm1_extra_costs
,
2079 NULL
, /* Sched adj cost. */
2080 arm_default_branch_cost
,
2081 &arm_default_vec_cost
,
2082 1, /* Constant limit. */
2083 2, /* Max cond insns. */
2084 8, /* Memset max inline. */
2085 3, /* Issue rate. */
2086 ARM_PREFETCH_NOT_BENEFICIAL
,
2087 tune_params::PREF_CONST_POOL_FALSE
,
2088 tune_params::PREF_LDRD_TRUE
,
2089 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2090 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2091 tune_params::DISPARAGE_FLAGS_ALL
,
2092 tune_params::PREF_NEON_64_FALSE
,
2093 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2094 tune_params::FUSE_NOTHING
,
2095 tune_params::SCHED_AUTOPREF_OFF
2098 const struct tune_params arm_xgene1_tune
=
2100 &xgene1_extra_costs
,
2101 NULL
, /* Sched adj cost. */
2102 arm_default_branch_cost
,
2103 &arm_default_vec_cost
,
2104 1, /* Constant limit. */
2105 2, /* Max cond insns. */
2106 32, /* Memset max inline. */
2107 4, /* Issue rate. */
2108 ARM_PREFETCH_NOT_BENEFICIAL
,
2109 tune_params::PREF_CONST_POOL_FALSE
,
2110 tune_params::PREF_LDRD_TRUE
,
2111 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2112 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2113 tune_params::DISPARAGE_FLAGS_ALL
,
2114 tune_params::PREF_NEON_64_FALSE
,
2115 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2116 tune_params::FUSE_NOTHING
,
2117 tune_params::SCHED_AUTOPREF_OFF
2120 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2121 less appealing. Set max_insns_skipped to a low value. */
2123 const struct tune_params arm_cortex_a5_tune
=
2125 &cortexa5_extra_costs
,
2126 NULL
, /* Sched adj cost. */
2127 arm_cortex_a5_branch_cost
,
2128 &arm_default_vec_cost
,
2129 1, /* Constant limit. */
2130 1, /* Max cond insns. */
2131 8, /* Memset max inline. */
2132 2, /* Issue rate. */
2133 ARM_PREFETCH_NOT_BENEFICIAL
,
2134 tune_params::PREF_CONST_POOL_FALSE
,
2135 tune_params::PREF_LDRD_FALSE
,
2136 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2137 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2138 tune_params::DISPARAGE_FLAGS_NEITHER
,
2139 tune_params::PREF_NEON_64_FALSE
,
2140 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2141 tune_params::FUSE_NOTHING
,
2142 tune_params::SCHED_AUTOPREF_OFF
2145 const struct tune_params arm_cortex_a9_tune
=
2147 &cortexa9_extra_costs
,
2148 cortex_a9_sched_adjust_cost
,
2149 arm_default_branch_cost
,
2150 &arm_default_vec_cost
,
2151 1, /* Constant limit. */
2152 5, /* Max cond insns. */
2153 8, /* Memset max inline. */
2154 2, /* Issue rate. */
2155 ARM_PREFETCH_BENEFICIAL(4,32,32),
2156 tune_params::PREF_CONST_POOL_FALSE
,
2157 tune_params::PREF_LDRD_FALSE
,
2158 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2159 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2160 tune_params::DISPARAGE_FLAGS_NEITHER
,
2161 tune_params::PREF_NEON_64_FALSE
,
2162 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2163 tune_params::FUSE_NOTHING
,
2164 tune_params::SCHED_AUTOPREF_OFF
2167 const struct tune_params arm_cortex_a12_tune
=
2169 &cortexa12_extra_costs
,
2170 NULL
, /* Sched adj cost. */
2171 arm_default_branch_cost
,
2172 &arm_default_vec_cost
, /* Vectorizer costs. */
2173 1, /* Constant limit. */
2174 2, /* Max cond insns. */
2175 8, /* Memset max inline. */
2176 2, /* Issue rate. */
2177 ARM_PREFETCH_NOT_BENEFICIAL
,
2178 tune_params::PREF_CONST_POOL_FALSE
,
2179 tune_params::PREF_LDRD_TRUE
,
2180 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2181 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2182 tune_params::DISPARAGE_FLAGS_ALL
,
2183 tune_params::PREF_NEON_64_FALSE
,
2184 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2185 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2186 tune_params::SCHED_AUTOPREF_OFF
2189 const struct tune_params arm_cortex_a73_tune
=
2191 &cortexa57_extra_costs
,
2192 NULL
, /* Sched adj cost. */
2193 arm_default_branch_cost
,
2194 &arm_default_vec_cost
, /* Vectorizer costs. */
2195 1, /* Constant limit. */
2196 2, /* Max cond insns. */
2197 8, /* Memset max inline. */
2198 2, /* Issue rate. */
2199 ARM_PREFETCH_NOT_BENEFICIAL
,
2200 tune_params::PREF_CONST_POOL_FALSE
,
2201 tune_params::PREF_LDRD_TRUE
,
2202 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2203 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2204 tune_params::DISPARAGE_FLAGS_ALL
,
2205 tune_params::PREF_NEON_64_FALSE
,
2206 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2207 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2208 tune_params::SCHED_AUTOPREF_FULL
2211 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2212 cycle to execute each. An LDR from the constant pool also takes two cycles
2213 to execute, but mildly increases pipelining opportunity (consecutive
2214 loads/stores can be pipelined together, saving one cycle), and may also
2215 improve icache utilisation. Hence we prefer the constant pool for such
2218 const struct tune_params arm_v7m_tune
=
2221 NULL
, /* Sched adj cost. */
2222 arm_cortex_m_branch_cost
,
2223 &arm_default_vec_cost
,
2224 1, /* Constant limit. */
2225 2, /* Max cond insns. */
2226 8, /* Memset max inline. */
2227 1, /* Issue rate. */
2228 ARM_PREFETCH_NOT_BENEFICIAL
,
2229 tune_params::PREF_CONST_POOL_TRUE
,
2230 tune_params::PREF_LDRD_FALSE
,
2231 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2232 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2233 tune_params::DISPARAGE_FLAGS_NEITHER
,
2234 tune_params::PREF_NEON_64_FALSE
,
2235 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2236 tune_params::FUSE_NOTHING
,
2237 tune_params::SCHED_AUTOPREF_OFF
2240 /* Cortex-M7 tuning. */
2242 const struct tune_params arm_cortex_m7_tune
=
2245 NULL
, /* Sched adj cost. */
2246 arm_cortex_m7_branch_cost
,
2247 &arm_default_vec_cost
,
2248 0, /* Constant limit. */
2249 1, /* Max cond insns. */
2250 8, /* Memset max inline. */
2251 2, /* Issue rate. */
2252 ARM_PREFETCH_NOT_BENEFICIAL
,
2253 tune_params::PREF_CONST_POOL_TRUE
,
2254 tune_params::PREF_LDRD_FALSE
,
2255 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2256 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2257 tune_params::DISPARAGE_FLAGS_NEITHER
,
2258 tune_params::PREF_NEON_64_FALSE
,
2259 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2260 tune_params::FUSE_NOTHING
,
2261 tune_params::SCHED_AUTOPREF_OFF
2264 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2265 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2267 const struct tune_params arm_v6m_tune
=
2269 &generic_extra_costs
, /* Insn extra costs. */
2270 NULL
, /* Sched adj cost. */
2271 arm_default_branch_cost
,
2272 &arm_default_vec_cost
, /* Vectorizer costs. */
2273 1, /* Constant limit. */
2274 5, /* Max cond insns. */
2275 8, /* Memset max inline. */
2276 1, /* Issue rate. */
2277 ARM_PREFETCH_NOT_BENEFICIAL
,
2278 tune_params::PREF_CONST_POOL_FALSE
,
2279 tune_params::PREF_LDRD_FALSE
,
2280 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2281 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2282 tune_params::DISPARAGE_FLAGS_NEITHER
,
2283 tune_params::PREF_NEON_64_FALSE
,
2284 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2285 tune_params::FUSE_NOTHING
,
2286 tune_params::SCHED_AUTOPREF_OFF
2289 const struct tune_params arm_fa726te_tune
=
2291 &generic_extra_costs
, /* Insn extra costs. */
2292 fa726te_sched_adjust_cost
,
2293 arm_default_branch_cost
,
2294 &arm_default_vec_cost
,
2295 1, /* Constant limit. */
2296 5, /* Max cond insns. */
2297 8, /* Memset max inline. */
2298 2, /* Issue rate. */
2299 ARM_PREFETCH_NOT_BENEFICIAL
,
2300 tune_params::PREF_CONST_POOL_TRUE
,
2301 tune_params::PREF_LDRD_FALSE
,
2302 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2303 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2304 tune_params::DISPARAGE_FLAGS_NEITHER
,
2305 tune_params::PREF_NEON_64_FALSE
,
2306 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2307 tune_params::FUSE_NOTHING
,
2308 tune_params::SCHED_AUTOPREF_OFF
2311 /* Auto-generated CPU, FPU and architecture tables. */
2312 #include "arm-cpu-data.h"
2314 /* The name of the preprocessor macro to define for this architecture. PROFILE
2315 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2316 is thus chosen to be big enough to hold the longest architecture name. */
2318 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2320 /* Supported TLS relocations. */
2328 TLS_DESCSEQ
/* GNU scheme */
2331 /* The maximum number of insns to be used when loading a constant. */
2333 arm_constant_limit (bool size_p
)
2335 return size_p
? 1 : current_tune
->constant_limit
;
2338 /* Emit an insn that's a simple single-set. Both the operands must be known
2340 inline static rtx_insn
*
2341 emit_set_insn (rtx x
, rtx y
)
2343 return emit_insn (gen_rtx_SET (x
, y
));
2346 /* Return the number of bits set in VALUE. */
2348 bit_count (unsigned long value
)
2350 unsigned long count
= 0;
2355 value
&= value
- 1; /* Clear the least-significant set bit. */
2361 /* Return the number of bits set in BMAP. */
2363 bitmap_popcount (const sbitmap bmap
)
2365 unsigned int count
= 0;
2367 sbitmap_iterator sbi
;
2369 EXECUTE_IF_SET_IN_BITMAP (bmap
, 0, n
, sbi
)
2378 } arm_fixed_mode_set
;
2380 /* A small helper for setting fixed-point library libfuncs. */
2383 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2384 const char *funcname
, const char *modename
,
2389 if (num_suffix
== 0)
2390 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2392 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2394 set_optab_libfunc (optable
, mode
, buffer
);
2398 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2399 machine_mode from
, const char *funcname
,
2400 const char *toname
, const char *fromname
)
2403 const char *maybe_suffix_2
= "";
2405 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2406 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2407 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2408 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2409 maybe_suffix_2
= "2";
2411 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2414 set_conv_libfunc (optable
, to
, from
, buffer
);
2417 /* Set up library functions unique to ARM. */
2420 arm_init_libfuncs (void)
2422 /* For Linux, we have access to kernel support for atomic operations. */
2423 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2424 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2426 /* There are no special library functions unless we are using the
2431 /* The functions below are described in Section 4 of the "Run-Time
2432 ABI for the ARM architecture", Version 1.0. */
2434 /* Double-precision floating-point arithmetic. Table 2. */
2435 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2436 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2437 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2438 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2439 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2441 /* Double-precision comparisons. Table 3. */
2442 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2443 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2444 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2445 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2446 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2447 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2448 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2450 /* Single-precision floating-point arithmetic. Table 4. */
2451 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2452 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2453 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2454 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2455 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2457 /* Single-precision comparisons. Table 5. */
2458 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2459 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2460 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2461 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2462 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2463 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2464 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2466 /* Floating-point to integer conversions. Table 6. */
2467 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2468 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2469 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2470 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2471 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2472 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2473 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2474 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2476 /* Conversions between floating types. Table 7. */
2477 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2478 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2480 /* Integer to floating-point conversions. Table 8. */
2481 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2482 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2483 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2484 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2485 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2486 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2487 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2488 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2490 /* Long long. Table 9. */
2491 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2492 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2493 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2494 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2495 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2496 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2497 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2498 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2500 /* Integer (32/32->32) division. \S 4.3.1. */
2501 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2502 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2504 /* The divmod functions are designed so that they can be used for
2505 plain division, even though they return both the quotient and the
2506 remainder. The quotient is returned in the usual location (i.e.,
2507 r0 for SImode, {r0, r1} for DImode), just as would be expected
2508 for an ordinary division routine. Because the AAPCS calling
2509 conventions specify that all of { r0, r1, r2, r3 } are
2510 callee-saved registers, there is no need to tell the compiler
2511 explicitly that those registers are clobbered by these
2513 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2514 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2516 /* For SImode division the ABI provides div-without-mod routines,
2517 which are faster. */
2518 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2519 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2521 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2522 divmod libcalls instead. */
2523 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2524 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2525 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2526 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2528 /* Half-precision float operations. The compiler handles all operations
2529 with NULL libfuncs by converting the SFmode. */
2530 switch (arm_fp16_format
)
2532 case ARM_FP16_FORMAT_IEEE
:
2533 case ARM_FP16_FORMAT_ALTERNATIVE
:
2536 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2537 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2539 : "__gnu_f2h_alternative"));
2540 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2541 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2543 : "__gnu_h2f_alternative"));
2545 set_conv_libfunc (trunc_optab
, HFmode
, DFmode
,
2546 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2548 : "__gnu_d2h_alternative"));
2551 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2552 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2553 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2554 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2555 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2558 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2559 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2560 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2561 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2562 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2563 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2564 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2571 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2573 const arm_fixed_mode_set fixed_arith_modes
[] =
2576 { E_UQQmode
, "uqq" },
2578 { E_UHQmode
, "uhq" },
2580 { E_USQmode
, "usq" },
2582 { E_UDQmode
, "udq" },
2584 { E_UTQmode
, "utq" },
2586 { E_UHAmode
, "uha" },
2588 { E_USAmode
, "usa" },
2590 { E_UDAmode
, "uda" },
2592 { E_UTAmode
, "uta" }
2594 const arm_fixed_mode_set fixed_conv_modes
[] =
2597 { E_UQQmode
, "uqq" },
2599 { E_UHQmode
, "uhq" },
2601 { E_USQmode
, "usq" },
2603 { E_UDQmode
, "udq" },
2605 { E_UTQmode
, "utq" },
2607 { E_UHAmode
, "uha" },
2609 { E_USAmode
, "usa" },
2611 { E_UDAmode
, "uda" },
2613 { E_UTAmode
, "uta" },
2624 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2626 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2627 "add", fixed_arith_modes
[i
].name
, 3);
2628 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2629 "ssadd", fixed_arith_modes
[i
].name
, 3);
2630 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2631 "usadd", fixed_arith_modes
[i
].name
, 3);
2632 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2633 "sub", fixed_arith_modes
[i
].name
, 3);
2634 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2635 "sssub", fixed_arith_modes
[i
].name
, 3);
2636 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2637 "ussub", fixed_arith_modes
[i
].name
, 3);
2638 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2639 "mul", fixed_arith_modes
[i
].name
, 3);
2640 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2641 "ssmul", fixed_arith_modes
[i
].name
, 3);
2642 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2643 "usmul", fixed_arith_modes
[i
].name
, 3);
2644 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2645 "div", fixed_arith_modes
[i
].name
, 3);
2646 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2647 "udiv", fixed_arith_modes
[i
].name
, 3);
2648 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2649 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2650 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2651 "usdiv", fixed_arith_modes
[i
].name
, 3);
2652 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2653 "neg", fixed_arith_modes
[i
].name
, 2);
2654 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2655 "ssneg", fixed_arith_modes
[i
].name
, 2);
2656 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2657 "usneg", fixed_arith_modes
[i
].name
, 2);
2658 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2659 "ashl", fixed_arith_modes
[i
].name
, 3);
2660 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2661 "ashr", fixed_arith_modes
[i
].name
, 3);
2662 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2663 "lshr", fixed_arith_modes
[i
].name
, 3);
2664 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2665 "ssashl", fixed_arith_modes
[i
].name
, 3);
2666 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2667 "usashl", fixed_arith_modes
[i
].name
, 3);
2668 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2669 "cmp", fixed_arith_modes
[i
].name
, 2);
2672 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2673 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2676 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2677 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2680 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2681 fixed_conv_modes
[j
].mode
, "fract",
2682 fixed_conv_modes
[i
].name
,
2683 fixed_conv_modes
[j
].name
);
2684 arm_set_fixed_conv_libfunc (satfract_optab
,
2685 fixed_conv_modes
[i
].mode
,
2686 fixed_conv_modes
[j
].mode
, "satfract",
2687 fixed_conv_modes
[i
].name
,
2688 fixed_conv_modes
[j
].name
);
2689 arm_set_fixed_conv_libfunc (fractuns_optab
,
2690 fixed_conv_modes
[i
].mode
,
2691 fixed_conv_modes
[j
].mode
, "fractuns",
2692 fixed_conv_modes
[i
].name
,
2693 fixed_conv_modes
[j
].name
);
2694 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2695 fixed_conv_modes
[i
].mode
,
2696 fixed_conv_modes
[j
].mode
, "satfractuns",
2697 fixed_conv_modes
[i
].name
,
2698 fixed_conv_modes
[j
].name
);
2702 if (TARGET_AAPCS_BASED
)
2703 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2706 /* On AAPCS systems, this is the "struct __va_list". */
2707 static GTY(()) tree va_list_type
;
2709 /* Return the type to use as __builtin_va_list. */
2711 arm_build_builtin_va_list (void)
2716 if (!TARGET_AAPCS_BASED
)
2717 return std_build_builtin_va_list ();
2719 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2727 The C Library ABI further reinforces this definition in \S
2730 We must follow this definition exactly. The structure tag
2731 name is visible in C++ mangled names, and thus forms a part
2732 of the ABI. The field name may be used by people who
2733 #include <stdarg.h>. */
2734 /* Create the type. */
2735 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2736 /* Give it the required name. */
2737 va_list_name
= build_decl (BUILTINS_LOCATION
,
2739 get_identifier ("__va_list"),
2741 DECL_ARTIFICIAL (va_list_name
) = 1;
2742 TYPE_NAME (va_list_type
) = va_list_name
;
2743 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2744 /* Create the __ap field. */
2745 ap_field
= build_decl (BUILTINS_LOCATION
,
2747 get_identifier ("__ap"),
2749 DECL_ARTIFICIAL (ap_field
) = 1;
2750 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2751 TYPE_FIELDS (va_list_type
) = ap_field
;
2752 /* Compute its layout. */
2753 layout_type (va_list_type
);
2755 return va_list_type
;
2758 /* Return an expression of type "void *" pointing to the next
2759 available argument in a variable-argument list. VALIST is the
2760 user-level va_list object, of type __builtin_va_list. */
2762 arm_extract_valist_ptr (tree valist
)
2764 if (TREE_TYPE (valist
) == error_mark_node
)
2765 return error_mark_node
;
2767 /* On an AAPCS target, the pointer is stored within "struct
2769 if (TARGET_AAPCS_BASED
)
2771 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2772 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2773 valist
, ap_field
, NULL_TREE
);
2779 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2781 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2783 valist
= arm_extract_valist_ptr (valist
);
2784 std_expand_builtin_va_start (valist
, nextarg
);
2787 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2789 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2792 valist
= arm_extract_valist_ptr (valist
);
2793 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2796 /* Check any incompatible options that the user has specified. */
2798 arm_option_check_internal (struct gcc_options
*opts
)
2800 int flags
= opts
->x_target_flags
;
2802 /* iWMMXt and NEON are incompatible. */
2804 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_neon
))
2805 error ("iWMMXt and NEON are incompatible");
2807 /* Make sure that the processor choice does not conflict with any of the
2808 other command line choices. */
2809 if (TARGET_ARM_P (flags
)
2810 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
))
2811 error ("target CPU does not support ARM mode");
2813 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2814 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2815 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2817 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2818 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2820 /* If this target is normally configured to use APCS frames, warn if they
2821 are turned off and debugging is turned on. */
2822 if (TARGET_ARM_P (flags
)
2823 && write_symbols
!= NO_DEBUG
2824 && !TARGET_APCS_FRAME
2825 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2826 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2828 /* iWMMXt unsupported under Thumb mode. */
2829 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2830 error ("iWMMXt unsupported under Thumb mode");
2832 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2833 error ("can not use -mtp=cp15 with 16-bit Thumb");
2835 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2837 error ("RTP PIC is incompatible with Thumb");
2841 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2843 if ((target_pure_code
|| target_slow_flash_data
)
2844 && (!TARGET_HAVE_MOVT
|| arm_arch_notm
|| flag_pic
|| TARGET_NEON
))
2846 const char *flag
= (target_pure_code
? "-mpure-code" :
2847 "-mslow-flash-data");
2848 error ("%s only supports non-pic code on M-profile targets with the "
2849 "MOVT instruction", flag
);
2854 /* Recompute the global settings depending on target attribute options. */
2857 arm_option_params_internal (void)
2859 /* If we are not using the default (ARM mode) section anchor offset
2860 ranges, then set the correct ranges now. */
2863 /* Thumb-1 LDR instructions cannot have negative offsets.
2864 Permissible positive offset ranges are 5-bit (for byte loads),
2865 6-bit (for halfword loads), or 7-bit (for word loads).
2866 Empirical results suggest a 7-bit anchor range gives the best
2867 overall code size. */
2868 targetm
.min_anchor_offset
= 0;
2869 targetm
.max_anchor_offset
= 127;
2871 else if (TARGET_THUMB2
)
2873 /* The minimum is set such that the total size of the block
2874 for a particular anchor is 248 + 1 + 4095 bytes, which is
2875 divisible by eight, ensuring natural spacing of anchors. */
2876 targetm
.min_anchor_offset
= -248;
2877 targetm
.max_anchor_offset
= 4095;
2881 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2882 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2885 /* Increase the number of conditional instructions with -Os. */
2886 max_insns_skipped
= optimize_size
? 4 : current_tune
->max_insns_skipped
;
2888 /* For THUMB2, we limit the conditional sequence to one IT block. */
2890 max_insns_skipped
= MIN (max_insns_skipped
, MAX_INSN_PER_IT_BLOCK
);
2893 /* True if -mflip-thumb should next add an attribute for the default
2894 mode, false if it should next add an attribute for the opposite mode. */
2895 static GTY(()) bool thumb_flipper
;
2897 /* Options after initial target override. */
2898 static GTY(()) tree init_optimize
;
2901 arm_override_options_after_change_1 (struct gcc_options
*opts
)
2903 if (opts
->x_align_functions
<= 0)
2904 opts
->x_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
2905 && opts
->x_optimize_size
? 2 : 4;
2908 /* Implement targetm.override_options_after_change. */
2911 arm_override_options_after_change (void)
2913 arm_configure_build_target (&arm_active_target
,
2914 TREE_TARGET_OPTION (target_option_default_node
),
2915 &global_options_set
, false);
2917 arm_override_options_after_change_1 (&global_options
);
2920 /* Implement TARGET_OPTION_SAVE. */
2922 arm_option_save (struct cl_target_option
*ptr
, struct gcc_options
*opts
)
2924 ptr
->x_arm_arch_string
= opts
->x_arm_arch_string
;
2925 ptr
->x_arm_cpu_string
= opts
->x_arm_cpu_string
;
2926 ptr
->x_arm_tune_string
= opts
->x_arm_tune_string
;
2929 /* Implement TARGET_OPTION_RESTORE. */
2931 arm_option_restore (struct gcc_options
*opts
, struct cl_target_option
*ptr
)
2933 opts
->x_arm_arch_string
= ptr
->x_arm_arch_string
;
2934 opts
->x_arm_cpu_string
= ptr
->x_arm_cpu_string
;
2935 opts
->x_arm_tune_string
= ptr
->x_arm_tune_string
;
2936 arm_configure_build_target (&arm_active_target
, ptr
, &global_options_set
,
2940 /* Reset options between modes that the user has specified. */
2942 arm_option_override_internal (struct gcc_options
*opts
,
2943 struct gcc_options
*opts_set
)
2945 arm_override_options_after_change_1 (opts
);
2947 if (TARGET_INTERWORK
&& !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
2949 /* The default is to enable interworking, so this warning message would
2950 be confusing to users who have just compiled with, eg, -march=armv3. */
2951 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2952 opts
->x_target_flags
&= ~MASK_INTERWORK
;
2955 if (TARGET_THUMB_P (opts
->x_target_flags
)
2956 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
2958 warning (0, "target CPU does not support THUMB instructions");
2959 opts
->x_target_flags
&= ~MASK_THUMB
;
2962 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
2964 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2965 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
2968 /* Callee super interworking implies thumb interworking. Adding
2969 this to the flags here simplifies the logic elsewhere. */
2970 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
2971 opts
->x_target_flags
|= MASK_INTERWORK
;
2973 /* need to remember initial values so combinaisons of options like
2974 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2975 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
2977 if (! opts_set
->x_arm_restrict_it
)
2978 opts
->x_arm_restrict_it
= arm_arch8
;
2980 /* ARM execution state and M profile don't have [restrict] IT. */
2981 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
2982 opts
->x_arm_restrict_it
= 0;
2984 /* Enable -munaligned-access by default for
2985 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2986 i.e. Thumb2 and ARM state only.
2987 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2988 - ARMv8 architecture-base processors.
2990 Disable -munaligned-access by default for
2991 - all pre-ARMv6 architecture-based processors
2992 - ARMv6-M architecture-based processors
2993 - ARMv8-M Baseline processors. */
2995 if (! opts_set
->x_unaligned_access
)
2997 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
2998 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
3000 else if (opts
->x_unaligned_access
== 1
3001 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3003 warning (0, "target CPU does not support unaligned accesses");
3004 opts
->x_unaligned_access
= 0;
3007 /* Don't warn since it's on by default in -O2. */
3008 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3009 opts
->x_flag_schedule_insns
= 0;
3011 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
3013 /* Disable shrink-wrap when optimizing function for size, since it tends to
3014 generate additional returns. */
3015 if (optimize_function_for_size_p (cfun
)
3016 && TARGET_THUMB2_P (opts
->x_target_flags
))
3017 opts
->x_flag_shrink_wrap
= false;
3019 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
3021 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3022 - epilogue_insns - does not accurately model the corresponding insns
3023 emitted in the asm file. In particular, see the comment in thumb_exit
3024 'Find out how many of the (return) argument registers we can corrupt'.
3025 As a consequence, the epilogue may clobber registers without fipa-ra
3026 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3027 TODO: Accurately model clobbers for epilogue_insns and reenable
3029 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3030 opts
->x_flag_ipa_ra
= 0;
3032 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3034 /* Thumb2 inline assembly code should always use unified syntax.
3035 This will apply to ARM and Thumb1 eventually. */
3036 opts
->x_inline_asm_unified
= TARGET_THUMB2_P (opts
->x_target_flags
);
3038 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3039 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3043 static sbitmap isa_all_fpubits
;
3044 static sbitmap isa_quirkbits
;
3046 /* Configure a build target TARGET from the user-specified options OPTS and
3047 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3048 architecture have been specified, but the two are not identical. */
3050 arm_configure_build_target (struct arm_build_target
*target
,
3051 struct cl_target_option
*opts
,
3052 struct gcc_options
*opts_set
,
3053 bool warn_compatible
)
3055 const cpu_option
*arm_selected_tune
= NULL
;
3056 const arch_option
*arm_selected_arch
= NULL
;
3057 const cpu_option
*arm_selected_cpu
= NULL
;
3058 const arm_fpu_desc
*arm_selected_fpu
= NULL
;
3059 const char *tune_opts
= NULL
;
3060 const char *arch_opts
= NULL
;
3061 const char *cpu_opts
= NULL
;
3063 bitmap_clear (target
->isa
);
3064 target
->core_name
= NULL
;
3065 target
->arch_name
= NULL
;
3067 if (opts_set
->x_arm_arch_string
)
3069 arm_selected_arch
= arm_parse_arch_option_name (all_architectures
,
3071 opts
->x_arm_arch_string
);
3072 arch_opts
= strchr (opts
->x_arm_arch_string
, '+');
3075 if (opts_set
->x_arm_cpu_string
)
3077 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "-mcpu",
3078 opts
->x_arm_cpu_string
);
3079 cpu_opts
= strchr (opts
->x_arm_cpu_string
, '+');
3080 arm_selected_tune
= arm_selected_cpu
;
3081 /* If taking the tuning from -mcpu, we don't need to rescan the
3082 options for tuning. */
3085 if (opts_set
->x_arm_tune_string
)
3087 arm_selected_tune
= arm_parse_cpu_option_name (all_cores
, "-mtune",
3088 opts
->x_arm_tune_string
);
3089 tune_opts
= strchr (opts
->x_arm_tune_string
, '+');
3092 if (arm_selected_arch
)
3094 arm_initialize_isa (target
->isa
, arm_selected_arch
->common
.isa_bits
);
3095 arm_parse_option_features (target
->isa
, &arm_selected_arch
->common
,
3098 if (arm_selected_cpu
)
3100 auto_sbitmap
cpu_isa (isa_num_bits
);
3101 auto_sbitmap
isa_delta (isa_num_bits
);
3103 arm_initialize_isa (cpu_isa
, arm_selected_cpu
->common
.isa_bits
);
3104 arm_parse_option_features (cpu_isa
, &arm_selected_cpu
->common
,
3106 bitmap_xor (isa_delta
, cpu_isa
, target
->isa
);
3107 /* Ignore any bits that are quirk bits. */
3108 bitmap_and_compl (isa_delta
, isa_delta
, isa_quirkbits
);
3109 /* Ignore (for now) any bits that might be set by -mfpu. */
3110 bitmap_and_compl (isa_delta
, isa_delta
, isa_all_fpubits
);
3112 if (!bitmap_empty_p (isa_delta
))
3114 if (warn_compatible
)
3115 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3116 arm_selected_cpu
->common
.name
,
3117 arm_selected_arch
->common
.name
);
3118 /* -march wins for code generation.
3119 -mcpu wins for default tuning. */
3120 if (!arm_selected_tune
)
3121 arm_selected_tune
= arm_selected_cpu
;
3123 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3124 target
->arch_name
= arm_selected_arch
->common
.name
;
3128 /* Architecture and CPU are essentially the same.
3129 Prefer the CPU setting. */
3130 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3131 target
->core_name
= arm_selected_cpu
->common
.name
;
3132 /* Copy the CPU's capabilities, so that we inherit the
3133 appropriate extensions and quirks. */
3134 bitmap_copy (target
->isa
, cpu_isa
);
3139 /* Pick a CPU based on the architecture. */
3140 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3141 target
->arch_name
= arm_selected_arch
->common
.name
;
3142 /* Note: target->core_name is left unset in this path. */
3145 else if (arm_selected_cpu
)
3147 target
->core_name
= arm_selected_cpu
->common
.name
;
3148 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3149 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3151 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3153 /* If the user did not specify a processor or architecture, choose
3157 const cpu_option
*sel
;
3158 auto_sbitmap
sought_isa (isa_num_bits
);
3159 bitmap_clear (sought_isa
);
3160 auto_sbitmap
default_isa (isa_num_bits
);
3162 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "default CPU",
3163 TARGET_CPU_DEFAULT
);
3164 cpu_opts
= strchr (TARGET_CPU_DEFAULT
, '+');
3165 gcc_assert (arm_selected_cpu
->common
.name
);
3167 /* RWE: All of the selection logic below (to the end of this
3168 'if' clause) looks somewhat suspect. It appears to be mostly
3169 there to support forcing thumb support when the default CPU
3170 does not have thumb (somewhat dubious in terms of what the
3171 user might be expecting). I think it should be removed once
3172 support for the pre-thumb era cores is removed. */
3173 sel
= arm_selected_cpu
;
3174 arm_initialize_isa (default_isa
, sel
->common
.isa_bits
);
3175 arm_parse_option_features (default_isa
, &arm_selected_cpu
->common
,
3178 /* Now check to see if the user has specified any command line
3179 switches that require certain abilities from the cpu. */
3181 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3183 bitmap_set_bit (sought_isa
, isa_bit_thumb
);
3184 bitmap_set_bit (sought_isa
, isa_bit_mode32
);
3186 /* There are no ARM processors that support both APCS-26 and
3187 interworking. Therefore we forcibly remove MODE26 from
3188 from the isa features here (if it was set), so that the
3189 search below will always be able to find a compatible
3191 bitmap_clear_bit (default_isa
, isa_bit_mode26
);
3194 /* If there are such requirements and the default CPU does not
3195 satisfy them, we need to run over the complete list of
3196 cores looking for one that is satisfactory. */
3197 if (!bitmap_empty_p (sought_isa
)
3198 && !bitmap_subset_p (sought_isa
, default_isa
))
3200 auto_sbitmap
candidate_isa (isa_num_bits
);
3201 /* We're only interested in a CPU with at least the
3202 capabilities of the default CPU and the required
3203 additional features. */
3204 bitmap_ior (default_isa
, default_isa
, sought_isa
);
3206 /* Try to locate a CPU type that supports all of the abilities
3207 of the default CPU, plus the extra abilities requested by
3209 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3211 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3212 /* An exact match? */
3213 if (bitmap_equal_p (default_isa
, candidate_isa
))
3217 if (sel
->common
.name
== NULL
)
3219 unsigned current_bit_count
= isa_num_bits
;
3220 const cpu_option
*best_fit
= NULL
;
3222 /* Ideally we would like to issue an error message here
3223 saying that it was not possible to find a CPU compatible
3224 with the default CPU, but which also supports the command
3225 line options specified by the programmer, and so they
3226 ought to use the -mcpu=<name> command line option to
3227 override the default CPU type.
3229 If we cannot find a CPU that has exactly the
3230 characteristics of the default CPU and the given
3231 command line options we scan the array again looking
3232 for a best match. The best match must have at least
3233 the capabilities of the perfect match. */
3234 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3236 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3238 if (bitmap_subset_p (default_isa
, candidate_isa
))
3242 bitmap_and_compl (candidate_isa
, candidate_isa
,
3244 count
= bitmap_popcount (candidate_isa
);
3246 if (count
< current_bit_count
)
3249 current_bit_count
= count
;
3253 gcc_assert (best_fit
);
3257 arm_selected_cpu
= sel
;
3260 /* Now we know the CPU, we can finally initialize the target
3262 target
->core_name
= arm_selected_cpu
->common
.name
;
3263 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3264 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3266 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3269 gcc_assert (arm_selected_cpu
);
3270 gcc_assert (arm_selected_arch
);
3272 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
)
3274 arm_selected_fpu
= &all_fpus
[opts
->x_arm_fpu_index
];
3275 auto_sbitmap
fpu_bits (isa_num_bits
);
3277 arm_initialize_isa (fpu_bits
, arm_selected_fpu
->isa_bits
);
3278 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpubits
);
3279 bitmap_ior (target
->isa
, target
->isa
, fpu_bits
);
3282 if (!arm_selected_tune
)
3283 arm_selected_tune
= arm_selected_cpu
;
3284 else /* Validate the features passed to -mtune. */
3285 arm_parse_option_features (NULL
, &arm_selected_tune
->common
, tune_opts
);
3287 const cpu_tune
*tune_data
= &all_tunes
[arm_selected_tune
- all_cores
];
3289 /* Finish initializing the target structure. */
3290 target
->arch_pp_name
= arm_selected_arch
->arch
;
3291 target
->base_arch
= arm_selected_arch
->base_arch
;
3292 target
->profile
= arm_selected_arch
->profile
;
3294 target
->tune_flags
= tune_data
->tune_flags
;
3295 target
->tune
= tune_data
->tune
;
3296 target
->tune_core
= tune_data
->scheduler
;
3299 /* Fix up any incompatible options that the user has specified. */
3301 arm_option_override (void)
3303 static const enum isa_feature fpu_bitlist
[]
3304 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
3305 static const enum isa_feature quirk_bitlist
[] = { ISA_ALL_QUIRKS
, isa_nobit
};
3306 cl_target_option opts
;
3308 isa_quirkbits
= sbitmap_alloc (isa_num_bits
);
3309 arm_initialize_isa (isa_quirkbits
, quirk_bitlist
);
3311 isa_all_fpubits
= sbitmap_alloc (isa_num_bits
);
3312 arm_initialize_isa (isa_all_fpubits
, fpu_bitlist
);
3314 arm_active_target
.isa
= sbitmap_alloc (isa_num_bits
);
3316 if (!global_options_set
.x_arm_fpu_index
)
3321 ok
= opt_enum_arg_to_value (OPT_mfpu_
, FPUTYPE_AUTO
, &fpu_index
,
3324 arm_fpu_index
= (enum fpu_type
) fpu_index
;
3327 cl_target_option_save (&opts
, &global_options
);
3328 arm_configure_build_target (&arm_active_target
, &opts
, &global_options_set
,
3331 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3332 SUBTARGET_OVERRIDE_OPTIONS
;
3335 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_active_target
.arch_pp_name
);
3336 arm_base_arch
= arm_active_target
.base_arch
;
3338 arm_tune
= arm_active_target
.tune_core
;
3339 tune_flags
= arm_active_target
.tune_flags
;
3340 current_tune
= arm_active_target
.tune
;
3342 /* TBD: Dwarf info for apcs frame is not handled yet. */
3343 if (TARGET_APCS_FRAME
)
3344 flag_shrink_wrap
= false;
3346 /* BPABI targets use linker tricks to allow interworking on cores
3347 without thumb support. */
3348 if (TARGET_INTERWORK
3350 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3352 warning (0, "target CPU does not support interworking" );
3353 target_flags
&= ~MASK_INTERWORK
;
3356 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3358 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3359 target_flags
|= MASK_APCS_FRAME
;
3362 if (TARGET_POKE_FUNCTION_NAME
)
3363 target_flags
|= MASK_APCS_FRAME
;
3365 if (TARGET_APCS_REENT
&& flag_pic
)
3366 error ("-fpic and -mapcs-reent are incompatible");
3368 if (TARGET_APCS_REENT
)
3369 warning (0, "APCS reentrant code not supported. Ignored");
3371 /* Initialize boolean versions of the architectural flags, for use
3372 in the arm.md file. */
3373 arm_arch3m
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv3m
);
3374 arm_arch4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv4
);
3375 arm_arch4t
= arm_arch4
&& bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3376 arm_arch5
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5
);
3377 arm_arch5e
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5e
);
3378 arm_arch5te
= arm_arch5e
3379 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3380 arm_arch6
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6
);
3381 arm_arch6k
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6k
);
3382 arm_arch_notm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
);
3383 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3384 arm_arch7
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7
);
3385 arm_arch7em
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7em
);
3386 arm_arch8
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8
);
3387 arm_arch8_1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_1
);
3388 arm_arch8_2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_2
);
3389 arm_arch_thumb1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3390 arm_arch_thumb2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb2
);
3391 arm_arch_xscale
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_xscale
);
3392 arm_arch_iwmmxt
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt
);
3393 arm_arch_iwmmxt2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt2
);
3394 arm_arch_thumb_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_tdiv
);
3395 arm_arch_arm_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_adiv
);
3396 arm_arch_crc
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_crc32
);
3397 arm_arch_cmse
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_cmse
);
3398 arm_fp16_inst
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_fp16
);
3399 arm_arch_lpae
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_lpae
);
3402 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3403 error ("selected fp16 options are incompatible");
3404 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3408 /* Set up some tuning parameters. */
3409 arm_ld_sched
= (tune_flags
& TF_LDSCHED
) != 0;
3410 arm_tune_strongarm
= (tune_flags
& TF_STRONG
) != 0;
3411 arm_tune_wbuf
= (tune_flags
& TF_WBUF
) != 0;
3412 arm_tune_xscale
= (tune_flags
& TF_XSCALE
) != 0;
3413 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3414 arm_m_profile_small_mul
= (tune_flags
& TF_SMALLMUL
) != 0;
3416 /* And finally, set up some quirks. */
3417 arm_arch_no_volatile_ce
3418 = bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_volatile_ce
);
3419 arm_arch6kz
= arm_arch6k
&& bitmap_bit_p (arm_active_target
.isa
,
3420 isa_bit_quirk_armv6kz
);
3422 /* V5 code we generate is completely interworking capable, so we turn off
3423 TARGET_INTERWORK here to avoid many tests later on. */
3425 /* XXX However, we must pass the right pre-processor defines to CPP
3426 or GLD can get confused. This is a hack. */
3427 if (TARGET_INTERWORK
)
3428 arm_cpp_interwork
= 1;
3431 target_flags
&= ~MASK_INTERWORK
;
3433 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3434 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3436 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3437 error ("iwmmxt abi requires an iwmmxt capable cpu");
3439 /* If soft-float is specified then don't use FPU. */
3440 if (TARGET_SOFT_FLOAT
)
3441 arm_fpu_attr
= FPU_NONE
;
3443 arm_fpu_attr
= FPU_VFP
;
3445 if (TARGET_AAPCS_BASED
)
3447 if (TARGET_CALLER_INTERWORKING
)
3448 error ("AAPCS does not support -mcaller-super-interworking");
3450 if (TARGET_CALLEE_INTERWORKING
)
3451 error ("AAPCS does not support -mcallee-super-interworking");
3454 /* __fp16 support currently assumes the core has ldrh. */
3455 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3456 sorry ("__fp16 and no ldrh");
3458 if (TARGET_AAPCS_BASED
)
3460 if (arm_abi
== ARM_ABI_IWMMXT
)
3461 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3462 else if (TARGET_HARD_FLOAT_ABI
)
3464 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3465 if (!bitmap_bit_p (arm_active_target
.isa
, isa_bit_vfpv2
))
3466 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3469 arm_pcs_default
= ARM_PCS_AAPCS
;
3473 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
3474 sorry ("-mfloat-abi=hard and VFP");
3476 if (arm_abi
== ARM_ABI_APCS
)
3477 arm_pcs_default
= ARM_PCS_APCS
;
3479 arm_pcs_default
= ARM_PCS_ATPCS
;
3482 /* For arm2/3 there is no need to do any scheduling if we are doing
3483 software floating-point. */
3484 if (TARGET_SOFT_FLOAT
&& (tune_flags
& TF_NO_MODE32
))
3485 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3487 /* Use the cp15 method if it is available. */
3488 if (target_thread_pointer
== TP_AUTO
)
3490 if (arm_arch6k
&& !TARGET_THUMB1
)
3491 target_thread_pointer
= TP_CP15
;
3493 target_thread_pointer
= TP_SOFT
;
3496 /* Override the default structure alignment for AAPCS ABI. */
3497 if (!global_options_set
.x_arm_structure_size_boundary
)
3499 if (TARGET_AAPCS_BASED
)
3500 arm_structure_size_boundary
= 8;
3504 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3506 if (arm_structure_size_boundary
!= 8
3507 && arm_structure_size_boundary
!= 32
3508 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3510 if (ARM_DOUBLEWORD_ALIGN
)
3512 "structure size boundary can only be set to 8, 32 or 64");
3514 warning (0, "structure size boundary can only be set to 8 or 32");
3515 arm_structure_size_boundary
3516 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3520 if (TARGET_VXWORKS_RTP
)
3522 if (!global_options_set
.x_arm_pic_data_is_text_relative
)
3523 arm_pic_data_is_text_relative
= 0;
3526 && !arm_pic_data_is_text_relative
3527 && !(global_options_set
.x_target_flags
& MASK_SINGLE_PIC_BASE
))
3528 /* When text & data segments don't have a fixed displacement, the
3529 intended use is with a single, read only, pic base register.
3530 Unless the user explicitly requested not to do that, set
3532 target_flags
|= MASK_SINGLE_PIC_BASE
;
3534 /* If stack checking is disabled, we can use r10 as the PIC register,
3535 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3536 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3538 if (TARGET_VXWORKS_RTP
)
3539 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3540 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3543 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3544 arm_pic_register
= 9;
3546 if (arm_pic_register_string
!= NULL
)
3548 int pic_register
= decode_reg_name (arm_pic_register_string
);
3551 warning (0, "-mpic-register= is useless without -fpic");
3553 /* Prevent the user from choosing an obviously stupid PIC register. */
3554 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3555 || pic_register
== HARD_FRAME_POINTER_REGNUM
3556 || pic_register
== STACK_POINTER_REGNUM
3557 || pic_register
>= PC_REGNUM
3558 || (TARGET_VXWORKS_RTP
3559 && (unsigned int) pic_register
!= arm_pic_register
))
3560 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3562 arm_pic_register
= pic_register
;
3565 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3566 if (fix_cm3_ldrd
== 2)
3568 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_cm3_ldrd
))
3574 /* Hot/Cold partitioning is not currently supported, since we can't
3575 handle literal pool placement in that case. */
3576 if (flag_reorder_blocks_and_partition
)
3578 inform (input_location
,
3579 "-freorder-blocks-and-partition not supported on this architecture");
3580 flag_reorder_blocks_and_partition
= 0;
3581 flag_reorder_blocks
= 1;
3585 /* Hoisting PIC address calculations more aggressively provides a small,
3586 but measurable, size reduction for PIC code. Therefore, we decrease
3587 the bar for unrestricted expression hoisting to the cost of PIC address
3588 calculation, which is 2 instructions. */
3589 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3590 global_options
.x_param_values
,
3591 global_options_set
.x_param_values
);
3593 /* ARM EABI defaults to strict volatile bitfields. */
3594 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3595 && abi_version_at_least(2))
3596 flag_strict_volatile_bitfields
= 1;
3598 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3599 have deemed it beneficial (signified by setting
3600 prefetch.num_slots to 1 or more). */
3601 if (flag_prefetch_loop_arrays
< 0
3604 && current_tune
->prefetch
.num_slots
> 0)
3605 flag_prefetch_loop_arrays
= 1;
3607 /* Set up parameters to be used in prefetching algorithm. Do not
3608 override the defaults unless we are tuning for a core we have
3609 researched values for. */
3610 if (current_tune
->prefetch
.num_slots
> 0)
3611 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3612 current_tune
->prefetch
.num_slots
,
3613 global_options
.x_param_values
,
3614 global_options_set
.x_param_values
);
3615 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3616 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3617 current_tune
->prefetch
.l1_cache_line_size
,
3618 global_options
.x_param_values
,
3619 global_options_set
.x_param_values
);
3620 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3621 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3622 current_tune
->prefetch
.l1_cache_size
,
3623 global_options
.x_param_values
,
3624 global_options_set
.x_param_values
);
3626 /* Use Neon to perform 64-bits operations rather than core
3628 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3629 if (use_neon_for_64bits
== 1)
3630 prefer_neon_for_64bits
= true;
3632 /* Use the alternative scheduling-pressure algorithm by default. */
3633 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3634 global_options
.x_param_values
,
3635 global_options_set
.x_param_values
);
3637 /* Look through ready list and all of queue for instructions
3638 relevant for L2 auto-prefetcher. */
3639 int param_sched_autopref_queue_depth
;
3641 switch (current_tune
->sched_autopref
)
3643 case tune_params::SCHED_AUTOPREF_OFF
:
3644 param_sched_autopref_queue_depth
= -1;
3647 case tune_params::SCHED_AUTOPREF_RANK
:
3648 param_sched_autopref_queue_depth
= 0;
3651 case tune_params::SCHED_AUTOPREF_FULL
:
3652 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3659 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3660 param_sched_autopref_queue_depth
,
3661 global_options
.x_param_values
,
3662 global_options_set
.x_param_values
);
3664 /* Currently, for slow flash data, we just disable literal pools. We also
3665 disable it for pure-code. */
3666 if (target_slow_flash_data
|| target_pure_code
)
3667 arm_disable_literal_pool
= true;
3669 if (use_cmse
&& !arm_arch_cmse
)
3670 error ("target CPU does not support ARMv8-M Security Extensions");
3672 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3673 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3674 if (use_cmse
&& LAST_VFP_REGNUM
> LAST_LO_VFP_REGNUM
)
3675 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3677 /* Disable scheduling fusion by default if it's not armv7 processor
3678 or doesn't prefer ldrd/strd. */
3679 if (flag_schedule_fusion
== 2
3680 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3681 flag_schedule_fusion
= 0;
3683 /* Need to remember initial options before they are overriden. */
3684 init_optimize
= build_optimization_node (&global_options
);
3686 arm_option_override_internal (&global_options
, &global_options_set
);
3687 arm_option_check_internal (&global_options
);
3688 arm_option_params_internal ();
3690 /* Create the default target_options structure. */
3691 target_option_default_node
= target_option_current_node
3692 = build_target_option_node (&global_options
);
3694 /* Register global variables with the garbage collector. */
3695 arm_add_gc_roots ();
3697 /* Init initial mode for testing. */
3698 thumb_flipper
= TARGET_THUMB
;
3702 arm_add_gc_roots (void)
3704 gcc_obstack_init(&minipool_obstack
);
3705 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3708 /* A table of known ARM exception types.
3709 For use with the interrupt function attribute. */
3713 const char *const arg
;
3714 const unsigned long return_value
;
3718 static const isr_attribute_arg isr_attribute_args
[] =
3720 { "IRQ", ARM_FT_ISR
},
3721 { "irq", ARM_FT_ISR
},
3722 { "FIQ", ARM_FT_FIQ
},
3723 { "fiq", ARM_FT_FIQ
},
3724 { "ABORT", ARM_FT_ISR
},
3725 { "abort", ARM_FT_ISR
},
3726 { "ABORT", ARM_FT_ISR
},
3727 { "abort", ARM_FT_ISR
},
3728 { "UNDEF", ARM_FT_EXCEPTION
},
3729 { "undef", ARM_FT_EXCEPTION
},
3730 { "SWI", ARM_FT_EXCEPTION
},
3731 { "swi", ARM_FT_EXCEPTION
},
3732 { NULL
, ARM_FT_NORMAL
}
3735 /* Returns the (interrupt) function type of the current
3736 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3738 static unsigned long
3739 arm_isr_value (tree argument
)
3741 const isr_attribute_arg
* ptr
;
3745 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3747 /* No argument - default to IRQ. */
3748 if (argument
== NULL_TREE
)
3751 /* Get the value of the argument. */
3752 if (TREE_VALUE (argument
) == NULL_TREE
3753 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3754 return ARM_FT_UNKNOWN
;
3756 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3758 /* Check it against the list of known arguments. */
3759 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3760 if (streq (arg
, ptr
->arg
))
3761 return ptr
->return_value
;
3763 /* An unrecognized interrupt type. */
3764 return ARM_FT_UNKNOWN
;
3767 /* Computes the type of the current function. */
3769 static unsigned long
3770 arm_compute_func_type (void)
3772 unsigned long type
= ARM_FT_UNKNOWN
;
3776 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3778 /* Decide if the current function is volatile. Such functions
3779 never return, and many memory cycles can be saved by not storing
3780 register values that will never be needed again. This optimization
3781 was added to speed up context switching in a kernel application. */
3783 && (TREE_NOTHROW (current_function_decl
)
3784 || !(flag_unwind_tables
3786 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3787 && TREE_THIS_VOLATILE (current_function_decl
))
3788 type
|= ARM_FT_VOLATILE
;
3790 if (cfun
->static_chain_decl
!= NULL
)
3791 type
|= ARM_FT_NESTED
;
3793 attr
= DECL_ATTRIBUTES (current_function_decl
);
3795 a
= lookup_attribute ("naked", attr
);
3797 type
|= ARM_FT_NAKED
;
3799 a
= lookup_attribute ("isr", attr
);
3801 a
= lookup_attribute ("interrupt", attr
);
3804 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3806 type
|= arm_isr_value (TREE_VALUE (a
));
3808 if (lookup_attribute ("cmse_nonsecure_entry", attr
))
3809 type
|= ARM_FT_CMSE_ENTRY
;
3814 /* Returns the type of the current function. */
3817 arm_current_func_type (void)
3819 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3820 cfun
->machine
->func_type
= arm_compute_func_type ();
3822 return cfun
->machine
->func_type
;
3826 arm_allocate_stack_slots_for_args (void)
3828 /* Naked functions should not allocate stack slots for arguments. */
3829 return !IS_NAKED (arm_current_func_type ());
3833 arm_warn_func_return (tree decl
)
3835 /* Naked functions are implemented entirely in assembly, including the
3836 return sequence, so suppress warnings about this. */
3837 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3841 /* Output assembler code for a block containing the constant parts
3842 of a trampoline, leaving space for the variable parts.
3844 On the ARM, (if r8 is the static chain regnum, and remembering that
3845 referencing pc adds an offset of 8) the trampoline looks like:
3848 .word static chain value
3849 .word function's address
3850 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3853 arm_asm_trampoline_template (FILE *f
)
3855 fprintf (f
, "\t.syntax unified\n");
3859 fprintf (f
, "\t.arm\n");
3860 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3861 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3863 else if (TARGET_THUMB2
)
3865 fprintf (f
, "\t.thumb\n");
3866 /* The Thumb-2 trampoline is similar to the arm implementation.
3867 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3868 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3869 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3870 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3874 ASM_OUTPUT_ALIGN (f
, 2);
3875 fprintf (f
, "\t.code\t16\n");
3876 fprintf (f
, ".Ltrampoline_start:\n");
3877 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3878 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3879 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3880 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3881 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3882 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3884 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3885 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3888 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3891 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3893 rtx fnaddr
, mem
, a_tramp
;
3895 emit_block_move (m_tramp
, assemble_trampoline_template (),
3896 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3898 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3899 emit_move_insn (mem
, chain_value
);
3901 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3902 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3903 emit_move_insn (mem
, fnaddr
);
3905 a_tramp
= XEXP (m_tramp
, 0);
3906 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3907 LCT_NORMAL
, VOIDmode
, a_tramp
, Pmode
,
3908 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3911 /* Thumb trampolines should be entered in thumb mode, so set
3912 the bottom bit of the address. */
3915 arm_trampoline_adjust_address (rtx addr
)
3918 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3919 NULL
, 0, OPTAB_LIB_WIDEN
);
3923 /* Return 1 if it is possible to return using a single instruction.
3924 If SIBLING is non-null, this is a test for a return before a sibling
3925 call. SIBLING is the call insn, so we can examine its register usage. */
3928 use_return_insn (int iscond
, rtx sibling
)
3931 unsigned int func_type
;
3932 unsigned long saved_int_regs
;
3933 unsigned HOST_WIDE_INT stack_adjust
;
3934 arm_stack_offsets
*offsets
;
3936 /* Never use a return instruction before reload has run. */
3937 if (!reload_completed
)
3940 func_type
= arm_current_func_type ();
3942 /* Naked, volatile and stack alignment functions need special
3944 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3947 /* So do interrupt functions that use the frame pointer and Thumb
3948 interrupt functions. */
3949 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3952 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3953 && !optimize_function_for_size_p (cfun
))
3956 offsets
= arm_get_frame_offsets ();
3957 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3959 /* As do variadic functions. */
3960 if (crtl
->args
.pretend_args_size
3961 || cfun
->machine
->uses_anonymous_args
3962 /* Or if the function calls __builtin_eh_return () */
3963 || crtl
->calls_eh_return
3964 /* Or if the function calls alloca */
3965 || cfun
->calls_alloca
3966 /* Or if there is a stack adjustment. However, if the stack pointer
3967 is saved on the stack, we can use a pre-incrementing stack load. */
3968 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3969 && stack_adjust
== 4))
3970 /* Or if the static chain register was saved above the frame, under the
3971 assumption that the stack pointer isn't saved on the stack. */
3972 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
3973 && arm_compute_static_chain_stack_bytes() != 0))
3976 saved_int_regs
= offsets
->saved_regs_mask
;
3978 /* Unfortunately, the insn
3980 ldmib sp, {..., sp, ...}
3982 triggers a bug on most SA-110 based devices, such that the stack
3983 pointer won't be correctly restored if the instruction takes a
3984 page fault. We work around this problem by popping r3 along with
3985 the other registers, since that is never slower than executing
3986 another instruction.
3988 We test for !arm_arch5 here, because code for any architecture
3989 less than this could potentially be run on one of the buggy
3991 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3993 /* Validate that r3 is a call-clobbered register (always true in
3994 the default abi) ... */
3995 if (!call_used_regs
[3])
3998 /* ... that it isn't being used for a return value ... */
3999 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
4002 /* ... or for a tail-call argument ... */
4005 gcc_assert (CALL_P (sibling
));
4007 if (find_regno_fusage (sibling
, USE
, 3))
4011 /* ... and that there are no call-saved registers in r0-r2
4012 (always true in the default ABI). */
4013 if (saved_int_regs
& 0x7)
4017 /* Can't be done if interworking with Thumb, and any registers have been
4019 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
4022 /* On StrongARM, conditional returns are expensive if they aren't
4023 taken and multiple registers have been stacked. */
4024 if (iscond
&& arm_tune_strongarm
)
4026 /* Conditional return when just the LR is stored is a simple
4027 conditional-load instruction, that's not expensive. */
4028 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
4032 && arm_pic_register
!= INVALID_REGNUM
4033 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
4037 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4038 several instructions if anything needs to be popped. */
4039 if (saved_int_regs
&& IS_CMSE_ENTRY (func_type
))
4042 /* If there are saved registers but the LR isn't saved, then we need
4043 two instructions for the return. */
4044 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
4047 /* Can't be done if any of the VFP regs are pushed,
4048 since this also requires an insn. */
4049 if (TARGET_HARD_FLOAT
)
4050 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
4051 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
4054 if (TARGET_REALLY_IWMMXT
)
4055 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
4056 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
4062 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4063 shrink-wrapping if possible. This is the case if we need to emit a
4064 prologue, which we can test by looking at the offsets. */
4066 use_simple_return_p (void)
4068 arm_stack_offsets
*offsets
;
4070 /* Note this function can be called before or after reload. */
4071 if (!reload_completed
)
4072 arm_compute_frame_layout ();
4074 offsets
= arm_get_frame_offsets ();
4075 return offsets
->outgoing_args
!= 0;
4078 /* Return TRUE if int I is a valid immediate ARM constant. */
4081 const_ok_for_arm (HOST_WIDE_INT i
)
4085 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4086 be all zero, or all one. */
4087 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
4088 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
4089 != ((~(unsigned HOST_WIDE_INT
) 0)
4090 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
4093 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
4095 /* Fast return for 0 and small values. We must do this for zero, since
4096 the code below can't handle that one case. */
4097 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
4100 /* Get the number of trailing zeros. */
4101 lowbit
= ffs((int) i
) - 1;
4103 /* Only even shifts are allowed in ARM mode so round down to the
4104 nearest even number. */
4108 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
4113 /* Allow rotated constants in ARM mode. */
4115 && ((i
& ~0xc000003f) == 0
4116 || (i
& ~0xf000000f) == 0
4117 || (i
& ~0xfc000003) == 0))
4120 else if (TARGET_THUMB2
)
4124 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4127 if (i
== v
|| i
== (v
| (v
<< 8)))
4130 /* Allow repeated pattern 0xXY00XY00. */
4136 else if (TARGET_HAVE_MOVT
)
4138 /* Thumb-1 Targets with MOVT. */
4148 /* Return true if I is a valid constant for the operation CODE. */
4150 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
4152 if (const_ok_for_arm (i
))
4158 /* See if we can use movw. */
4159 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
4162 /* Otherwise, try mvn. */
4163 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4166 /* See if we can use addw or subw. */
4168 && ((i
& 0xfffff000) == 0
4169 || ((-i
) & 0xfffff000) == 0))
4190 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4192 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4198 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4202 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4209 /* Return true if I is a valid di mode constant for the operation CODE. */
4211 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4213 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4214 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4215 rtx hi
= GEN_INT (hi_val
);
4216 rtx lo
= GEN_INT (lo_val
);
4226 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
4227 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
4229 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4236 /* Emit a sequence of insns to handle a large constant.
4237 CODE is the code of the operation required, it can be any of SET, PLUS,
4238 IOR, AND, XOR, MINUS;
4239 MODE is the mode in which the operation is being performed;
4240 VAL is the integer to operate on;
4241 SOURCE is the other operand (a register, or a null-pointer for SET);
4242 SUBTARGETS means it is safe to create scratch registers if that will
4243 either produce a simpler sequence, or we will want to cse the values.
4244 Return value is the number of insns emitted. */
4246 /* ??? Tweak this for thumb2. */
4248 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4249 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4253 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4254 cond
= COND_EXEC_TEST (PATTERN (insn
));
4258 if (subtargets
|| code
== SET
4259 || (REG_P (target
) && REG_P (source
)
4260 && REGNO (target
) != REGNO (source
)))
4262 /* After arm_reorg has been called, we can't fix up expensive
4263 constants by pushing them into memory so we must synthesize
4264 them in-line, regardless of the cost. This is only likely to
4265 be more costly on chips that have load delay slots and we are
4266 compiling without running the scheduler (so no splitting
4267 occurred before the final instruction emission).
4269 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4271 if (!cfun
->machine
->after_arm_reorg
4273 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4275 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4280 /* Currently SET is the only monadic value for CODE, all
4281 the rest are diadic. */
4282 if (TARGET_USE_MOVT
)
4283 arm_emit_movpair (target
, GEN_INT (val
));
4285 emit_set_insn (target
, GEN_INT (val
));
4291 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4293 if (TARGET_USE_MOVT
)
4294 arm_emit_movpair (temp
, GEN_INT (val
));
4296 emit_set_insn (temp
, GEN_INT (val
));
4298 /* For MINUS, the value is subtracted from, since we never
4299 have subtraction of a constant. */
4301 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4303 emit_set_insn (target
,
4304 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4310 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4314 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4315 ARM/THUMB2 immediates, and add up to VAL.
4316 Thr function return value gives the number of insns required. */
4318 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4319 struct four_ints
*return_sequence
)
4321 int best_consecutive_zeros
= 0;
4325 struct four_ints tmp_sequence
;
4327 /* If we aren't targeting ARM, the best place to start is always at
4328 the bottom, otherwise look more closely. */
4331 for (i
= 0; i
< 32; i
+= 2)
4333 int consecutive_zeros
= 0;
4335 if (!(val
& (3 << i
)))
4337 while ((i
< 32) && !(val
& (3 << i
)))
4339 consecutive_zeros
+= 2;
4342 if (consecutive_zeros
> best_consecutive_zeros
)
4344 best_consecutive_zeros
= consecutive_zeros
;
4345 best_start
= i
- consecutive_zeros
;
4352 /* So long as it won't require any more insns to do so, it's
4353 desirable to emit a small constant (in bits 0...9) in the last
4354 insn. This way there is more chance that it can be combined with
4355 a later addressing insn to form a pre-indexed load or store
4356 operation. Consider:
4358 *((volatile int *)0xe0000100) = 1;
4359 *((volatile int *)0xe0000110) = 2;
4361 We want this to wind up as:
4365 str rB, [rA, #0x100]
4367 str rB, [rA, #0x110]
4369 rather than having to synthesize both large constants from scratch.
4371 Therefore, we calculate how many insns would be required to emit
4372 the constant starting from `best_start', and also starting from
4373 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4374 yield a shorter sequence, we may as well use zero. */
4375 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4377 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4379 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4380 if (insns2
<= insns1
)
4382 *return_sequence
= tmp_sequence
;
4390 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4392 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4393 struct four_ints
*return_sequence
, int i
)
4395 int remainder
= val
& 0xffffffff;
4398 /* Try and find a way of doing the job in either two or three
4401 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4402 location. We start at position I. This may be the MSB, or
4403 optimial_immediate_sequence may have positioned it at the largest block
4404 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4405 wrapping around to the top of the word when we drop off the bottom.
4406 In the worst case this code should produce no more than four insns.
4408 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4409 constants, shifted to any arbitrary location. We should always start
4414 unsigned int b1
, b2
, b3
, b4
;
4415 unsigned HOST_WIDE_INT result
;
4418 gcc_assert (insns
< 4);
4423 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4424 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4427 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4428 /* We can use addw/subw for the last 12 bits. */
4432 /* Use an 8-bit shifted/rotated immediate. */
4436 result
= remainder
& ((0x0ff << end
)
4437 | ((i
< end
) ? (0xff >> (32 - end
))
4444 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4445 arbitrary shifts. */
4446 i
-= TARGET_ARM
? 2 : 1;
4450 /* Next, see if we can do a better job with a thumb2 replicated
4453 We do it this way around to catch the cases like 0x01F001E0 where
4454 two 8-bit immediates would work, but a replicated constant would
4457 TODO: 16-bit constants that don't clear all the bits, but still win.
4458 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4461 b1
= (remainder
& 0xff000000) >> 24;
4462 b2
= (remainder
& 0x00ff0000) >> 16;
4463 b3
= (remainder
& 0x0000ff00) >> 8;
4464 b4
= remainder
& 0xff;
4468 /* The 8-bit immediate already found clears b1 (and maybe b2),
4469 but must leave b3 and b4 alone. */
4471 /* First try to find a 32-bit replicated constant that clears
4472 almost everything. We can assume that we can't do it in one,
4473 or else we wouldn't be here. */
4474 unsigned int tmp
= b1
& b2
& b3
& b4
;
4475 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4477 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4478 + (tmp
== b3
) + (tmp
== b4
);
4480 && (matching_bytes
>= 3
4481 || (matching_bytes
== 2
4482 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4484 /* At least 3 of the bytes match, and the fourth has at
4485 least as many bits set, or two of the bytes match
4486 and it will only require one more insn to finish. */
4494 /* Second, try to find a 16-bit replicated constant that can
4495 leave three of the bytes clear. If b2 or b4 is already
4496 zero, then we can. If the 8-bit from above would not
4497 clear b2 anyway, then we still win. */
4498 else if (b1
== b3
&& (!b2
|| !b4
4499 || (remainder
& 0x00ff0000 & ~result
)))
4501 result
= remainder
& 0xff00ff00;
4507 /* The 8-bit immediate already found clears b2 (and maybe b3)
4508 and we don't get here unless b1 is alredy clear, but it will
4509 leave b4 unchanged. */
4511 /* If we can clear b2 and b4 at once, then we win, since the
4512 8-bits couldn't possibly reach that far. */
4515 result
= remainder
& 0x00ff00ff;
4521 return_sequence
->i
[insns
++] = result
;
4522 remainder
&= ~result
;
4524 if (code
== SET
|| code
== MINUS
)
4532 /* Emit an instruction with the indicated PATTERN. If COND is
4533 non-NULL, conditionalize the execution of the instruction on COND
4537 emit_constant_insn (rtx cond
, rtx pattern
)
4540 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4541 emit_insn (pattern
);
4544 /* As above, but extra parameter GENERATE which, if clear, suppresses
4548 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4549 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4550 int subtargets
, int generate
)
4554 int final_invert
= 0;
4556 int set_sign_bit_copies
= 0;
4557 int clear_sign_bit_copies
= 0;
4558 int clear_zero_bit_copies
= 0;
4559 int set_zero_bit_copies
= 0;
4560 int insns
= 0, neg_insns
, inv_insns
;
4561 unsigned HOST_WIDE_INT temp1
, temp2
;
4562 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4563 struct four_ints
*immediates
;
4564 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4566 /* Find out which operations are safe for a given CODE. Also do a quick
4567 check for degenerate cases; these can occur when DImode operations
4580 if (remainder
== 0xffffffff)
4583 emit_constant_insn (cond
,
4584 gen_rtx_SET (target
,
4585 GEN_INT (ARM_SIGN_EXTEND (val
))));
4591 if (reload_completed
&& rtx_equal_p (target
, source
))
4595 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4604 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4607 if (remainder
== 0xffffffff)
4609 if (reload_completed
&& rtx_equal_p (target
, source
))
4612 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4621 if (reload_completed
&& rtx_equal_p (target
, source
))
4624 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4628 if (remainder
== 0xffffffff)
4631 emit_constant_insn (cond
,
4632 gen_rtx_SET (target
,
4633 gen_rtx_NOT (mode
, source
)));
4640 /* We treat MINUS as (val - source), since (source - val) is always
4641 passed as (source + (-val)). */
4645 emit_constant_insn (cond
,
4646 gen_rtx_SET (target
,
4647 gen_rtx_NEG (mode
, source
)));
4650 if (const_ok_for_arm (val
))
4653 emit_constant_insn (cond
,
4654 gen_rtx_SET (target
,
4655 gen_rtx_MINUS (mode
, GEN_INT (val
),
4666 /* If we can do it in one insn get out quickly. */
4667 if (const_ok_for_op (val
, code
))
4670 emit_constant_insn (cond
,
4671 gen_rtx_SET (target
,
4673 ? gen_rtx_fmt_ee (code
, mode
, source
,
4679 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4681 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4682 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4686 if (mode
== SImode
&& i
== 16)
4687 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4689 emit_constant_insn (cond
,
4690 gen_zero_extendhisi2
4691 (target
, gen_lowpart (HImode
, source
)));
4693 /* Extz only supports SImode, but we can coerce the operands
4695 emit_constant_insn (cond
,
4696 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4697 gen_lowpart (SImode
, source
),
4698 GEN_INT (i
), const0_rtx
));
4704 /* Calculate a few attributes that may be useful for specific
4706 /* Count number of leading zeros. */
4707 for (i
= 31; i
>= 0; i
--)
4709 if ((remainder
& (1 << i
)) == 0)
4710 clear_sign_bit_copies
++;
4715 /* Count number of leading 1's. */
4716 for (i
= 31; i
>= 0; i
--)
4718 if ((remainder
& (1 << i
)) != 0)
4719 set_sign_bit_copies
++;
4724 /* Count number of trailing zero's. */
4725 for (i
= 0; i
<= 31; i
++)
4727 if ((remainder
& (1 << i
)) == 0)
4728 clear_zero_bit_copies
++;
4733 /* Count number of trailing 1's. */
4734 for (i
= 0; i
<= 31; i
++)
4736 if ((remainder
& (1 << i
)) != 0)
4737 set_zero_bit_copies
++;
4745 /* See if we can do this by sign_extending a constant that is known
4746 to be negative. This is a good, way of doing it, since the shift
4747 may well merge into a subsequent insn. */
4748 if (set_sign_bit_copies
> 1)
4750 if (const_ok_for_arm
4751 (temp1
= ARM_SIGN_EXTEND (remainder
4752 << (set_sign_bit_copies
- 1))))
4756 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4757 emit_constant_insn (cond
,
4758 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4759 emit_constant_insn (cond
,
4760 gen_ashrsi3 (target
, new_src
,
4761 GEN_INT (set_sign_bit_copies
- 1)));
4765 /* For an inverted constant, we will need to set the low bits,
4766 these will be shifted out of harm's way. */
4767 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4768 if (const_ok_for_arm (~temp1
))
4772 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4773 emit_constant_insn (cond
,
4774 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4775 emit_constant_insn (cond
,
4776 gen_ashrsi3 (target
, new_src
,
4777 GEN_INT (set_sign_bit_copies
- 1)));
4783 /* See if we can calculate the value as the difference between two
4784 valid immediates. */
4785 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4787 int topshift
= clear_sign_bit_copies
& ~1;
4789 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4790 & (0xff000000 >> topshift
));
4792 /* If temp1 is zero, then that means the 9 most significant
4793 bits of remainder were 1 and we've caused it to overflow.
4794 When topshift is 0 we don't need to do anything since we
4795 can borrow from 'bit 32'. */
4796 if (temp1
== 0 && topshift
!= 0)
4797 temp1
= 0x80000000 >> (topshift
- 1);
4799 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4801 if (const_ok_for_arm (temp2
))
4805 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4806 emit_constant_insn (cond
,
4807 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4808 emit_constant_insn (cond
,
4809 gen_addsi3 (target
, new_src
,
4817 /* See if we can generate this by setting the bottom (or the top)
4818 16 bits, and then shifting these into the other half of the
4819 word. We only look for the simplest cases, to do more would cost
4820 too much. Be careful, however, not to generate this when the
4821 alternative would take fewer insns. */
4822 if (val
& 0xffff0000)
4824 temp1
= remainder
& 0xffff0000;
4825 temp2
= remainder
& 0x0000ffff;
4827 /* Overlaps outside this range are best done using other methods. */
4828 for (i
= 9; i
< 24; i
++)
4830 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4831 && !const_ok_for_arm (temp2
))
4833 rtx new_src
= (subtargets
4834 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4836 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4837 source
, subtargets
, generate
);
4845 gen_rtx_ASHIFT (mode
, source
,
4852 /* Don't duplicate cases already considered. */
4853 for (i
= 17; i
< 24; i
++)
4855 if (((temp1
| (temp1
>> i
)) == remainder
)
4856 && !const_ok_for_arm (temp1
))
4858 rtx new_src
= (subtargets
4859 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4861 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4862 source
, subtargets
, generate
);
4867 gen_rtx_SET (target
,
4870 gen_rtx_LSHIFTRT (mode
, source
,
4881 /* If we have IOR or XOR, and the constant can be loaded in a
4882 single instruction, and we can find a temporary to put it in,
4883 then this can be done in two instructions instead of 3-4. */
4885 /* TARGET can't be NULL if SUBTARGETS is 0 */
4886 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4888 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4892 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4894 emit_constant_insn (cond
,
4895 gen_rtx_SET (sub
, GEN_INT (val
)));
4896 emit_constant_insn (cond
,
4897 gen_rtx_SET (target
,
4898 gen_rtx_fmt_ee (code
, mode
,
4909 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4910 and the remainder 0s for e.g. 0xfff00000)
4911 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4913 This can be done in 2 instructions by using shifts with mov or mvn.
4918 mvn r0, r0, lsr #12 */
4919 if (set_sign_bit_copies
> 8
4920 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
4924 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4925 rtx shift
= GEN_INT (set_sign_bit_copies
);
4931 gen_rtx_ASHIFT (mode
,
4936 gen_rtx_SET (target
,
4938 gen_rtx_LSHIFTRT (mode
, sub
,
4945 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4947 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4949 For eg. r0 = r0 | 0xfff
4954 if (set_zero_bit_copies
> 8
4955 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4959 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4960 rtx shift
= GEN_INT (set_zero_bit_copies
);
4966 gen_rtx_LSHIFTRT (mode
,
4971 gen_rtx_SET (target
,
4973 gen_rtx_ASHIFT (mode
, sub
,
4979 /* This will never be reached for Thumb2 because orn is a valid
4980 instruction. This is for Thumb1 and the ARM 32 bit cases.
4982 x = y | constant (such that ~constant is a valid constant)
4984 x = ~(~y & ~constant).
4986 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4990 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4991 emit_constant_insn (cond
,
4993 gen_rtx_NOT (mode
, source
)));
4996 sub
= gen_reg_rtx (mode
);
4997 emit_constant_insn (cond
,
4999 gen_rtx_AND (mode
, source
,
5001 emit_constant_insn (cond
,
5002 gen_rtx_SET (target
,
5003 gen_rtx_NOT (mode
, sub
)));
5010 /* See if two shifts will do 2 or more insn's worth of work. */
5011 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
5013 HOST_WIDE_INT shift_mask
= ((0xffffffff
5014 << (32 - clear_sign_bit_copies
))
5017 if ((remainder
| shift_mask
) != 0xffffffff)
5019 HOST_WIDE_INT new_val
5020 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5024 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5025 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
5026 new_src
, source
, subtargets
, 1);
5031 rtx targ
= subtargets
? NULL_RTX
: target
;
5032 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5033 targ
, source
, subtargets
, 0);
5039 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5040 rtx shift
= GEN_INT (clear_sign_bit_copies
);
5042 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
5043 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
5049 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
5051 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
5053 if ((remainder
| shift_mask
) != 0xffffffff)
5055 HOST_WIDE_INT new_val
5056 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5059 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5061 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5062 new_src
, source
, subtargets
, 1);
5067 rtx targ
= subtargets
? NULL_RTX
: target
;
5069 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5070 targ
, source
, subtargets
, 0);
5076 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5077 rtx shift
= GEN_INT (clear_zero_bit_copies
);
5079 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
5080 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
5092 /* Calculate what the instruction sequences would be if we generated it
5093 normally, negated, or inverted. */
5095 /* AND cannot be split into multiple insns, so invert and use BIC. */
5098 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
5101 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
5106 if (can_invert
|| final_invert
)
5107 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
5112 immediates
= &pos_immediates
;
5114 /* Is the negated immediate sequence more efficient? */
5115 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
5118 immediates
= &neg_immediates
;
5123 /* Is the inverted immediate sequence more efficient?
5124 We must allow for an extra NOT instruction for XOR operations, although
5125 there is some chance that the final 'mvn' will get optimized later. */
5126 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
5129 immediates
= &inv_immediates
;
5137 /* Now output the chosen sequence as instructions. */
5140 for (i
= 0; i
< insns
; i
++)
5142 rtx new_src
, temp1_rtx
;
5144 temp1
= immediates
->i
[i
];
5146 if (code
== SET
|| code
== MINUS
)
5147 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
5148 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
5149 new_src
= gen_reg_rtx (mode
);
5155 else if (can_negate
)
5158 temp1
= trunc_int_for_mode (temp1
, mode
);
5159 temp1_rtx
= GEN_INT (temp1
);
5163 else if (code
== MINUS
)
5164 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
5166 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
5168 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5173 can_negate
= can_invert
;
5177 else if (code
== MINUS
)
5185 emit_constant_insn (cond
, gen_rtx_SET (target
,
5186 gen_rtx_NOT (mode
, source
)));
5193 /* Canonicalize a comparison so that we are more likely to recognize it.
5194 This can be done for a few constant compares, where we can make the
5195 immediate value easier to load. */
5198 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5199 bool op0_preserve_value
)
5202 unsigned HOST_WIDE_INT i
, maxval
;
5204 mode
= GET_MODE (*op0
);
5205 if (mode
== VOIDmode
)
5206 mode
= GET_MODE (*op1
);
5208 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5210 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5211 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5212 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5213 for GTU/LEU in Thumb mode. */
5217 if (*code
== GT
|| *code
== LE
5218 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
5220 /* Missing comparison. First try to use an available
5222 if (CONST_INT_P (*op1
))
5230 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5232 *op1
= GEN_INT (i
+ 1);
5233 *code
= *code
== GT
? GE
: LT
;
5239 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5240 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5242 *op1
= GEN_INT (i
+ 1);
5243 *code
= *code
== GTU
? GEU
: LTU
;
5252 /* If that did not work, reverse the condition. */
5253 if (!op0_preserve_value
)
5255 std::swap (*op0
, *op1
);
5256 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5262 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5263 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5264 to facilitate possible combining with a cmp into 'ands'. */
5266 && GET_CODE (*op0
) == ZERO_EXTEND
5267 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5268 && GET_MODE (XEXP (*op0
, 0)) == QImode
5269 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5270 && subreg_lowpart_p (XEXP (*op0
, 0))
5271 && *op1
== const0_rtx
)
5272 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5275 /* Comparisons smaller than DImode. Only adjust comparisons against
5276 an out-of-range constant. */
5277 if (!CONST_INT_P (*op1
)
5278 || const_ok_for_arm (INTVAL (*op1
))
5279 || const_ok_for_arm (- INTVAL (*op1
)))
5293 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5295 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5296 *code
= *code
== GT
? GE
: LT
;
5304 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5306 *op1
= GEN_INT (i
- 1);
5307 *code
= *code
== GE
? GT
: LE
;
5314 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5315 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5317 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5318 *code
= *code
== GTU
? GEU
: LTU
;
5326 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5328 *op1
= GEN_INT (i
- 1);
5329 *code
= *code
== GEU
? GTU
: LEU
;
5340 /* Define how to find the value returned by a function. */
5343 arm_function_value(const_tree type
, const_tree func
,
5344 bool outgoing ATTRIBUTE_UNUSED
)
5347 int unsignedp ATTRIBUTE_UNUSED
;
5348 rtx r ATTRIBUTE_UNUSED
;
5350 mode
= TYPE_MODE (type
);
5352 if (TARGET_AAPCS_BASED
)
5353 return aapcs_allocate_return_reg (mode
, type
, func
);
5355 /* Promote integer types. */
5356 if (INTEGRAL_TYPE_P (type
))
5357 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5359 /* Promotes small structs returned in a register to full-word size
5360 for big-endian AAPCS. */
5361 if (arm_return_in_msb (type
))
5363 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5364 if (size
% UNITS_PER_WORD
!= 0)
5366 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5367 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
5371 return arm_libcall_value_1 (mode
);
5374 /* libcall hashtable helpers. */
5376 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5378 static inline hashval_t
hash (const rtx_def
*);
5379 static inline bool equal (const rtx_def
*, const rtx_def
*);
5380 static inline void remove (rtx_def
*);
5384 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5386 return rtx_equal_p (p1
, p2
);
5390 libcall_hasher::hash (const rtx_def
*p1
)
5392 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5395 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5398 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5400 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5404 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5406 static bool init_done
= false;
5407 static libcall_table_type
*libcall_htab
= NULL
;
5413 libcall_htab
= new libcall_table_type (31);
5414 add_libcall (libcall_htab
,
5415 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5416 add_libcall (libcall_htab
,
5417 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5418 add_libcall (libcall_htab
,
5419 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5420 add_libcall (libcall_htab
,
5421 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5423 add_libcall (libcall_htab
,
5424 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5425 add_libcall (libcall_htab
,
5426 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5427 add_libcall (libcall_htab
,
5428 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5429 add_libcall (libcall_htab
,
5430 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5432 add_libcall (libcall_htab
,
5433 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5434 add_libcall (libcall_htab
,
5435 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5436 add_libcall (libcall_htab
,
5437 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5438 add_libcall (libcall_htab
,
5439 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5440 add_libcall (libcall_htab
,
5441 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5442 add_libcall (libcall_htab
,
5443 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5444 add_libcall (libcall_htab
,
5445 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5446 add_libcall (libcall_htab
,
5447 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5449 /* Values from double-precision helper functions are returned in core
5450 registers if the selected core only supports single-precision
5451 arithmetic, even if we are using the hard-float ABI. The same is
5452 true for single-precision helpers, but we will never be using the
5453 hard-float ABI on a CPU which doesn't support single-precision
5454 operations in hardware. */
5455 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5456 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5457 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5458 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5459 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5460 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5461 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5462 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5463 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5464 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5465 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5466 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5468 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5470 add_libcall (libcall_htab
,
5471 convert_optab_libfunc (trunc_optab
, HFmode
, DFmode
));
5474 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5478 arm_libcall_value_1 (machine_mode mode
)
5480 if (TARGET_AAPCS_BASED
)
5481 return aapcs_libcall_value (mode
);
5482 else if (TARGET_IWMMXT_ABI
5483 && arm_vector_mode_supported_p (mode
))
5484 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5486 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5489 /* Define how to find the value returned by a library function
5490 assuming the value has mode MODE. */
5493 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5495 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5496 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5498 /* The following libcalls return their result in integer registers,
5499 even though they return a floating point value. */
5500 if (arm_libcall_uses_aapcs_base (libcall
))
5501 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5505 return arm_libcall_value_1 (mode
);
5508 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5511 arm_function_value_regno_p (const unsigned int regno
)
5513 if (regno
== ARG_REGISTER (1)
5515 && TARGET_AAPCS_BASED
5516 && TARGET_HARD_FLOAT
5517 && regno
== FIRST_VFP_REGNUM
)
5518 || (TARGET_IWMMXT_ABI
5519 && regno
== FIRST_IWMMXT_REGNUM
))
5525 /* Determine the amount of memory needed to store the possible return
5526 registers of an untyped call. */
5528 arm_apply_result_size (void)
5534 if (TARGET_HARD_FLOAT_ABI
)
5536 if (TARGET_IWMMXT_ABI
)
5543 /* Decide whether TYPE should be returned in memory (true)
5544 or in a register (false). FNTYPE is the type of the function making
5547 arm_return_in_memory (const_tree type
, const_tree fntype
)
5551 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5553 if (TARGET_AAPCS_BASED
)
5555 /* Simple, non-aggregate types (ie not including vectors and
5556 complex) are always returned in a register (or registers).
5557 We don't care about which register here, so we can short-cut
5558 some of the detail. */
5559 if (!AGGREGATE_TYPE_P (type
)
5560 && TREE_CODE (type
) != VECTOR_TYPE
5561 && TREE_CODE (type
) != COMPLEX_TYPE
)
5564 /* Any return value that is no larger than one word can be
5566 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5569 /* Check any available co-processors to see if they accept the
5570 type as a register candidate (VFP, for example, can return
5571 some aggregates in consecutive registers). These aren't
5572 available if the call is variadic. */
5573 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5576 /* Vector values should be returned using ARM registers, not
5577 memory (unless they're over 16 bytes, which will break since
5578 we only have four call-clobbered registers to play with). */
5579 if (TREE_CODE (type
) == VECTOR_TYPE
)
5580 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5582 /* The rest go in memory. */
5586 if (TREE_CODE (type
) == VECTOR_TYPE
)
5587 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5589 if (!AGGREGATE_TYPE_P (type
) &&
5590 (TREE_CODE (type
) != VECTOR_TYPE
))
5591 /* All simple types are returned in registers. */
5594 if (arm_abi
!= ARM_ABI_APCS
)
5596 /* ATPCS and later return aggregate types in memory only if they are
5597 larger than a word (or are variable size). */
5598 return (size
< 0 || size
> UNITS_PER_WORD
);
5601 /* For the arm-wince targets we choose to be compatible with Microsoft's
5602 ARM and Thumb compilers, which always return aggregates in memory. */
5604 /* All structures/unions bigger than one word are returned in memory.
5605 Also catch the case where int_size_in_bytes returns -1. In this case
5606 the aggregate is either huge or of variable size, and in either case
5607 we will want to return it via memory and not in a register. */
5608 if (size
< 0 || size
> UNITS_PER_WORD
)
5611 if (TREE_CODE (type
) == RECORD_TYPE
)
5615 /* For a struct the APCS says that we only return in a register
5616 if the type is 'integer like' and every addressable element
5617 has an offset of zero. For practical purposes this means
5618 that the structure can have at most one non bit-field element
5619 and that this element must be the first one in the structure. */
5621 /* Find the first field, ignoring non FIELD_DECL things which will
5622 have been created by C++. */
5623 for (field
= TYPE_FIELDS (type
);
5624 field
&& TREE_CODE (field
) != FIELD_DECL
;
5625 field
= DECL_CHAIN (field
))
5629 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5631 /* Check that the first field is valid for returning in a register. */
5633 /* ... Floats are not allowed */
5634 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5637 /* ... Aggregates that are not themselves valid for returning in
5638 a register are not allowed. */
5639 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5642 /* Now check the remaining fields, if any. Only bitfields are allowed,
5643 since they are not addressable. */
5644 for (field
= DECL_CHAIN (field
);
5646 field
= DECL_CHAIN (field
))
5648 if (TREE_CODE (field
) != FIELD_DECL
)
5651 if (!DECL_BIT_FIELD_TYPE (field
))
5658 if (TREE_CODE (type
) == UNION_TYPE
)
5662 /* Unions can be returned in registers if every element is
5663 integral, or can be returned in an integer register. */
5664 for (field
= TYPE_FIELDS (type
);
5666 field
= DECL_CHAIN (field
))
5668 if (TREE_CODE (field
) != FIELD_DECL
)
5671 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5674 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5680 #endif /* not ARM_WINCE */
5682 /* Return all other types in memory. */
5686 const struct pcs_attribute_arg
5690 } pcs_attribute_args
[] =
5692 {"aapcs", ARM_PCS_AAPCS
},
5693 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5695 /* We could recognize these, but changes would be needed elsewhere
5696 * to implement them. */
5697 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5698 {"atpcs", ARM_PCS_ATPCS
},
5699 {"apcs", ARM_PCS_APCS
},
5701 {NULL
, ARM_PCS_UNKNOWN
}
5705 arm_pcs_from_attribute (tree attr
)
5707 const struct pcs_attribute_arg
*ptr
;
5710 /* Get the value of the argument. */
5711 if (TREE_VALUE (attr
) == NULL_TREE
5712 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5713 return ARM_PCS_UNKNOWN
;
5715 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5717 /* Check it against the list of known arguments. */
5718 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5719 if (streq (arg
, ptr
->arg
))
5722 /* An unrecognized interrupt type. */
5723 return ARM_PCS_UNKNOWN
;
5726 /* Get the PCS variant to use for this call. TYPE is the function's type
5727 specification, DECL is the specific declartion. DECL may be null if
5728 the call could be indirect or if this is a library call. */
5730 arm_get_pcs_model (const_tree type
, const_tree decl
)
5732 bool user_convention
= false;
5733 enum arm_pcs user_pcs
= arm_pcs_default
;
5738 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5741 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5742 user_convention
= true;
5745 if (TARGET_AAPCS_BASED
)
5747 /* Detect varargs functions. These always use the base rules
5748 (no argument is ever a candidate for a co-processor
5750 bool base_rules
= stdarg_p (type
);
5752 if (user_convention
)
5754 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5755 sorry ("non-AAPCS derived PCS variant");
5756 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5757 error ("variadic functions must use the base AAPCS variant");
5761 return ARM_PCS_AAPCS
;
5762 else if (user_convention
)
5764 else if (decl
&& flag_unit_at_a_time
)
5766 /* Local functions never leak outside this compilation unit,
5767 so we are free to use whatever conventions are
5769 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5770 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5772 return ARM_PCS_AAPCS_LOCAL
;
5775 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5776 sorry ("PCS variant");
5778 /* For everything else we use the target's default. */
5779 return arm_pcs_default
;
5784 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5785 const_tree fntype ATTRIBUTE_UNUSED
,
5786 rtx libcall ATTRIBUTE_UNUSED
,
5787 const_tree fndecl ATTRIBUTE_UNUSED
)
5789 /* Record the unallocated VFP registers. */
5790 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5791 pcum
->aapcs_vfp_reg_alloc
= 0;
5794 /* Walk down the type tree of TYPE counting consecutive base elements.
5795 If *MODEP is VOIDmode, then set it to the first valid floating point
5796 type. If a non-floating point type is found, or if a floating point
5797 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5798 otherwise return the count in the sub-tree. */
5800 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5805 switch (TREE_CODE (type
))
5808 mode
= TYPE_MODE (type
);
5809 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
)
5812 if (*modep
== VOIDmode
)
5821 mode
= TYPE_MODE (TREE_TYPE (type
));
5822 if (mode
!= DFmode
&& mode
!= SFmode
)
5825 if (*modep
== VOIDmode
)
5834 /* Use V2SImode and V4SImode as representatives of all 64-bit
5835 and 128-bit vector types, whether or not those modes are
5836 supported with the present options. */
5837 size
= int_size_in_bytes (type
);
5850 if (*modep
== VOIDmode
)
5853 /* Vector modes are considered to be opaque: two vectors are
5854 equivalent for the purposes of being homogeneous aggregates
5855 if they are the same size. */
5864 tree index
= TYPE_DOMAIN (type
);
5866 /* Can't handle incomplete types nor sizes that are not
5868 if (!COMPLETE_TYPE_P (type
)
5869 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5872 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5875 || !TYPE_MAX_VALUE (index
)
5876 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5877 || !TYPE_MIN_VALUE (index
)
5878 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5882 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5883 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5885 /* There must be no padding. */
5886 if (wi::to_wide (TYPE_SIZE (type
))
5887 != count
* GET_MODE_BITSIZE (*modep
))
5899 /* Can't handle incomplete types nor sizes that are not
5901 if (!COMPLETE_TYPE_P (type
)
5902 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5905 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5907 if (TREE_CODE (field
) != FIELD_DECL
)
5910 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5916 /* There must be no padding. */
5917 if (wi::to_wide (TYPE_SIZE (type
))
5918 != count
* GET_MODE_BITSIZE (*modep
))
5925 case QUAL_UNION_TYPE
:
5927 /* These aren't very interesting except in a degenerate case. */
5932 /* Can't handle incomplete types nor sizes that are not
5934 if (!COMPLETE_TYPE_P (type
)
5935 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5938 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5940 if (TREE_CODE (field
) != FIELD_DECL
)
5943 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5946 count
= count
> sub_count
? count
: sub_count
;
5949 /* There must be no padding. */
5950 if (wi::to_wide (TYPE_SIZE (type
))
5951 != count
* GET_MODE_BITSIZE (*modep
))
5964 /* Return true if PCS_VARIANT should use VFP registers. */
5966 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5968 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5970 static bool seen_thumb1_vfp
= false;
5972 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5974 sorry ("Thumb-1 hard-float VFP ABI");
5975 /* sorry() is not immediately fatal, so only display this once. */
5976 seen_thumb1_vfp
= true;
5982 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5985 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
5986 (TARGET_VFP_DOUBLE
|| !is_double
));
5989 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5990 suitable for passing or returning in VFP registers for the PCS
5991 variant selected. If it is, then *BASE_MODE is updated to contain
5992 a machine mode describing each element of the argument's type and
5993 *COUNT to hold the number of such elements. */
5995 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5996 machine_mode mode
, const_tree type
,
5997 machine_mode
*base_mode
, int *count
)
5999 machine_mode new_mode
= VOIDmode
;
6001 /* If we have the type information, prefer that to working things
6002 out from the mode. */
6005 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
6007 if (ag_count
> 0 && ag_count
<= 4)
6012 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
6013 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
6014 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6019 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6022 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
6028 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
6031 *base_mode
= new_mode
;
6036 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
6037 machine_mode mode
, const_tree type
)
6039 int count ATTRIBUTE_UNUSED
;
6040 machine_mode ag_mode ATTRIBUTE_UNUSED
;
6042 if (!use_vfp_abi (pcs_variant
, false))
6044 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6049 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6052 if (!use_vfp_abi (pcum
->pcs_variant
, false))
6055 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
6056 &pcum
->aapcs_vfp_rmode
,
6057 &pcum
->aapcs_vfp_rcount
);
6060 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6061 for the behaviour of this function. */
6064 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6065 const_tree type ATTRIBUTE_UNUSED
)
6068 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
6069 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
6070 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
6073 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
6074 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
6076 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
6078 || (mode
== TImode
&& ! TARGET_NEON
)
6079 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
6082 int rcount
= pcum
->aapcs_vfp_rcount
;
6084 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
6088 /* Avoid using unsupported vector modes. */
6089 if (rmode
== V2SImode
)
6091 else if (rmode
== V4SImode
)
6098 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
6099 for (i
= 0; i
< rcount
; i
++)
6101 rtx tmp
= gen_rtx_REG (rmode
,
6102 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
6103 tmp
= gen_rtx_EXPR_LIST
6105 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
6106 XVECEXP (par
, 0, i
) = tmp
;
6109 pcum
->aapcs_reg
= par
;
6112 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
6118 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6119 comment there for the behaviour of this function. */
6122 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
6124 const_tree type ATTRIBUTE_UNUSED
)
6126 if (!use_vfp_abi (pcs_variant
, false))
6130 || (GET_MODE_CLASS (mode
) == MODE_INT
6131 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
6135 machine_mode ag_mode
;
6140 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6145 if (ag_mode
== V2SImode
)
6147 else if (ag_mode
== V4SImode
)
6153 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
6154 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
6155 for (i
= 0; i
< count
; i
++)
6157 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
6158 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
6159 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
6160 XVECEXP (par
, 0, i
) = tmp
;
6166 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
6170 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6171 machine_mode mode ATTRIBUTE_UNUSED
,
6172 const_tree type ATTRIBUTE_UNUSED
)
6174 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6175 pcum
->aapcs_vfp_reg_alloc
= 0;
6179 #define AAPCS_CP(X) \
6181 aapcs_ ## X ## _cum_init, \
6182 aapcs_ ## X ## _is_call_candidate, \
6183 aapcs_ ## X ## _allocate, \
6184 aapcs_ ## X ## _is_return_candidate, \
6185 aapcs_ ## X ## _allocate_return_reg, \
6186 aapcs_ ## X ## _advance \
6189 /* Table of co-processors that can be used to pass arguments in
6190 registers. Idealy no arugment should be a candidate for more than
6191 one co-processor table entry, but the table is processed in order
6192 and stops after the first match. If that entry then fails to put
6193 the argument into a co-processor register, the argument will go on
6197 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6198 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6200 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6201 BLKmode) is a candidate for this co-processor's registers; this
6202 function should ignore any position-dependent state in
6203 CUMULATIVE_ARGS and only use call-type dependent information. */
6204 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6206 /* Return true if the argument does get a co-processor register; it
6207 should set aapcs_reg to an RTX of the register allocated as is
6208 required for a return from FUNCTION_ARG. */
6209 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6211 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6212 be returned in this co-processor's registers. */
6213 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6215 /* Allocate and return an RTX element to hold the return type of a call. This
6216 routine must not fail and will only be called if is_return_candidate
6217 returned true with the same parameters. */
6218 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6220 /* Finish processing this argument and prepare to start processing
6222 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6223 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6231 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6236 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6237 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6244 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6246 /* We aren't passed a decl, so we can't check that a call is local.
6247 However, it isn't clear that that would be a win anyway, since it
6248 might limit some tail-calling opportunities. */
6249 enum arm_pcs pcs_variant
;
6253 const_tree fndecl
= NULL_TREE
;
6255 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6258 fntype
= TREE_TYPE (fntype
);
6261 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6264 pcs_variant
= arm_pcs_default
;
6266 if (pcs_variant
!= ARM_PCS_AAPCS
)
6270 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6271 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6280 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6283 /* We aren't passed a decl, so we can't check that a call is local.
6284 However, it isn't clear that that would be a win anyway, since it
6285 might limit some tail-calling opportunities. */
6286 enum arm_pcs pcs_variant
;
6287 int unsignedp ATTRIBUTE_UNUSED
;
6291 const_tree fndecl
= NULL_TREE
;
6293 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6296 fntype
= TREE_TYPE (fntype
);
6299 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6302 pcs_variant
= arm_pcs_default
;
6304 /* Promote integer types. */
6305 if (type
&& INTEGRAL_TYPE_P (type
))
6306 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6308 if (pcs_variant
!= ARM_PCS_AAPCS
)
6312 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6313 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6315 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6319 /* Promotes small structs returned in a register to full-word size
6320 for big-endian AAPCS. */
6321 if (type
&& arm_return_in_msb (type
))
6323 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6324 if (size
% UNITS_PER_WORD
!= 0)
6326 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6327 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
6331 return gen_rtx_REG (mode
, R0_REGNUM
);
6335 aapcs_libcall_value (machine_mode mode
)
6337 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6338 && GET_MODE_SIZE (mode
) <= 4)
6341 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6344 /* Lay out a function argument using the AAPCS rules. The rule
6345 numbers referred to here are those in the AAPCS. */
6347 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6348 const_tree type
, bool named
)
6353 /* We only need to do this once per argument. */
6354 if (pcum
->aapcs_arg_processed
)
6357 pcum
->aapcs_arg_processed
= true;
6359 /* Special case: if named is false then we are handling an incoming
6360 anonymous argument which is on the stack. */
6364 /* Is this a potential co-processor register candidate? */
6365 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6367 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6368 pcum
->aapcs_cprc_slot
= slot
;
6370 /* We don't have to apply any of the rules from part B of the
6371 preparation phase, these are handled elsewhere in the
6376 /* A Co-processor register candidate goes either in its own
6377 class of registers or on the stack. */
6378 if (!pcum
->aapcs_cprc_failed
[slot
])
6380 /* C1.cp - Try to allocate the argument to co-processor
6382 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6385 /* C2.cp - Put the argument on the stack and note that we
6386 can't assign any more candidates in this slot. We also
6387 need to note that we have allocated stack space, so that
6388 we won't later try to split a non-cprc candidate between
6389 core registers and the stack. */
6390 pcum
->aapcs_cprc_failed
[slot
] = true;
6391 pcum
->can_split
= false;
6394 /* We didn't get a register, so this argument goes on the
6396 gcc_assert (pcum
->can_split
== false);
6401 /* C3 - For double-word aligned arguments, round the NCRN up to the
6402 next even number. */
6403 ncrn
= pcum
->aapcs_ncrn
;
6406 int res
= arm_needs_doubleword_align (mode
, type
);
6407 /* Only warn during RTL expansion of call stmts, otherwise we would
6408 warn e.g. during gimplification even on functions that will be
6409 always inlined, and we'd warn multiple times. Don't warn when
6410 called in expand_function_start either, as we warn instead in
6411 arm_function_arg_boundary in that case. */
6412 if (res
< 0 && warn_psabi
&& currently_expanding_gimple_stmt
)
6413 inform (input_location
, "parameter passing for argument of type "
6414 "%qT changed in GCC 7.1", type
);
6419 nregs
= ARM_NUM_REGS2(mode
, type
);
6421 /* Sigh, this test should really assert that nregs > 0, but a GCC
6422 extension allows empty structs and then gives them empty size; it
6423 then allows such a structure to be passed by value. For some of
6424 the code below we have to pretend that such an argument has
6425 non-zero size so that we 'locate' it correctly either in
6426 registers or on the stack. */
6427 gcc_assert (nregs
>= 0);
6429 nregs2
= nregs
? nregs
: 1;
6431 /* C4 - Argument fits entirely in core registers. */
6432 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6434 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6435 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6439 /* C5 - Some core registers left and there are no arguments already
6440 on the stack: split this argument between the remaining core
6441 registers and the stack. */
6442 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6444 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6445 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6446 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6450 /* C6 - NCRN is set to 4. */
6451 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6453 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6457 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6458 for a call to a function whose data type is FNTYPE.
6459 For a library call, FNTYPE is NULL. */
6461 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6463 tree fndecl ATTRIBUTE_UNUSED
)
6465 /* Long call handling. */
6467 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6469 pcum
->pcs_variant
= arm_pcs_default
;
6471 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6473 if (arm_libcall_uses_aapcs_base (libname
))
6474 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6476 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6477 pcum
->aapcs_reg
= NULL_RTX
;
6478 pcum
->aapcs_partial
= 0;
6479 pcum
->aapcs_arg_processed
= false;
6480 pcum
->aapcs_cprc_slot
= -1;
6481 pcum
->can_split
= true;
6483 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6487 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6489 pcum
->aapcs_cprc_failed
[i
] = false;
6490 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6498 /* On the ARM, the offset starts at 0. */
6500 pcum
->iwmmxt_nregs
= 0;
6501 pcum
->can_split
= true;
6503 /* Varargs vectors are treated the same as long long.
6504 named_count avoids having to change the way arm handles 'named' */
6505 pcum
->named_count
= 0;
6508 if (TARGET_REALLY_IWMMXT
&& fntype
)
6512 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6514 fn_arg
= TREE_CHAIN (fn_arg
))
6515 pcum
->named_count
+= 1;
6517 if (! pcum
->named_count
)
6518 pcum
->named_count
= INT_MAX
;
6522 /* Return 1 if double word alignment is required for argument passing.
6523 Return -1 if double word alignment used to be required for argument
6524 passing before PR77728 ABI fix, but is not required anymore.
6525 Return 0 if double word alignment is not required and wasn't requried
6528 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6531 return GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
;
6533 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6534 if (!AGGREGATE_TYPE_P (type
))
6535 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6537 /* Array types: Use member alignment of element type. */
6538 if (TREE_CODE (type
) == ARRAY_TYPE
)
6539 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
6542 /* Record/aggregate types: Use greatest member alignment of any member. */
6543 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6544 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
6546 if (TREE_CODE (field
) == FIELD_DECL
)
6549 /* Before PR77728 fix, we were incorrectly considering also
6550 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6551 Make sure we can warn about that with -Wpsabi. */
6559 /* Determine where to put an argument to a function.
6560 Value is zero to push the argument on the stack,
6561 or a hard register in which to store the argument.
6563 MODE is the argument's machine mode.
6564 TYPE is the data type of the argument (as a tree).
6565 This is null for libcalls where that information may
6567 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6568 the preceding args and about the function being called.
6569 NAMED is nonzero if this argument is a named parameter
6570 (otherwise it is an extra parameter matching an ellipsis).
6572 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6573 other arguments are passed on the stack. If (NAMED == 0) (which happens
6574 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6575 defined), say it is passed in the stack (function_prologue will
6576 indeed make it pass in the stack if necessary). */
6579 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6580 const_tree type
, bool named
)
6582 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6585 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6586 a call insn (op3 of a call_value insn). */
6587 if (mode
== VOIDmode
)
6590 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6592 aapcs_layout_arg (pcum
, mode
, type
, named
);
6593 return pcum
->aapcs_reg
;
6596 /* Varargs vectors are treated the same as long long.
6597 named_count avoids having to change the way arm handles 'named' */
6598 if (TARGET_IWMMXT_ABI
6599 && arm_vector_mode_supported_p (mode
)
6600 && pcum
->named_count
> pcum
->nargs
+ 1)
6602 if (pcum
->iwmmxt_nregs
<= 9)
6603 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6606 pcum
->can_split
= false;
6611 /* Put doubleword aligned quantities in even register pairs. */
6612 if ((pcum
->nregs
& 1) && ARM_DOUBLEWORD_ALIGN
)
6614 int res
= arm_needs_doubleword_align (mode
, type
);
6615 if (res
< 0 && warn_psabi
)
6616 inform (input_location
, "parameter passing for argument of type "
6617 "%qT changed in GCC 7.1", type
);
6622 /* Only allow splitting an arg between regs and memory if all preceding
6623 args were allocated to regs. For args passed by reference we only count
6624 the reference pointer. */
6625 if (pcum
->can_split
)
6628 nregs
= ARM_NUM_REGS2 (mode
, type
);
6630 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6633 return gen_rtx_REG (mode
, pcum
->nregs
);
6637 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6639 if (!ARM_DOUBLEWORD_ALIGN
)
6640 return PARM_BOUNDARY
;
6642 int res
= arm_needs_doubleword_align (mode
, type
);
6643 if (res
< 0 && warn_psabi
)
6644 inform (input_location
, "parameter passing for argument of type %qT "
6645 "changed in GCC 7.1", type
);
6647 return res
> 0 ? DOUBLEWORD_ALIGNMENT
: PARM_BOUNDARY
;
6651 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6652 tree type
, bool named
)
6654 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6655 int nregs
= pcum
->nregs
;
6657 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6659 aapcs_layout_arg (pcum
, mode
, type
, named
);
6660 return pcum
->aapcs_partial
;
6663 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6666 if (NUM_ARG_REGS
> nregs
6667 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6669 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6674 /* Update the data in PCUM to advance over an argument
6675 of mode MODE and data type TYPE.
6676 (TYPE is null for libcalls where that information may not be available.) */
6679 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6680 const_tree type
, bool named
)
6682 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6684 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6686 aapcs_layout_arg (pcum
, mode
, type
, named
);
6688 if (pcum
->aapcs_cprc_slot
>= 0)
6690 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6692 pcum
->aapcs_cprc_slot
= -1;
6695 /* Generic stuff. */
6696 pcum
->aapcs_arg_processed
= false;
6697 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6698 pcum
->aapcs_reg
= NULL_RTX
;
6699 pcum
->aapcs_partial
= 0;
6704 if (arm_vector_mode_supported_p (mode
)
6705 && pcum
->named_count
> pcum
->nargs
6706 && TARGET_IWMMXT_ABI
)
6707 pcum
->iwmmxt_nregs
+= 1;
6709 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6713 /* Variable sized types are passed by reference. This is a GCC
6714 extension to the ARM ABI. */
6717 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6718 machine_mode mode ATTRIBUTE_UNUSED
,
6719 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6721 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6724 /* Encode the current state of the #pragma [no_]long_calls. */
6727 OFF
, /* No #pragma [no_]long_calls is in effect. */
6728 LONG
, /* #pragma long_calls is in effect. */
6729 SHORT
/* #pragma no_long_calls is in effect. */
6732 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6735 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6737 arm_pragma_long_calls
= LONG
;
6741 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6743 arm_pragma_long_calls
= SHORT
;
6747 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6749 arm_pragma_long_calls
= OFF
;
6752 /* Handle an attribute requiring a FUNCTION_DECL;
6753 arguments as in struct attribute_spec.handler. */
6755 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6756 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6758 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6760 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6762 *no_add_attrs
= true;
6768 /* Handle an "interrupt" or "isr" attribute;
6769 arguments as in struct attribute_spec.handler. */
6771 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6776 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6778 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6780 *no_add_attrs
= true;
6782 /* FIXME: the argument if any is checked for type attributes;
6783 should it be checked for decl ones? */
6787 if (TREE_CODE (*node
) == FUNCTION_TYPE
6788 || TREE_CODE (*node
) == METHOD_TYPE
)
6790 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6792 warning (OPT_Wattributes
, "%qE attribute ignored",
6794 *no_add_attrs
= true;
6797 else if (TREE_CODE (*node
) == POINTER_TYPE
6798 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6799 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6800 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6802 *node
= build_variant_type_copy (*node
);
6803 TREE_TYPE (*node
) = build_type_attribute_variant
6805 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6806 *no_add_attrs
= true;
6810 /* Possibly pass this attribute on from the type to a decl. */
6811 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6812 | (int) ATTR_FLAG_FUNCTION_NEXT
6813 | (int) ATTR_FLAG_ARRAY_NEXT
))
6815 *no_add_attrs
= true;
6816 return tree_cons (name
, args
, NULL_TREE
);
6820 warning (OPT_Wattributes
, "%qE attribute ignored",
6829 /* Handle a "pcs" attribute; arguments as in struct
6830 attribute_spec.handler. */
6832 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6833 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6835 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6837 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6838 *no_add_attrs
= true;
6843 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6844 /* Handle the "notshared" attribute. This attribute is another way of
6845 requesting hidden visibility. ARM's compiler supports
6846 "__declspec(notshared)"; we support the same thing via an
6850 arm_handle_notshared_attribute (tree
*node
,
6851 tree name ATTRIBUTE_UNUSED
,
6852 tree args ATTRIBUTE_UNUSED
,
6853 int flags ATTRIBUTE_UNUSED
,
6856 tree decl
= TYPE_NAME (*node
);
6860 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6861 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6862 *no_add_attrs
= false;
6868 /* This function returns true if a function with declaration FNDECL and type
6869 FNTYPE uses the stack to pass arguments or return variables and false
6870 otherwise. This is used for functions with the attributes
6871 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6872 diagnostic messages if the stack is used. NAME is the name of the attribute
6876 cmse_func_args_or_return_in_stack (tree fndecl
, tree name
, tree fntype
)
6878 function_args_iterator args_iter
;
6879 CUMULATIVE_ARGS args_so_far_v
;
6880 cumulative_args_t args_so_far
;
6881 bool first_param
= true;
6882 tree arg_type
, prev_arg_type
= NULL_TREE
, ret_type
;
6884 /* Error out if any argument is passed on the stack. */
6885 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
6886 args_so_far
= pack_cumulative_args (&args_so_far_v
);
6887 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
6890 machine_mode arg_mode
= TYPE_MODE (arg_type
);
6892 prev_arg_type
= arg_type
;
6893 if (VOID_TYPE_P (arg_type
))
6897 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
, true);
6898 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
, true);
6900 || arm_arg_partial_bytes (args_so_far
, arg_mode
, arg_type
, true))
6902 error ("%qE attribute not available to functions with arguments "
6903 "passed on the stack", name
);
6906 first_param
= false;
6909 /* Error out for variadic functions since we cannot control how many
6910 arguments will be passed and thus stack could be used. stdarg_p () is not
6911 used for the checking to avoid browsing arguments twice. */
6912 if (prev_arg_type
!= NULL_TREE
&& !VOID_TYPE_P (prev_arg_type
))
6914 error ("%qE attribute not available to functions with variable number "
6915 "of arguments", name
);
6919 /* Error out if return value is passed on the stack. */
6920 ret_type
= TREE_TYPE (fntype
);
6921 if (arm_return_in_memory (ret_type
, fntype
))
6923 error ("%qE attribute not available to functions that return value on "
6930 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6931 function will check whether the attribute is allowed here and will add the
6932 attribute to the function declaration tree or otherwise issue a warning. */
6935 arm_handle_cmse_nonsecure_entry (tree
*node
, tree name
,
6944 *no_add_attrs
= true;
6945 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
6950 /* Ignore attribute for function types. */
6951 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6953 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6955 *no_add_attrs
= true;
6961 /* Warn for static linkage functions. */
6962 if (!TREE_PUBLIC (fndecl
))
6964 warning (OPT_Wattributes
, "%qE attribute has no effect on functions "
6965 "with static linkage", name
);
6966 *no_add_attrs
= true;
6970 *no_add_attrs
|= cmse_func_args_or_return_in_stack (fndecl
, name
,
6971 TREE_TYPE (fndecl
));
6976 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6977 function will check whether the attribute is allowed here and will add the
6978 attribute to the function type tree or otherwise issue a diagnostic. The
6979 reason we check this at declaration time is to only allow the use of the
6980 attribute with declarations of function pointers and not function
6981 declarations. This function checks NODE is of the expected type and issues
6982 diagnostics otherwise using NAME. If it is not of the expected type
6983 *NO_ADD_ATTRS will be set to true. */
6986 arm_handle_cmse_nonsecure_call (tree
*node
, tree name
,
6991 tree decl
= NULL_TREE
, fntype
= NULL_TREE
;
6996 *no_add_attrs
= true;
6997 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
7002 if (TREE_CODE (*node
) == VAR_DECL
|| TREE_CODE (*node
) == TYPE_DECL
)
7005 fntype
= TREE_TYPE (decl
);
7008 while (fntype
!= NULL_TREE
&& TREE_CODE (fntype
) == POINTER_TYPE
)
7009 fntype
= TREE_TYPE (fntype
);
7011 if (!decl
|| TREE_CODE (fntype
) != FUNCTION_TYPE
)
7013 warning (OPT_Wattributes
, "%qE attribute only applies to base type of a "
7014 "function pointer", name
);
7015 *no_add_attrs
= true;
7019 *no_add_attrs
|= cmse_func_args_or_return_in_stack (NULL
, name
, fntype
);
7024 /* Prevent trees being shared among function types with and without
7025 cmse_nonsecure_call attribute. */
7026 type
= TREE_TYPE (decl
);
7028 type
= build_distinct_type_copy (type
);
7029 TREE_TYPE (decl
) = type
;
7032 while (TREE_CODE (fntype
) != FUNCTION_TYPE
)
7035 fntype
= TREE_TYPE (fntype
);
7036 fntype
= build_distinct_type_copy (fntype
);
7037 TREE_TYPE (type
) = fntype
;
7040 /* Construct a type attribute and add it to the function type. */
7041 tree attrs
= tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE
,
7042 TYPE_ATTRIBUTES (fntype
));
7043 TYPE_ATTRIBUTES (fntype
) = attrs
;
7047 /* Return 0 if the attributes for two types are incompatible, 1 if they
7048 are compatible, and 2 if they are nearly compatible (which causes a
7049 warning to be generated). */
7051 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
7055 /* Check for mismatch of non-default calling convention. */
7056 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
7059 /* Check for mismatched call attributes. */
7060 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7061 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7062 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7063 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7065 /* Only bother to check if an attribute is defined. */
7066 if (l1
| l2
| s1
| s2
)
7068 /* If one type has an attribute, the other must have the same attribute. */
7069 if ((l1
!= l2
) || (s1
!= s2
))
7072 /* Disallow mixed attributes. */
7073 if ((l1
& s2
) || (l2
& s1
))
7077 /* Check for mismatched ISR attribute. */
7078 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
7080 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
7081 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
7083 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
7087 l1
= lookup_attribute ("cmse_nonsecure_call",
7088 TYPE_ATTRIBUTES (type1
)) != NULL
;
7089 l2
= lookup_attribute ("cmse_nonsecure_call",
7090 TYPE_ATTRIBUTES (type2
)) != NULL
;
7098 /* Assigns default attributes to newly defined type. This is used to
7099 set short_call/long_call attributes for function types of
7100 functions defined inside corresponding #pragma scopes. */
7102 arm_set_default_type_attributes (tree type
)
7104 /* Add __attribute__ ((long_call)) to all functions, when
7105 inside #pragma long_calls or __attribute__ ((short_call)),
7106 when inside #pragma no_long_calls. */
7107 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
7109 tree type_attr_list
, attr_name
;
7110 type_attr_list
= TYPE_ATTRIBUTES (type
);
7112 if (arm_pragma_long_calls
== LONG
)
7113 attr_name
= get_identifier ("long_call");
7114 else if (arm_pragma_long_calls
== SHORT
)
7115 attr_name
= get_identifier ("short_call");
7119 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
7120 TYPE_ATTRIBUTES (type
) = type_attr_list
;
7124 /* Return true if DECL is known to be linked into section SECTION. */
7127 arm_function_in_section_p (tree decl
, section
*section
)
7129 /* We can only be certain about the prevailing symbol definition. */
7130 if (!decl_binds_to_current_def_p (decl
))
7133 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7134 if (!DECL_SECTION_NAME (decl
))
7136 /* Make sure that we will not create a unique section for DECL. */
7137 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
7141 return function_section (decl
) == section
;
7144 /* Return nonzero if a 32-bit "long_call" should be generated for
7145 a call from the current function to DECL. We generate a long_call
7148 a. has an __attribute__((long call))
7149 or b. is within the scope of a #pragma long_calls
7150 or c. the -mlong-calls command line switch has been specified
7152 However we do not generate a long call if the function:
7154 d. has an __attribute__ ((short_call))
7155 or e. is inside the scope of a #pragma no_long_calls
7156 or f. is defined in the same section as the current function. */
7159 arm_is_long_call_p (tree decl
)
7164 return TARGET_LONG_CALLS
;
7166 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
7167 if (lookup_attribute ("short_call", attrs
))
7170 /* For "f", be conservative, and only cater for cases in which the
7171 whole of the current function is placed in the same section. */
7172 if (!flag_reorder_blocks_and_partition
7173 && TREE_CODE (decl
) == FUNCTION_DECL
7174 && arm_function_in_section_p (decl
, current_function_section ()))
7177 if (lookup_attribute ("long_call", attrs
))
7180 return TARGET_LONG_CALLS
;
7183 /* Return nonzero if it is ok to make a tail-call to DECL. */
7185 arm_function_ok_for_sibcall (tree decl
, tree exp
)
7187 unsigned long func_type
;
7189 if (cfun
->machine
->sibcall_blocked
)
7192 /* Never tailcall something if we are generating code for Thumb-1. */
7196 /* The PIC register is live on entry to VxWorks PLT entries, so we
7197 must make the call before restoring the PIC register. */
7198 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
7201 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7202 may be used both as target of the call and base register for restoring
7203 the VFP registers */
7204 if (TARGET_APCS_FRAME
&& TARGET_ARM
7205 && TARGET_HARD_FLOAT
7206 && decl
&& arm_is_long_call_p (decl
))
7209 /* If we are interworking and the function is not declared static
7210 then we can't tail-call it unless we know that it exists in this
7211 compilation unit (since it might be a Thumb routine). */
7212 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
7213 && !TREE_ASM_WRITTEN (decl
))
7216 func_type
= arm_current_func_type ();
7217 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7218 if (IS_INTERRUPT (func_type
))
7221 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7222 generated for entry functions themselves. */
7223 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7226 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7227 this would complicate matters for later code generation. */
7228 if (TREE_CODE (exp
) == CALL_EXPR
)
7230 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7231 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype
)))
7235 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
7237 /* Check that the return value locations are the same. For
7238 example that we aren't returning a value from the sibling in
7239 a VFP register but then need to transfer it to a core
7242 tree decl_or_type
= decl
;
7244 /* If it is an indirect function pointer, get the function type. */
7246 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7248 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
7249 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
7251 if (!rtx_equal_p (a
, b
))
7255 /* Never tailcall if function may be called with a misaligned SP. */
7256 if (IS_STACKALIGN (func_type
))
7259 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7260 references should become a NOP. Don't convert such calls into
7262 if (TARGET_AAPCS_BASED
7263 && arm_abi
== ARM_ABI_AAPCS
7265 && DECL_WEAK (decl
))
7268 /* We cannot do a tailcall for an indirect call by descriptor if all the
7269 argument registers are used because the only register left to load the
7270 address is IP and it will already contain the static chain. */
7271 if (!decl
&& CALL_EXPR_BY_DESCRIPTOR (exp
) && !flag_trampolines
)
7273 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7274 CUMULATIVE_ARGS cum
;
7275 cumulative_args_t cum_v
;
7277 arm_init_cumulative_args (&cum
, fntype
, NULL_RTX
, NULL_TREE
);
7278 cum_v
= pack_cumulative_args (&cum
);
7280 for (tree t
= TYPE_ARG_TYPES (fntype
); t
; t
= TREE_CHAIN (t
))
7282 tree type
= TREE_VALUE (t
);
7283 if (!VOID_TYPE_P (type
))
7284 arm_function_arg_advance (cum_v
, TYPE_MODE (type
), type
, true);
7287 if (!arm_function_arg (cum_v
, SImode
, integer_type_node
, true))
7291 /* Everything else is ok. */
7296 /* Addressing mode support functions. */
7298 /* Return nonzero if X is a legitimate immediate operand when compiling
7299 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7301 legitimate_pic_operand_p (rtx x
)
7303 if (GET_CODE (x
) == SYMBOL_REF
7304 || (GET_CODE (x
) == CONST
7305 && GET_CODE (XEXP (x
, 0)) == PLUS
7306 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7312 /* Record that the current function needs a PIC register. Initialize
7313 cfun->machine->pic_reg if we have not already done so. */
7316 require_pic_register (void)
7318 /* A lot of the logic here is made obscure by the fact that this
7319 routine gets called as part of the rtx cost estimation process.
7320 We don't want those calls to affect any assumptions about the real
7321 function; and further, we can't call entry_of_function() until we
7322 start the real expansion process. */
7323 if (!crtl
->uses_pic_offset_table
)
7325 gcc_assert (can_create_pseudo_p ());
7326 if (arm_pic_register
!= INVALID_REGNUM
7327 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
7329 if (!cfun
->machine
->pic_reg
)
7330 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
7332 /* Play games to avoid marking the function as needing pic
7333 if we are being called as part of the cost-estimation
7335 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7336 crtl
->uses_pic_offset_table
= 1;
7340 rtx_insn
*seq
, *insn
;
7342 if (!cfun
->machine
->pic_reg
)
7343 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
7345 /* Play games to avoid marking the function as needing pic
7346 if we are being called as part of the cost-estimation
7348 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7350 crtl
->uses_pic_offset_table
= 1;
7353 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
7354 && arm_pic_register
> LAST_LO_REGNUM
)
7355 emit_move_insn (cfun
->machine
->pic_reg
,
7356 gen_rtx_REG (Pmode
, arm_pic_register
));
7358 arm_load_pic_register (0UL);
7363 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
7365 INSN_LOCATION (insn
) = prologue_location
;
7367 /* We can be called during expansion of PHI nodes, where
7368 we can't yet emit instructions directly in the final
7369 insn stream. Queue the insns on the entry edge, they will
7370 be committed after everything else is expanded. */
7371 insert_insn_on_edge (seq
,
7372 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
7379 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
7381 if (GET_CODE (orig
) == SYMBOL_REF
7382 || GET_CODE (orig
) == LABEL_REF
)
7386 gcc_assert (can_create_pseudo_p ());
7387 reg
= gen_reg_rtx (Pmode
);
7390 /* VxWorks does not impose a fixed gap between segments; the run-time
7391 gap can be different from the object-file gap. We therefore can't
7392 use GOTOFF unless we are absolutely sure that the symbol is in the
7393 same segment as the GOT. Unfortunately, the flexibility of linker
7394 scripts means that we can't be sure of that in general, so assume
7395 that GOTOFF is never valid on VxWorks. */
7396 /* References to weak symbols cannot be resolved locally: they
7397 may be overridden by a non-weak definition at link time. */
7399 if ((GET_CODE (orig
) == LABEL_REF
7400 || (GET_CODE (orig
) == SYMBOL_REF
7401 && SYMBOL_REF_LOCAL_P (orig
)
7402 && (SYMBOL_REF_DECL (orig
)
7403 ? !DECL_WEAK (SYMBOL_REF_DECL (orig
)) : 1)))
7405 && arm_pic_data_is_text_relative
)
7406 insn
= arm_pic_static_addr (orig
, reg
);
7412 /* If this function doesn't have a pic register, create one now. */
7413 require_pic_register ();
7415 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
7417 /* Make the MEM as close to a constant as possible. */
7418 mem
= SET_SRC (pat
);
7419 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
7420 MEM_READONLY_P (mem
) = 1;
7421 MEM_NOTRAP_P (mem
) = 1;
7423 insn
= emit_insn (pat
);
7426 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7428 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
7432 else if (GET_CODE (orig
) == CONST
)
7436 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7437 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
7440 /* Handle the case where we have: const (UNSPEC_TLS). */
7441 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
7442 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
7445 /* Handle the case where we have:
7446 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7448 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7449 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
7450 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
7452 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
7458 gcc_assert (can_create_pseudo_p ());
7459 reg
= gen_reg_rtx (Pmode
);
7462 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
7464 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
7465 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
7466 base
== reg
? 0 : reg
);
7468 if (CONST_INT_P (offset
))
7470 /* The base register doesn't really matter, we only want to
7471 test the index for the appropriate mode. */
7472 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
7474 gcc_assert (can_create_pseudo_p ());
7475 offset
= force_reg (Pmode
, offset
);
7478 if (CONST_INT_P (offset
))
7479 return plus_constant (Pmode
, base
, INTVAL (offset
));
7482 if (GET_MODE_SIZE (mode
) > 4
7483 && (GET_MODE_CLASS (mode
) == MODE_INT
7484 || TARGET_SOFT_FLOAT
))
7486 emit_insn (gen_addsi3 (reg
, base
, offset
));
7490 return gen_rtx_PLUS (Pmode
, base
, offset
);
7497 /* Find a spare register to use during the prolog of a function. */
7500 thumb_find_work_register (unsigned long pushed_regs_mask
)
7504 /* Check the argument registers first as these are call-used. The
7505 register allocation order means that sometimes r3 might be used
7506 but earlier argument registers might not, so check them all. */
7507 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
7508 if (!df_regs_ever_live_p (reg
))
7511 /* Before going on to check the call-saved registers we can try a couple
7512 more ways of deducing that r3 is available. The first is when we are
7513 pushing anonymous arguments onto the stack and we have less than 4
7514 registers worth of fixed arguments(*). In this case r3 will be part of
7515 the variable argument list and so we can be sure that it will be
7516 pushed right at the start of the function. Hence it will be available
7517 for the rest of the prologue.
7518 (*): ie crtl->args.pretend_args_size is greater than 0. */
7519 if (cfun
->machine
->uses_anonymous_args
7520 && crtl
->args
.pretend_args_size
> 0)
7521 return LAST_ARG_REGNUM
;
7523 /* The other case is when we have fixed arguments but less than 4 registers
7524 worth. In this case r3 might be used in the body of the function, but
7525 it is not being used to convey an argument into the function. In theory
7526 we could just check crtl->args.size to see how many bytes are
7527 being passed in argument registers, but it seems that it is unreliable.
7528 Sometimes it will have the value 0 when in fact arguments are being
7529 passed. (See testcase execute/20021111-1.c for an example). So we also
7530 check the args_info.nregs field as well. The problem with this field is
7531 that it makes no allowances for arguments that are passed to the
7532 function but which are not used. Hence we could miss an opportunity
7533 when a function has an unused argument in r3. But it is better to be
7534 safe than to be sorry. */
7535 if (! cfun
->machine
->uses_anonymous_args
7536 && crtl
->args
.size
>= 0
7537 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
7538 && (TARGET_AAPCS_BASED
7539 ? crtl
->args
.info
.aapcs_ncrn
< 4
7540 : crtl
->args
.info
.nregs
< 4))
7541 return LAST_ARG_REGNUM
;
7543 /* Otherwise look for a call-saved register that is going to be pushed. */
7544 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
7545 if (pushed_regs_mask
& (1 << reg
))
7550 /* Thumb-2 can use high regs. */
7551 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
7552 if (pushed_regs_mask
& (1 << reg
))
7555 /* Something went wrong - thumb_compute_save_reg_mask()
7556 should have arranged for a suitable register to be pushed. */
7560 static GTY(()) int pic_labelno
;
7562 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7566 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
7568 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
7570 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
7573 gcc_assert (flag_pic
);
7575 pic_reg
= cfun
->machine
->pic_reg
;
7576 if (TARGET_VXWORKS_RTP
)
7578 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
7579 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7580 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
7582 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
7584 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
7585 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
7589 /* We use an UNSPEC rather than a LABEL_REF because this label
7590 never appears in the code stream. */
7592 labelno
= GEN_INT (pic_labelno
++);
7593 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7594 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7596 /* On the ARM the PC register contains 'dot + 8' at the time of the
7597 addition, on the Thumb it is 'dot + 4'. */
7598 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7599 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
7601 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7605 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7607 else /* TARGET_THUMB1 */
7609 if (arm_pic_register
!= INVALID_REGNUM
7610 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
7612 /* We will have pushed the pic register, so we should always be
7613 able to find a work register. */
7614 pic_tmp
= gen_rtx_REG (SImode
,
7615 thumb_find_work_register (saved_regs
));
7616 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
7617 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
7618 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
7620 else if (arm_pic_register
!= INVALID_REGNUM
7621 && arm_pic_register
> LAST_LO_REGNUM
7622 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
7624 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7625 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
7626 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
7629 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7633 /* Need to emit this whether or not we obey regdecls,
7634 since setjmp/longjmp can cause life info to screw up. */
7638 /* Generate code to load the address of a static var when flag_pic is set. */
7640 arm_pic_static_addr (rtx orig
, rtx reg
)
7642 rtx l1
, labelno
, offset_rtx
;
7644 gcc_assert (flag_pic
);
7646 /* We use an UNSPEC rather than a LABEL_REF because this label
7647 never appears in the code stream. */
7648 labelno
= GEN_INT (pic_labelno
++);
7649 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7650 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7652 /* On the ARM the PC register contains 'dot + 8' at the time of the
7653 addition, on the Thumb it is 'dot + 4'. */
7654 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7655 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
7656 UNSPEC_SYMBOL_OFFSET
);
7657 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
7659 return emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
7662 /* Return nonzero if X is valid as an ARM state addressing register. */
7664 arm_address_register_rtx_p (rtx x
, int strict_p
)
7674 return ARM_REGNO_OK_FOR_BASE_P (regno
);
7676 return (regno
<= LAST_ARM_REGNUM
7677 || regno
>= FIRST_PSEUDO_REGISTER
7678 || regno
== FRAME_POINTER_REGNUM
7679 || regno
== ARG_POINTER_REGNUM
);
7682 /* Return TRUE if this rtx is the difference of a symbol and a label,
7683 and will reduce to a PC-relative relocation in the object file.
7684 Expressions like this can be left alone when generating PIC, rather
7685 than forced through the GOT. */
7687 pcrel_constant_p (rtx x
)
7689 if (GET_CODE (x
) == MINUS
)
7690 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
7695 /* Return true if X will surely end up in an index register after next
7698 will_be_in_index_register (const_rtx x
)
7700 /* arm.md: calculate_pic_address will split this into a register. */
7701 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
7704 /* Return nonzero if X is a valid ARM state address operand. */
7706 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
7710 enum rtx_code code
= GET_CODE (x
);
7712 if (arm_address_register_rtx_p (x
, strict_p
))
7715 use_ldrd
= (TARGET_LDRD
7716 && (mode
== DImode
|| mode
== DFmode
));
7718 if (code
== POST_INC
|| code
== PRE_DEC
7719 || ((code
== PRE_INC
|| code
== POST_DEC
)
7720 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7721 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7723 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7724 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7725 && GET_CODE (XEXP (x
, 1)) == PLUS
7726 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7728 rtx addend
= XEXP (XEXP (x
, 1), 1);
7730 /* Don't allow ldrd post increment by register because it's hard
7731 to fixup invalid register choices. */
7733 && GET_CODE (x
) == POST_MODIFY
7737 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
7738 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
7741 /* After reload constants split into minipools will have addresses
7742 from a LABEL_REF. */
7743 else if (reload_completed
7744 && (code
== LABEL_REF
7746 && GET_CODE (XEXP (x
, 0)) == PLUS
7747 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7748 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7751 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7754 else if (code
== PLUS
)
7756 rtx xop0
= XEXP (x
, 0);
7757 rtx xop1
= XEXP (x
, 1);
7759 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7760 && ((CONST_INT_P (xop1
)
7761 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7762 || (!strict_p
&& will_be_in_index_register (xop1
))))
7763 || (arm_address_register_rtx_p (xop1
, strict_p
)
7764 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7768 /* Reload currently can't handle MINUS, so disable this for now */
7769 else if (GET_CODE (x
) == MINUS
)
7771 rtx xop0
= XEXP (x
, 0);
7772 rtx xop1
= XEXP (x
, 1);
7774 return (arm_address_register_rtx_p (xop0
, strict_p
)
7775 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7779 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7780 && code
== SYMBOL_REF
7781 && CONSTANT_POOL_ADDRESS_P (x
)
7783 && symbol_mentioned_p (get_pool_constant (x
))
7784 && ! pcrel_constant_p (get_pool_constant (x
))))
7790 /* Return true if we can avoid creating a constant pool entry for x. */
7792 can_avoid_literal_pool_for_label_p (rtx x
)
7794 /* Normally we can assign constant values to target registers without
7795 the help of constant pool. But there are cases we have to use constant
7797 1) assign a label to register.
7798 2) sign-extend a 8bit value to 32bit and then assign to register.
7800 Constant pool access in format:
7801 (set (reg r0) (mem (symbol_ref (".LC0"))))
7802 will cause the use of literal pool (later in function arm_reorg).
7803 So here we mark such format as an invalid format, then the compiler
7804 will adjust it into:
7805 (set (reg r0) (symbol_ref (".LC0")))
7806 (set (reg r0) (mem (reg r0))).
7807 No extra register is required, and (mem (reg r0)) won't cause the use
7808 of literal pools. */
7809 if (arm_disable_literal_pool
&& GET_CODE (x
) == SYMBOL_REF
7810 && CONSTANT_POOL_ADDRESS_P (x
))
7816 /* Return nonzero if X is a valid Thumb-2 address operand. */
7818 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7821 enum rtx_code code
= GET_CODE (x
);
7823 if (arm_address_register_rtx_p (x
, strict_p
))
7826 use_ldrd
= (TARGET_LDRD
7827 && (mode
== DImode
|| mode
== DFmode
));
7829 if (code
== POST_INC
|| code
== PRE_DEC
7830 || ((code
== PRE_INC
|| code
== POST_DEC
)
7831 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7832 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7834 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7835 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7836 && GET_CODE (XEXP (x
, 1)) == PLUS
7837 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7839 /* Thumb-2 only has autoincrement by constant. */
7840 rtx addend
= XEXP (XEXP (x
, 1), 1);
7841 HOST_WIDE_INT offset
;
7843 if (!CONST_INT_P (addend
))
7846 offset
= INTVAL(addend
);
7847 if (GET_MODE_SIZE (mode
) <= 4)
7848 return (offset
> -256 && offset
< 256);
7850 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7851 && (offset
& 3) == 0);
7854 /* After reload constants split into minipools will have addresses
7855 from a LABEL_REF. */
7856 else if (reload_completed
7857 && (code
== LABEL_REF
7859 && GET_CODE (XEXP (x
, 0)) == PLUS
7860 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7861 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7864 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7867 else if (code
== PLUS
)
7869 rtx xop0
= XEXP (x
, 0);
7870 rtx xop1
= XEXP (x
, 1);
7872 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7873 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7874 || (!strict_p
&& will_be_in_index_register (xop1
))))
7875 || (arm_address_register_rtx_p (xop1
, strict_p
)
7876 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7879 else if (can_avoid_literal_pool_for_label_p (x
))
7882 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7883 && code
== SYMBOL_REF
7884 && CONSTANT_POOL_ADDRESS_P (x
)
7886 && symbol_mentioned_p (get_pool_constant (x
))
7887 && ! pcrel_constant_p (get_pool_constant (x
))))
7893 /* Return nonzero if INDEX is valid for an address index operand in
7896 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7899 HOST_WIDE_INT range
;
7900 enum rtx_code code
= GET_CODE (index
);
7902 /* Standard coprocessor addressing modes. */
7903 if (TARGET_HARD_FLOAT
7904 && (mode
== SFmode
|| mode
== DFmode
))
7905 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7906 && INTVAL (index
) > -1024
7907 && (INTVAL (index
) & 3) == 0);
7909 /* For quad modes, we restrict the constant offset to be slightly less
7910 than what the instruction format permits. We do this because for
7911 quad mode moves, we will actually decompose them into two separate
7912 double-mode reads or writes. INDEX must therefore be a valid
7913 (double-mode) offset and so should INDEX+8. */
7914 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7915 return (code
== CONST_INT
7916 && INTVAL (index
) < 1016
7917 && INTVAL (index
) > -1024
7918 && (INTVAL (index
) & 3) == 0);
7920 /* We have no such constraint on double mode offsets, so we permit the
7921 full range of the instruction format. */
7922 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7923 return (code
== CONST_INT
7924 && INTVAL (index
) < 1024
7925 && INTVAL (index
) > -1024
7926 && (INTVAL (index
) & 3) == 0);
7928 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7929 return (code
== CONST_INT
7930 && INTVAL (index
) < 1024
7931 && INTVAL (index
) > -1024
7932 && (INTVAL (index
) & 3) == 0);
7934 if (arm_address_register_rtx_p (index
, strict_p
)
7935 && (GET_MODE_SIZE (mode
) <= 4))
7938 if (mode
== DImode
|| mode
== DFmode
)
7940 if (code
== CONST_INT
)
7942 HOST_WIDE_INT val
= INTVAL (index
);
7944 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
7945 If vldr is selected it uses arm_coproc_mem_operand. */
7947 return val
> -256 && val
< 256;
7949 return val
> -4096 && val
< 4092;
7952 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7955 if (GET_MODE_SIZE (mode
) <= 4
7959 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7963 rtx xiop0
= XEXP (index
, 0);
7964 rtx xiop1
= XEXP (index
, 1);
7966 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7967 && power_of_two_operand (xiop1
, SImode
))
7968 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7969 && power_of_two_operand (xiop0
, SImode
)));
7971 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7972 || code
== ASHIFT
|| code
== ROTATERT
)
7974 rtx op
= XEXP (index
, 1);
7976 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7979 && INTVAL (op
) <= 31);
7983 /* For ARM v4 we may be doing a sign-extend operation during the
7989 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7995 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7997 return (code
== CONST_INT
7998 && INTVAL (index
) < range
7999 && INTVAL (index
) > -range
);
8002 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8003 index operand. i.e. 1, 2, 4 or 8. */
8005 thumb2_index_mul_operand (rtx op
)
8009 if (!CONST_INT_P (op
))
8013 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
8016 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8018 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
8020 enum rtx_code code
= GET_CODE (index
);
8022 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8023 /* Standard coprocessor addressing modes. */
8024 if (TARGET_HARD_FLOAT
8025 && (mode
== SFmode
|| mode
== DFmode
))
8026 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8027 /* Thumb-2 allows only > -256 index range for it's core register
8028 load/stores. Since we allow SF/DF in core registers, we have
8029 to use the intersection between -256~4096 (core) and -1024~1024
8031 && INTVAL (index
) > -256
8032 && (INTVAL (index
) & 3) == 0);
8034 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8036 /* For DImode assume values will usually live in core regs
8037 and only allow LDRD addressing modes. */
8038 if (!TARGET_LDRD
|| mode
!= DImode
)
8039 return (code
== CONST_INT
8040 && INTVAL (index
) < 1024
8041 && INTVAL (index
) > -1024
8042 && (INTVAL (index
) & 3) == 0);
8045 /* For quad modes, we restrict the constant offset to be slightly less
8046 than what the instruction format permits. We do this because for
8047 quad mode moves, we will actually decompose them into two separate
8048 double-mode reads or writes. INDEX must therefore be a valid
8049 (double-mode) offset and so should INDEX+8. */
8050 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8051 return (code
== CONST_INT
8052 && INTVAL (index
) < 1016
8053 && INTVAL (index
) > -1024
8054 && (INTVAL (index
) & 3) == 0);
8056 /* We have no such constraint on double mode offsets, so we permit the
8057 full range of the instruction format. */
8058 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8059 return (code
== CONST_INT
8060 && INTVAL (index
) < 1024
8061 && INTVAL (index
) > -1024
8062 && (INTVAL (index
) & 3) == 0);
8064 if (arm_address_register_rtx_p (index
, strict_p
)
8065 && (GET_MODE_SIZE (mode
) <= 4))
8068 if (mode
== DImode
|| mode
== DFmode
)
8070 if (code
== CONST_INT
)
8072 HOST_WIDE_INT val
= INTVAL (index
);
8073 /* Thumb-2 ldrd only has reg+const addressing modes.
8074 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8075 If vldr is selected it uses arm_coproc_mem_operand. */
8077 return IN_RANGE (val
, -1020, 1020) && (val
& 3) == 0;
8079 return IN_RANGE (val
, -255, 4095 - 4);
8087 rtx xiop0
= XEXP (index
, 0);
8088 rtx xiop1
= XEXP (index
, 1);
8090 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8091 && thumb2_index_mul_operand (xiop1
))
8092 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8093 && thumb2_index_mul_operand (xiop0
)));
8095 else if (code
== ASHIFT
)
8097 rtx op
= XEXP (index
, 1);
8099 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8102 && INTVAL (op
) <= 3);
8105 return (code
== CONST_INT
8106 && INTVAL (index
) < 4096
8107 && INTVAL (index
) > -256);
8110 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8112 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
8122 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
8124 return (regno
<= LAST_LO_REGNUM
8125 || regno
> LAST_VIRTUAL_REGISTER
8126 || regno
== FRAME_POINTER_REGNUM
8127 || (GET_MODE_SIZE (mode
) >= 4
8128 && (regno
== STACK_POINTER_REGNUM
8129 || regno
>= FIRST_PSEUDO_REGISTER
8130 || x
== hard_frame_pointer_rtx
8131 || x
== arg_pointer_rtx
)));
8134 /* Return nonzero if x is a legitimate index register. This is the case
8135 for any base register that can access a QImode object. */
8137 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
8139 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
8142 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8144 The AP may be eliminated to either the SP or the FP, so we use the
8145 least common denominator, e.g. SImode, and offsets from 0 to 64.
8147 ??? Verify whether the above is the right approach.
8149 ??? Also, the FP may be eliminated to the SP, so perhaps that
8150 needs special handling also.
8152 ??? Look at how the mips16 port solves this problem. It probably uses
8153 better ways to solve some of these problems.
8155 Although it is not incorrect, we don't accept QImode and HImode
8156 addresses based on the frame pointer or arg pointer until the
8157 reload pass starts. This is so that eliminating such addresses
8158 into stack based ones won't produce impossible code. */
8160 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8162 if (TARGET_HAVE_MOVT
&& can_avoid_literal_pool_for_label_p (x
))
8165 /* ??? Not clear if this is right. Experiment. */
8166 if (GET_MODE_SIZE (mode
) < 4
8167 && !(reload_in_progress
|| reload_completed
)
8168 && (reg_mentioned_p (frame_pointer_rtx
, x
)
8169 || reg_mentioned_p (arg_pointer_rtx
, x
)
8170 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
8171 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
8172 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
8173 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
8176 /* Accept any base register. SP only in SImode or larger. */
8177 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
8180 /* This is PC relative data before arm_reorg runs. */
8181 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
8182 && GET_CODE (x
) == SYMBOL_REF
8183 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
8186 /* This is PC relative data after arm_reorg runs. */
8187 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
8189 && (GET_CODE (x
) == LABEL_REF
8190 || (GET_CODE (x
) == CONST
8191 && GET_CODE (XEXP (x
, 0)) == PLUS
8192 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8193 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8196 /* Post-inc indexing only supported for SImode and larger. */
8197 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
8198 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
8201 else if (GET_CODE (x
) == PLUS
)
8203 /* REG+REG address can be any two index registers. */
8204 /* We disallow FRAME+REG addressing since we know that FRAME
8205 will be replaced with STACK, and SP relative addressing only
8206 permits SP+OFFSET. */
8207 if (GET_MODE_SIZE (mode
) <= 4
8208 && XEXP (x
, 0) != frame_pointer_rtx
8209 && XEXP (x
, 1) != frame_pointer_rtx
8210 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8211 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
8212 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
8215 /* REG+const has 5-7 bit offset for non-SP registers. */
8216 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8217 || XEXP (x
, 0) == arg_pointer_rtx
)
8218 && CONST_INT_P (XEXP (x
, 1))
8219 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
8222 /* REG+const has 10-bit offset for SP, but only SImode and
8223 larger is supported. */
8224 /* ??? Should probably check for DI/DFmode overflow here
8225 just like GO_IF_LEGITIMATE_OFFSET does. */
8226 else if (REG_P (XEXP (x
, 0))
8227 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
8228 && GET_MODE_SIZE (mode
) >= 4
8229 && CONST_INT_P (XEXP (x
, 1))
8230 && INTVAL (XEXP (x
, 1)) >= 0
8231 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
8232 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8235 else if (REG_P (XEXP (x
, 0))
8236 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
8237 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
8238 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
8239 && REGNO (XEXP (x
, 0))
8240 <= LAST_VIRTUAL_POINTER_REGISTER
))
8241 && GET_MODE_SIZE (mode
) >= 4
8242 && CONST_INT_P (XEXP (x
, 1))
8243 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8247 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8248 && GET_MODE_SIZE (mode
) == 4
8249 && GET_CODE (x
) == SYMBOL_REF
8250 && CONSTANT_POOL_ADDRESS_P (x
)
8252 && symbol_mentioned_p (get_pool_constant (x
))
8253 && ! pcrel_constant_p (get_pool_constant (x
))))
8259 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8260 instruction of mode MODE. */
8262 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
8264 switch (GET_MODE_SIZE (mode
))
8267 return val
>= 0 && val
< 32;
8270 return val
>= 0 && val
< 64 && (val
& 1) == 0;
8274 && (val
+ GET_MODE_SIZE (mode
)) <= 128
8280 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
8283 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
8284 else if (TARGET_THUMB2
)
8285 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
8286 else /* if (TARGET_THUMB1) */
8287 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
8290 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8292 Given an rtx X being reloaded into a reg required to be
8293 in class CLASS, return the class of reg to actually use.
8294 In general this is just CLASS, but for the Thumb core registers and
8295 immediate constants we prefer a LO_REGS class or a subset. */
8298 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
8304 if (rclass
== GENERAL_REGS
)
8311 /* Build the SYMBOL_REF for __tls_get_addr. */
8313 static GTY(()) rtx tls_get_addr_libfunc
;
8316 get_tls_get_addr (void)
8318 if (!tls_get_addr_libfunc
)
8319 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
8320 return tls_get_addr_libfunc
;
8324 arm_load_tp (rtx target
)
8327 target
= gen_reg_rtx (SImode
);
8331 /* Can return in any reg. */
8332 emit_insn (gen_load_tp_hard (target
));
8336 /* Always returned in r0. Immediately copy the result into a pseudo,
8337 otherwise other uses of r0 (e.g. setting up function arguments) may
8338 clobber the value. */
8342 emit_insn (gen_load_tp_soft ());
8344 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
8345 emit_move_insn (target
, tmp
);
8351 load_tls_operand (rtx x
, rtx reg
)
8355 if (reg
== NULL_RTX
)
8356 reg
= gen_reg_rtx (SImode
);
8358 tmp
= gen_rtx_CONST (SImode
, x
);
8360 emit_move_insn (reg
, tmp
);
8366 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
8368 rtx label
, labelno
, sum
;
8370 gcc_assert (reloc
!= TLS_DESCSEQ
);
8373 labelno
= GEN_INT (pic_labelno
++);
8374 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8375 label
= gen_rtx_CONST (VOIDmode
, label
);
8377 sum
= gen_rtx_UNSPEC (Pmode
,
8378 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
8379 GEN_INT (TARGET_ARM
? 8 : 4)),
8381 reg
= load_tls_operand (sum
, reg
);
8384 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
8386 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8388 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
8389 LCT_PURE
, /* LCT_CONST? */
8392 rtx_insn
*insns
= get_insns ();
8399 arm_tls_descseq_addr (rtx x
, rtx reg
)
8401 rtx labelno
= GEN_INT (pic_labelno
++);
8402 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8403 rtx sum
= gen_rtx_UNSPEC (Pmode
,
8404 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
8405 gen_rtx_CONST (VOIDmode
, label
),
8406 GEN_INT (!TARGET_ARM
)),
8408 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
8410 emit_insn (gen_tlscall (x
, labelno
));
8412 reg
= gen_reg_rtx (SImode
);
8414 gcc_assert (REGNO (reg
) != R0_REGNUM
);
8416 emit_move_insn (reg
, reg0
);
8422 legitimize_tls_address (rtx x
, rtx reg
)
8424 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
8426 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
8430 case TLS_MODEL_GLOBAL_DYNAMIC
:
8431 if (TARGET_GNU2_TLS
)
8433 reg
= arm_tls_descseq_addr (x
, reg
);
8435 tp
= arm_load_tp (NULL_RTX
);
8437 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8441 /* Original scheme */
8442 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
8443 dest
= gen_reg_rtx (Pmode
);
8444 emit_libcall_block (insns
, dest
, ret
, x
);
8448 case TLS_MODEL_LOCAL_DYNAMIC
:
8449 if (TARGET_GNU2_TLS
)
8451 reg
= arm_tls_descseq_addr (x
, reg
);
8453 tp
= arm_load_tp (NULL_RTX
);
8455 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8459 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
8461 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8462 share the LDM result with other LD model accesses. */
8463 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
8465 dest
= gen_reg_rtx (Pmode
);
8466 emit_libcall_block (insns
, dest
, ret
, eqv
);
8468 /* Load the addend. */
8469 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
8470 GEN_INT (TLS_LDO32
)),
8472 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
8473 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
8477 case TLS_MODEL_INITIAL_EXEC
:
8478 labelno
= GEN_INT (pic_labelno
++);
8479 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8480 label
= gen_rtx_CONST (VOIDmode
, label
);
8481 sum
= gen_rtx_UNSPEC (Pmode
,
8482 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
8483 GEN_INT (TARGET_ARM
? 8 : 4)),
8485 reg
= load_tls_operand (sum
, reg
);
8488 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
8489 else if (TARGET_THUMB2
)
8490 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
8493 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8494 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
8497 tp
= arm_load_tp (NULL_RTX
);
8499 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8501 case TLS_MODEL_LOCAL_EXEC
:
8502 tp
= arm_load_tp (NULL_RTX
);
8504 reg
= gen_rtx_UNSPEC (Pmode
,
8505 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
8507 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
8509 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8516 /* Try machine-dependent ways of modifying an illegitimate address
8517 to be legitimate. If we find one, return the new, valid address. */
8519 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8521 if (arm_tls_referenced_p (x
))
8525 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
8527 addend
= XEXP (XEXP (x
, 0), 1);
8528 x
= XEXP (XEXP (x
, 0), 0);
8531 if (GET_CODE (x
) != SYMBOL_REF
)
8534 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
8536 x
= legitimize_tls_address (x
, NULL_RTX
);
8540 x
= gen_rtx_PLUS (SImode
, x
, addend
);
8549 /* TODO: legitimize_address for Thumb2. */
8552 return thumb_legitimize_address (x
, orig_x
, mode
);
8555 if (GET_CODE (x
) == PLUS
)
8557 rtx xop0
= XEXP (x
, 0);
8558 rtx xop1
= XEXP (x
, 1);
8560 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
8561 xop0
= force_reg (SImode
, xop0
);
8563 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
8564 && !symbol_mentioned_p (xop1
))
8565 xop1
= force_reg (SImode
, xop1
);
8567 if (ARM_BASE_REGISTER_RTX_P (xop0
)
8568 && CONST_INT_P (xop1
))
8570 HOST_WIDE_INT n
, low_n
;
8574 /* VFP addressing modes actually allow greater offsets, but for
8575 now we just stick with the lowest common denominator. */
8576 if (mode
== DImode
|| mode
== DFmode
)
8588 low_n
= ((mode
) == TImode
? 0
8589 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
8593 base_reg
= gen_reg_rtx (SImode
);
8594 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
8595 emit_move_insn (base_reg
, val
);
8596 x
= plus_constant (Pmode
, base_reg
, low_n
);
8598 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8599 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8602 /* XXX We don't allow MINUS any more -- see comment in
8603 arm_legitimate_address_outer_p (). */
8604 else if (GET_CODE (x
) == MINUS
)
8606 rtx xop0
= XEXP (x
, 0);
8607 rtx xop1
= XEXP (x
, 1);
8609 if (CONSTANT_P (xop0
))
8610 xop0
= force_reg (SImode
, xop0
);
8612 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
8613 xop1
= force_reg (SImode
, xop1
);
8615 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8616 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
8619 /* Make sure to take full advantage of the pre-indexed addressing mode
8620 with absolute addresses which often allows for the base register to
8621 be factorized for multiple adjacent memory references, and it might
8622 even allows for the mini pool to be avoided entirely. */
8623 else if (CONST_INT_P (x
) && optimize
> 0)
8626 HOST_WIDE_INT mask
, base
, index
;
8629 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8630 use a 8-bit index. So let's use a 12-bit index for SImode only and
8631 hope that arm_gen_constant will enable ldrb to use more bits. */
8632 bits
= (mode
== SImode
) ? 12 : 8;
8633 mask
= (1 << bits
) - 1;
8634 base
= INTVAL (x
) & ~mask
;
8635 index
= INTVAL (x
) & mask
;
8636 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
8638 /* It'll most probably be more efficient to generate the base
8639 with more bits set and use a negative index instead. */
8643 base_reg
= force_reg (SImode
, GEN_INT (base
));
8644 x
= plus_constant (Pmode
, base_reg
, index
);
8649 /* We need to find and carefully transform any SYMBOL and LABEL
8650 references; so go back to the original address expression. */
8651 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8653 if (new_x
!= orig_x
)
8661 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8662 to be legitimate. If we find one, return the new, valid address. */
8664 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8666 if (GET_CODE (x
) == PLUS
8667 && CONST_INT_P (XEXP (x
, 1))
8668 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
8669 || INTVAL (XEXP (x
, 1)) < 0))
8671 rtx xop0
= XEXP (x
, 0);
8672 rtx xop1
= XEXP (x
, 1);
8673 HOST_WIDE_INT offset
= INTVAL (xop1
);
8675 /* Try and fold the offset into a biasing of the base register and
8676 then offsetting that. Don't do this when optimizing for space
8677 since it can cause too many CSEs. */
8678 if (optimize_size
&& offset
>= 0
8679 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
8681 HOST_WIDE_INT delta
;
8684 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
8685 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
8686 delta
= 31 * GET_MODE_SIZE (mode
);
8688 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
8690 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
8692 x
= plus_constant (Pmode
, xop0
, delta
);
8694 else if (offset
< 0 && offset
> -256)
8695 /* Small negative offsets are best done with a subtract before the
8696 dereference, forcing these into a register normally takes two
8698 x
= force_operand (x
, NULL_RTX
);
8701 /* For the remaining cases, force the constant into a register. */
8702 xop1
= force_reg (SImode
, xop1
);
8703 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8706 else if (GET_CODE (x
) == PLUS
8707 && s_register_operand (XEXP (x
, 1), SImode
)
8708 && !s_register_operand (XEXP (x
, 0), SImode
))
8710 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
8712 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
8717 /* We need to find and carefully transform any SYMBOL and LABEL
8718 references; so go back to the original address expression. */
8719 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8721 if (new_x
!= orig_x
)
8728 /* Return TRUE if X contains any TLS symbol references. */
8731 arm_tls_referenced_p (rtx x
)
8733 if (! TARGET_HAVE_TLS
)
8736 subrtx_iterator::array_type array
;
8737 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8739 const_rtx x
= *iter
;
8740 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8742 /* ARM currently does not provide relocations to encode TLS variables
8743 into AArch32 instructions, only data, so there is no way to
8744 currently implement these if a literal pool is disabled. */
8745 if (arm_disable_literal_pool
)
8746 sorry ("accessing thread-local storage is not currently supported "
8747 "with -mpure-code or -mslow-flash-data");
8752 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8753 TLS offsets, not real symbol references. */
8754 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8755 iter
.skip_subrtxes ();
8760 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8762 On the ARM, allow any integer (invalid ones are removed later by insn
8763 patterns), nice doubles and symbol_refs which refer to the function's
8766 When generating pic allow anything. */
8769 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8771 return flag_pic
|| !label_mentioned_p (x
);
8775 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8777 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8778 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8779 for ARMv8-M Baseline or later the result is valid. */
8780 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
8783 return (CONST_INT_P (x
)
8784 || CONST_DOUBLE_P (x
)
8785 || CONSTANT_ADDRESS_P (x
)
8786 || (TARGET_HAVE_MOVT
&& GET_CODE (x
) == SYMBOL_REF
)
8791 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8793 return (!arm_cannot_force_const_mem (mode
, x
)
8795 ? arm_legitimate_constant_p_1 (mode
, x
)
8796 : thumb_legitimate_constant_p (mode
, x
)));
8799 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8802 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8806 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8808 split_const (x
, &base
, &offset
);
8809 if (GET_CODE (base
) == SYMBOL_REF
8810 && !offset_within_block_p (base
, INTVAL (offset
)))
8813 return arm_tls_referenced_p (x
);
8816 #define REG_OR_SUBREG_REG(X) \
8818 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8820 #define REG_OR_SUBREG_RTX(X) \
8821 (REG_P (X) ? (X) : SUBREG_REG (X))
8824 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8826 machine_mode mode
= GET_MODE (x
);
8835 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8842 return COSTS_N_INSNS (1);
8845 if (arm_arch6m
&& arm_m_profile_small_mul
)
8846 return COSTS_N_INSNS (32);
8848 if (CONST_INT_P (XEXP (x
, 1)))
8851 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8858 return COSTS_N_INSNS (2) + cycles
;
8860 return COSTS_N_INSNS (1) + 16;
8863 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8865 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8866 return (COSTS_N_INSNS (words
)
8867 + 4 * ((MEM_P (SET_SRC (x
)))
8868 + MEM_P (SET_DEST (x
))));
8873 if (UINTVAL (x
) < 256
8874 /* 16-bit constant. */
8875 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
8877 if (thumb_shiftable_const (INTVAL (x
)))
8878 return COSTS_N_INSNS (2);
8879 return COSTS_N_INSNS (3);
8881 else if ((outer
== PLUS
|| outer
== COMPARE
)
8882 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8884 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8885 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8886 return COSTS_N_INSNS (1);
8887 else if (outer
== AND
)
8890 /* This duplicates the tests in the andsi3 expander. */
8891 for (i
= 9; i
<= 31; i
++)
8892 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8893 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8894 return COSTS_N_INSNS (2);
8896 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8897 || outer
== LSHIFTRT
)
8899 return COSTS_N_INSNS (2);
8905 return COSTS_N_INSNS (3);
8923 /* XXX another guess. */
8924 /* Memory costs quite a lot for the first word, but subsequent words
8925 load at the equivalent of a single insn each. */
8926 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8927 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8932 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8938 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8939 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8945 return total
+ COSTS_N_INSNS (1);
8947 /* Assume a two-shift sequence. Increase the cost slightly so
8948 we prefer actual shifts over an extend operation. */
8949 return total
+ 1 + COSTS_N_INSNS (2);
8956 /* Estimates the size cost of thumb1 instructions.
8957 For now most of the code is copied from thumb1_rtx_costs. We need more
8958 fine grain tuning when we have more related test cases. */
8960 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8962 machine_mode mode
= GET_MODE (x
);
8971 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8975 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8976 defined by RTL expansion, especially for the expansion of
8978 if ((GET_CODE (XEXP (x
, 0)) == MULT
8979 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8980 || (GET_CODE (XEXP (x
, 1)) == MULT
8981 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8982 return COSTS_N_INSNS (2);
8987 return COSTS_N_INSNS (1);
8990 if (CONST_INT_P (XEXP (x
, 1)))
8992 /* Thumb1 mul instruction can't operate on const. We must Load it
8993 into a register first. */
8994 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8995 /* For the targets which have a very small and high-latency multiply
8996 unit, we prefer to synthesize the mult with up to 5 instructions,
8997 giving a good balance between size and performance. */
8998 if (arm_arch6m
&& arm_m_profile_small_mul
)
8999 return COSTS_N_INSNS (5);
9001 return COSTS_N_INSNS (1) + const_size
;
9003 return COSTS_N_INSNS (1);
9006 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9008 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9009 cost
= COSTS_N_INSNS (words
);
9010 if (satisfies_constraint_J (SET_SRC (x
))
9011 || satisfies_constraint_K (SET_SRC (x
))
9012 /* Too big an immediate for a 2-byte mov, using MOVT. */
9013 || (CONST_INT_P (SET_SRC (x
))
9014 && UINTVAL (SET_SRC (x
)) >= 256
9016 && satisfies_constraint_j (SET_SRC (x
)))
9017 /* thumb1_movdi_insn. */
9018 || ((words
> 1) && MEM_P (SET_SRC (x
))))
9019 cost
+= COSTS_N_INSNS (1);
9025 if (UINTVAL (x
) < 256)
9026 return COSTS_N_INSNS (1);
9027 /* movw is 4byte long. */
9028 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
9029 return COSTS_N_INSNS (2);
9030 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9031 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
9032 return COSTS_N_INSNS (2);
9033 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9034 if (thumb_shiftable_const (INTVAL (x
)))
9035 return COSTS_N_INSNS (2);
9036 return COSTS_N_INSNS (3);
9038 else if ((outer
== PLUS
|| outer
== COMPARE
)
9039 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9041 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9042 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9043 return COSTS_N_INSNS (1);
9044 else if (outer
== AND
)
9047 /* This duplicates the tests in the andsi3 expander. */
9048 for (i
= 9; i
<= 31; i
++)
9049 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9050 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9051 return COSTS_N_INSNS (2);
9053 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9054 || outer
== LSHIFTRT
)
9056 return COSTS_N_INSNS (2);
9062 return COSTS_N_INSNS (3);
9076 return COSTS_N_INSNS (1);
9079 return (COSTS_N_INSNS (1)
9081 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9082 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9083 ? COSTS_N_INSNS (1) : 0));
9087 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9092 /* XXX still guessing. */
9093 switch (GET_MODE (XEXP (x
, 0)))
9096 return (1 + (mode
== DImode
? 4 : 0)
9097 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9100 return (4 + (mode
== DImode
? 4 : 0)
9101 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9104 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9115 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9116 operand, then return the operand that is being shifted. If the shift
9117 is not by a constant, then set SHIFT_REG to point to the operand.
9118 Return NULL if OP is not a shifter operand. */
9120 shifter_op_p (rtx op
, rtx
*shift_reg
)
9122 enum rtx_code code
= GET_CODE (op
);
9124 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9125 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9126 return XEXP (op
, 0);
9127 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9128 return XEXP (op
, 0);
9129 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9130 || code
== ASHIFTRT
)
9132 if (!CONST_INT_P (XEXP (op
, 1)))
9133 *shift_reg
= XEXP (op
, 1);
9134 return XEXP (op
, 0);
9141 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9143 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9144 rtx_code code
= GET_CODE (x
);
9145 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9147 switch (XINT (x
, 1))
9149 case UNSPEC_UNALIGNED_LOAD
:
9150 /* We can only do unaligned loads into the integer unit, and we can't
9152 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9154 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9155 + extra_cost
->ldst
.load_unaligned
);
9158 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9159 ADDR_SPACE_GENERIC
, speed_p
);
9163 case UNSPEC_UNALIGNED_STORE
:
9164 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9166 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9167 + extra_cost
->ldst
.store_unaligned
);
9169 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
9171 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9172 ADDR_SPACE_GENERIC
, speed_p
);
9183 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9187 *cost
= COSTS_N_INSNS (2);
9193 /* Cost of a libcall. We assume one insn per argument, an amount for the
9194 call (one insn for -Os) and then one for processing the result. */
9195 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9197 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9200 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9201 if (shift_op != NULL \
9202 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9207 *cost += extra_cost->alu.arith_shift_reg; \
9208 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9209 ASHIFT, 1, speed_p); \
9212 *cost += extra_cost->alu.arith_shift; \
9214 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9215 ASHIFT, 0, speed_p) \
9216 + rtx_cost (XEXP (x, 1 - IDX), \
9217 GET_MODE (shift_op), \
9224 /* RTX costs. Make an estimate of the cost of executing the operation
9225 X, which is contained with an operation with code OUTER_CODE.
9226 SPEED_P indicates whether the cost desired is the performance cost,
9227 or the size cost. The estimate is stored in COST and the return
9228 value is TRUE if the cost calculation is final, or FALSE if the
9229 caller should recurse through the operands of X to add additional
9232 We currently make no attempt to model the size savings of Thumb-2
9233 16-bit instructions. At the normal points in compilation where
9234 this code is called we have no measure of whether the condition
9235 flags are live or not, and thus no realistic way to determine what
9236 the size will eventually be. */
9238 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9239 const struct cpu_cost_table
*extra_cost
,
9240 int *cost
, bool speed_p
)
9242 machine_mode mode
= GET_MODE (x
);
9244 *cost
= COSTS_N_INSNS (1);
9249 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9251 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9259 /* SET RTXs don't have a mode so we get it from the destination. */
9260 mode
= GET_MODE (SET_DEST (x
));
9262 if (REG_P (SET_SRC (x
))
9263 && REG_P (SET_DEST (x
)))
9265 /* Assume that most copies can be done with a single insn,
9266 unless we don't have HW FP, in which case everything
9267 larger than word mode will require two insns. */
9268 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9269 && GET_MODE_SIZE (mode
) > 4)
9272 /* Conditional register moves can be encoded
9273 in 16 bits in Thumb mode. */
9274 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9280 if (CONST_INT_P (SET_SRC (x
)))
9282 /* Handle CONST_INT here, since the value doesn't have a mode
9283 and we would otherwise be unable to work out the true cost. */
9284 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
9287 /* Slightly lower the cost of setting a core reg to a constant.
9288 This helps break up chains and allows for better scheduling. */
9289 if (REG_P (SET_DEST (x
))
9290 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9293 /* Immediate moves with an immediate in the range [0, 255] can be
9294 encoded in 16 bits in Thumb mode. */
9295 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9296 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9298 goto const_int_cost
;
9304 /* A memory access costs 1 insn if the mode is small, or the address is
9305 a single register, otherwise it costs one insn per word. */
9306 if (REG_P (XEXP (x
, 0)))
9307 *cost
= COSTS_N_INSNS (1);
9309 && GET_CODE (XEXP (x
, 0)) == PLUS
9310 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9311 /* This will be split into two instructions.
9312 See arm.md:calculate_pic_address. */
9313 *cost
= COSTS_N_INSNS (2);
9315 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9317 /* For speed optimizations, add the costs of the address and
9318 accessing memory. */
9321 *cost
+= (extra_cost
->ldst
.load
9322 + arm_address_cost (XEXP (x
, 0), mode
,
9323 ADDR_SPACE_GENERIC
, speed_p
));
9325 *cost
+= extra_cost
->ldst
.load
;
9331 /* Calculations of LDM costs are complex. We assume an initial cost
9332 (ldm_1st) which will load the number of registers mentioned in
9333 ldm_regs_per_insn_1st registers; then each additional
9334 ldm_regs_per_insn_subsequent registers cost one more insn. The
9335 formula for N regs is thus:
9337 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9338 + ldm_regs_per_insn_subsequent - 1)
9339 / ldm_regs_per_insn_subsequent).
9341 Additional costs may also be added for addressing. A similar
9342 formula is used for STM. */
9344 bool is_ldm
= load_multiple_operation (x
, SImode
);
9345 bool is_stm
= store_multiple_operation (x
, SImode
);
9347 if (is_ldm
|| is_stm
)
9351 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9352 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9353 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9354 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9355 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9356 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9357 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9359 *cost
+= regs_per_insn_1st
9360 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9361 + regs_per_insn_sub
- 1)
9362 / regs_per_insn_sub
);
9371 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9372 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9373 *cost
+= COSTS_N_INSNS (speed_p
9374 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
9375 else if (mode
== SImode
&& TARGET_IDIV
)
9376 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
9378 *cost
= LIBCALL_COST (2);
9380 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9381 possible udiv is prefered. */
9382 *cost
+= (code
== DIV
? COSTS_N_INSNS (1) : 0);
9383 return false; /* All arguments must be in registers. */
9386 /* MOD by a power of 2 can be expanded as:
9388 and r0, r0, #(n - 1)
9389 and r1, r1, #(n - 1)
9390 rsbpl r0, r1, #0. */
9391 if (CONST_INT_P (XEXP (x
, 1))
9392 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
9395 *cost
+= COSTS_N_INSNS (3);
9398 *cost
+= 2 * extra_cost
->alu
.logical
9399 + extra_cost
->alu
.arith
;
9405 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9406 possible udiv is prefered. */
9407 *cost
= LIBCALL_COST (2) + (code
== MOD
? COSTS_N_INSNS (1) : 0);
9408 return false; /* All arguments must be in registers. */
9411 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9413 *cost
+= (COSTS_N_INSNS (1)
9414 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9416 *cost
+= extra_cost
->alu
.shift_reg
;
9424 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9426 *cost
+= (COSTS_N_INSNS (2)
9427 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9429 *cost
+= 2 * extra_cost
->alu
.shift
;
9432 else if (mode
== SImode
)
9434 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9435 /* Slightly disparage register shifts at -Os, but not by much. */
9436 if (!CONST_INT_P (XEXP (x
, 1)))
9437 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9438 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9441 else if (GET_MODE_CLASS (mode
) == MODE_INT
9442 && GET_MODE_SIZE (mode
) < 4)
9446 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9447 /* Slightly disparage register shifts at -Os, but not by
9449 if (!CONST_INT_P (XEXP (x
, 1)))
9450 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9451 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9453 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9455 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9457 /* Can use SBFX/UBFX. */
9459 *cost
+= extra_cost
->alu
.bfx
;
9460 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9464 *cost
+= COSTS_N_INSNS (1);
9465 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9468 if (CONST_INT_P (XEXP (x
, 1)))
9469 *cost
+= 2 * extra_cost
->alu
.shift
;
9471 *cost
+= (extra_cost
->alu
.shift
9472 + extra_cost
->alu
.shift_reg
);
9475 /* Slightly disparage register shifts. */
9476 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9481 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
9482 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9485 if (CONST_INT_P (XEXP (x
, 1)))
9486 *cost
+= (2 * extra_cost
->alu
.shift
9487 + extra_cost
->alu
.log_shift
);
9489 *cost
+= (extra_cost
->alu
.shift
9490 + extra_cost
->alu
.shift_reg
9491 + extra_cost
->alu
.log_shift_reg
);
9497 *cost
= LIBCALL_COST (2);
9506 *cost
+= extra_cost
->alu
.rev
;
9513 /* No rev instruction available. Look at arm_legacy_rev
9514 and thumb_legacy_rev for the form of RTL used then. */
9517 *cost
+= COSTS_N_INSNS (9);
9521 *cost
+= 6 * extra_cost
->alu
.shift
;
9522 *cost
+= 3 * extra_cost
->alu
.logical
;
9527 *cost
+= COSTS_N_INSNS (4);
9531 *cost
+= 2 * extra_cost
->alu
.shift
;
9532 *cost
+= extra_cost
->alu
.arith_shift
;
9533 *cost
+= 2 * extra_cost
->alu
.logical
;
9541 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9542 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9544 if (GET_CODE (XEXP (x
, 0)) == MULT
9545 || GET_CODE (XEXP (x
, 1)) == MULT
)
9547 rtx mul_op0
, mul_op1
, sub_op
;
9550 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9552 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9554 mul_op0
= XEXP (XEXP (x
, 0), 0);
9555 mul_op1
= XEXP (XEXP (x
, 0), 1);
9556 sub_op
= XEXP (x
, 1);
9560 mul_op0
= XEXP (XEXP (x
, 1), 0);
9561 mul_op1
= XEXP (XEXP (x
, 1), 1);
9562 sub_op
= XEXP (x
, 0);
9565 /* The first operand of the multiply may be optionally
9567 if (GET_CODE (mul_op0
) == NEG
)
9568 mul_op0
= XEXP (mul_op0
, 0);
9570 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9571 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9572 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
9578 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9584 rtx shift_by_reg
= NULL
;
9588 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9589 if (shift_op
== NULL
)
9591 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9592 non_shift_op
= XEXP (x
, 0);
9595 non_shift_op
= XEXP (x
, 1);
9597 if (shift_op
!= NULL
)
9599 if (shift_by_reg
!= NULL
)
9602 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9603 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
9606 *cost
+= extra_cost
->alu
.arith_shift
;
9608 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
9609 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
9614 && GET_CODE (XEXP (x
, 1)) == MULT
)
9618 *cost
+= extra_cost
->mult
[0].add
;
9619 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
9620 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
9621 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
9625 if (CONST_INT_P (XEXP (x
, 0)))
9627 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9628 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9630 *cost
= COSTS_N_INSNS (insns
);
9632 *cost
+= insns
* extra_cost
->alu
.arith
;
9633 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9637 *cost
+= extra_cost
->alu
.arith
;
9642 if (GET_MODE_CLASS (mode
) == MODE_INT
9643 && GET_MODE_SIZE (mode
) < 4)
9645 rtx shift_op
, shift_reg
;
9648 /* We check both sides of the MINUS for shifter operands since,
9649 unlike PLUS, it's not commutative. */
9651 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9652 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9654 /* Slightly disparage, as we might need to widen the result. */
9657 *cost
+= extra_cost
->alu
.arith
;
9659 if (CONST_INT_P (XEXP (x
, 0)))
9661 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9670 *cost
+= COSTS_N_INSNS (1);
9672 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9674 rtx op1
= XEXP (x
, 1);
9677 *cost
+= 2 * extra_cost
->alu
.arith
;
9679 if (GET_CODE (op1
) == ZERO_EXTEND
)
9680 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
9683 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
9684 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9688 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9691 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9692 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
9694 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
9697 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9698 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9701 *cost
+= (extra_cost
->alu
.arith
9702 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9703 ? extra_cost
->alu
.arith
9704 : extra_cost
->alu
.arith_shift
));
9705 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
9706 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
9707 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9712 *cost
+= 2 * extra_cost
->alu
.arith
;
9718 *cost
= LIBCALL_COST (2);
9722 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9723 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9725 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9727 rtx mul_op0
, mul_op1
, add_op
;
9730 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9732 mul_op0
= XEXP (XEXP (x
, 0), 0);
9733 mul_op1
= XEXP (XEXP (x
, 0), 1);
9734 add_op
= XEXP (x
, 1);
9736 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9737 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9738 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
9744 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9747 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9749 *cost
= LIBCALL_COST (2);
9753 /* Narrow modes can be synthesized in SImode, but the range
9754 of useful sub-operations is limited. Check for shift operations
9755 on one of the operands. Only left shifts can be used in the
9757 if (GET_MODE_CLASS (mode
) == MODE_INT
9758 && GET_MODE_SIZE (mode
) < 4)
9760 rtx shift_op
, shift_reg
;
9763 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9765 if (CONST_INT_P (XEXP (x
, 1)))
9767 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9768 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9770 *cost
= COSTS_N_INSNS (insns
);
9772 *cost
+= insns
* extra_cost
->alu
.arith
;
9773 /* Slightly penalize a narrow operation as the result may
9775 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9779 /* Slightly penalize a narrow operation as the result may
9783 *cost
+= extra_cost
->alu
.arith
;
9790 rtx shift_op
, shift_reg
;
9793 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9794 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9796 /* UXTA[BH] or SXTA[BH]. */
9798 *cost
+= extra_cost
->alu
.extend_arith
;
9799 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9801 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
9806 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9807 if (shift_op
!= NULL
)
9812 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9813 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9816 *cost
+= extra_cost
->alu
.arith_shift
;
9818 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9819 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9822 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9824 rtx mul_op
= XEXP (x
, 0);
9826 if (TARGET_DSP_MULTIPLY
9827 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9828 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9829 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9830 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9831 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9832 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9833 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9834 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9835 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9836 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9837 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9838 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9843 *cost
+= extra_cost
->mult
[0].extend_add
;
9844 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
9845 SIGN_EXTEND
, 0, speed_p
)
9846 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
9847 SIGN_EXTEND
, 0, speed_p
)
9848 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9853 *cost
+= extra_cost
->mult
[0].add
;
9854 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
9855 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
9856 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9859 if (CONST_INT_P (XEXP (x
, 1)))
9861 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9862 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9864 *cost
= COSTS_N_INSNS (insns
);
9866 *cost
+= insns
* extra_cost
->alu
.arith
;
9867 *cost
+= rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9871 *cost
+= extra_cost
->alu
.arith
;
9879 && GET_CODE (XEXP (x
, 0)) == MULT
9880 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9881 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9882 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
9883 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
9886 *cost
+= extra_cost
->mult
[1].extend_add
;
9887 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
9888 ZERO_EXTEND
, 0, speed_p
)
9889 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
9890 ZERO_EXTEND
, 0, speed_p
)
9891 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9895 *cost
+= COSTS_N_INSNS (1);
9897 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9898 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9901 *cost
+= (extra_cost
->alu
.arith
9902 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9903 ? extra_cost
->alu
.arith
9904 : extra_cost
->alu
.arith_shift
));
9906 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9908 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9913 *cost
+= 2 * extra_cost
->alu
.arith
;
9918 *cost
= LIBCALL_COST (2);
9921 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
9924 *cost
+= extra_cost
->alu
.rev
;
9932 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9933 rtx op0
= XEXP (x
, 0);
9934 rtx shift_op
, shift_reg
;
9938 || (code
== IOR
&& TARGET_THUMB2
)))
9939 op0
= XEXP (op0
, 0);
9942 shift_op
= shifter_op_p (op0
, &shift_reg
);
9943 if (shift_op
!= NULL
)
9948 *cost
+= extra_cost
->alu
.log_shift_reg
;
9949 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9952 *cost
+= extra_cost
->alu
.log_shift
;
9954 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9955 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9959 if (CONST_INT_P (XEXP (x
, 1)))
9961 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
9962 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9965 *cost
= COSTS_N_INSNS (insns
);
9967 *cost
+= insns
* extra_cost
->alu
.logical
;
9968 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
9973 *cost
+= extra_cost
->alu
.logical
;
9974 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
9975 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9981 rtx op0
= XEXP (x
, 0);
9982 enum rtx_code subcode
= GET_CODE (op0
);
9984 *cost
+= COSTS_N_INSNS (1);
9988 || (code
== IOR
&& TARGET_THUMB2
)))
9989 op0
= XEXP (op0
, 0);
9991 if (GET_CODE (op0
) == ZERO_EXTEND
)
9994 *cost
+= 2 * extra_cost
->alu
.logical
;
9996 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
9998 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10001 else if (GET_CODE (op0
) == SIGN_EXTEND
)
10004 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
10006 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
10008 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10013 *cost
+= 2 * extra_cost
->alu
.logical
;
10019 *cost
= LIBCALL_COST (2);
10023 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10024 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10026 rtx op0
= XEXP (x
, 0);
10028 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
10029 op0
= XEXP (op0
, 0);
10032 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10034 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
10035 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
10038 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10040 *cost
= LIBCALL_COST (2);
10044 if (mode
== SImode
)
10046 if (TARGET_DSP_MULTIPLY
10047 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10048 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10049 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10050 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10051 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10052 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10053 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10054 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10055 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10056 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10057 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10058 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10061 /* SMUL[TB][TB]. */
10063 *cost
+= extra_cost
->mult
[0].extend
;
10064 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
10065 SIGN_EXTEND
, 0, speed_p
);
10066 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
10067 SIGN_EXTEND
, 1, speed_p
);
10071 *cost
+= extra_cost
->mult
[0].simple
;
10075 if (mode
== DImode
)
10078 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10079 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10080 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10081 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10084 *cost
+= extra_cost
->mult
[1].extend
;
10085 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
10086 ZERO_EXTEND
, 0, speed_p
)
10087 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10088 ZERO_EXTEND
, 0, speed_p
));
10092 *cost
= LIBCALL_COST (2);
10097 *cost
= LIBCALL_COST (2);
10101 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10102 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10104 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10107 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
10112 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10116 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10118 *cost
= LIBCALL_COST (1);
10122 if (mode
== SImode
)
10124 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10126 *cost
+= COSTS_N_INSNS (1);
10127 /* Assume the non-flag-changing variant. */
10129 *cost
+= (extra_cost
->alu
.log_shift
10130 + extra_cost
->alu
.arith_shift
);
10131 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
10135 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10136 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10138 *cost
+= COSTS_N_INSNS (1);
10139 /* No extra cost for MOV imm and MVN imm. */
10140 /* If the comparison op is using the flags, there's no further
10141 cost, otherwise we need to add the cost of the comparison. */
10142 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10143 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10144 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10146 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
10147 *cost
+= (COSTS_N_INSNS (1)
10148 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
10150 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
10153 *cost
+= extra_cost
->alu
.arith
;
10159 *cost
+= extra_cost
->alu
.arith
;
10163 if (GET_MODE_CLASS (mode
) == MODE_INT
10164 && GET_MODE_SIZE (mode
) < 4)
10166 /* Slightly disparage, as we might need an extend operation. */
10169 *cost
+= extra_cost
->alu
.arith
;
10173 if (mode
== DImode
)
10175 *cost
+= COSTS_N_INSNS (1);
10177 *cost
+= 2 * extra_cost
->alu
.arith
;
10182 *cost
= LIBCALL_COST (1);
10186 if (mode
== SImode
)
10189 rtx shift_reg
= NULL
;
10191 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10195 if (shift_reg
!= NULL
)
10198 *cost
+= extra_cost
->alu
.log_shift_reg
;
10199 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10202 *cost
+= extra_cost
->alu
.log_shift
;
10203 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
10208 *cost
+= extra_cost
->alu
.logical
;
10211 if (mode
== DImode
)
10213 *cost
+= COSTS_N_INSNS (1);
10219 *cost
+= LIBCALL_COST (1);
10224 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10226 *cost
+= COSTS_N_INSNS (3);
10229 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
10230 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
10232 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
10233 /* Assume that if one arm of the if_then_else is a register,
10234 that it will be tied with the result and eliminate the
10235 conditional insn. */
10236 if (REG_P (XEXP (x
, 1)))
10238 else if (REG_P (XEXP (x
, 2)))
10244 if (extra_cost
->alu
.non_exec_costs_exec
)
10245 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10247 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10250 *cost
+= op1cost
+ op2cost
;
10256 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10260 machine_mode op0mode
;
10261 /* We'll mostly assume that the cost of a compare is the cost of the
10262 LHS. However, there are some notable exceptions. */
10264 /* Floating point compares are never done as side-effects. */
10265 op0mode
= GET_MODE (XEXP (x
, 0));
10266 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10267 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10270 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10272 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10274 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
10280 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10282 *cost
= LIBCALL_COST (2);
10286 /* DImode compares normally take two insns. */
10287 if (op0mode
== DImode
)
10289 *cost
+= COSTS_N_INSNS (1);
10291 *cost
+= 2 * extra_cost
->alu
.arith
;
10295 if (op0mode
== SImode
)
10300 if (XEXP (x
, 1) == const0_rtx
10301 && !(REG_P (XEXP (x
, 0))
10302 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10303 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10305 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10307 /* Multiply operations that set the flags are often
10308 significantly more expensive. */
10310 && GET_CODE (XEXP (x
, 0)) == MULT
10311 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10312 *cost
+= extra_cost
->mult
[0].flag_setting
;
10315 && GET_CODE (XEXP (x
, 0)) == PLUS
10316 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10317 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10319 *cost
+= extra_cost
->mult
[0].flag_setting
;
10324 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10325 if (shift_op
!= NULL
)
10327 if (shift_reg
!= NULL
)
10329 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
10332 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10335 *cost
+= extra_cost
->alu
.arith_shift
;
10336 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
10337 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
10342 *cost
+= extra_cost
->alu
.arith
;
10343 if (CONST_INT_P (XEXP (x
, 1))
10344 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10346 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10354 *cost
= LIBCALL_COST (2);
10377 if (outer_code
== SET
)
10379 /* Is it a store-flag operation? */
10380 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10381 && XEXP (x
, 1) == const0_rtx
)
10383 /* Thumb also needs an IT insn. */
10384 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
10387 if (XEXP (x
, 1) == const0_rtx
)
10392 /* LSR Rd, Rn, #31. */
10394 *cost
+= extra_cost
->alu
.shift
;
10404 *cost
+= COSTS_N_INSNS (1);
10408 /* RSBS T1, Rn, Rn, LSR #31
10410 *cost
+= COSTS_N_INSNS (1);
10412 *cost
+= extra_cost
->alu
.arith_shift
;
10416 /* RSB Rd, Rn, Rn, ASR #1
10417 LSR Rd, Rd, #31. */
10418 *cost
+= COSTS_N_INSNS (1);
10420 *cost
+= (extra_cost
->alu
.arith_shift
10421 + extra_cost
->alu
.shift
);
10427 *cost
+= COSTS_N_INSNS (1);
10429 *cost
+= extra_cost
->alu
.shift
;
10433 /* Remaining cases are either meaningless or would take
10434 three insns anyway. */
10435 *cost
= COSTS_N_INSNS (3);
10438 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10443 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10444 if (CONST_INT_P (XEXP (x
, 1))
10445 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10447 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10454 /* Not directly inside a set. If it involves the condition code
10455 register it must be the condition for a branch, cond_exec or
10456 I_T_E operation. Since the comparison is performed elsewhere
10457 this is just the control part which has no additional
10459 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10460 && XEXP (x
, 1) == const0_rtx
)
10468 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10469 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10472 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10476 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10478 *cost
= LIBCALL_COST (1);
10482 if (mode
== SImode
)
10485 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10489 *cost
= LIBCALL_COST (1);
10493 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10494 && MEM_P (XEXP (x
, 0)))
10496 if (mode
== DImode
)
10497 *cost
+= COSTS_N_INSNS (1);
10502 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10503 *cost
+= extra_cost
->ldst
.load
;
10505 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10507 if (mode
== DImode
)
10508 *cost
+= extra_cost
->alu
.shift
;
10513 /* Widening from less than 32-bits requires an extend operation. */
10514 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10516 /* We have SXTB/SXTH. */
10517 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10519 *cost
+= extra_cost
->alu
.extend
;
10521 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10523 /* Needs two shifts. */
10524 *cost
+= COSTS_N_INSNS (1);
10525 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10527 *cost
+= 2 * extra_cost
->alu
.shift
;
10530 /* Widening beyond 32-bits requires one more insn. */
10531 if (mode
== DImode
)
10533 *cost
+= COSTS_N_INSNS (1);
10535 *cost
+= extra_cost
->alu
.shift
;
10542 || GET_MODE (XEXP (x
, 0)) == SImode
10543 || GET_MODE (XEXP (x
, 0)) == QImode
)
10544 && MEM_P (XEXP (x
, 0)))
10546 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10548 if (mode
== DImode
)
10549 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10554 /* Widening from less than 32-bits requires an extend operation. */
10555 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10557 /* UXTB can be a shorter instruction in Thumb2, but it might
10558 be slower than the AND Rd, Rn, #255 alternative. When
10559 optimizing for speed it should never be slower to use
10560 AND, and we don't really model 16-bit vs 32-bit insns
10563 *cost
+= extra_cost
->alu
.logical
;
10565 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10567 /* We have UXTB/UXTH. */
10568 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10570 *cost
+= extra_cost
->alu
.extend
;
10572 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10574 /* Needs two shifts. It's marginally preferable to use
10575 shifts rather than two BIC instructions as the second
10576 shift may merge with a subsequent insn as a shifter
10578 *cost
= COSTS_N_INSNS (2);
10579 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10581 *cost
+= 2 * extra_cost
->alu
.shift
;
10584 /* Widening beyond 32-bits requires one more insn. */
10585 if (mode
== DImode
)
10587 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10594 /* CONST_INT has no mode, so we cannot tell for sure how many
10595 insns are really going to be needed. The best we can do is
10596 look at the value passed. If it fits in SImode, then assume
10597 that's the mode it will be used for. Otherwise assume it
10598 will be used in DImode. */
10599 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10604 /* Avoid blowing up in arm_gen_constant (). */
10605 if (!(outer_code
== PLUS
10606 || outer_code
== AND
10607 || outer_code
== IOR
10608 || outer_code
== XOR
10609 || outer_code
== MINUS
))
10613 if (mode
== SImode
)
10615 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10616 INTVAL (x
), NULL
, NULL
,
10622 *cost
+= COSTS_N_INSNS (arm_gen_constant
10623 (outer_code
, SImode
, NULL
,
10624 trunc_int_for_mode (INTVAL (x
), SImode
),
10626 + arm_gen_constant (outer_code
, SImode
, NULL
,
10627 INTVAL (x
) >> 32, NULL
,
10639 if (arm_arch_thumb2
&& !flag_pic
)
10640 *cost
+= COSTS_N_INSNS (1);
10642 *cost
+= extra_cost
->ldst
.load
;
10645 *cost
+= COSTS_N_INSNS (1);
10649 *cost
+= COSTS_N_INSNS (1);
10651 *cost
+= extra_cost
->alu
.arith
;
10657 *cost
= COSTS_N_INSNS (4);
10662 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10663 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10665 if (vfp3_const_double_rtx (x
))
10668 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10674 if (mode
== DFmode
)
10675 *cost
+= extra_cost
->ldst
.loadd
;
10677 *cost
+= extra_cost
->ldst
.loadf
;
10680 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
10684 *cost
= COSTS_N_INSNS (4);
10690 && TARGET_HARD_FLOAT
10691 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10692 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10693 *cost
= COSTS_N_INSNS (1);
10695 *cost
= COSTS_N_INSNS (4);
10700 /* When optimizing for size, we prefer constant pool entries to
10701 MOVW/MOVT pairs, so bump the cost of these slightly. */
10708 *cost
+= extra_cost
->alu
.clz
;
10712 if (XEXP (x
, 1) == const0_rtx
)
10715 *cost
+= extra_cost
->alu
.log_shift
;
10716 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10719 /* Fall through. */
10723 *cost
+= COSTS_N_INSNS (1);
10727 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10728 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10729 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10730 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10731 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10732 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10733 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10734 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10738 *cost
+= extra_cost
->mult
[1].extend
;
10739 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
10740 ZERO_EXTEND
, 0, speed_p
)
10741 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
10742 ZERO_EXTEND
, 0, speed_p
));
10745 *cost
= LIBCALL_COST (1);
10748 case UNSPEC_VOLATILE
:
10750 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10753 /* Reading the PC is like reading any other register. Writing it
10754 is more expensive, but we take that into account elsewhere. */
10759 /* TODO: Simple zero_extract of bottom bits using AND. */
10760 /* Fall through. */
10764 && CONST_INT_P (XEXP (x
, 1))
10765 && CONST_INT_P (XEXP (x
, 2)))
10768 *cost
+= extra_cost
->alu
.bfx
;
10769 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10772 /* Without UBFX/SBFX, need to resort to shift operations. */
10773 *cost
+= COSTS_N_INSNS (1);
10775 *cost
+= 2 * extra_cost
->alu
.shift
;
10776 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
10780 if (TARGET_HARD_FLOAT
)
10783 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10785 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10787 /* Pre v8, widening HF->DF is a two-step process, first
10788 widening to SFmode. */
10789 *cost
+= COSTS_N_INSNS (1);
10791 *cost
+= extra_cost
->fp
[0].widen
;
10793 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10797 *cost
= LIBCALL_COST (1);
10800 case FLOAT_TRUNCATE
:
10801 if (TARGET_HARD_FLOAT
)
10804 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10805 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10807 /* Vector modes? */
10809 *cost
= LIBCALL_COST (1);
10813 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10815 rtx op0
= XEXP (x
, 0);
10816 rtx op1
= XEXP (x
, 1);
10817 rtx op2
= XEXP (x
, 2);
10820 /* vfms or vfnma. */
10821 if (GET_CODE (op0
) == NEG
)
10822 op0
= XEXP (op0
, 0);
10824 /* vfnms or vfnma. */
10825 if (GET_CODE (op2
) == NEG
)
10826 op2
= XEXP (op2
, 0);
10828 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
10829 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
10830 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
10833 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
10838 *cost
= LIBCALL_COST (3);
10843 if (TARGET_HARD_FLOAT
)
10845 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10846 a vcvt fixed-point conversion. */
10847 if (code
== FIX
&& mode
== SImode
10848 && GET_CODE (XEXP (x
, 0)) == FIX
10849 && GET_MODE (XEXP (x
, 0)) == SFmode
10850 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10851 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10855 *cost
+= extra_cost
->fp
[0].toint
;
10857 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10862 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10864 mode
= GET_MODE (XEXP (x
, 0));
10866 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
10867 /* Strip of the 'cost' of rounding towards zero. */
10868 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10869 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
10872 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10873 /* ??? Increase the cost to deal with transferring from
10874 FP -> CORE registers? */
10877 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10881 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
10884 /* Vector costs? */
10886 *cost
= LIBCALL_COST (1);
10890 case UNSIGNED_FLOAT
:
10891 if (TARGET_HARD_FLOAT
)
10893 /* ??? Increase the cost to deal with transferring from CORE
10894 -> FP registers? */
10896 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
10899 *cost
= LIBCALL_COST (1);
10907 /* Just a guess. Guess number of instructions in the asm
10908 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10909 though (see PR60663). */
10910 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
10911 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
10913 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
10917 if (mode
!= VOIDmode
)
10918 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
10920 *cost
= COSTS_N_INSNS (4); /* Who knows? */
10925 #undef HANDLE_NARROW_SHIFT_ARITH
10927 /* RTX costs entry point. */
10930 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
10931 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
10934 int code
= GET_CODE (x
);
10935 gcc_assert (current_tune
->insn_extra_cost
);
10937 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
10938 (enum rtx_code
) outer_code
,
10939 current_tune
->insn_extra_cost
,
10942 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
10944 print_rtl_single (dump_file
, x
);
10945 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
10946 *total
, result
? "final" : "partial");
10951 /* All address computations that can be done are free, but rtx cost returns
10952 the same for practically all of them. So we weight the different types
10953 of address here in the order (most pref first):
10954 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10956 arm_arm_address_cost (rtx x
)
10958 enum rtx_code c
= GET_CODE (x
);
10960 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
10962 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
10967 if (CONST_INT_P (XEXP (x
, 1)))
10970 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
10980 arm_thumb_address_cost (rtx x
)
10982 enum rtx_code c
= GET_CODE (x
);
10987 && REG_P (XEXP (x
, 0))
10988 && CONST_INT_P (XEXP (x
, 1)))
10995 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
10996 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
10998 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11001 /* Adjust cost hook for XScale. */
11003 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11006 /* Some true dependencies can have a higher cost depending
11007 on precisely how certain input operands are used. */
11009 && recog_memoized (insn
) >= 0
11010 && recog_memoized (dep
) >= 0)
11012 int shift_opnum
= get_attr_shift (insn
);
11013 enum attr_type attr_type
= get_attr_type (dep
);
11015 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11016 operand for INSN. If we have a shifted input operand and the
11017 instruction we depend on is another ALU instruction, then we may
11018 have to account for an additional stall. */
11019 if (shift_opnum
!= 0
11020 && (attr_type
== TYPE_ALU_SHIFT_IMM
11021 || attr_type
== TYPE_ALUS_SHIFT_IMM
11022 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11023 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11024 || attr_type
== TYPE_ALU_SHIFT_REG
11025 || attr_type
== TYPE_ALUS_SHIFT_REG
11026 || attr_type
== TYPE_LOGIC_SHIFT_REG
11027 || attr_type
== TYPE_LOGICS_SHIFT_REG
11028 || attr_type
== TYPE_MOV_SHIFT
11029 || attr_type
== TYPE_MVN_SHIFT
11030 || attr_type
== TYPE_MOV_SHIFT_REG
11031 || attr_type
== TYPE_MVN_SHIFT_REG
))
11033 rtx shifted_operand
;
11036 /* Get the shifted operand. */
11037 extract_insn (insn
);
11038 shifted_operand
= recog_data
.operand
[shift_opnum
];
11040 /* Iterate over all the operands in DEP. If we write an operand
11041 that overlaps with SHIFTED_OPERAND, then we have increase the
11042 cost of this dependency. */
11043 extract_insn (dep
);
11044 preprocess_constraints (dep
);
11045 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11047 /* We can ignore strict inputs. */
11048 if (recog_data
.operand_type
[opno
] == OP_IN
)
11051 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11063 /* Adjust cost hook for Cortex A9. */
11065 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11075 case REG_DEP_OUTPUT
:
11076 if (recog_memoized (insn
) >= 0
11077 && recog_memoized (dep
) >= 0)
11079 if (GET_CODE (PATTERN (insn
)) == SET
)
11082 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11084 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11086 enum attr_type attr_type_insn
= get_attr_type (insn
);
11087 enum attr_type attr_type_dep
= get_attr_type (dep
);
11089 /* By default all dependencies of the form
11092 have an extra latency of 1 cycle because
11093 of the input and output dependency in this
11094 case. However this gets modeled as an true
11095 dependency and hence all these checks. */
11096 if (REG_P (SET_DEST (PATTERN (insn
)))
11097 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
11099 /* FMACS is a special case where the dependent
11100 instruction can be issued 3 cycles before
11101 the normal latency in case of an output
11103 if ((attr_type_insn
== TYPE_FMACS
11104 || attr_type_insn
== TYPE_FMACD
)
11105 && (attr_type_dep
== TYPE_FMACS
11106 || attr_type_dep
== TYPE_FMACD
))
11108 if (dep_type
== REG_DEP_OUTPUT
)
11109 *cost
= insn_default_latency (dep
) - 3;
11111 *cost
= insn_default_latency (dep
);
11116 if (dep_type
== REG_DEP_OUTPUT
)
11117 *cost
= insn_default_latency (dep
) + 1;
11119 *cost
= insn_default_latency (dep
);
11129 gcc_unreachable ();
11135 /* Adjust cost hook for FA726TE. */
11137 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11140 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11141 have penalty of 3. */
11142 if (dep_type
== REG_DEP_TRUE
11143 && recog_memoized (insn
) >= 0
11144 && recog_memoized (dep
) >= 0
11145 && get_attr_conds (dep
) == CONDS_SET
)
11147 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11148 if (get_attr_conds (insn
) == CONDS_USE
11149 && get_attr_type (insn
) != TYPE_BRANCH
)
11155 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11156 || get_attr_conds (insn
) == CONDS_USE
)
11166 /* Implement TARGET_REGISTER_MOVE_COST.
11168 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11169 it is typically more expensive than a single memory access. We set
11170 the cost to less than two memory accesses so that floating
11171 point to integer conversion does not go through memory. */
11174 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11175 reg_class_t from
, reg_class_t to
)
11179 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11180 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11182 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11183 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11185 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11192 if (from
== HI_REGS
|| to
== HI_REGS
)
11199 /* Implement TARGET_MEMORY_MOVE_COST. */
11202 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11203 bool in ATTRIBUTE_UNUSED
)
11209 if (GET_MODE_SIZE (mode
) < 4)
11212 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11216 /* Vectorizer cost model implementation. */
11218 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11220 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11222 int misalign ATTRIBUTE_UNUSED
)
11226 switch (type_of_cost
)
11229 return current_tune
->vec_costs
->scalar_stmt_cost
;
11232 return current_tune
->vec_costs
->scalar_load_cost
;
11235 return current_tune
->vec_costs
->scalar_store_cost
;
11238 return current_tune
->vec_costs
->vec_stmt_cost
;
11241 return current_tune
->vec_costs
->vec_align_load_cost
;
11244 return current_tune
->vec_costs
->vec_store_cost
;
11246 case vec_to_scalar
:
11247 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11249 case scalar_to_vec
:
11250 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11252 case unaligned_load
:
11253 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11255 case unaligned_store
:
11256 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11258 case cond_branch_taken
:
11259 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11261 case cond_branch_not_taken
:
11262 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11265 case vec_promote_demote
:
11266 return current_tune
->vec_costs
->vec_stmt_cost
;
11268 case vec_construct
:
11269 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11270 return elements
/ 2 + 1;
11273 gcc_unreachable ();
11277 /* Implement targetm.vectorize.add_stmt_cost. */
11280 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11281 struct _stmt_vec_info
*stmt_info
, int misalign
,
11282 enum vect_cost_model_location where
)
11284 unsigned *cost
= (unsigned *) data
;
11285 unsigned retval
= 0;
11287 if (flag_vect_cost_model
)
11289 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11290 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11292 /* Statements in an inner loop relative to the loop being
11293 vectorized are weighted more heavily. The value here is
11294 arbitrary and could potentially be improved with analysis. */
11295 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11296 count
*= 50; /* FIXME. */
11298 retval
= (unsigned) (count
* stmt_cost
);
11299 cost
[where
] += retval
;
11305 /* Return true if and only if this insn can dual-issue only as older. */
11307 cortexa7_older_only (rtx_insn
*insn
)
11309 if (recog_memoized (insn
) < 0)
11312 switch (get_attr_type (insn
))
11314 case TYPE_ALU_DSP_REG
:
11315 case TYPE_ALU_SREG
:
11316 case TYPE_ALUS_SREG
:
11317 case TYPE_LOGIC_REG
:
11318 case TYPE_LOGICS_REG
:
11320 case TYPE_ADCS_REG
:
11325 case TYPE_SHIFT_IMM
:
11326 case TYPE_SHIFT_REG
:
11327 case TYPE_LOAD_BYTE
:
11330 case TYPE_FFARITHS
:
11332 case TYPE_FFARITHD
:
11350 case TYPE_F_STORES
:
11357 /* Return true if and only if this insn can dual-issue as younger. */
11359 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11361 if (recog_memoized (insn
) < 0)
11364 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11368 switch (get_attr_type (insn
))
11371 case TYPE_ALUS_IMM
:
11372 case TYPE_LOGIC_IMM
:
11373 case TYPE_LOGICS_IMM
:
11378 case TYPE_MOV_SHIFT
:
11379 case TYPE_MOV_SHIFT_REG
:
11389 /* Look for an instruction that can dual issue only as an older
11390 instruction, and move it in front of any instructions that can
11391 dual-issue as younger, while preserving the relative order of all
11392 other instructions in the ready list. This is a hueuristic to help
11393 dual-issue in later cycles, by postponing issue of more flexible
11394 instructions. This heuristic may affect dual issue opportunities
11395 in the current cycle. */
11397 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11398 int *n_readyp
, int clock
)
11401 int first_older_only
= -1, first_younger
= -1;
11405 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11409 /* Traverse the ready list from the head (the instruction to issue
11410 first), and looking for the first instruction that can issue as
11411 younger and the first instruction that can dual-issue only as
11413 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11415 rtx_insn
*insn
= ready
[i
];
11416 if (cortexa7_older_only (insn
))
11418 first_older_only
= i
;
11420 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11423 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11427 /* Nothing to reorder because either no younger insn found or insn
11428 that can dual-issue only as older appears before any insn that
11429 can dual-issue as younger. */
11430 if (first_younger
== -1)
11433 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11437 /* Nothing to reorder because no older-only insn in the ready list. */
11438 if (first_older_only
== -1)
11441 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11445 /* Move first_older_only insn before first_younger. */
11447 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11448 INSN_UID(ready
[first_older_only
]),
11449 INSN_UID(ready
[first_younger
]));
11450 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11451 for (i
= first_older_only
; i
< first_younger
; i
++)
11453 ready
[i
] = ready
[i
+1];
11456 ready
[i
] = first_older_only_insn
;
11460 /* Implement TARGET_SCHED_REORDER. */
11462 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11467 case TARGET_CPU_cortexa7
:
11468 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11471 /* Do nothing for other cores. */
11475 return arm_issue_rate ();
11478 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11479 It corrects the value of COST based on the relationship between
11480 INSN and DEP through the dependence LINK. It returns the new
11481 value. There is a per-core adjust_cost hook to adjust scheduler costs
11482 and the per-core hook can choose to completely override the generic
11483 adjust_cost function. Only put bits of code into arm_adjust_cost that
11484 are common across all cores. */
11486 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
11491 /* When generating Thumb-1 code, we want to place flag-setting operations
11492 close to a conditional branch which depends on them, so that we can
11493 omit the comparison. */
11496 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11497 && recog_memoized (dep
) >= 0
11498 && get_attr_conds (dep
) == CONDS_SET
)
11501 if (current_tune
->sched_adjust_cost
!= NULL
)
11503 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
11507 /* XXX Is this strictly true? */
11508 if (dep_type
== REG_DEP_ANTI
11509 || dep_type
== REG_DEP_OUTPUT
)
11512 /* Call insns don't incur a stall, even if they follow a load. */
11517 if ((i_pat
= single_set (insn
)) != NULL
11518 && MEM_P (SET_SRC (i_pat
))
11519 && (d_pat
= single_set (dep
)) != NULL
11520 && MEM_P (SET_DEST (d_pat
)))
11522 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11523 /* This is a load after a store, there is no conflict if the load reads
11524 from a cached area. Assume that loads from the stack, and from the
11525 constant pool are cached, and that others will miss. This is a
11528 if ((GET_CODE (src_mem
) == SYMBOL_REF
11529 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11530 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11531 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11532 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11540 arm_max_conditional_execute (void)
11542 return max_insns_skipped
;
11546 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11549 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11551 return (optimize
> 0) ? 2 : 0;
11555 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11557 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11560 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11561 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11562 sequences of non-executed instructions in IT blocks probably take the same
11563 amount of time as executed instructions (and the IT instruction itself takes
11564 space in icache). This function was experimentally determined to give good
11565 results on a popular embedded benchmark. */
11568 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11570 return (TARGET_32BIT
&& speed_p
) ? 1
11571 : arm_default_branch_cost (speed_p
, predictable_p
);
11575 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
11577 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11580 static bool fp_consts_inited
= false;
11582 static REAL_VALUE_TYPE value_fp0
;
11585 init_fp_table (void)
11589 r
= REAL_VALUE_ATOF ("0", DFmode
);
11591 fp_consts_inited
= true;
11594 /* Return TRUE if rtx X is a valid immediate FP constant. */
11596 arm_const_double_rtx (rtx x
)
11598 const REAL_VALUE_TYPE
*r
;
11600 if (!fp_consts_inited
)
11603 r
= CONST_DOUBLE_REAL_VALUE (x
);
11604 if (REAL_VALUE_MINUS_ZERO (*r
))
11607 if (real_equal (r
, &value_fp0
))
11613 /* VFPv3 has a fairly wide range of representable immediates, formed from
11614 "quarter-precision" floating-point values. These can be evaluated using this
11615 formula (with ^ for exponentiation):
11619 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11620 16 <= n <= 31 and 0 <= r <= 7.
11622 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11624 - A (most-significant) is the sign bit.
11625 - BCD are the exponent (encoded as r XOR 3).
11626 - EFGH are the mantissa (encoded as n - 16).
11629 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11630 fconst[sd] instruction, or -1 if X isn't suitable. */
11632 vfp3_const_double_index (rtx x
)
11634 REAL_VALUE_TYPE r
, m
;
11635 int sign
, exponent
;
11636 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11637 unsigned HOST_WIDE_INT mask
;
11638 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11641 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11644 r
= *CONST_DOUBLE_REAL_VALUE (x
);
11646 /* We can't represent these things, so detect them first. */
11647 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11650 /* Extract sign, exponent and mantissa. */
11651 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
11652 r
= real_value_abs (&r
);
11653 exponent
= REAL_EXP (&r
);
11654 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11655 highest (sign) bit, with a fixed binary point at bit point_pos.
11656 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11657 bits for the mantissa, this may fail (low bits would be lost). */
11658 real_ldexp (&m
, &r
, point_pos
- exponent
);
11659 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
11660 mantissa
= w
.elt (0);
11661 mant_hi
= w
.elt (1);
11663 /* If there are bits set in the low part of the mantissa, we can't
11664 represent this value. */
11668 /* Now make it so that mantissa contains the most-significant bits, and move
11669 the point_pos to indicate that the least-significant bits have been
11671 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11672 mantissa
= mant_hi
;
11674 /* We can permit four significant bits of mantissa only, plus a high bit
11675 which is always 1. */
11676 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
11677 if ((mantissa
& mask
) != 0)
11680 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11681 mantissa
>>= point_pos
- 5;
11683 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11684 floating-point immediate zero with Neon using an integer-zero load, but
11685 that case is handled elsewhere.) */
11689 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
11691 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11692 normalized significands are in the range [1, 2). (Our mantissa is shifted
11693 left 4 places at this point relative to normalized IEEE754 values). GCC
11694 internally uses [0.5, 1) (see real.c), so the exponent returned from
11695 REAL_EXP must be altered. */
11696 exponent
= 5 - exponent
;
11698 if (exponent
< 0 || exponent
> 7)
11701 /* Sign, mantissa and exponent are now in the correct form to plug into the
11702 formula described in the comment above. */
11703 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
11706 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11708 vfp3_const_double_rtx (rtx x
)
11713 return vfp3_const_double_index (x
) != -1;
11716 /* Recognize immediates which can be used in various Neon instructions. Legal
11717 immediates are described by the following table (for VMVN variants, the
11718 bitwise inverse of the constant shown is recognized. In either case, VMOV
11719 is output and the correct instruction to use for a given constant is chosen
11720 by the assembler). The constant shown is replicated across all elements of
11721 the destination vector.
11723 insn elems variant constant (binary)
11724 ---- ----- ------- -----------------
11725 vmov i32 0 00000000 00000000 00000000 abcdefgh
11726 vmov i32 1 00000000 00000000 abcdefgh 00000000
11727 vmov i32 2 00000000 abcdefgh 00000000 00000000
11728 vmov i32 3 abcdefgh 00000000 00000000 00000000
11729 vmov i16 4 00000000 abcdefgh
11730 vmov i16 5 abcdefgh 00000000
11731 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11732 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11733 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11734 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11735 vmvn i16 10 00000000 abcdefgh
11736 vmvn i16 11 abcdefgh 00000000
11737 vmov i32 12 00000000 00000000 abcdefgh 11111111
11738 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11739 vmov i32 14 00000000 abcdefgh 11111111 11111111
11740 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11741 vmov i8 16 abcdefgh
11742 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11743 eeeeeeee ffffffff gggggggg hhhhhhhh
11744 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11745 vmov f32 19 00000000 00000000 00000000 00000000
11747 For case 18, B = !b. Representable values are exactly those accepted by
11748 vfp3_const_double_index, but are output as floating-point numbers rather
11751 For case 19, we will change it to vmov.i32 when assembling.
11753 Variants 0-5 (inclusive) may also be used as immediates for the second
11754 operand of VORR/VBIC instructions.
11756 The INVERSE argument causes the bitwise inverse of the given operand to be
11757 recognized instead (used for recognizing legal immediates for the VAND/VORN
11758 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11759 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11760 output, rather than the real insns vbic/vorr).
11762 INVERSE makes no difference to the recognition of float vectors.
11764 The return value is the variant of immediate as shown in the above table, or
11765 -1 if the given value doesn't match any of the listed patterns.
11768 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
11769 rtx
*modconst
, int *elementwidth
)
11771 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11773 for (i = 0; i < idx; i += (STRIDE)) \
11778 immtype = (CLASS); \
11779 elsize = (ELSIZE); \
11783 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
11784 unsigned int innersize
;
11785 unsigned char bytes
[16];
11786 int immtype
= -1, matches
;
11787 unsigned int invmask
= inverse
? 0xff : 0;
11788 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
11791 n_elts
= CONST_VECTOR_NUNITS (op
);
11795 if (mode
== VOIDmode
)
11799 innersize
= GET_MODE_UNIT_SIZE (mode
);
11801 /* Vectors of float constants. */
11802 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11804 rtx el0
= CONST_VECTOR_ELT (op
, 0);
11806 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
11809 /* FP16 vectors cannot be represented. */
11810 if (GET_MODE_INNER (mode
) == HFmode
)
11813 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11814 are distinct in this context. */
11815 if (!const_vec_duplicate_p (op
))
11819 *modconst
= CONST_VECTOR_ELT (op
, 0);
11824 if (el0
== CONST0_RTX (GET_MODE (el0
)))
11830 /* The tricks done in the code below apply for little-endian vector layout.
11831 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11832 FIXME: Implement logic for big-endian vectors. */
11833 if (BYTES_BIG_ENDIAN
&& vector
&& !const_vec_duplicate_p (op
))
11836 /* Splat vector constant out into a byte vector. */
11837 for (i
= 0; i
< n_elts
; i
++)
11839 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
11840 unsigned HOST_WIDE_INT elpart
;
11842 gcc_assert (CONST_INT_P (el
));
11843 elpart
= INTVAL (el
);
11845 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
11847 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
11848 elpart
>>= BITS_PER_UNIT
;
11852 /* Sanity check. */
11853 gcc_assert (idx
== GET_MODE_SIZE (mode
));
11857 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
11858 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11860 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11861 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11863 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11864 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11866 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11867 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
11869 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
11871 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
11873 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
11874 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11876 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11877 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11879 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11880 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11882 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11883 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
11885 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
11887 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
11889 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11890 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11892 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11893 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11895 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11896 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11898 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11899 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11901 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
11903 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
11904 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
11912 *elementwidth
= elsize
;
11916 unsigned HOST_WIDE_INT imm
= 0;
11918 /* Un-invert bytes of recognized vector, if necessary. */
11920 for (i
= 0; i
< idx
; i
++)
11921 bytes
[i
] ^= invmask
;
11925 /* FIXME: Broken on 32-bit H_W_I hosts. */
11926 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
11928 for (i
= 0; i
< 8; i
++)
11929 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
11930 << (i
* BITS_PER_UNIT
);
11932 *modconst
= GEN_INT (imm
);
11936 unsigned HOST_WIDE_INT imm
= 0;
11938 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
11939 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
11941 *modconst
= GEN_INT (imm
);
11949 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11950 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11951 float elements), and a modified constant (whatever should be output for a
11952 VMOV) in *MODCONST. */
11955 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
11956 rtx
*modconst
, int *elementwidth
)
11960 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
11966 *modconst
= tmpconst
;
11969 *elementwidth
= tmpwidth
;
11974 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11975 the immediate is valid, write a constant suitable for using as an operand
11976 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11977 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11980 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
11981 rtx
*modconst
, int *elementwidth
)
11985 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
11987 if (retval
< 0 || retval
> 5)
11991 *modconst
= tmpconst
;
11994 *elementwidth
= tmpwidth
;
11999 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12000 the immediate is valid, write a constant suitable for using as an operand
12001 to VSHR/VSHL to *MODCONST and the corresponding element width to
12002 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12003 because they have different limitations. */
12006 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
12007 rtx
*modconst
, int *elementwidth
,
12010 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
12011 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12012 unsigned HOST_WIDE_INT last_elt
= 0;
12013 unsigned HOST_WIDE_INT maxshift
;
12015 /* Split vector constant out into a byte vector. */
12016 for (i
= 0; i
< n_elts
; i
++)
12018 rtx el
= CONST_VECTOR_ELT (op
, i
);
12019 unsigned HOST_WIDE_INT elpart
;
12021 if (CONST_INT_P (el
))
12022 elpart
= INTVAL (el
);
12023 else if (CONST_DOUBLE_P (el
))
12026 gcc_unreachable ();
12028 if (i
!= 0 && elpart
!= last_elt
)
12034 /* Shift less than element size. */
12035 maxshift
= innersize
* 8;
12039 /* Left shift immediate value can be from 0 to <size>-1. */
12040 if (last_elt
>= maxshift
)
12045 /* Right shift immediate value can be from 1 to <size>. */
12046 if (last_elt
== 0 || last_elt
> maxshift
)
12051 *elementwidth
= innersize
* 8;
12054 *modconst
= CONST_VECTOR_ELT (op
, 0);
12059 /* Return a string suitable for output of Neon immediate logic operation
12063 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12064 int inverse
, int quad
)
12066 int width
, is_valid
;
12067 static char templ
[40];
12069 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12071 gcc_assert (is_valid
!= 0);
12074 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12076 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12081 /* Return a string suitable for output of Neon immediate shift operation
12082 (VSHR or VSHL) MNEM. */
12085 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12086 machine_mode mode
, int quad
,
12089 int width
, is_valid
;
12090 static char templ
[40];
12092 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12093 gcc_assert (is_valid
!= 0);
12096 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12098 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12103 /* Output a sequence of pairwise operations to implement a reduction.
12104 NOTE: We do "too much work" here, because pairwise operations work on two
12105 registers-worth of operands in one go. Unfortunately we can't exploit those
12106 extra calculations to do the full operation in fewer steps, I don't think.
12107 Although all vector elements of the result but the first are ignored, we
12108 actually calculate the same result in each of the elements. An alternative
12109 such as initially loading a vector with zero to use as each of the second
12110 operands would use up an additional register and take an extra instruction,
12111 for no particular gain. */
12114 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12115 rtx (*reduc
) (rtx
, rtx
, rtx
))
12117 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
12120 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12122 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12123 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12128 /* If VALS is a vector constant that can be loaded into a register
12129 using VDUP, generate instructions to do so and return an RTX to
12130 assign to the register. Otherwise return NULL_RTX. */
12133 neon_vdup_constant (rtx vals
)
12135 machine_mode mode
= GET_MODE (vals
);
12136 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12139 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12142 if (!const_vec_duplicate_p (vals
, &x
))
12143 /* The elements are not all the same. We could handle repeating
12144 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12145 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12149 /* We can load this constant by using VDUP and a constant in a
12150 single ARM register. This will be cheaper than a vector
12153 x
= copy_to_mode_reg (inner_mode
, x
);
12154 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12157 /* Generate code to load VALS, which is a PARALLEL containing only
12158 constants (for vec_init) or CONST_VECTOR, efficiently into a
12159 register. Returns an RTX to copy into the register, or NULL_RTX
12160 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12163 neon_make_constant (rtx vals
)
12165 machine_mode mode
= GET_MODE (vals
);
12167 rtx const_vec
= NULL_RTX
;
12168 int n_elts
= GET_MODE_NUNITS (mode
);
12172 if (GET_CODE (vals
) == CONST_VECTOR
)
12174 else if (GET_CODE (vals
) == PARALLEL
)
12176 /* A CONST_VECTOR must contain only CONST_INTs and
12177 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12178 Only store valid constants in a CONST_VECTOR. */
12179 for (i
= 0; i
< n_elts
; ++i
)
12181 rtx x
= XVECEXP (vals
, 0, i
);
12182 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12185 if (n_const
== n_elts
)
12186 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12189 gcc_unreachable ();
12191 if (const_vec
!= NULL
12192 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12193 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12195 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12196 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12197 pipeline cycle; creating the constant takes one or two ARM
12198 pipeline cycles. */
12200 else if (const_vec
!= NULL_RTX
)
12201 /* Load from constant pool. On Cortex-A8 this takes two cycles
12202 (for either double or quad vectors). We can not take advantage
12203 of single-cycle VLD1 because we need a PC-relative addressing
12207 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12208 We can not construct an initializer. */
12212 /* Initialize vector TARGET to VALS. */
12215 neon_expand_vector_init (rtx target
, rtx vals
)
12217 machine_mode mode
= GET_MODE (target
);
12218 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12219 int n_elts
= GET_MODE_NUNITS (mode
);
12220 int n_var
= 0, one_var
= -1;
12221 bool all_same
= true;
12225 for (i
= 0; i
< n_elts
; ++i
)
12227 x
= XVECEXP (vals
, 0, i
);
12228 if (!CONSTANT_P (x
))
12229 ++n_var
, one_var
= i
;
12231 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12237 rtx constant
= neon_make_constant (vals
);
12238 if (constant
!= NULL_RTX
)
12240 emit_move_insn (target
, constant
);
12245 /* Splat a single non-constant element if we can. */
12246 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12248 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12249 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
12253 /* One field is non-constant. Load constant then overwrite varying
12254 field. This is more efficient than using the stack. */
12257 rtx copy
= copy_rtx (vals
);
12258 rtx index
= GEN_INT (one_var
);
12260 /* Load constant part of vector, substitute neighboring value for
12261 varying element. */
12262 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12263 neon_expand_vector_init (target
, copy
);
12265 /* Insert variable. */
12266 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12270 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12273 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12276 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12279 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12282 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12285 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12288 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12291 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12294 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12297 gcc_unreachable ();
12302 /* Construct the vector in memory one field at a time
12303 and load the whole vector. */
12304 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12305 for (i
= 0; i
< n_elts
; i
++)
12306 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12307 i
* GET_MODE_SIZE (inner_mode
)),
12308 XVECEXP (vals
, 0, i
));
12309 emit_move_insn (target
, mem
);
12312 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12313 ERR if it doesn't. EXP indicates the source location, which includes the
12314 inlining history for intrinsics. */
12317 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12318 const_tree exp
, const char *desc
)
12320 HOST_WIDE_INT lane
;
12322 gcc_assert (CONST_INT_P (operand
));
12324 lane
= INTVAL (operand
);
12326 if (lane
< low
|| lane
>= high
)
12329 error ("%K%s %wd out of range %wd - %wd",
12330 exp
, desc
, lane
, low
, high
- 1);
12332 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
12336 /* Bounds-check lanes. */
12339 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12342 bounds_check (operand
, low
, high
, exp
, "lane");
12345 /* Bounds-check constants. */
12348 arm_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12350 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
12354 neon_element_bits (machine_mode mode
)
12356 return GET_MODE_UNIT_BITSIZE (mode
);
12360 /* Predicates for `match_operand' and `match_operator'. */
12362 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12363 WB is true if full writeback address modes are allowed and is false
12364 if limited writeback address modes (POST_INC and PRE_DEC) are
12368 arm_coproc_mem_operand (rtx op
, bool wb
)
12372 /* Reject eliminable registers. */
12373 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12374 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12375 || reg_mentioned_p (arg_pointer_rtx
, op
)
12376 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12377 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12378 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12379 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12382 /* Constants are converted into offsets from labels. */
12386 ind
= XEXP (op
, 0);
12388 if (reload_completed
12389 && (GET_CODE (ind
) == LABEL_REF
12390 || (GET_CODE (ind
) == CONST
12391 && GET_CODE (XEXP (ind
, 0)) == PLUS
12392 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12393 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12396 /* Match: (mem (reg)). */
12398 return arm_address_register_rtx_p (ind
, 0);
12400 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12401 acceptable in any case (subject to verification by
12402 arm_address_register_rtx_p). We need WB to be true to accept
12403 PRE_INC and POST_DEC. */
12404 if (GET_CODE (ind
) == POST_INC
12405 || GET_CODE (ind
) == PRE_DEC
12407 && (GET_CODE (ind
) == PRE_INC
12408 || GET_CODE (ind
) == POST_DEC
)))
12409 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12412 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12413 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12414 && GET_CODE (XEXP (ind
, 1)) == PLUS
12415 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12416 ind
= XEXP (ind
, 1);
12421 if (GET_CODE (ind
) == PLUS
12422 && REG_P (XEXP (ind
, 0))
12423 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12424 && CONST_INT_P (XEXP (ind
, 1))
12425 && INTVAL (XEXP (ind
, 1)) > -1024
12426 && INTVAL (XEXP (ind
, 1)) < 1024
12427 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12433 /* Return TRUE if OP is a memory operand which we can load or store a vector
12434 to/from. TYPE is one of the following values:
12435 0 - Vector load/stor (vldr)
12436 1 - Core registers (ldm)
12437 2 - Element/structure loads (vld1)
12440 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12444 /* Reject eliminable registers. */
12445 if (strict
&& ! (reload_in_progress
|| reload_completed
)
12446 && (reg_mentioned_p (frame_pointer_rtx
, op
)
12447 || reg_mentioned_p (arg_pointer_rtx
, op
)
12448 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12449 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12450 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12451 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12454 /* Constants are converted into offsets from labels. */
12458 ind
= XEXP (op
, 0);
12460 if (reload_completed
12461 && (GET_CODE (ind
) == LABEL_REF
12462 || (GET_CODE (ind
) == CONST
12463 && GET_CODE (XEXP (ind
, 0)) == PLUS
12464 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12465 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12468 /* Match: (mem (reg)). */
12470 return arm_address_register_rtx_p (ind
, 0);
12472 /* Allow post-increment with Neon registers. */
12473 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12474 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12475 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12477 /* Allow post-increment by register for VLDn */
12478 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12479 && GET_CODE (XEXP (ind
, 1)) == PLUS
12480 && REG_P (XEXP (XEXP (ind
, 1), 1)))
12487 && GET_CODE (ind
) == PLUS
12488 && REG_P (XEXP (ind
, 0))
12489 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12490 && CONST_INT_P (XEXP (ind
, 1))
12491 && INTVAL (XEXP (ind
, 1)) > -1024
12492 /* For quad modes, we restrict the constant offset to be slightly less
12493 than what the instruction format permits. We have no such constraint
12494 on double mode offsets. (This must match arm_legitimate_index_p.) */
12495 && (INTVAL (XEXP (ind
, 1))
12496 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12497 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12503 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12506 neon_struct_mem_operand (rtx op
)
12510 /* Reject eliminable registers. */
12511 if (! (reload_in_progress
|| reload_completed
)
12512 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12513 || reg_mentioned_p (arg_pointer_rtx
, op
)
12514 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12515 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12516 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12517 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12520 /* Constants are converted into offsets from labels. */
12524 ind
= XEXP (op
, 0);
12526 if (reload_completed
12527 && (GET_CODE (ind
) == LABEL_REF
12528 || (GET_CODE (ind
) == CONST
12529 && GET_CODE (XEXP (ind
, 0)) == PLUS
12530 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12531 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12534 /* Match: (mem (reg)). */
12536 return arm_address_register_rtx_p (ind
, 0);
12538 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12539 if (GET_CODE (ind
) == POST_INC
12540 || GET_CODE (ind
) == PRE_DEC
)
12541 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12546 /* Return true if X is a register that will be eliminated later on. */
12548 arm_eliminable_register (rtx x
)
12550 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12551 || REGNO (x
) == ARG_POINTER_REGNUM
12552 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12553 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12556 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12557 coprocessor registers. Otherwise return NO_REGS. */
12560 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
12562 if (mode
== HFmode
)
12564 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
12565 return GENERAL_REGS
;
12566 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12568 return GENERAL_REGS
;
12571 /* The neon move patterns handle all legitimate vector and struct
12574 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12575 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12576 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12577 || VALID_NEON_STRUCT_MODE (mode
)))
12580 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12583 return GENERAL_REGS
;
12586 /* Values which must be returned in the most-significant end of the return
12590 arm_return_in_msb (const_tree valtype
)
12592 return (TARGET_AAPCS_BASED
12593 && BYTES_BIG_ENDIAN
12594 && (AGGREGATE_TYPE_P (valtype
)
12595 || TREE_CODE (valtype
) == COMPLEX_TYPE
12596 || FIXED_POINT_TYPE_P (valtype
)));
12599 /* Return TRUE if X references a SYMBOL_REF. */
12601 symbol_mentioned_p (rtx x
)
12606 if (GET_CODE (x
) == SYMBOL_REF
)
12609 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12610 are constant offsets, not symbols. */
12611 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12614 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12616 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12622 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12623 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
12626 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
12633 /* Return TRUE if X references a LABEL_REF. */
12635 label_mentioned_p (rtx x
)
12640 if (GET_CODE (x
) == LABEL_REF
)
12643 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12644 instruction, but they are constant offsets, not symbols. */
12645 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12648 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12649 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12655 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12656 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
12659 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
12667 tls_mentioned_p (rtx x
)
12669 switch (GET_CODE (x
))
12672 return tls_mentioned_p (XEXP (x
, 0));
12675 if (XINT (x
, 1) == UNSPEC_TLS
)
12678 /* Fall through. */
12684 /* Must not copy any rtx that uses a pc-relative address.
12685 Also, disallow copying of load-exclusive instructions that
12686 may appear after splitting of compare-and-swap-style operations
12687 so as to prevent those loops from being transformed away from their
12688 canonical forms (see PR 69904). */
12691 arm_cannot_copy_insn_p (rtx_insn
*insn
)
12693 /* The tls call insn cannot be copied, as it is paired with a data
12695 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
12698 subrtx_iterator::array_type array
;
12699 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
12701 const_rtx x
= *iter
;
12702 if (GET_CODE (x
) == UNSPEC
12703 && (XINT (x
, 1) == UNSPEC_PIC_BASE
12704 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
12708 rtx set
= single_set (insn
);
12711 rtx src
= SET_SRC (set
);
12712 if (GET_CODE (src
) == ZERO_EXTEND
)
12713 src
= XEXP (src
, 0);
12715 /* Catch the load-exclusive and load-acquire operations. */
12716 if (GET_CODE (src
) == UNSPEC_VOLATILE
12717 && (XINT (src
, 1) == VUNSPEC_LL
12718 || XINT (src
, 1) == VUNSPEC_LAX
))
12725 minmax_code (rtx x
)
12727 enum rtx_code code
= GET_CODE (x
);
12740 gcc_unreachable ();
12744 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12747 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
12748 int *mask
, bool *signed_sat
)
12750 /* The high bound must be a power of two minus one. */
12751 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
12755 /* The low bound is either zero (for usat) or one less than the
12756 negation of the high bound (for ssat). */
12757 if (INTVAL (lo_bound
) == 0)
12762 *signed_sat
= false;
12767 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
12772 *signed_sat
= true;
12780 /* Return 1 if memory locations are adjacent. */
12782 adjacent_mem_locations (rtx a
, rtx b
)
12784 /* We don't guarantee to preserve the order of these memory refs. */
12785 if (volatile_refs_p (a
) || volatile_refs_p (b
))
12788 if ((REG_P (XEXP (a
, 0))
12789 || (GET_CODE (XEXP (a
, 0)) == PLUS
12790 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
12791 && (REG_P (XEXP (b
, 0))
12792 || (GET_CODE (XEXP (b
, 0)) == PLUS
12793 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
12795 HOST_WIDE_INT val0
= 0, val1
= 0;
12799 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
12801 reg0
= XEXP (XEXP (a
, 0), 0);
12802 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
12805 reg0
= XEXP (a
, 0);
12807 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
12809 reg1
= XEXP (XEXP (b
, 0), 0);
12810 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
12813 reg1
= XEXP (b
, 0);
12815 /* Don't accept any offset that will require multiple
12816 instructions to handle, since this would cause the
12817 arith_adjacentmem pattern to output an overlong sequence. */
12818 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
12821 /* Don't allow an eliminable register: register elimination can make
12822 the offset too large. */
12823 if (arm_eliminable_register (reg0
))
12826 val_diff
= val1
- val0
;
12830 /* If the target has load delay slots, then there's no benefit
12831 to using an ldm instruction unless the offset is zero and
12832 we are optimizing for size. */
12833 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
12834 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
12835 && (val_diff
== 4 || val_diff
== -4));
12838 return ((REGNO (reg0
) == REGNO (reg1
))
12839 && (val_diff
== 4 || val_diff
== -4));
12845 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12846 for load operations, false for store operations. CONSECUTIVE is true
12847 if the register numbers in the operation must be consecutive in the register
12848 bank. RETURN_PC is true if value is to be loaded in PC.
12849 The pattern we are trying to match for load is:
12850 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12851 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12854 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12857 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12858 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12859 3. If consecutive is TRUE, then for kth register being loaded,
12860 REGNO (R_dk) = REGNO (R_d0) + k.
12861 The pattern for store is similar. */
12863 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
12864 bool consecutive
, bool return_pc
)
12866 HOST_WIDE_INT count
= XVECLEN (op
, 0);
12867 rtx reg
, mem
, addr
;
12869 unsigned first_regno
;
12870 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
12872 bool addr_reg_in_reglist
= false;
12873 bool update
= false;
12878 /* If not in SImode, then registers must be consecutive
12879 (e.g., VLDM instructions for DFmode). */
12880 gcc_assert ((mode
== SImode
) || consecutive
);
12881 /* Setting return_pc for stores is illegal. */
12882 gcc_assert (!return_pc
|| load
);
12884 /* Set up the increments and the regs per val based on the mode. */
12885 reg_increment
= GET_MODE_SIZE (mode
);
12886 regs_per_val
= reg_increment
/ 4;
12887 offset_adj
= return_pc
? 1 : 0;
12890 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
12891 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
12894 /* Check if this is a write-back. */
12895 elt
= XVECEXP (op
, 0, offset_adj
);
12896 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
12902 /* The offset adjustment must be the number of registers being
12903 popped times the size of a single register. */
12904 if (!REG_P (SET_DEST (elt
))
12905 || !REG_P (XEXP (SET_SRC (elt
), 0))
12906 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
12907 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
12908 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
12909 ((count
- 1 - offset_adj
) * reg_increment
))
12913 i
= i
+ offset_adj
;
12914 base
= base
+ offset_adj
;
12915 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12916 success depends on the type: VLDM can do just one reg,
12917 LDM must do at least two. */
12918 if ((count
<= i
) && (mode
== SImode
))
12921 elt
= XVECEXP (op
, 0, i
- 1);
12922 if (GET_CODE (elt
) != SET
)
12927 reg
= SET_DEST (elt
);
12928 mem
= SET_SRC (elt
);
12932 reg
= SET_SRC (elt
);
12933 mem
= SET_DEST (elt
);
12936 if (!REG_P (reg
) || !MEM_P (mem
))
12939 regno
= REGNO (reg
);
12940 first_regno
= regno
;
12941 addr
= XEXP (mem
, 0);
12942 if (GET_CODE (addr
) == PLUS
)
12944 if (!CONST_INT_P (XEXP (addr
, 1)))
12947 offset
= INTVAL (XEXP (addr
, 1));
12948 addr
= XEXP (addr
, 0);
12954 /* Don't allow SP to be loaded unless it is also the base register. It
12955 guarantees that SP is reset correctly when an LDM instruction
12956 is interrupted. Otherwise, we might end up with a corrupt stack. */
12957 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12960 for (; i
< count
; i
++)
12962 elt
= XVECEXP (op
, 0, i
);
12963 if (GET_CODE (elt
) != SET
)
12968 reg
= SET_DEST (elt
);
12969 mem
= SET_SRC (elt
);
12973 reg
= SET_SRC (elt
);
12974 mem
= SET_DEST (elt
);
12978 || GET_MODE (reg
) != mode
12979 || REGNO (reg
) <= regno
12982 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
12983 /* Don't allow SP to be loaded unless it is also the base register. It
12984 guarantees that SP is reset correctly when an LDM instruction
12985 is interrupted. Otherwise, we might end up with a corrupt stack. */
12986 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12988 || GET_MODE (mem
) != mode
12989 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
12990 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
12991 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
12992 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
12993 offset
+ (i
- base
) * reg_increment
))
12994 && (!REG_P (XEXP (mem
, 0))
12995 || offset
+ (i
- base
) * reg_increment
!= 0)))
12998 regno
= REGNO (reg
);
12999 if (regno
== REGNO (addr
))
13000 addr_reg_in_reglist
= true;
13005 if (update
&& addr_reg_in_reglist
)
13008 /* For Thumb-1, address register is always modified - either by write-back
13009 or by explicit load. If the pattern does not describe an update,
13010 then the address register must be in the list of loaded registers. */
13012 return update
|| addr_reg_in_reglist
;
13018 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13019 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13020 instruction. ADD_OFFSET is nonzero if the base address register needs
13021 to be modified with an add instruction before we can use it. */
13024 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13025 int nops
, HOST_WIDE_INT add_offset
)
13027 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13028 if the offset isn't small enough. The reason 2 ldrs are faster
13029 is because these ARMs are able to do more than one cache access
13030 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13031 whilst the ARM8 has a double bandwidth cache. This means that
13032 these cores can do both an instruction fetch and a data fetch in
13033 a single cycle, so the trick of calculating the address into a
13034 scratch register (one of the result regs) and then doing a load
13035 multiple actually becomes slower (and no smaller in code size).
13036 That is the transformation
13038 ldr rd1, [rbase + offset]
13039 ldr rd2, [rbase + offset + 4]
13043 add rd1, rbase, offset
13044 ldmia rd1, {rd1, rd2}
13046 produces worse code -- '3 cycles + any stalls on rd2' instead of
13047 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13048 access per cycle, the first sequence could never complete in less
13049 than 6 cycles, whereas the ldm sequence would only take 5 and
13050 would make better use of sequential accesses if not hitting the
13053 We cheat here and test 'arm_ld_sched' which we currently know to
13054 only be true for the ARM8, ARM9 and StrongARM. If this ever
13055 changes, then the test below needs to be reworked. */
13056 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13059 /* XScale has load-store double instructions, but they have stricter
13060 alignment requirements than load-store multiple, so we cannot
13063 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13064 the pipeline until completion.
13072 An ldr instruction takes 1-3 cycles, but does not block the
13081 Best case ldr will always win. However, the more ldr instructions
13082 we issue, the less likely we are to be able to schedule them well.
13083 Using ldr instructions also increases code size.
13085 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13086 for counts of 3 or 4 regs. */
13087 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13092 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13093 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13094 an array ORDER which describes the sequence to use when accessing the
13095 offsets that produces an ascending order. In this sequence, each
13096 offset must be larger by exactly 4 than the previous one. ORDER[0]
13097 must have been filled in with the lowest offset by the caller.
13098 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13099 we use to verify that ORDER produces an ascending order of registers.
13100 Return true if it was possible to construct such an order, false if
13104 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13105 int *unsorted_regs
)
13108 for (i
= 1; i
< nops
; i
++)
13112 order
[i
] = order
[i
- 1];
13113 for (j
= 0; j
< nops
; j
++)
13114 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13116 /* We must find exactly one offset that is higher than the
13117 previous one by 4. */
13118 if (order
[i
] != order
[i
- 1])
13122 if (order
[i
] == order
[i
- 1])
13124 /* The register numbers must be ascending. */
13125 if (unsorted_regs
!= NULL
13126 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13132 /* Used to determine in a peephole whether a sequence of load
13133 instructions can be changed into a load-multiple instruction.
13134 NOPS is the number of separate load instructions we are examining. The
13135 first NOPS entries in OPERANDS are the destination registers, the
13136 next NOPS entries are memory operands. If this function is
13137 successful, *BASE is set to the common base register of the memory
13138 accesses; *LOAD_OFFSET is set to the first memory location's offset
13139 from that base register.
13140 REGS is an array filled in with the destination register numbers.
13141 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13142 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13143 the sequence of registers in REGS matches the loads from ascending memory
13144 locations, and the function verifies that the register numbers are
13145 themselves ascending. If CHECK_REGS is false, the register numbers
13146 are stored in the order they are found in the operands. */
13148 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13149 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13151 int unsorted_regs
[MAX_LDM_STM_OPS
];
13152 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13153 int order
[MAX_LDM_STM_OPS
];
13154 rtx base_reg_rtx
= NULL
;
13158 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13159 easily extended if required. */
13160 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13162 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13164 /* Loop over the operands and check that the memory references are
13165 suitable (i.e. immediate offsets from the same base register). At
13166 the same time, extract the target register, and the memory
13168 for (i
= 0; i
< nops
; i
++)
13173 /* Convert a subreg of a mem into the mem itself. */
13174 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13175 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13177 gcc_assert (MEM_P (operands
[nops
+ i
]));
13179 /* Don't reorder volatile memory references; it doesn't seem worth
13180 looking for the case where the order is ok anyway. */
13181 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13184 offset
= const0_rtx
;
13186 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13187 || (GET_CODE (reg
) == SUBREG
13188 && REG_P (reg
= SUBREG_REG (reg
))))
13189 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13190 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13191 || (GET_CODE (reg
) == SUBREG
13192 && REG_P (reg
= SUBREG_REG (reg
))))
13193 && (CONST_INT_P (offset
13194 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13198 base_reg
= REGNO (reg
);
13199 base_reg_rtx
= reg
;
13200 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13203 else if (base_reg
!= (int) REGNO (reg
))
13204 /* Not addressed from the same base register. */
13207 unsorted_regs
[i
] = (REG_P (operands
[i
])
13208 ? REGNO (operands
[i
])
13209 : REGNO (SUBREG_REG (operands
[i
])));
13211 /* If it isn't an integer register, or if it overwrites the
13212 base register but isn't the last insn in the list, then
13213 we can't do this. */
13214 if (unsorted_regs
[i
] < 0
13215 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13216 || unsorted_regs
[i
] > 14
13217 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13220 /* Don't allow SP to be loaded unless it is also the base
13221 register. It guarantees that SP is reset correctly when
13222 an LDM instruction is interrupted. Otherwise, we might
13223 end up with a corrupt stack. */
13224 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13227 unsorted_offsets
[i
] = INTVAL (offset
);
13228 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13232 /* Not a suitable memory address. */
13236 /* All the useful information has now been extracted from the
13237 operands into unsorted_regs and unsorted_offsets; additionally,
13238 order[0] has been set to the lowest offset in the list. Sort
13239 the offsets into order, verifying that they are adjacent, and
13240 check that the register numbers are ascending. */
13241 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13242 check_regs
? unsorted_regs
: NULL
))
13246 memcpy (saved_order
, order
, sizeof order
);
13252 for (i
= 0; i
< nops
; i
++)
13253 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13255 *load_offset
= unsorted_offsets
[order
[0]];
13259 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13262 if (unsorted_offsets
[order
[0]] == 0)
13263 ldm_case
= 1; /* ldmia */
13264 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13265 ldm_case
= 2; /* ldmib */
13266 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13267 ldm_case
= 3; /* ldmda */
13268 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13269 ldm_case
= 4; /* ldmdb */
13270 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13271 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13276 if (!multiple_operation_profitable_p (false, nops
,
13278 ? unsorted_offsets
[order
[0]] : 0))
13284 /* Used to determine in a peephole whether a sequence of store instructions can
13285 be changed into a store-multiple instruction.
13286 NOPS is the number of separate store instructions we are examining.
13287 NOPS_TOTAL is the total number of instructions recognized by the peephole
13289 The first NOPS entries in OPERANDS are the source registers, the next
13290 NOPS entries are memory operands. If this function is successful, *BASE is
13291 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13292 to the first memory location's offset from that base register. REGS is an
13293 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13294 likewise filled with the corresponding rtx's.
13295 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13296 numbers to an ascending order of stores.
13297 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13298 from ascending memory locations, and the function verifies that the register
13299 numbers are themselves ascending. If CHECK_REGS is false, the register
13300 numbers are stored in the order they are found in the operands. */
13302 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13303 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13304 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13306 int unsorted_regs
[MAX_LDM_STM_OPS
];
13307 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13308 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13309 int order
[MAX_LDM_STM_OPS
];
13311 rtx base_reg_rtx
= NULL
;
13314 /* Write back of base register is currently only supported for Thumb 1. */
13315 int base_writeback
= TARGET_THUMB1
;
13317 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13318 easily extended if required. */
13319 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13321 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13323 /* Loop over the operands and check that the memory references are
13324 suitable (i.e. immediate offsets from the same base register). At
13325 the same time, extract the target register, and the memory
13327 for (i
= 0; i
< nops
; i
++)
13332 /* Convert a subreg of a mem into the mem itself. */
13333 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13334 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13336 gcc_assert (MEM_P (operands
[nops
+ i
]));
13338 /* Don't reorder volatile memory references; it doesn't seem worth
13339 looking for the case where the order is ok anyway. */
13340 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13343 offset
= const0_rtx
;
13345 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13346 || (GET_CODE (reg
) == SUBREG
13347 && REG_P (reg
= SUBREG_REG (reg
))))
13348 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13349 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13350 || (GET_CODE (reg
) == SUBREG
13351 && REG_P (reg
= SUBREG_REG (reg
))))
13352 && (CONST_INT_P (offset
13353 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13355 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13356 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13357 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13361 base_reg
= REGNO (reg
);
13362 base_reg_rtx
= reg
;
13363 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13366 else if (base_reg
!= (int) REGNO (reg
))
13367 /* Not addressed from the same base register. */
13370 /* If it isn't an integer register, then we can't do this. */
13371 if (unsorted_regs
[i
] < 0
13372 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13373 /* The effects are unpredictable if the base register is
13374 both updated and stored. */
13375 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13376 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13377 || unsorted_regs
[i
] > 14)
13380 unsorted_offsets
[i
] = INTVAL (offset
);
13381 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13385 /* Not a suitable memory address. */
13389 /* All the useful information has now been extracted from the
13390 operands into unsorted_regs and unsorted_offsets; additionally,
13391 order[0] has been set to the lowest offset in the list. Sort
13392 the offsets into order, verifying that they are adjacent, and
13393 check that the register numbers are ascending. */
13394 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13395 check_regs
? unsorted_regs
: NULL
))
13399 memcpy (saved_order
, order
, sizeof order
);
13405 for (i
= 0; i
< nops
; i
++)
13407 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13409 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13412 *load_offset
= unsorted_offsets
[order
[0]];
13416 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13419 if (unsorted_offsets
[order
[0]] == 0)
13420 stm_case
= 1; /* stmia */
13421 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13422 stm_case
= 2; /* stmib */
13423 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13424 stm_case
= 3; /* stmda */
13425 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13426 stm_case
= 4; /* stmdb */
13430 if (!multiple_operation_profitable_p (false, nops
, 0))
13436 /* Routines for use in generating RTL. */
13438 /* Generate a load-multiple instruction. COUNT is the number of loads in
13439 the instruction; REGS and MEMS are arrays containing the operands.
13440 BASEREG is the base register to be used in addressing the memory operands.
13441 WBACK_OFFSET is nonzero if the instruction should update the base
13445 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13446 HOST_WIDE_INT wback_offset
)
13451 if (!multiple_operation_profitable_p (false, count
, 0))
13457 for (i
= 0; i
< count
; i
++)
13458 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13460 if (wback_offset
!= 0)
13461 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13463 seq
= get_insns ();
13469 result
= gen_rtx_PARALLEL (VOIDmode
,
13470 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13471 if (wback_offset
!= 0)
13473 XVECEXP (result
, 0, 0)
13474 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13479 for (j
= 0; i
< count
; i
++, j
++)
13480 XVECEXP (result
, 0, i
)
13481 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13486 /* Generate a store-multiple instruction. COUNT is the number of stores in
13487 the instruction; REGS and MEMS are arrays containing the operands.
13488 BASEREG is the base register to be used in addressing the memory operands.
13489 WBACK_OFFSET is nonzero if the instruction should update the base
13493 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13494 HOST_WIDE_INT wback_offset
)
13499 if (GET_CODE (basereg
) == PLUS
)
13500 basereg
= XEXP (basereg
, 0);
13502 if (!multiple_operation_profitable_p (false, count
, 0))
13508 for (i
= 0; i
< count
; i
++)
13509 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13511 if (wback_offset
!= 0)
13512 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13514 seq
= get_insns ();
13520 result
= gen_rtx_PARALLEL (VOIDmode
,
13521 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13522 if (wback_offset
!= 0)
13524 XVECEXP (result
, 0, 0)
13525 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13530 for (j
= 0; i
< count
; i
++, j
++)
13531 XVECEXP (result
, 0, i
)
13532 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13537 /* Generate either a load-multiple or a store-multiple instruction. This
13538 function can be used in situations where we can start with a single MEM
13539 rtx and adjust its address upwards.
13540 COUNT is the number of operations in the instruction, not counting a
13541 possible update of the base register. REGS is an array containing the
13543 BASEREG is the base register to be used in addressing the memory operands,
13544 which are constructed from BASEMEM.
13545 WRITE_BACK specifies whether the generated instruction should include an
13546 update of the base register.
13547 OFFSETP is used to pass an offset to and from this function; this offset
13548 is not used when constructing the address (instead BASEMEM should have an
13549 appropriate offset in its address), it is used only for setting
13550 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13553 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13554 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13556 rtx mems
[MAX_LDM_STM_OPS
];
13557 HOST_WIDE_INT offset
= *offsetp
;
13560 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13562 if (GET_CODE (basereg
) == PLUS
)
13563 basereg
= XEXP (basereg
, 0);
13565 for (i
= 0; i
< count
; i
++)
13567 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13568 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13576 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13577 write_back
? 4 * count
: 0);
13579 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13580 write_back
? 4 * count
: 0);
13584 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13585 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13587 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13592 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13593 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13595 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13599 /* Called from a peephole2 expander to turn a sequence of loads into an
13600 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13601 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13602 is true if we can reorder the registers because they are used commutatively
13604 Returns true iff we could generate a new instruction. */
13607 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13609 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13610 rtx mems
[MAX_LDM_STM_OPS
];
13611 int i
, j
, base_reg
;
13613 HOST_WIDE_INT offset
;
13614 int write_back
= FALSE
;
13618 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13619 &base_reg
, &offset
, !sort_regs
);
13625 for (i
= 0; i
< nops
- 1; i
++)
13626 for (j
= i
+ 1; j
< nops
; j
++)
13627 if (regs
[i
] > regs
[j
])
13633 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13637 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
13638 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13644 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
13645 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
13647 if (!TARGET_THUMB1
)
13648 base_reg_rtx
= newbase
;
13651 for (i
= 0; i
< nops
; i
++)
13653 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13654 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13657 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13658 write_back
? offset
+ i
* 4 : 0));
13662 /* Called from a peephole2 expander to turn a sequence of stores into an
13663 STM instruction. OPERANDS are the operands found by the peephole matcher;
13664 NOPS indicates how many separate stores we are trying to combine.
13665 Returns true iff we could generate a new instruction. */
13668 gen_stm_seq (rtx
*operands
, int nops
)
13671 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13672 rtx mems
[MAX_LDM_STM_OPS
];
13675 HOST_WIDE_INT offset
;
13676 int write_back
= FALSE
;
13679 bool base_reg_dies
;
13681 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
13682 mem_order
, &base_reg
, &offset
, true);
13687 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13689 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
13692 gcc_assert (base_reg_dies
);
13698 gcc_assert (base_reg_dies
);
13699 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13703 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13705 for (i
= 0; i
< nops
; i
++)
13707 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13708 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13711 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13712 write_back
? offset
+ i
* 4 : 0));
13716 /* Called from a peephole2 expander to turn a sequence of stores that are
13717 preceded by constant loads into an STM instruction. OPERANDS are the
13718 operands found by the peephole matcher; NOPS indicates how many
13719 separate stores we are trying to combine; there are 2 * NOPS
13720 instructions in the peephole.
13721 Returns true iff we could generate a new instruction. */
13724 gen_const_stm_seq (rtx
*operands
, int nops
)
13726 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
13727 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13728 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
13729 rtx mems
[MAX_LDM_STM_OPS
];
13732 HOST_WIDE_INT offset
;
13733 int write_back
= FALSE
;
13736 bool base_reg_dies
;
13738 HARD_REG_SET allocated
;
13740 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
13741 mem_order
, &base_reg
, &offset
, false);
13746 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
13748 /* If the same register is used more than once, try to find a free
13750 CLEAR_HARD_REG_SET (allocated
);
13751 for (i
= 0; i
< nops
; i
++)
13753 for (j
= i
+ 1; j
< nops
; j
++)
13754 if (regs
[i
] == regs
[j
])
13756 rtx t
= peep2_find_free_register (0, nops
* 2,
13757 TARGET_THUMB1
? "l" : "r",
13758 SImode
, &allocated
);
13762 regs
[i
] = REGNO (t
);
13766 /* Compute an ordering that maps the register numbers to an ascending
13769 for (i
= 0; i
< nops
; i
++)
13770 if (regs
[i
] < regs
[reg_order
[0]])
13773 for (i
= 1; i
< nops
; i
++)
13775 int this_order
= reg_order
[i
- 1];
13776 for (j
= 0; j
< nops
; j
++)
13777 if (regs
[j
] > regs
[reg_order
[i
- 1]]
13778 && (this_order
== reg_order
[i
- 1]
13779 || regs
[j
] < regs
[this_order
]))
13781 reg_order
[i
] = this_order
;
13784 /* Ensure that registers that must be live after the instruction end
13785 up with the correct value. */
13786 for (i
= 0; i
< nops
; i
++)
13788 int this_order
= reg_order
[i
];
13789 if ((this_order
!= mem_order
[i
]
13790 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
13791 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
13795 /* Load the constants. */
13796 for (i
= 0; i
< nops
; i
++)
13798 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
13799 sorted_regs
[i
] = regs
[reg_order
[i
]];
13800 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
13803 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13805 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
13808 gcc_assert (base_reg_dies
);
13814 gcc_assert (base_reg_dies
);
13815 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13819 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13821 for (i
= 0; i
< nops
; i
++)
13823 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13824 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13827 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
13828 write_back
? offset
+ i
* 4 : 0));
13832 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13833 unaligned copies on processors which support unaligned semantics for those
13834 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13835 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13836 An interleave factor of 1 (the minimum) will perform no interleaving.
13837 Load/store multiple are used for aligned addresses where possible. */
13840 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
13841 HOST_WIDE_INT length
,
13842 unsigned int interleave_factor
)
13844 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
13845 int *regnos
= XALLOCAVEC (int, interleave_factor
);
13846 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
13847 HOST_WIDE_INT i
, j
;
13848 HOST_WIDE_INT remaining
= length
, words
;
13849 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
13851 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
13852 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
13853 HOST_WIDE_INT srcoffset
, dstoffset
;
13854 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
13857 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
13859 /* Use hard registers if we have aligned source or destination so we can use
13860 load/store multiple with contiguous registers. */
13861 if (dst_aligned
|| src_aligned
)
13862 for (i
= 0; i
< interleave_factor
; i
++)
13863 regs
[i
] = gen_rtx_REG (SImode
, i
);
13865 for (i
= 0; i
< interleave_factor
; i
++)
13866 regs
[i
] = gen_reg_rtx (SImode
);
13868 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
13869 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
13871 srcoffset
= dstoffset
= 0;
13873 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13874 For copying the last bytes we want to subtract this offset again. */
13875 src_autoinc
= dst_autoinc
= 0;
13877 for (i
= 0; i
< interleave_factor
; i
++)
13880 /* Copy BLOCK_SIZE_BYTES chunks. */
13882 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
13885 if (src_aligned
&& interleave_factor
> 1)
13887 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
13888 TRUE
, srcbase
, &srcoffset
));
13889 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13893 for (j
= 0; j
< interleave_factor
; j
++)
13895 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
13897 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13898 srcoffset
+ j
* UNITS_PER_WORD
);
13899 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13901 srcoffset
+= block_size_bytes
;
13905 if (dst_aligned
&& interleave_factor
> 1)
13907 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
13908 TRUE
, dstbase
, &dstoffset
));
13909 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13913 for (j
= 0; j
< interleave_factor
; j
++)
13915 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
13917 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13918 dstoffset
+ j
* UNITS_PER_WORD
);
13919 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13921 dstoffset
+= block_size_bytes
;
13924 remaining
-= block_size_bytes
;
13927 /* Copy any whole words left (note these aren't interleaved with any
13928 subsequent halfword/byte load/stores in the interests of simplicity). */
13930 words
= remaining
/ UNITS_PER_WORD
;
13932 gcc_assert (words
< interleave_factor
);
13934 if (src_aligned
&& words
> 1)
13936 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
13938 src_autoinc
+= UNITS_PER_WORD
* words
;
13942 for (j
= 0; j
< words
; j
++)
13944 addr
= plus_constant (Pmode
, src
,
13945 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
13946 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13947 srcoffset
+ j
* UNITS_PER_WORD
);
13949 emit_move_insn (regs
[j
], mem
);
13951 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13953 srcoffset
+= words
* UNITS_PER_WORD
;
13956 if (dst_aligned
&& words
> 1)
13958 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
13960 dst_autoinc
+= words
* UNITS_PER_WORD
;
13964 for (j
= 0; j
< words
; j
++)
13966 addr
= plus_constant (Pmode
, dst
,
13967 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
13968 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13969 dstoffset
+ j
* UNITS_PER_WORD
);
13971 emit_move_insn (mem
, regs
[j
]);
13973 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13975 dstoffset
+= words
* UNITS_PER_WORD
;
13978 remaining
-= words
* UNITS_PER_WORD
;
13980 gcc_assert (remaining
< 4);
13982 /* Copy a halfword if necessary. */
13984 if (remaining
>= 2)
13986 halfword_tmp
= gen_reg_rtx (SImode
);
13988 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13989 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
13990 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
13992 /* Either write out immediately, or delay until we've loaded the last
13993 byte, depending on interleave factor. */
13994 if (interleave_factor
== 1)
13996 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13997 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
13998 emit_insn (gen_unaligned_storehi (mem
,
13999 gen_lowpart (HImode
, halfword_tmp
)));
14000 halfword_tmp
= NULL
;
14008 gcc_assert (remaining
< 2);
14010 /* Copy last byte. */
14012 if ((remaining
& 1) != 0)
14014 byte_tmp
= gen_reg_rtx (SImode
);
14016 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14017 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14018 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14020 if (interleave_factor
== 1)
14022 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14023 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14024 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14033 /* Store last halfword if we haven't done so already. */
14037 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14038 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14039 emit_insn (gen_unaligned_storehi (mem
,
14040 gen_lowpart (HImode
, halfword_tmp
)));
14044 /* Likewise for last byte. */
14048 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14049 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14050 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14054 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14057 /* From mips_adjust_block_mem:
14059 Helper function for doing a loop-based block operation on memory
14060 reference MEM. Each iteration of the loop will operate on LENGTH
14063 Create a new base register for use within the loop and point it to
14064 the start of MEM. Create a new memory reference that uses this
14065 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14068 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14071 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14073 /* Although the new mem does not refer to a known location,
14074 it does keep up to LENGTH bytes of alignment. */
14075 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14076 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14079 /* From mips_block_move_loop:
14081 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14082 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14083 the memory regions do not overlap. */
14086 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14087 unsigned int interleave_factor
,
14088 HOST_WIDE_INT bytes_per_iter
)
14090 rtx src_reg
, dest_reg
, final_src
, test
;
14091 HOST_WIDE_INT leftover
;
14093 leftover
= length
% bytes_per_iter
;
14094 length
-= leftover
;
14096 /* Create registers and memory references for use within the loop. */
14097 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14098 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14100 /* Calculate the value that SRC_REG should have after the last iteration of
14102 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14103 0, 0, OPTAB_WIDEN
);
14105 /* Emit the start of the loop. */
14106 rtx_code_label
*label
= gen_label_rtx ();
14107 emit_label (label
);
14109 /* Emit the loop body. */
14110 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14111 interleave_factor
);
14113 /* Move on to the next block. */
14114 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14115 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14117 /* Emit the loop condition. */
14118 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14119 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14121 /* Mop up any left-over bytes. */
14123 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14126 /* Emit a block move when either the source or destination is unaligned (not
14127 aligned to a four-byte boundary). This may need further tuning depending on
14128 core type, optimize_size setting, etc. */
14131 arm_movmemqi_unaligned (rtx
*operands
)
14133 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14137 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14138 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14139 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14140 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14141 or dst_aligned though: allow more interleaving in those cases since the
14142 resulting code can be smaller. */
14143 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14144 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14147 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14148 interleave_factor
, bytes_per_iter
);
14150 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14151 interleave_factor
);
14155 /* Note that the loop created by arm_block_move_unaligned_loop may be
14156 subject to loop unrolling, which makes tuning this condition a little
14159 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14161 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14168 arm_gen_movmemqi (rtx
*operands
)
14170 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14171 HOST_WIDE_INT srcoffset
, dstoffset
;
14172 rtx src
, dst
, srcbase
, dstbase
;
14173 rtx part_bytes_reg
= NULL
;
14176 if (!CONST_INT_P (operands
[2])
14177 || !CONST_INT_P (operands
[3])
14178 || INTVAL (operands
[2]) > 64)
14181 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14182 return arm_movmemqi_unaligned (operands
);
14184 if (INTVAL (operands
[3]) & 3)
14187 dstbase
= operands
[0];
14188 srcbase
= operands
[1];
14190 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14191 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14193 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14194 out_words_to_go
= INTVAL (operands
[2]) / 4;
14195 last_bytes
= INTVAL (operands
[2]) & 3;
14196 dstoffset
= srcoffset
= 0;
14198 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14199 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14201 while (in_words_to_go
>= 2)
14203 if (in_words_to_go
> 4)
14204 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14205 TRUE
, srcbase
, &srcoffset
));
14207 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14208 src
, FALSE
, srcbase
,
14211 if (out_words_to_go
)
14213 if (out_words_to_go
> 4)
14214 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14215 TRUE
, dstbase
, &dstoffset
));
14216 else if (out_words_to_go
!= 1)
14217 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14218 out_words_to_go
, dst
,
14221 dstbase
, &dstoffset
));
14224 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14225 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14226 if (last_bytes
!= 0)
14228 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14234 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14235 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14238 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14239 if (out_words_to_go
)
14243 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14244 sreg
= copy_to_reg (mem
);
14246 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14247 emit_move_insn (mem
, sreg
);
14250 gcc_assert (!in_words_to_go
); /* Sanity check */
14253 if (in_words_to_go
)
14255 gcc_assert (in_words_to_go
> 0);
14257 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14258 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14261 gcc_assert (!last_bytes
|| part_bytes_reg
);
14263 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14265 rtx tmp
= gen_reg_rtx (SImode
);
14267 /* The bytes we want are in the top end of the word. */
14268 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14269 GEN_INT (8 * (4 - last_bytes
))));
14270 part_bytes_reg
= tmp
;
14274 mem
= adjust_automodify_address (dstbase
, QImode
,
14275 plus_constant (Pmode
, dst
,
14277 dstoffset
+ last_bytes
- 1);
14278 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14282 tmp
= gen_reg_rtx (SImode
);
14283 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14284 part_bytes_reg
= tmp
;
14291 if (last_bytes
> 1)
14293 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14294 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14298 rtx tmp
= gen_reg_rtx (SImode
);
14299 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14300 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14301 part_bytes_reg
= tmp
;
14308 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14309 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14316 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14319 next_consecutive_mem (rtx mem
)
14321 machine_mode mode
= GET_MODE (mem
);
14322 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14323 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14325 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14328 /* Copy using LDRD/STRD instructions whenever possible.
14329 Returns true upon success. */
14331 gen_movmem_ldrd_strd (rtx
*operands
)
14333 unsigned HOST_WIDE_INT len
;
14334 HOST_WIDE_INT align
;
14335 rtx src
, dst
, base
;
14337 bool src_aligned
, dst_aligned
;
14338 bool src_volatile
, dst_volatile
;
14340 gcc_assert (CONST_INT_P (operands
[2]));
14341 gcc_assert (CONST_INT_P (operands
[3]));
14343 len
= UINTVAL (operands
[2]);
14347 /* Maximum alignment we can assume for both src and dst buffers. */
14348 align
= INTVAL (operands
[3]);
14350 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14353 /* Place src and dst addresses in registers
14354 and update the corresponding mem rtx. */
14356 dst_volatile
= MEM_VOLATILE_P (dst
);
14357 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14358 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14359 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14362 src_volatile
= MEM_VOLATILE_P (src
);
14363 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14364 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14365 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14367 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14370 if (src_volatile
|| dst_volatile
)
14373 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14374 if (!(dst_aligned
|| src_aligned
))
14375 return arm_gen_movmemqi (operands
);
14377 /* If the either src or dst is unaligned we'll be accessing it as pairs
14378 of unaligned SImode accesses. Otherwise we can generate DImode
14379 ldrd/strd instructions. */
14380 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
14381 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
14386 reg0
= gen_reg_rtx (DImode
);
14387 rtx low_reg
= NULL_RTX
;
14388 rtx hi_reg
= NULL_RTX
;
14390 if (!src_aligned
|| !dst_aligned
)
14392 low_reg
= gen_lowpart (SImode
, reg0
);
14393 hi_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
14396 emit_move_insn (reg0
, src
);
14399 emit_insn (gen_unaligned_loadsi (low_reg
, src
));
14400 src
= next_consecutive_mem (src
);
14401 emit_insn (gen_unaligned_loadsi (hi_reg
, src
));
14405 emit_move_insn (dst
, reg0
);
14408 emit_insn (gen_unaligned_storesi (dst
, low_reg
));
14409 dst
= next_consecutive_mem (dst
);
14410 emit_insn (gen_unaligned_storesi (dst
, hi_reg
));
14413 src
= next_consecutive_mem (src
);
14414 dst
= next_consecutive_mem (dst
);
14417 gcc_assert (len
< 8);
14420 /* More than a word but less than a double-word to copy. Copy a word. */
14421 reg0
= gen_reg_rtx (SImode
);
14422 src
= adjust_address (src
, SImode
, 0);
14423 dst
= adjust_address (dst
, SImode
, 0);
14425 emit_move_insn (reg0
, src
);
14427 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14430 emit_move_insn (dst
, reg0
);
14432 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14434 src
= next_consecutive_mem (src
);
14435 dst
= next_consecutive_mem (dst
);
14442 /* Copy the remaining bytes. */
14445 dst
= adjust_address (dst
, HImode
, 0);
14446 src
= adjust_address (src
, HImode
, 0);
14447 reg0
= gen_reg_rtx (SImode
);
14449 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14451 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14454 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14456 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14458 src
= next_consecutive_mem (src
);
14459 dst
= next_consecutive_mem (dst
);
14464 dst
= adjust_address (dst
, QImode
, 0);
14465 src
= adjust_address (src
, QImode
, 0);
14466 reg0
= gen_reg_rtx (QImode
);
14467 emit_move_insn (reg0
, src
);
14468 emit_move_insn (dst
, reg0
);
14472 /* Select a dominance comparison mode if possible for a test of the general
14473 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14474 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14475 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14476 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14477 In all cases OP will be either EQ or NE, but we don't need to know which
14478 here. If we are unable to support a dominance comparison we return
14479 CC mode. This will then fail to match for the RTL expressions that
14480 generate this call. */
14482 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14484 enum rtx_code cond1
, cond2
;
14487 /* Currently we will probably get the wrong result if the individual
14488 comparisons are not simple. This also ensures that it is safe to
14489 reverse a comparison if necessary. */
14490 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14492 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14496 /* The if_then_else variant of this tests the second condition if the
14497 first passes, but is true if the first fails. Reverse the first
14498 condition to get a true "inclusive-or" expression. */
14499 if (cond_or
== DOM_CC_NX_OR_Y
)
14500 cond1
= reverse_condition (cond1
);
14502 /* If the comparisons are not equal, and one doesn't dominate the other,
14503 then we can't do this. */
14505 && !comparison_dominates_p (cond1
, cond2
)
14506 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14510 std::swap (cond1
, cond2
);
14515 if (cond_or
== DOM_CC_X_AND_Y
)
14520 case EQ
: return CC_DEQmode
;
14521 case LE
: return CC_DLEmode
;
14522 case LEU
: return CC_DLEUmode
;
14523 case GE
: return CC_DGEmode
;
14524 case GEU
: return CC_DGEUmode
;
14525 default: gcc_unreachable ();
14529 if (cond_or
== DOM_CC_X_AND_Y
)
14541 gcc_unreachable ();
14545 if (cond_or
== DOM_CC_X_AND_Y
)
14557 gcc_unreachable ();
14561 if (cond_or
== DOM_CC_X_AND_Y
)
14562 return CC_DLTUmode
;
14567 return CC_DLTUmode
;
14569 return CC_DLEUmode
;
14573 gcc_unreachable ();
14577 if (cond_or
== DOM_CC_X_AND_Y
)
14578 return CC_DGTUmode
;
14583 return CC_DGTUmode
;
14585 return CC_DGEUmode
;
14589 gcc_unreachable ();
14592 /* The remaining cases only occur when both comparisons are the
14595 gcc_assert (cond1
== cond2
);
14599 gcc_assert (cond1
== cond2
);
14603 gcc_assert (cond1
== cond2
);
14607 gcc_assert (cond1
== cond2
);
14608 return CC_DLEUmode
;
14611 gcc_assert (cond1
== cond2
);
14612 return CC_DGEUmode
;
14615 gcc_unreachable ();
14620 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14622 /* All floating point compares return CCFP if it is an equality
14623 comparison, and CCFPE otherwise. */
14624 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14647 gcc_unreachable ();
14651 /* A compare with a shifted operand. Because of canonicalization, the
14652 comparison will have to be swapped when we emit the assembler. */
14653 if (GET_MODE (y
) == SImode
14654 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14655 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14656 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14657 || GET_CODE (x
) == ROTATERT
))
14660 /* This operation is performed swapped, but since we only rely on the Z
14661 flag we don't need an additional mode. */
14662 if (GET_MODE (y
) == SImode
14663 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14664 && GET_CODE (x
) == NEG
14665 && (op
== EQ
|| op
== NE
))
14668 /* This is a special case that is used by combine to allow a
14669 comparison of a shifted byte load to be split into a zero-extend
14670 followed by a comparison of the shifted integer (only valid for
14671 equalities and unsigned inequalities). */
14672 if (GET_MODE (x
) == SImode
14673 && GET_CODE (x
) == ASHIFT
14674 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
14675 && GET_CODE (XEXP (x
, 0)) == SUBREG
14676 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
14677 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
14678 && (op
== EQ
|| op
== NE
14679 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
14680 && CONST_INT_P (y
))
14683 /* A construct for a conditional compare, if the false arm contains
14684 0, then both conditions must be true, otherwise either condition
14685 must be true. Not all conditions are possible, so CCmode is
14686 returned if it can't be done. */
14687 if (GET_CODE (x
) == IF_THEN_ELSE
14688 && (XEXP (x
, 2) == const0_rtx
14689 || XEXP (x
, 2) == const1_rtx
)
14690 && COMPARISON_P (XEXP (x
, 0))
14691 && COMPARISON_P (XEXP (x
, 1)))
14692 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14693 INTVAL (XEXP (x
, 2)));
14695 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14696 if (GET_CODE (x
) == AND
14697 && (op
== EQ
|| op
== NE
)
14698 && COMPARISON_P (XEXP (x
, 0))
14699 && COMPARISON_P (XEXP (x
, 1)))
14700 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14703 if (GET_CODE (x
) == IOR
14704 && (op
== EQ
|| op
== NE
)
14705 && COMPARISON_P (XEXP (x
, 0))
14706 && COMPARISON_P (XEXP (x
, 1)))
14707 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14710 /* An operation (on Thumb) where we want to test for a single bit.
14711 This is done by shifting that bit up into the top bit of a
14712 scratch register; we can then branch on the sign bit. */
14714 && GET_MODE (x
) == SImode
14715 && (op
== EQ
|| op
== NE
)
14716 && GET_CODE (x
) == ZERO_EXTRACT
14717 && XEXP (x
, 1) == const1_rtx
)
14720 /* An operation that sets the condition codes as a side-effect, the
14721 V flag is not set correctly, so we can only use comparisons where
14722 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14724 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14725 if (GET_MODE (x
) == SImode
14727 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
14728 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
14729 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
14730 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
14731 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
14732 || GET_CODE (x
) == LSHIFTRT
14733 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14734 || GET_CODE (x
) == ROTATERT
14735 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
14736 return CC_NOOVmode
;
14738 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
14741 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
14742 && GET_CODE (x
) == PLUS
14743 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
14746 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
14752 /* A DImode comparison against zero can be implemented by
14753 or'ing the two halves together. */
14754 if (y
== const0_rtx
)
14757 /* We can do an equality test in three Thumb instructions. */
14767 /* DImode unsigned comparisons can be implemented by cmp +
14768 cmpeq without a scratch register. Not worth doing in
14779 /* DImode signed and unsigned comparisons can be implemented
14780 by cmp + sbcs with a scratch register, but that does not
14781 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14782 gcc_assert (op
!= EQ
&& op
!= NE
);
14786 gcc_unreachable ();
14790 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
14791 return GET_MODE (x
);
14796 /* X and Y are two things to compare using CODE. Emit the compare insn and
14797 return the rtx for register 0 in the proper mode. FP means this is a
14798 floating point compare: I don't think that it is needed on the arm. */
14800 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
14804 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
14806 /* We might have X as a constant, Y as a register because of the predicates
14807 used for cmpdi. If so, force X to a register here. */
14808 if (dimode_comparison
&& !REG_P (x
))
14809 x
= force_reg (DImode
, x
);
14811 mode
= SELECT_CC_MODE (code
, x
, y
);
14812 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
14814 if (dimode_comparison
14815 && mode
!= CC_CZmode
)
14819 /* To compare two non-zero values for equality, XOR them and
14820 then compare against zero. Not used for ARM mode; there
14821 CC_CZmode is cheaper. */
14822 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
14824 gcc_assert (!reload_completed
);
14825 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
14829 /* A scratch register is required. */
14830 if (reload_completed
)
14831 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
14833 scratch
= gen_rtx_SCRATCH (SImode
);
14835 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
14836 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14837 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
14840 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14845 /* Generate a sequence of insns that will generate the correct return
14846 address mask depending on the physical architecture that the program
14849 arm_gen_return_addr_mask (void)
14851 rtx reg
= gen_reg_rtx (Pmode
);
14853 emit_insn (gen_return_addr_mask (reg
));
14858 arm_reload_in_hi (rtx
*operands
)
14860 rtx ref
= operands
[1];
14862 HOST_WIDE_INT offset
= 0;
14864 if (GET_CODE (ref
) == SUBREG
)
14866 offset
= SUBREG_BYTE (ref
);
14867 ref
= SUBREG_REG (ref
);
14872 /* We have a pseudo which has been spilt onto the stack; there
14873 are two cases here: the first where there is a simple
14874 stack-slot replacement and a second where the stack-slot is
14875 out of range, or is used as a subreg. */
14876 if (reg_equiv_mem (REGNO (ref
)))
14878 ref
= reg_equiv_mem (REGNO (ref
));
14879 base
= find_replacement (&XEXP (ref
, 0));
14882 /* The slot is out of range, or was dressed up in a SUBREG. */
14883 base
= reg_equiv_address (REGNO (ref
));
14885 /* PR 62554: If there is no equivalent memory location then just move
14886 the value as an SImode register move. This happens when the target
14887 architecture variant does not have an HImode register move. */
14890 gcc_assert (REG_P (operands
[0]));
14891 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14892 gen_rtx_SUBREG (SImode
, ref
, 0)));
14897 base
= find_replacement (&XEXP (ref
, 0));
14899 /* Handle the case where the address is too complex to be offset by 1. */
14900 if (GET_CODE (base
) == MINUS
14901 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14903 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14905 emit_set_insn (base_plus
, base
);
14908 else if (GET_CODE (base
) == PLUS
)
14910 /* The addend must be CONST_INT, or we would have dealt with it above. */
14911 HOST_WIDE_INT hi
, lo
;
14913 offset
+= INTVAL (XEXP (base
, 1));
14914 base
= XEXP (base
, 0);
14916 /* Rework the address into a legal sequence of insns. */
14917 /* Valid range for lo is -4095 -> 4095 */
14920 : -((-offset
) & 0xfff));
14922 /* Corner case, if lo is the max offset then we would be out of range
14923 once we have added the additional 1 below, so bump the msb into the
14924 pre-loading insn(s). */
14928 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14929 ^ (HOST_WIDE_INT
) 0x80000000)
14930 - (HOST_WIDE_INT
) 0x80000000);
14932 gcc_assert (hi
+ lo
== offset
);
14936 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14938 /* Get the base address; addsi3 knows how to handle constants
14939 that require more than one insn. */
14940 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
14946 /* Operands[2] may overlap operands[0] (though it won't overlap
14947 operands[1]), that's why we asked for a DImode reg -- so we can
14948 use the bit that does not overlap. */
14949 if (REGNO (operands
[2]) == REGNO (operands
[0]))
14950 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14952 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14954 emit_insn (gen_zero_extendqisi2 (scratch
,
14955 gen_rtx_MEM (QImode
,
14956 plus_constant (Pmode
, base
,
14958 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14959 gen_rtx_MEM (QImode
,
14960 plus_constant (Pmode
, base
,
14962 if (!BYTES_BIG_ENDIAN
)
14963 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14964 gen_rtx_IOR (SImode
,
14967 gen_rtx_SUBREG (SImode
, operands
[0], 0),
14971 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14972 gen_rtx_IOR (SImode
,
14973 gen_rtx_ASHIFT (SImode
, scratch
,
14975 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
14978 /* Handle storing a half-word to memory during reload by synthesizing as two
14979 byte stores. Take care not to clobber the input values until after we
14980 have moved them somewhere safe. This code assumes that if the DImode
14981 scratch in operands[2] overlaps either the input value or output address
14982 in some way, then that value must die in this insn (we absolutely need
14983 two scratch registers for some corner cases). */
14985 arm_reload_out_hi (rtx
*operands
)
14987 rtx ref
= operands
[0];
14988 rtx outval
= operands
[1];
14990 HOST_WIDE_INT offset
= 0;
14992 if (GET_CODE (ref
) == SUBREG
)
14994 offset
= SUBREG_BYTE (ref
);
14995 ref
= SUBREG_REG (ref
);
15000 /* We have a pseudo which has been spilt onto the stack; there
15001 are two cases here: the first where there is a simple
15002 stack-slot replacement and a second where the stack-slot is
15003 out of range, or is used as a subreg. */
15004 if (reg_equiv_mem (REGNO (ref
)))
15006 ref
= reg_equiv_mem (REGNO (ref
));
15007 base
= find_replacement (&XEXP (ref
, 0));
15010 /* The slot is out of range, or was dressed up in a SUBREG. */
15011 base
= reg_equiv_address (REGNO (ref
));
15013 /* PR 62254: If there is no equivalent memory location then just move
15014 the value as an SImode register move. This happens when the target
15015 architecture variant does not have an HImode register move. */
15018 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
15020 if (REG_P (outval
))
15022 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15023 gen_rtx_SUBREG (SImode
, outval
, 0)));
15025 else /* SUBREG_P (outval) */
15027 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
15028 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15029 SUBREG_REG (outval
)));
15031 /* FIXME: Handle other cases ? */
15032 gcc_unreachable ();
15038 base
= find_replacement (&XEXP (ref
, 0));
15040 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15042 /* Handle the case where the address is too complex to be offset by 1. */
15043 if (GET_CODE (base
) == MINUS
15044 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15046 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15048 /* Be careful not to destroy OUTVAL. */
15049 if (reg_overlap_mentioned_p (base_plus
, outval
))
15051 /* Updating base_plus might destroy outval, see if we can
15052 swap the scratch and base_plus. */
15053 if (!reg_overlap_mentioned_p (scratch
, outval
))
15054 std::swap (scratch
, base_plus
);
15057 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15059 /* Be conservative and copy OUTVAL into the scratch now,
15060 this should only be necessary if outval is a subreg
15061 of something larger than a word. */
15062 /* XXX Might this clobber base? I can't see how it can,
15063 since scratch is known to overlap with OUTVAL, and
15064 must be wider than a word. */
15065 emit_insn (gen_movhi (scratch_hi
, outval
));
15066 outval
= scratch_hi
;
15070 emit_set_insn (base_plus
, base
);
15073 else if (GET_CODE (base
) == PLUS
)
15075 /* The addend must be CONST_INT, or we would have dealt with it above. */
15076 HOST_WIDE_INT hi
, lo
;
15078 offset
+= INTVAL (XEXP (base
, 1));
15079 base
= XEXP (base
, 0);
15081 /* Rework the address into a legal sequence of insns. */
15082 /* Valid range for lo is -4095 -> 4095 */
15085 : -((-offset
) & 0xfff));
15087 /* Corner case, if lo is the max offset then we would be out of range
15088 once we have added the additional 1 below, so bump the msb into the
15089 pre-loading insn(s). */
15093 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15094 ^ (HOST_WIDE_INT
) 0x80000000)
15095 - (HOST_WIDE_INT
) 0x80000000);
15097 gcc_assert (hi
+ lo
== offset
);
15101 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15103 /* Be careful not to destroy OUTVAL. */
15104 if (reg_overlap_mentioned_p (base_plus
, outval
))
15106 /* Updating base_plus might destroy outval, see if we
15107 can swap the scratch and base_plus. */
15108 if (!reg_overlap_mentioned_p (scratch
, outval
))
15109 std::swap (scratch
, base_plus
);
15112 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15114 /* Be conservative and copy outval into scratch now,
15115 this should only be necessary if outval is a
15116 subreg of something larger than a word. */
15117 /* XXX Might this clobber base? I can't see how it
15118 can, since scratch is known to overlap with
15120 emit_insn (gen_movhi (scratch_hi
, outval
));
15121 outval
= scratch_hi
;
15125 /* Get the base address; addsi3 knows how to handle constants
15126 that require more than one insn. */
15127 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15133 if (BYTES_BIG_ENDIAN
)
15135 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15136 plus_constant (Pmode
, base
,
15138 gen_lowpart (QImode
, outval
)));
15139 emit_insn (gen_lshrsi3 (scratch
,
15140 gen_rtx_SUBREG (SImode
, outval
, 0),
15142 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15144 gen_lowpart (QImode
, scratch
)));
15148 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15150 gen_lowpart (QImode
, outval
)));
15151 emit_insn (gen_lshrsi3 (scratch
,
15152 gen_rtx_SUBREG (SImode
, outval
, 0),
15154 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15155 plus_constant (Pmode
, base
,
15157 gen_lowpart (QImode
, scratch
)));
15161 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15162 (padded to the size of a word) should be passed in a register. */
15165 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15167 if (TARGET_AAPCS_BASED
)
15168 return must_pass_in_stack_var_size (mode
, type
);
15170 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15174 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15175 byte of a stack argument has useful data. For legacy APCS ABIs we use
15176 the default. For AAPCS based ABIs small aggregate types are placed
15177 in the lowest memory address. */
15179 static pad_direction
15180 arm_function_arg_padding (machine_mode mode
, const_tree type
)
15182 if (!TARGET_AAPCS_BASED
)
15183 return default_function_arg_padding (mode
, type
);
15185 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15186 return PAD_DOWNWARD
;
15192 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15193 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15194 register has useful data, and return the opposite if the most
15195 significant byte does. */
15198 arm_pad_reg_upward (machine_mode mode
,
15199 tree type
, int first ATTRIBUTE_UNUSED
)
15201 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15203 /* For AAPCS, small aggregates, small fixed-point types,
15204 and small complex types are always padded upwards. */
15207 if ((AGGREGATE_TYPE_P (type
)
15208 || TREE_CODE (type
) == COMPLEX_TYPE
15209 || FIXED_POINT_TYPE_P (type
))
15210 && int_size_in_bytes (type
) <= 4)
15215 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15216 && GET_MODE_SIZE (mode
) <= 4)
15221 /* Otherwise, use default padding. */
15222 return !BYTES_BIG_ENDIAN
;
15225 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15226 assuming that the address in the base register is word aligned. */
15228 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15230 HOST_WIDE_INT max_offset
;
15232 /* Offset must be a multiple of 4 in Thumb mode. */
15233 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15238 else if (TARGET_ARM
)
15243 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15246 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15247 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15248 Assumes that the address in the base register RN is word aligned. Pattern
15249 guarantees that both memory accesses use the same base register,
15250 the offsets are constants within the range, and the gap between the offsets is 4.
15251 If preload complete then check that registers are legal. WBACK indicates whether
15252 address is updated. LOAD indicates whether memory access is load or store. */
15254 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15255 bool wback
, bool load
)
15257 unsigned int t
, t2
, n
;
15259 if (!reload_completed
)
15262 if (!offset_ok_for_ldrd_strd (offset
))
15269 if ((TARGET_THUMB2
)
15270 && ((wback
&& (n
== t
|| n
== t2
))
15271 || (t
== SP_REGNUM
)
15272 || (t
== PC_REGNUM
)
15273 || (t2
== SP_REGNUM
)
15274 || (t2
== PC_REGNUM
)
15275 || (!load
&& (n
== PC_REGNUM
))
15276 || (load
&& (t
== t2
))
15277 /* Triggers Cortex-M3 LDRD errata. */
15278 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15282 && ((wback
&& (n
== t
|| n
== t2
))
15283 || (t2
== PC_REGNUM
)
15284 || (t
% 2 != 0) /* First destination register is not even. */
15286 /* PC can be used as base register (for offset addressing only),
15287 but it is depricated. */
15288 || (n
== PC_REGNUM
)))
15294 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15295 operand MEM's address contains an immediate offset from the base
15296 register and has no side effects, in which case it sets BASE and
15297 OFFSET accordingly. */
15299 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15303 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15305 /* TODO: Handle more general memory operand patterns, such as
15306 PRE_DEC and PRE_INC. */
15308 if (side_effects_p (mem
))
15311 /* Can't deal with subregs. */
15312 if (GET_CODE (mem
) == SUBREG
)
15315 gcc_assert (MEM_P (mem
));
15317 *offset
= const0_rtx
;
15319 addr
= XEXP (mem
, 0);
15321 /* If addr isn't valid for DImode, then we can't handle it. */
15322 if (!arm_legitimate_address_p (DImode
, addr
,
15323 reload_in_progress
|| reload_completed
))
15331 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15333 *base
= XEXP (addr
, 0);
15334 *offset
= XEXP (addr
, 1);
15335 return (REG_P (*base
) && CONST_INT_P (*offset
));
15341 /* Called from a peephole2 to replace two word-size accesses with a
15342 single LDRD/STRD instruction. Returns true iff we can generate a
15343 new instruction sequence. That is, both accesses use the same base
15344 register and the gap between constant offsets is 4. This function
15345 may reorder its operands to match ldrd/strd RTL templates.
15346 OPERANDS are the operands found by the peephole matcher;
15347 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15348 corresponding memory operands. LOAD indicaates whether the access
15349 is load or store. CONST_STORE indicates a store of constant
15350 integer values held in OPERANDS[4,5] and assumes that the pattern
15351 is of length 4 insn, for the purpose of checking dead registers.
15352 COMMUTE indicates that register operands may be reordered. */
15354 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15355 bool const_store
, bool commute
)
15358 HOST_WIDE_INT offsets
[2], offset
;
15359 rtx base
= NULL_RTX
;
15360 rtx cur_base
, cur_offset
, tmp
;
15362 HARD_REG_SET regset
;
15364 gcc_assert (!const_store
|| !load
);
15365 /* Check that the memory references are immediate offsets from the
15366 same base register. Extract the base register, the destination
15367 registers, and the corresponding memory offsets. */
15368 for (i
= 0; i
< nops
; i
++)
15370 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15375 else if (REGNO (base
) != REGNO (cur_base
))
15378 offsets
[i
] = INTVAL (cur_offset
);
15379 if (GET_CODE (operands
[i
]) == SUBREG
)
15381 tmp
= SUBREG_REG (operands
[i
]);
15382 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15387 /* Make sure there is no dependency between the individual loads. */
15388 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15389 return false; /* RAW */
15391 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15392 return false; /* WAW */
15394 /* If the same input register is used in both stores
15395 when storing different constants, try to find a free register.
15396 For example, the code
15401 can be transformed into
15405 in Thumb mode assuming that r1 is free.
15406 For ARM mode do the same but only if the starting register
15407 can be made to be even. */
15409 && REGNO (operands
[0]) == REGNO (operands
[1])
15410 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15414 CLEAR_HARD_REG_SET (regset
);
15415 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15416 if (tmp
== NULL_RTX
)
15419 /* Use the new register in the first load to ensure that
15420 if the original input register is not dead after peephole,
15421 then it will have the correct constant value. */
15424 else if (TARGET_ARM
)
15426 int regno
= REGNO (operands
[0]);
15427 if (!peep2_reg_dead_p (4, operands
[0]))
15429 /* When the input register is even and is not dead after the
15430 pattern, it has to hold the second constant but we cannot
15431 form a legal STRD in ARM mode with this register as the second
15433 if (regno
% 2 == 0)
15436 /* Is regno-1 free? */
15437 SET_HARD_REG_SET (regset
);
15438 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15439 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15440 if (tmp
== NULL_RTX
)
15447 /* Find a DImode register. */
15448 CLEAR_HARD_REG_SET (regset
);
15449 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15450 if (tmp
!= NULL_RTX
)
15452 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15453 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15457 /* Can we use the input register to form a DI register? */
15458 SET_HARD_REG_SET (regset
);
15459 CLEAR_HARD_REG_BIT(regset
,
15460 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15461 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15462 if (tmp
== NULL_RTX
)
15464 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15468 gcc_assert (operands
[0] != NULL_RTX
);
15469 gcc_assert (operands
[1] != NULL_RTX
);
15470 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15471 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15475 /* Make sure the instructions are ordered with lower memory access first. */
15476 if (offsets
[0] > offsets
[1])
15478 gap
= offsets
[0] - offsets
[1];
15479 offset
= offsets
[1];
15481 /* Swap the instructions such that lower memory is accessed first. */
15482 std::swap (operands
[0], operands
[1]);
15483 std::swap (operands
[2], operands
[3]);
15485 std::swap (operands
[4], operands
[5]);
15489 gap
= offsets
[1] - offsets
[0];
15490 offset
= offsets
[0];
15493 /* Make sure accesses are to consecutive memory locations. */
15497 /* Make sure we generate legal instructions. */
15498 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15502 /* In Thumb state, where registers are almost unconstrained, there
15503 is little hope to fix it. */
15507 if (load
&& commute
)
15509 /* Try reordering registers. */
15510 std::swap (operands
[0], operands
[1]);
15511 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15518 /* If input registers are dead after this pattern, they can be
15519 reordered or replaced by other registers that are free in the
15520 current pattern. */
15521 if (!peep2_reg_dead_p (4, operands
[0])
15522 || !peep2_reg_dead_p (4, operands
[1]))
15525 /* Try to reorder the input registers. */
15526 /* For example, the code
15531 can be transformed into
15536 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15539 std::swap (operands
[0], operands
[1]);
15543 /* Try to find a free DI register. */
15544 CLEAR_HARD_REG_SET (regset
);
15545 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15546 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15549 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15550 if (tmp
== NULL_RTX
)
15553 /* DREG must be an even-numbered register in DImode.
15554 Split it into SI registers. */
15555 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15556 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15557 gcc_assert (operands
[0] != NULL_RTX
);
15558 gcc_assert (operands
[1] != NULL_RTX
);
15559 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15560 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15562 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15574 /* Print a symbolic form of X to the debug file, F. */
15576 arm_print_value (FILE *f
, rtx x
)
15578 switch (GET_CODE (x
))
15581 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15585 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15593 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15595 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15596 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15604 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15608 fprintf (f
, "`%s'", XSTR (x
, 0));
15612 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15616 arm_print_value (f
, XEXP (x
, 0));
15620 arm_print_value (f
, XEXP (x
, 0));
15622 arm_print_value (f
, XEXP (x
, 1));
15630 fprintf (f
, "????");
15635 /* Routines for manipulation of the constant pool. */
15637 /* Arm instructions cannot load a large constant directly into a
15638 register; they have to come from a pc relative load. The constant
15639 must therefore be placed in the addressable range of the pc
15640 relative load. Depending on the precise pc relative load
15641 instruction the range is somewhere between 256 bytes and 4k. This
15642 means that we often have to dump a constant inside a function, and
15643 generate code to branch around it.
15645 It is important to minimize this, since the branches will slow
15646 things down and make the code larger.
15648 Normally we can hide the table after an existing unconditional
15649 branch so that there is no interruption of the flow, but in the
15650 worst case the code looks like this:
15668 We fix this by performing a scan after scheduling, which notices
15669 which instructions need to have their operands fetched from the
15670 constant table and builds the table.
15672 The algorithm starts by building a table of all the constants that
15673 need fixing up and all the natural barriers in the function (places
15674 where a constant table can be dropped without breaking the flow).
15675 For each fixup we note how far the pc-relative replacement will be
15676 able to reach and the offset of the instruction into the function.
15678 Having built the table we then group the fixes together to form
15679 tables that are as large as possible (subject to addressing
15680 constraints) and emit each table of constants after the last
15681 barrier that is within range of all the instructions in the group.
15682 If a group does not contain a barrier, then we forcibly create one
15683 by inserting a jump instruction into the flow. Once the table has
15684 been inserted, the insns are then modified to reference the
15685 relevant entry in the pool.
15687 Possible enhancements to the algorithm (not implemented) are:
15689 1) For some processors and object formats, there may be benefit in
15690 aligning the pools to the start of cache lines; this alignment
15691 would need to be taken into account when calculating addressability
15694 /* These typedefs are located at the start of this file, so that
15695 they can be used in the prototypes there. This comment is to
15696 remind readers of that fact so that the following structures
15697 can be understood more easily.
15699 typedef struct minipool_node Mnode;
15700 typedef struct minipool_fixup Mfix; */
15702 struct minipool_node
15704 /* Doubly linked chain of entries. */
15707 /* The maximum offset into the code that this entry can be placed. While
15708 pushing fixes for forward references, all entries are sorted in order
15709 of increasing max_address. */
15710 HOST_WIDE_INT max_address
;
15711 /* Similarly for an entry inserted for a backwards ref. */
15712 HOST_WIDE_INT min_address
;
15713 /* The number of fixes referencing this entry. This can become zero
15714 if we "unpush" an entry. In this case we ignore the entry when we
15715 come to emit the code. */
15717 /* The offset from the start of the minipool. */
15718 HOST_WIDE_INT offset
;
15719 /* The value in table. */
15721 /* The mode of value. */
15723 /* The size of the value. With iWMMXt enabled
15724 sizes > 4 also imply an alignment of 8-bytes. */
15728 struct minipool_fixup
15732 HOST_WIDE_INT address
;
15738 HOST_WIDE_INT forwards
;
15739 HOST_WIDE_INT backwards
;
15742 /* Fixes less than a word need padding out to a word boundary. */
15743 #define MINIPOOL_FIX_SIZE(mode) \
15744 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15746 static Mnode
* minipool_vector_head
;
15747 static Mnode
* minipool_vector_tail
;
15748 static rtx_code_label
*minipool_vector_label
;
15749 static int minipool_pad
;
15751 /* The linked list of all minipool fixes required for this function. */
15752 Mfix
* minipool_fix_head
;
15753 Mfix
* minipool_fix_tail
;
15754 /* The fix entry for the current minipool, once it has been placed. */
15755 Mfix
* minipool_barrier
;
15757 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15758 #define JUMP_TABLES_IN_TEXT_SECTION 0
15761 static HOST_WIDE_INT
15762 get_jump_table_size (rtx_jump_table_data
*insn
)
15764 /* ADDR_VECs only take room if read-only data does into the text
15766 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
15768 rtx body
= PATTERN (insn
);
15769 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
15770 HOST_WIDE_INT size
;
15771 HOST_WIDE_INT modesize
;
15773 modesize
= GET_MODE_SIZE (GET_MODE (body
));
15774 size
= modesize
* XVECLEN (body
, elt
);
15778 /* Round up size of TBB table to a halfword boundary. */
15779 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
15782 /* No padding necessary for TBH. */
15785 /* Add two bytes for alignment on Thumb. */
15790 gcc_unreachable ();
15798 /* Return the maximum amount of padding that will be inserted before
15801 static HOST_WIDE_INT
15802 get_label_padding (rtx label
)
15804 HOST_WIDE_INT align
, min_insn_size
;
15806 align
= 1 << label_to_alignment (label
);
15807 min_insn_size
= TARGET_THUMB
? 2 : 4;
15808 return align
> min_insn_size
? align
- min_insn_size
: 0;
15811 /* Move a minipool fix MP from its current location to before MAX_MP.
15812 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15813 constraints may need updating. */
15815 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
15816 HOST_WIDE_INT max_address
)
15818 /* The code below assumes these are different. */
15819 gcc_assert (mp
!= max_mp
);
15821 if (max_mp
== NULL
)
15823 if (max_address
< mp
->max_address
)
15824 mp
->max_address
= max_address
;
15828 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15829 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15831 mp
->max_address
= max_address
;
15833 /* Unlink MP from its current position. Since max_mp is non-null,
15834 mp->prev must be non-null. */
15835 mp
->prev
->next
= mp
->next
;
15836 if (mp
->next
!= NULL
)
15837 mp
->next
->prev
= mp
->prev
;
15839 minipool_vector_tail
= mp
->prev
;
15841 /* Re-insert it before MAX_MP. */
15843 mp
->prev
= max_mp
->prev
;
15846 if (mp
->prev
!= NULL
)
15847 mp
->prev
->next
= mp
;
15849 minipool_vector_head
= mp
;
15852 /* Save the new entry. */
15855 /* Scan over the preceding entries and adjust their addresses as
15857 while (mp
->prev
!= NULL
15858 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15860 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15867 /* Add a constant to the minipool for a forward reference. Returns the
15868 node added or NULL if the constant will not fit in this pool. */
15870 add_minipool_forward_ref (Mfix
*fix
)
15872 /* If set, max_mp is the first pool_entry that has a lower
15873 constraint than the one we are trying to add. */
15874 Mnode
* max_mp
= NULL
;
15875 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
15878 /* If the minipool starts before the end of FIX->INSN then this FIX
15879 can not be placed into the current pool. Furthermore, adding the
15880 new constant pool entry may cause the pool to start FIX_SIZE bytes
15882 if (minipool_vector_head
&&
15883 (fix
->address
+ get_attr_length (fix
->insn
)
15884 >= minipool_vector_head
->max_address
- fix
->fix_size
))
15887 /* Scan the pool to see if a constant with the same value has
15888 already been added. While we are doing this, also note the
15889 location where we must insert the constant if it doesn't already
15891 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15893 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15894 && fix
->mode
== mp
->mode
15895 && (!LABEL_P (fix
->value
)
15896 || (CODE_LABEL_NUMBER (fix
->value
)
15897 == CODE_LABEL_NUMBER (mp
->value
)))
15898 && rtx_equal_p (fix
->value
, mp
->value
))
15900 /* More than one fix references this entry. */
15902 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
15905 /* Note the insertion point if necessary. */
15907 && mp
->max_address
> max_address
)
15910 /* If we are inserting an 8-bytes aligned quantity and
15911 we have not already found an insertion point, then
15912 make sure that all such 8-byte aligned quantities are
15913 placed at the start of the pool. */
15914 if (ARM_DOUBLEWORD_ALIGN
15916 && fix
->fix_size
>= 8
15917 && mp
->fix_size
< 8)
15920 max_address
= mp
->max_address
;
15924 /* The value is not currently in the minipool, so we need to create
15925 a new entry for it. If MAX_MP is NULL, the entry will be put on
15926 the end of the list since the placement is less constrained than
15927 any existing entry. Otherwise, we insert the new fix before
15928 MAX_MP and, if necessary, adjust the constraints on the other
15931 mp
->fix_size
= fix
->fix_size
;
15932 mp
->mode
= fix
->mode
;
15933 mp
->value
= fix
->value
;
15935 /* Not yet required for a backwards ref. */
15936 mp
->min_address
= -65536;
15938 if (max_mp
== NULL
)
15940 mp
->max_address
= max_address
;
15942 mp
->prev
= minipool_vector_tail
;
15944 if (mp
->prev
== NULL
)
15946 minipool_vector_head
= mp
;
15947 minipool_vector_label
= gen_label_rtx ();
15950 mp
->prev
->next
= mp
;
15952 minipool_vector_tail
= mp
;
15956 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15957 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15959 mp
->max_address
= max_address
;
15962 mp
->prev
= max_mp
->prev
;
15964 if (mp
->prev
!= NULL
)
15965 mp
->prev
->next
= mp
;
15967 minipool_vector_head
= mp
;
15970 /* Save the new entry. */
15973 /* Scan over the preceding entries and adjust their addresses as
15975 while (mp
->prev
!= NULL
15976 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15978 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15986 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
15987 HOST_WIDE_INT min_address
)
15989 HOST_WIDE_INT offset
;
15991 /* The code below assumes these are different. */
15992 gcc_assert (mp
!= min_mp
);
15994 if (min_mp
== NULL
)
15996 if (min_address
> mp
->min_address
)
15997 mp
->min_address
= min_address
;
16001 /* We will adjust this below if it is too loose. */
16002 mp
->min_address
= min_address
;
16004 /* Unlink MP from its current position. Since min_mp is non-null,
16005 mp->next must be non-null. */
16006 mp
->next
->prev
= mp
->prev
;
16007 if (mp
->prev
!= NULL
)
16008 mp
->prev
->next
= mp
->next
;
16010 minipool_vector_head
= mp
->next
;
16012 /* Reinsert it after MIN_MP. */
16014 mp
->next
= min_mp
->next
;
16016 if (mp
->next
!= NULL
)
16017 mp
->next
->prev
= mp
;
16019 minipool_vector_tail
= mp
;
16025 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16027 mp
->offset
= offset
;
16028 if (mp
->refcount
> 0)
16029 offset
+= mp
->fix_size
;
16031 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16032 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16038 /* Add a constant to the minipool for a backward reference. Returns the
16039 node added or NULL if the constant will not fit in this pool.
16041 Note that the code for insertion for a backwards reference can be
16042 somewhat confusing because the calculated offsets for each fix do
16043 not take into account the size of the pool (which is still under
16046 add_minipool_backward_ref (Mfix
*fix
)
16048 /* If set, min_mp is the last pool_entry that has a lower constraint
16049 than the one we are trying to add. */
16050 Mnode
*min_mp
= NULL
;
16051 /* This can be negative, since it is only a constraint. */
16052 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16055 /* If we can't reach the current pool from this insn, or if we can't
16056 insert this entry at the end of the pool without pushing other
16057 fixes out of range, then we don't try. This ensures that we
16058 can't fail later on. */
16059 if (min_address
>= minipool_barrier
->address
16060 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16061 >= minipool_barrier
->address
))
16064 /* Scan the pool to see if a constant with the same value has
16065 already been added. While we are doing this, also note the
16066 location where we must insert the constant if it doesn't already
16068 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16070 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16071 && fix
->mode
== mp
->mode
16072 && (!LABEL_P (fix
->value
)
16073 || (CODE_LABEL_NUMBER (fix
->value
)
16074 == CODE_LABEL_NUMBER (mp
->value
)))
16075 && rtx_equal_p (fix
->value
, mp
->value
)
16076 /* Check that there is enough slack to move this entry to the
16077 end of the table (this is conservative). */
16078 && (mp
->max_address
16079 > (minipool_barrier
->address
16080 + minipool_vector_tail
->offset
16081 + minipool_vector_tail
->fix_size
)))
16084 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16087 if (min_mp
!= NULL
)
16088 mp
->min_address
+= fix
->fix_size
;
16091 /* Note the insertion point if necessary. */
16092 if (mp
->min_address
< min_address
)
16094 /* For now, we do not allow the insertion of 8-byte alignment
16095 requiring nodes anywhere but at the start of the pool. */
16096 if (ARM_DOUBLEWORD_ALIGN
16097 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16102 else if (mp
->max_address
16103 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16105 /* Inserting before this entry would push the fix beyond
16106 its maximum address (which can happen if we have
16107 re-located a forwards fix); force the new fix to come
16109 if (ARM_DOUBLEWORD_ALIGN
16110 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16115 min_address
= mp
->min_address
+ fix
->fix_size
;
16118 /* Do not insert a non-8-byte aligned quantity before 8-byte
16119 aligned quantities. */
16120 else if (ARM_DOUBLEWORD_ALIGN
16121 && fix
->fix_size
< 8
16122 && mp
->fix_size
>= 8)
16125 min_address
= mp
->min_address
+ fix
->fix_size
;
16130 /* We need to create a new entry. */
16132 mp
->fix_size
= fix
->fix_size
;
16133 mp
->mode
= fix
->mode
;
16134 mp
->value
= fix
->value
;
16136 mp
->max_address
= minipool_barrier
->address
+ 65536;
16138 mp
->min_address
= min_address
;
16140 if (min_mp
== NULL
)
16143 mp
->next
= minipool_vector_head
;
16145 if (mp
->next
== NULL
)
16147 minipool_vector_tail
= mp
;
16148 minipool_vector_label
= gen_label_rtx ();
16151 mp
->next
->prev
= mp
;
16153 minipool_vector_head
= mp
;
16157 mp
->next
= min_mp
->next
;
16161 if (mp
->next
!= NULL
)
16162 mp
->next
->prev
= mp
;
16164 minipool_vector_tail
= mp
;
16167 /* Save the new entry. */
16175 /* Scan over the following entries and adjust their offsets. */
16176 while (mp
->next
!= NULL
)
16178 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16179 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16182 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16184 mp
->next
->offset
= mp
->offset
;
16193 assign_minipool_offsets (Mfix
*barrier
)
16195 HOST_WIDE_INT offset
= 0;
16198 minipool_barrier
= barrier
;
16200 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16202 mp
->offset
= offset
;
16204 if (mp
->refcount
> 0)
16205 offset
+= mp
->fix_size
;
16209 /* Output the literal table */
16211 dump_minipool (rtx_insn
*scan
)
16217 if (ARM_DOUBLEWORD_ALIGN
)
16218 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16219 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16226 fprintf (dump_file
,
16227 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16228 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16230 scan
= emit_label_after (gen_label_rtx (), scan
);
16231 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16232 scan
= emit_label_after (minipool_vector_label
, scan
);
16234 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16236 if (mp
->refcount
> 0)
16240 fprintf (dump_file
,
16241 ";; Offset %u, min %ld, max %ld ",
16242 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16243 (unsigned long) mp
->max_address
);
16244 arm_print_value (dump_file
, mp
->value
);
16245 fputc ('\n', dump_file
);
16248 rtx val
= copy_rtx (mp
->value
);
16250 switch (GET_MODE_SIZE (mp
->mode
))
16252 #ifdef HAVE_consttable_1
16254 scan
= emit_insn_after (gen_consttable_1 (val
), scan
);
16258 #ifdef HAVE_consttable_2
16260 scan
= emit_insn_after (gen_consttable_2 (val
), scan
);
16264 #ifdef HAVE_consttable_4
16266 scan
= emit_insn_after (gen_consttable_4 (val
), scan
);
16270 #ifdef HAVE_consttable_8
16272 scan
= emit_insn_after (gen_consttable_8 (val
), scan
);
16276 #ifdef HAVE_consttable_16
16278 scan
= emit_insn_after (gen_consttable_16 (val
), scan
);
16283 gcc_unreachable ();
16291 minipool_vector_head
= minipool_vector_tail
= NULL
;
16292 scan
= emit_insn_after (gen_consttable_end (), scan
);
16293 scan
= emit_barrier_after (scan
);
16296 /* Return the cost of forcibly inserting a barrier after INSN. */
16298 arm_barrier_cost (rtx_insn
*insn
)
16300 /* Basing the location of the pool on the loop depth is preferable,
16301 but at the moment, the basic block information seems to be
16302 corrupt by this stage of the compilation. */
16303 int base_cost
= 50;
16304 rtx_insn
*next
= next_nonnote_insn (insn
);
16306 if (next
!= NULL
&& LABEL_P (next
))
16309 switch (GET_CODE (insn
))
16312 /* It will always be better to place the table before the label, rather
16321 return base_cost
- 10;
16324 return base_cost
+ 10;
16328 /* Find the best place in the insn stream in the range
16329 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16330 Create the barrier by inserting a jump and add a new fix entry for
16333 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16335 HOST_WIDE_INT count
= 0;
16336 rtx_barrier
*barrier
;
16337 rtx_insn
*from
= fix
->insn
;
16338 /* The instruction after which we will insert the jump. */
16339 rtx_insn
*selected
= NULL
;
16341 /* The address at which the jump instruction will be placed. */
16342 HOST_WIDE_INT selected_address
;
16344 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16345 rtx_code_label
*label
= gen_label_rtx ();
16347 selected_cost
= arm_barrier_cost (from
);
16348 selected_address
= fix
->address
;
16350 while (from
&& count
< max_count
)
16352 rtx_jump_table_data
*tmp
;
16355 /* This code shouldn't have been called if there was a natural barrier
16357 gcc_assert (!BARRIER_P (from
));
16359 /* Count the length of this insn. This must stay in sync with the
16360 code that pushes minipool fixes. */
16361 if (LABEL_P (from
))
16362 count
+= get_label_padding (from
);
16364 count
+= get_attr_length (from
);
16366 /* If there is a jump table, add its length. */
16367 if (tablejump_p (from
, NULL
, &tmp
))
16369 count
+= get_jump_table_size (tmp
);
16371 /* Jump tables aren't in a basic block, so base the cost on
16372 the dispatch insn. If we select this location, we will
16373 still put the pool after the table. */
16374 new_cost
= arm_barrier_cost (from
);
16376 if (count
< max_count
16377 && (!selected
|| new_cost
<= selected_cost
))
16380 selected_cost
= new_cost
;
16381 selected_address
= fix
->address
+ count
;
16384 /* Continue after the dispatch table. */
16385 from
= NEXT_INSN (tmp
);
16389 new_cost
= arm_barrier_cost (from
);
16391 if (count
< max_count
16392 && (!selected
|| new_cost
<= selected_cost
))
16395 selected_cost
= new_cost
;
16396 selected_address
= fix
->address
+ count
;
16399 from
= NEXT_INSN (from
);
16402 /* Make sure that we found a place to insert the jump. */
16403 gcc_assert (selected
);
16405 /* Make sure we do not split a call and its corresponding
16406 CALL_ARG_LOCATION note. */
16407 if (CALL_P (selected
))
16409 rtx_insn
*next
= NEXT_INSN (selected
);
16410 if (next
&& NOTE_P (next
)
16411 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16415 /* Create a new JUMP_INSN that branches around a barrier. */
16416 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16417 JUMP_LABEL (from
) = label
;
16418 barrier
= emit_barrier_after (from
);
16419 emit_label_after (label
, barrier
);
16421 /* Create a minipool barrier entry for the new barrier. */
16422 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16423 new_fix
->insn
= barrier
;
16424 new_fix
->address
= selected_address
;
16425 new_fix
->next
= fix
->next
;
16426 fix
->next
= new_fix
;
16431 /* Record that there is a natural barrier in the insn stream at
16434 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16436 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16439 fix
->address
= address
;
16442 if (minipool_fix_head
!= NULL
)
16443 minipool_fix_tail
->next
= fix
;
16445 minipool_fix_head
= fix
;
16447 minipool_fix_tail
= fix
;
16450 /* Record INSN, which will need fixing up to load a value from the
16451 minipool. ADDRESS is the offset of the insn since the start of the
16452 function; LOC is a pointer to the part of the insn which requires
16453 fixing; VALUE is the constant that must be loaded, which is of type
16456 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16457 machine_mode mode
, rtx value
)
16459 gcc_assert (!arm_disable_literal_pool
);
16460 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16463 fix
->address
= address
;
16466 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16467 fix
->value
= value
;
16468 fix
->forwards
= get_attr_pool_range (insn
);
16469 fix
->backwards
= get_attr_neg_pool_range (insn
);
16470 fix
->minipool
= NULL
;
16472 /* If an insn doesn't have a range defined for it, then it isn't
16473 expecting to be reworked by this code. Better to stop now than
16474 to generate duff assembly code. */
16475 gcc_assert (fix
->forwards
|| fix
->backwards
);
16477 /* If an entry requires 8-byte alignment then assume all constant pools
16478 require 4 bytes of padding. Trying to do this later on a per-pool
16479 basis is awkward because existing pool entries have to be modified. */
16480 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16485 fprintf (dump_file
,
16486 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16487 GET_MODE_NAME (mode
),
16488 INSN_UID (insn
), (unsigned long) address
,
16489 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16490 arm_print_value (dump_file
, fix
->value
);
16491 fprintf (dump_file
, "\n");
16494 /* Add it to the chain of fixes. */
16497 if (minipool_fix_head
!= NULL
)
16498 minipool_fix_tail
->next
= fix
;
16500 minipool_fix_head
= fix
;
16502 minipool_fix_tail
= fix
;
16505 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16506 Returns the number of insns needed, or 99 if we always want to synthesize
16509 arm_max_const_double_inline_cost ()
16511 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16514 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16515 Returns the number of insns needed, or 99 if we don't know how to
16518 arm_const_double_inline_cost (rtx val
)
16520 rtx lowpart
, highpart
;
16523 mode
= GET_MODE (val
);
16525 if (mode
== VOIDmode
)
16528 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16530 lowpart
= gen_lowpart (SImode
, val
);
16531 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16533 gcc_assert (CONST_INT_P (lowpart
));
16534 gcc_assert (CONST_INT_P (highpart
));
16536 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16537 NULL_RTX
, NULL_RTX
, 0, 0)
16538 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16539 NULL_RTX
, NULL_RTX
, 0, 0));
16542 /* Cost of loading a SImode constant. */
16544 arm_const_inline_cost (enum rtx_code code
, rtx val
)
16546 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
16547 NULL_RTX
, NULL_RTX
, 1, 0);
16550 /* Return true if it is worthwhile to split a 64-bit constant into two
16551 32-bit operations. This is the case if optimizing for size, or
16552 if we have load delay slots, or if one 32-bit part can be done with
16553 a single data operation. */
16555 arm_const_double_by_parts (rtx val
)
16557 machine_mode mode
= GET_MODE (val
);
16560 if (optimize_size
|| arm_ld_sched
)
16563 if (mode
== VOIDmode
)
16566 part
= gen_highpart_mode (SImode
, mode
, val
);
16568 gcc_assert (CONST_INT_P (part
));
16570 if (const_ok_for_arm (INTVAL (part
))
16571 || const_ok_for_arm (~INTVAL (part
)))
16574 part
= gen_lowpart (SImode
, val
);
16576 gcc_assert (CONST_INT_P (part
));
16578 if (const_ok_for_arm (INTVAL (part
))
16579 || const_ok_for_arm (~INTVAL (part
)))
16585 /* Return true if it is possible to inline both the high and low parts
16586 of a 64-bit constant into 32-bit data processing instructions. */
16588 arm_const_double_by_immediates (rtx val
)
16590 machine_mode mode
= GET_MODE (val
);
16593 if (mode
== VOIDmode
)
16596 part
= gen_highpart_mode (SImode
, mode
, val
);
16598 gcc_assert (CONST_INT_P (part
));
16600 if (!const_ok_for_arm (INTVAL (part
)))
16603 part
= gen_lowpart (SImode
, val
);
16605 gcc_assert (CONST_INT_P (part
));
16607 if (!const_ok_for_arm (INTVAL (part
)))
16613 /* Scan INSN and note any of its operands that need fixing.
16614 If DO_PUSHES is false we do not actually push any of the fixups
16617 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
16621 extract_constrain_insn (insn
);
16623 if (recog_data
.n_alternatives
== 0)
16626 /* Fill in recog_op_alt with information about the constraints of
16628 preprocess_constraints (insn
);
16630 const operand_alternative
*op_alt
= which_op_alt ();
16631 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16633 /* Things we need to fix can only occur in inputs. */
16634 if (recog_data
.operand_type
[opno
] != OP_IN
)
16637 /* If this alternative is a memory reference, then any mention
16638 of constants in this alternative is really to fool reload
16639 into allowing us to accept one there. We need to fix them up
16640 now so that we output the right code. */
16641 if (op_alt
[opno
].memory_ok
)
16643 rtx op
= recog_data
.operand
[opno
];
16645 if (CONSTANT_P (op
))
16648 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16649 recog_data
.operand_mode
[opno
], op
);
16651 else if (MEM_P (op
)
16652 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16653 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16657 rtx cop
= avoid_constant_pool_reference (op
);
16659 /* Casting the address of something to a mode narrower
16660 than a word can cause avoid_constant_pool_reference()
16661 to return the pool reference itself. That's no good to
16662 us here. Lets just hope that we can use the
16663 constant pool value directly. */
16665 cop
= get_pool_constant (XEXP (op
, 0));
16667 push_minipool_fix (insn
, address
,
16668 recog_data
.operand_loc
[opno
],
16669 recog_data
.operand_mode
[opno
], cop
);
16679 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16680 and unions in the context of ARMv8-M Security Extensions. It is used as a
16681 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16682 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16683 or four masks, depending on whether it is being computed for a
16684 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16685 respectively. The tree for the type of the argument or a field within an
16686 argument is passed in ARG_TYPE, the current register this argument or field
16687 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16688 argument or field starts at is passed in STARTING_BIT and the last used bit
16689 is kept in LAST_USED_BIT which is also updated accordingly. */
16691 static unsigned HOST_WIDE_INT
16692 comp_not_to_clear_mask_str_un (tree arg_type
, int * regno
,
16693 uint32_t * padding_bits_to_clear
,
16694 unsigned starting_bit
, int * last_used_bit
)
16697 unsigned HOST_WIDE_INT not_to_clear_reg_mask
= 0;
16699 if (TREE_CODE (arg_type
) == RECORD_TYPE
)
16701 unsigned current_bit
= starting_bit
;
16703 long int offset
, size
;
16706 field
= TYPE_FIELDS (arg_type
);
16709 /* The offset within a structure is always an offset from
16710 the start of that structure. Make sure we take that into the
16711 calculation of the register based offset that we use here. */
16712 offset
= starting_bit
;
16713 offset
+= TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field
), 0);
16716 /* This is the actual size of the field, for bitfields this is the
16717 bitfield width and not the container size. */
16718 size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16720 if (*last_used_bit
!= offset
)
16722 if (offset
< *last_used_bit
)
16724 /* This field's offset is before the 'last_used_bit', that
16725 means this field goes on the next register. So we need to
16726 pad the rest of the current register and increase the
16727 register number. */
16729 mask
= ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit
);
16732 padding_bits_to_clear
[*regno
] |= mask
;
16733 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16738 /* Otherwise we pad the bits between the last field's end and
16739 the start of the new field. */
16742 mask
= ((uint32_t)-1) >> (32 - offset
);
16743 mask
-= ((uint32_t) 1 << *last_used_bit
) - 1;
16744 padding_bits_to_clear
[*regno
] |= mask
;
16746 current_bit
= offset
;
16749 /* Calculate further padding bits for inner structs/unions too. */
16750 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field
)))
16752 *last_used_bit
= current_bit
;
16753 not_to_clear_reg_mask
16754 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field
), regno
,
16755 padding_bits_to_clear
, offset
,
16760 /* Update 'current_bit' with this field's size. If the
16761 'current_bit' lies in a subsequent register, update 'regno' and
16762 reset 'current_bit' to point to the current bit in that new
16764 current_bit
+= size
;
16765 while (current_bit
>= 32)
16768 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16771 *last_used_bit
= current_bit
;
16774 field
= TREE_CHAIN (field
);
16776 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16778 else if (TREE_CODE (arg_type
) == UNION_TYPE
)
16780 tree field
, field_t
;
16781 int i
, regno_t
, field_size
;
16785 uint32_t padding_bits_to_clear_res
[NUM_ARG_REGS
]
16786 = {-1, -1, -1, -1};
16788 /* To compute the padding bits in a union we only consider bits as
16789 padding bits if they are always either a padding bit or fall outside a
16790 fields size for all fields in the union. */
16791 field
= TYPE_FIELDS (arg_type
);
16794 uint32_t padding_bits_to_clear_t
[NUM_ARG_REGS
]
16795 = {0U, 0U, 0U, 0U};
16796 int last_used_bit_t
= *last_used_bit
;
16798 field_t
= TREE_TYPE (field
);
16800 /* If the field's type is either a record or a union make sure to
16801 compute their padding bits too. */
16802 if (RECORD_OR_UNION_TYPE_P (field_t
))
16803 not_to_clear_reg_mask
16804 |= comp_not_to_clear_mask_str_un (field_t
, ®no_t
,
16805 &padding_bits_to_clear_t
[0],
16806 starting_bit
, &last_used_bit_t
);
16809 field_size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16810 regno_t
= (field_size
/ 32) + *regno
;
16811 last_used_bit_t
= (starting_bit
+ field_size
) % 32;
16814 for (i
= *regno
; i
< regno_t
; i
++)
16816 /* For all but the last register used by this field only keep the
16817 padding bits that were padding bits in this field. */
16818 padding_bits_to_clear_res
[i
] &= padding_bits_to_clear_t
[i
];
16821 /* For the last register, keep all padding bits that were padding
16822 bits in this field and any padding bits that are still valid
16823 as padding bits but fall outside of this field's size. */
16824 mask
= (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t
)) + 1;
16825 padding_bits_to_clear_res
[regno_t
]
16826 &= padding_bits_to_clear_t
[regno_t
] | mask
;
16828 /* Update the maximum size of the fields in terms of registers used
16829 ('max_reg') and the 'last_used_bit' in said register. */
16830 if (max_reg
< regno_t
)
16833 max_bit
= last_used_bit_t
;
16835 else if (max_reg
== regno_t
&& max_bit
< last_used_bit_t
)
16836 max_bit
= last_used_bit_t
;
16838 field
= TREE_CHAIN (field
);
16841 /* Update the current padding_bits_to_clear using the intersection of the
16842 padding bits of all the fields. */
16843 for (i
=*regno
; i
< max_reg
; i
++)
16844 padding_bits_to_clear
[i
] |= padding_bits_to_clear_res
[i
];
16846 /* Do not keep trailing padding bits, we do not know yet whether this
16847 is the end of the argument. */
16848 mask
= ((uint32_t) 1 << max_bit
) - 1;
16849 padding_bits_to_clear
[max_reg
]
16850 |= padding_bits_to_clear_res
[max_reg
] & mask
;
16853 *last_used_bit
= max_bit
;
16856 /* This function should only be used for structs and unions. */
16857 gcc_unreachable ();
16859 return not_to_clear_reg_mask
;
16862 /* In the context of ARMv8-M Security Extensions, this function is used for both
16863 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16864 registers are used when returning or passing arguments, which is then
16865 returned as a mask. It will also compute a mask to indicate padding/unused
16866 bits for each of these registers, and passes this through the
16867 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16868 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16869 the starting register used to pass this argument or return value is passed
16870 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16871 for struct and union types. */
16873 static unsigned HOST_WIDE_INT
16874 compute_not_to_clear_mask (tree arg_type
, rtx arg_rtx
, int regno
,
16875 uint32_t * padding_bits_to_clear
)
16878 int last_used_bit
= 0;
16879 unsigned HOST_WIDE_INT not_to_clear_mask
;
16881 if (RECORD_OR_UNION_TYPE_P (arg_type
))
16884 = comp_not_to_clear_mask_str_un (arg_type
, ®no
,
16885 padding_bits_to_clear
, 0,
16889 /* If the 'last_used_bit' is not zero, that means we are still using a
16890 part of the last 'regno'. In such cases we must clear the trailing
16891 bits. Otherwise we are not using regno and we should mark it as to
16893 if (last_used_bit
!= 0)
16894 padding_bits_to_clear
[regno
]
16895 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit
) + 1;
16897 not_to_clear_mask
&= ~(HOST_WIDE_INT_1U
<< regno
);
16901 not_to_clear_mask
= 0;
16902 /* We are not dealing with structs nor unions. So these arguments may be
16903 passed in floating point registers too. In some cases a BLKmode is
16904 used when returning or passing arguments in multiple VFP registers. */
16905 if (GET_MODE (arg_rtx
) == BLKmode
)
16910 /* This should really only occur when dealing with the hard-float
16912 gcc_assert (TARGET_HARD_FLOAT_ABI
);
16914 for (i
= 0; i
< XVECLEN (arg_rtx
, 0); i
++)
16916 reg
= XEXP (XVECEXP (arg_rtx
, 0, i
), 0);
16917 gcc_assert (REG_P (reg
));
16919 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (reg
);
16921 /* If we are dealing with DF mode, make sure we don't
16922 clear either of the registers it addresses. */
16923 arg_regs
= ARM_NUM_REGS (GET_MODE (reg
));
16926 unsigned HOST_WIDE_INT mask
;
16927 mask
= HOST_WIDE_INT_1U
<< (REGNO (reg
) + arg_regs
);
16928 mask
-= HOST_WIDE_INT_1U
<< REGNO (reg
);
16929 not_to_clear_mask
|= mask
;
16935 /* Otherwise we can rely on the MODE to determine how many registers
16936 are being used by this argument. */
16937 int arg_regs
= ARM_NUM_REGS (GET_MODE (arg_rtx
));
16938 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
16941 unsigned HOST_WIDE_INT
16942 mask
= HOST_WIDE_INT_1U
<< (REGNO (arg_rtx
) + arg_regs
);
16943 mask
-= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
16944 not_to_clear_mask
|= mask
;
16949 return not_to_clear_mask
;
16952 /* Clears caller saved registers not used to pass arguments before a
16953 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
16954 registers is done in __gnu_cmse_nonsecure_call libcall.
16955 See libgcc/config/arm/cmse_nonsecure_call.S. */
16958 cmse_nonsecure_call_clear_caller_saved (void)
16962 FOR_EACH_BB_FN (bb
, cfun
)
16966 FOR_BB_INSNS (bb
, insn
)
16968 uint64_t to_clear_mask
, float_mask
;
16970 rtx pat
, call
, unspec
, reg
, cleared_reg
, tmp
;
16971 unsigned int regno
, maxregno
;
16973 CUMULATIVE_ARGS args_so_far_v
;
16974 cumulative_args_t args_so_far
;
16975 tree arg_type
, fntype
;
16976 bool using_r4
, first_param
= true;
16977 function_args_iterator args_iter
;
16978 uint32_t padding_bits_to_clear
[4] = {0U, 0U, 0U, 0U};
16979 uint32_t * padding_bits_to_clear_ptr
= &padding_bits_to_clear
[0];
16981 if (!NONDEBUG_INSN_P (insn
))
16984 if (!CALL_P (insn
))
16987 pat
= PATTERN (insn
);
16988 gcc_assert (GET_CODE (pat
) == PARALLEL
&& XVECLEN (pat
, 0) > 0);
16989 call
= XVECEXP (pat
, 0, 0);
16991 /* Get the real call RTX if the insn sets a value, ie. returns. */
16992 if (GET_CODE (call
) == SET
)
16993 call
= SET_SRC (call
);
16995 /* Check if it is a cmse_nonsecure_call. */
16996 unspec
= XEXP (call
, 0);
16997 if (GET_CODE (unspec
) != UNSPEC
16998 || XINT (unspec
, 1) != UNSPEC_NONSECURE_MEM
)
17001 /* Determine the caller-saved registers we need to clear. */
17002 to_clear_mask
= (1LL << (NUM_ARG_REGS
)) - 1;
17003 maxregno
= NUM_ARG_REGS
- 1;
17004 /* Only look at the caller-saved floating point registers in case of
17005 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
17006 lazy store and loads which clear both caller- and callee-saved
17008 if (TARGET_HARD_FLOAT_ABI
)
17010 float_mask
= (1LL << (D7_VFP_REGNUM
+ 1)) - 1;
17011 float_mask
&= ~((1LL << FIRST_VFP_REGNUM
) - 1);
17012 to_clear_mask
|= float_mask
;
17013 maxregno
= D7_VFP_REGNUM
;
17016 /* Make sure the register used to hold the function address is not
17018 address
= RTVEC_ELT (XVEC (unspec
, 0), 0);
17019 gcc_assert (MEM_P (address
));
17020 gcc_assert (REG_P (XEXP (address
, 0)));
17021 to_clear_mask
&= ~(1LL << REGNO (XEXP (address
, 0)));
17023 /* Set basic block of call insn so that df rescan is performed on
17024 insns inserted here. */
17025 set_block_for_insn (insn
, bb
);
17026 df_set_flags (DF_DEFER_INSN_RESCAN
);
17029 /* Make sure the scheduler doesn't schedule other insns beyond
17031 emit_insn (gen_blockage ());
17033 /* Walk through all arguments and clear registers appropriately.
17035 fntype
= TREE_TYPE (MEM_EXPR (address
));
17036 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
,
17038 args_so_far
= pack_cumulative_args (&args_so_far_v
);
17039 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
17042 machine_mode arg_mode
= TYPE_MODE (arg_type
);
17044 if (VOID_TYPE_P (arg_type
))
17048 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
,
17051 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
,
17053 gcc_assert (REG_P (arg_rtx
));
17055 &= ~compute_not_to_clear_mask (arg_type
, arg_rtx
,
17057 padding_bits_to_clear_ptr
);
17059 first_param
= false;
17062 /* Clear padding bits where needed. */
17063 cleared_reg
= XEXP (address
, 0);
17064 reg
= gen_rtx_REG (SImode
, IP_REGNUM
);
17066 for (regno
= R0_REGNUM
; regno
< NUM_ARG_REGS
; regno
++)
17068 if (padding_bits_to_clear
[regno
] == 0)
17071 /* If this is a Thumb-1 target copy the address of the function
17072 we are calling from 'r4' into 'ip' such that we can use r4 to
17073 clear the unused bits in the arguments. */
17074 if (TARGET_THUMB1
&& !using_r4
)
17078 emit_move_insn (gen_rtx_REG (SImode
, IP_REGNUM
),
17082 tmp
= GEN_INT ((((~padding_bits_to_clear
[regno
]) << 16u) >> 16u));
17083 emit_move_insn (reg
, tmp
);
17084 /* Also fill the top half of the negated
17085 padding_bits_to_clear. */
17086 if (((~padding_bits_to_clear
[regno
]) >> 16) > 0)
17088 tmp
= GEN_INT ((~padding_bits_to_clear
[regno
]) >> 16);
17089 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode
, reg
,
17095 emit_insn (gen_andsi3 (gen_rtx_REG (SImode
, regno
),
17096 gen_rtx_REG (SImode
, regno
),
17101 emit_move_insn (cleared_reg
,
17102 gen_rtx_REG (SImode
, IP_REGNUM
));
17104 /* We use right shift and left shift to clear the LSB of the address
17105 we jump to instead of using bic, to avoid having to use an extra
17106 register on Thumb-1. */
17107 tmp
= gen_rtx_LSHIFTRT (SImode
, cleared_reg
, const1_rtx
);
17108 emit_insn (gen_rtx_SET (cleared_reg
, tmp
));
17109 tmp
= gen_rtx_ASHIFT (SImode
, cleared_reg
, const1_rtx
);
17110 emit_insn (gen_rtx_SET (cleared_reg
, tmp
));
17112 /* Clearing all registers that leak before doing a non-secure
17114 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
17116 if (!(to_clear_mask
& (1LL << regno
)))
17119 /* If regno is an even vfp register and its successor is also to
17120 be cleared, use vmov. */
17121 if (IS_VFP_REGNUM (regno
))
17123 if (TARGET_VFP_DOUBLE
17124 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
17125 && to_clear_mask
& (1LL << (regno
+ 1)))
17126 emit_move_insn (gen_rtx_REG (DFmode
, regno
++),
17127 CONST0_RTX (DFmode
));
17129 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
17130 CONST0_RTX (SFmode
));
17133 emit_move_insn (gen_rtx_REG (SImode
, regno
), cleared_reg
);
17136 seq
= get_insns ();
17138 emit_insn_before (seq
, insn
);
17144 /* Rewrite move insn into subtract of 0 if the condition codes will
17145 be useful in next conditional jump insn. */
17148 thumb1_reorg (void)
17152 FOR_EACH_BB_FN (bb
, cfun
)
17155 rtx cmp
, op0
, op1
, set
= NULL
;
17156 rtx_insn
*prev
, *insn
= BB_END (bb
);
17157 bool insn_clobbered
= false;
17159 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17160 insn
= PREV_INSN (insn
);
17162 /* Find the last cbranchsi4_insn in basic block BB. */
17163 if (insn
== BB_HEAD (bb
)
17164 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17167 /* Get the register with which we are comparing. */
17168 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
17169 op0
= XEXP (cmp
, 0);
17170 op1
= XEXP (cmp
, 1);
17172 /* Check that comparison is against ZERO. */
17173 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
17176 /* Find the first flag setting insn before INSN in basic block BB. */
17177 gcc_assert (insn
!= BB_HEAD (bb
));
17178 for (prev
= PREV_INSN (insn
);
17180 && prev
!= BB_HEAD (bb
)
17182 || DEBUG_INSN_P (prev
)
17183 || ((set
= single_set (prev
)) != NULL
17184 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17185 prev
= PREV_INSN (prev
))
17187 if (reg_set_p (op0
, prev
))
17188 insn_clobbered
= true;
17191 /* Skip if op0 is clobbered by insn other than prev. */
17192 if (insn_clobbered
)
17198 dest
= SET_DEST (set
);
17199 src
= SET_SRC (set
);
17200 if (!low_register_operand (dest
, SImode
)
17201 || !low_register_operand (src
, SImode
))
17204 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17205 in INSN. Both src and dest of the move insn are checked. */
17206 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17208 dest
= copy_rtx (dest
);
17209 src
= copy_rtx (src
);
17210 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17211 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
17212 INSN_CODE (prev
) = -1;
17213 /* Set test register in INSN to dest. */
17214 XEXP (cmp
, 0) = copy_rtx (dest
);
17215 INSN_CODE (insn
) = -1;
17220 /* Convert instructions to their cc-clobbering variant if possible, since
17221 that allows us to use smaller encodings. */
17224 thumb2_reorg (void)
17229 INIT_REG_SET (&live
);
17231 /* We are freeing block_for_insn in the toplev to keep compatibility
17232 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17233 compute_bb_for_insn ();
17236 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17238 FOR_EACH_BB_FN (bb
, cfun
)
17240 if ((current_tune
->disparage_flag_setting_t16_encodings
17241 == tune_params::DISPARAGE_FLAGS_ALL
)
17242 && optimize_bb_for_speed_p (bb
))
17246 Convert_Action action
= SKIP
;
17247 Convert_Action action_for_partial_flag_setting
17248 = ((current_tune
->disparage_flag_setting_t16_encodings
17249 != tune_params::DISPARAGE_FLAGS_NEITHER
)
17250 && optimize_bb_for_speed_p (bb
))
17253 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17254 df_simulate_initialize_backwards (bb
, &live
);
17255 FOR_BB_INSNS_REVERSE (bb
, insn
)
17257 if (NONJUMP_INSN_P (insn
)
17258 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17259 && GET_CODE (PATTERN (insn
)) == SET
)
17262 rtx pat
= PATTERN (insn
);
17263 rtx dst
= XEXP (pat
, 0);
17264 rtx src
= XEXP (pat
, 1);
17265 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17267 if (UNARY_P (src
) || BINARY_P (src
))
17268 op0
= XEXP (src
, 0);
17270 if (BINARY_P (src
))
17271 op1
= XEXP (src
, 1);
17273 if (low_register_operand (dst
, SImode
))
17275 switch (GET_CODE (src
))
17278 /* Adding two registers and storing the result
17279 in the first source is already a 16-bit
17281 if (rtx_equal_p (dst
, op0
)
17282 && register_operand (op1
, SImode
))
17285 if (low_register_operand (op0
, SImode
))
17287 /* ADDS <Rd>,<Rn>,<Rm> */
17288 if (low_register_operand (op1
, SImode
))
17290 /* ADDS <Rdn>,#<imm8> */
17291 /* SUBS <Rdn>,#<imm8> */
17292 else if (rtx_equal_p (dst
, op0
)
17293 && CONST_INT_P (op1
)
17294 && IN_RANGE (INTVAL (op1
), -255, 255))
17296 /* ADDS <Rd>,<Rn>,#<imm3> */
17297 /* SUBS <Rd>,<Rn>,#<imm3> */
17298 else if (CONST_INT_P (op1
)
17299 && IN_RANGE (INTVAL (op1
), -7, 7))
17302 /* ADCS <Rd>, <Rn> */
17303 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17304 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17305 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17307 && COMPARISON_P (op1
)
17308 && cc_register (XEXP (op1
, 0), VOIDmode
)
17309 && maybe_get_arm_condition_code (op1
) == ARM_CS
17310 && XEXP (op1
, 1) == const0_rtx
)
17315 /* RSBS <Rd>,<Rn>,#0
17316 Not handled here: see NEG below. */
17317 /* SUBS <Rd>,<Rn>,#<imm3>
17319 Not handled here: see PLUS above. */
17320 /* SUBS <Rd>,<Rn>,<Rm> */
17321 if (low_register_operand (op0
, SImode
)
17322 && low_register_operand (op1
, SImode
))
17327 /* MULS <Rdm>,<Rn>,<Rdm>
17328 As an exception to the rule, this is only used
17329 when optimizing for size since MULS is slow on all
17330 known implementations. We do not even want to use
17331 MULS in cold code, if optimizing for speed, so we
17332 test the global flag here. */
17333 if (!optimize_size
)
17335 /* Fall through. */
17339 /* ANDS <Rdn>,<Rm> */
17340 if (rtx_equal_p (dst
, op0
)
17341 && low_register_operand (op1
, SImode
))
17342 action
= action_for_partial_flag_setting
;
17343 else if (rtx_equal_p (dst
, op1
)
17344 && low_register_operand (op0
, SImode
))
17345 action
= action_for_partial_flag_setting
== SKIP
17346 ? SKIP
: SWAP_CONV
;
17352 /* ASRS <Rdn>,<Rm> */
17353 /* LSRS <Rdn>,<Rm> */
17354 /* LSLS <Rdn>,<Rm> */
17355 if (rtx_equal_p (dst
, op0
)
17356 && low_register_operand (op1
, SImode
))
17357 action
= action_for_partial_flag_setting
;
17358 /* ASRS <Rd>,<Rm>,#<imm5> */
17359 /* LSRS <Rd>,<Rm>,#<imm5> */
17360 /* LSLS <Rd>,<Rm>,#<imm5> */
17361 else if (low_register_operand (op0
, SImode
)
17362 && CONST_INT_P (op1
)
17363 && IN_RANGE (INTVAL (op1
), 0, 31))
17364 action
= action_for_partial_flag_setting
;
17368 /* RORS <Rdn>,<Rm> */
17369 if (rtx_equal_p (dst
, op0
)
17370 && low_register_operand (op1
, SImode
))
17371 action
= action_for_partial_flag_setting
;
17375 /* MVNS <Rd>,<Rm> */
17376 if (low_register_operand (op0
, SImode
))
17377 action
= action_for_partial_flag_setting
;
17381 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17382 if (low_register_operand (op0
, SImode
))
17387 /* MOVS <Rd>,#<imm8> */
17388 if (CONST_INT_P (src
)
17389 && IN_RANGE (INTVAL (src
), 0, 255))
17390 action
= action_for_partial_flag_setting
;
17394 /* MOVS and MOV<c> with registers have different
17395 encodings, so are not relevant here. */
17403 if (action
!= SKIP
)
17405 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17406 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17409 if (action
== SWAP_CONV
)
17411 src
= copy_rtx (src
);
17412 XEXP (src
, 0) = op1
;
17413 XEXP (src
, 1) = op0
;
17414 pat
= gen_rtx_SET (dst
, src
);
17415 vec
= gen_rtvec (2, pat
, clobber
);
17417 else /* action == CONV */
17418 vec
= gen_rtvec (2, pat
, clobber
);
17420 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17421 INSN_CODE (insn
) = -1;
17425 if (NONDEBUG_INSN_P (insn
))
17426 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17430 CLEAR_REG_SET (&live
);
17433 /* Gcc puts the pool in the wrong place for ARM, since we can only
17434 load addresses a limited distance around the pc. We do some
17435 special munging to move the constant pool values to the correct
17436 point in the code. */
17441 HOST_WIDE_INT address
= 0;
17445 cmse_nonsecure_call_clear_caller_saved ();
17448 else if (TARGET_THUMB2
)
17451 /* Ensure all insns that must be split have been split at this point.
17452 Otherwise, the pool placement code below may compute incorrect
17453 insn lengths. Note that when optimizing, all insns have already
17454 been split at this point. */
17456 split_all_insns_noflow ();
17458 /* Make sure we do not attempt to create a literal pool even though it should
17459 no longer be necessary to create any. */
17460 if (arm_disable_literal_pool
)
17463 minipool_fix_head
= minipool_fix_tail
= NULL
;
17465 /* The first insn must always be a note, or the code below won't
17466 scan it properly. */
17467 insn
= get_insns ();
17468 gcc_assert (NOTE_P (insn
));
17471 /* Scan all the insns and record the operands that will need fixing. */
17472 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17474 if (BARRIER_P (insn
))
17475 push_minipool_barrier (insn
, address
);
17476 else if (INSN_P (insn
))
17478 rtx_jump_table_data
*table
;
17480 note_invalid_constants (insn
, address
, true);
17481 address
+= get_attr_length (insn
);
17483 /* If the insn is a vector jump, add the size of the table
17484 and skip the table. */
17485 if (tablejump_p (insn
, NULL
, &table
))
17487 address
+= get_jump_table_size (table
);
17491 else if (LABEL_P (insn
))
17492 /* Add the worst-case padding due to alignment. We don't add
17493 the _current_ padding because the minipool insertions
17494 themselves might change it. */
17495 address
+= get_label_padding (insn
);
17498 fix
= minipool_fix_head
;
17500 /* Now scan the fixups and perform the required changes. */
17505 Mfix
* last_added_fix
;
17506 Mfix
* last_barrier
= NULL
;
17509 /* Skip any further barriers before the next fix. */
17510 while (fix
&& BARRIER_P (fix
->insn
))
17513 /* No more fixes. */
17517 last_added_fix
= NULL
;
17519 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17521 if (BARRIER_P (ftmp
->insn
))
17523 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17526 last_barrier
= ftmp
;
17528 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17531 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17534 /* If we found a barrier, drop back to that; any fixes that we
17535 could have reached but come after the barrier will now go in
17536 the next mini-pool. */
17537 if (last_barrier
!= NULL
)
17539 /* Reduce the refcount for those fixes that won't go into this
17541 for (fdel
= last_barrier
->next
;
17542 fdel
&& fdel
!= ftmp
;
17545 fdel
->minipool
->refcount
--;
17546 fdel
->minipool
= NULL
;
17549 ftmp
= last_barrier
;
17553 /* ftmp is first fix that we can't fit into this pool and
17554 there no natural barriers that we could use. Insert a
17555 new barrier in the code somewhere between the previous
17556 fix and this one, and arrange to jump around it. */
17557 HOST_WIDE_INT max_address
;
17559 /* The last item on the list of fixes must be a barrier, so
17560 we can never run off the end of the list of fixes without
17561 last_barrier being set. */
17564 max_address
= minipool_vector_head
->max_address
;
17565 /* Check that there isn't another fix that is in range that
17566 we couldn't fit into this pool because the pool was
17567 already too large: we need to put the pool before such an
17568 instruction. The pool itself may come just after the
17569 fix because create_fix_barrier also allows space for a
17570 jump instruction. */
17571 if (ftmp
->address
< max_address
)
17572 max_address
= ftmp
->address
+ 1;
17574 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17577 assign_minipool_offsets (last_barrier
);
17581 if (!BARRIER_P (ftmp
->insn
)
17582 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17589 /* Scan over the fixes we have identified for this pool, fixing them
17590 up and adding the constants to the pool itself. */
17591 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17592 this_fix
= this_fix
->next
)
17593 if (!BARRIER_P (this_fix
->insn
))
17596 = plus_constant (Pmode
,
17597 gen_rtx_LABEL_REF (VOIDmode
,
17598 minipool_vector_label
),
17599 this_fix
->minipool
->offset
);
17600 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17603 dump_minipool (last_barrier
->insn
);
17607 /* From now on we must synthesize any constants that we can't handle
17608 directly. This can happen if the RTL gets split during final
17609 instruction generation. */
17610 cfun
->machine
->after_arm_reorg
= 1;
17612 /* Free the minipool memory. */
17613 obstack_free (&minipool_obstack
, minipool_startobj
);
17616 /* Routines to output assembly language. */
17618 /* Return string representation of passed in real value. */
17619 static const char *
17620 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17622 if (!fp_consts_inited
)
17625 gcc_assert (real_equal (r
, &value_fp0
));
17629 /* OPERANDS[0] is the entire list of insns that constitute pop,
17630 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17631 is in the list, UPDATE is true iff the list contains explicit
17632 update of base register. */
17634 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17640 const char *conditional
;
17641 int num_saves
= XVECLEN (operands
[0], 0);
17642 unsigned int regno
;
17643 unsigned int regno_base
= REGNO (operands
[1]);
17644 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
17647 offset
+= update
? 1 : 0;
17648 offset
+= return_pc
? 1 : 0;
17650 /* Is the base register in the list? */
17651 for (i
= offset
; i
< num_saves
; i
++)
17653 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17654 /* If SP is in the list, then the base register must be SP. */
17655 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17656 /* If base register is in the list, there must be no explicit update. */
17657 if (regno
== regno_base
)
17658 gcc_assert (!update
);
17661 conditional
= reverse
? "%?%D0" : "%?%d0";
17662 /* Can't use POP if returning from an interrupt. */
17663 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
17664 sprintf (pattern
, "pop%s\t{", conditional
);
17667 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17668 It's just a convention, their semantics are identical. */
17669 if (regno_base
== SP_REGNUM
)
17670 sprintf (pattern
, "ldmfd%s\t", conditional
);
17672 sprintf (pattern
, "ldmia%s\t", conditional
);
17674 sprintf (pattern
, "ldm%s\t", conditional
);
17676 strcat (pattern
, reg_names
[regno_base
]);
17678 strcat (pattern
, "!, {");
17680 strcat (pattern
, ", {");
17683 /* Output the first destination register. */
17685 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17687 /* Output the rest of the destination registers. */
17688 for (i
= offset
+ 1; i
< num_saves
; i
++)
17690 strcat (pattern
, ", ");
17692 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17695 strcat (pattern
, "}");
17697 if (interrupt_p
&& return_pc
)
17698 strcat (pattern
, "^");
17700 output_asm_insn (pattern
, &cond
);
17704 /* Output the assembly for a store multiple. */
17707 vfp_output_vstmd (rtx
* operands
)
17713 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17714 ? XEXP (operands
[0], 0)
17715 : XEXP (XEXP (operands
[0], 0), 0);
17716 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17719 strcpy (pattern
, "vpush%?.64\t{%P1");
17721 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17723 p
= strlen (pattern
);
17725 gcc_assert (REG_P (operands
[1]));
17727 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17728 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17730 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17732 strcpy (&pattern
[p
], "}");
17734 output_asm_insn (pattern
, operands
);
17739 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17740 number of bytes pushed. */
17743 vfp_emit_fstmd (int base_reg
, int count
)
17750 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17751 register pairs are stored by a store multiple insn. We avoid this
17752 by pushing an extra pair. */
17753 if (count
== 2 && !arm_arch6
)
17755 if (base_reg
== LAST_VFP_REGNUM
- 3)
17760 /* FSTMD may not store more than 16 doubleword registers at once. Split
17761 larger stores into multiple parts (up to a maximum of two, in
17766 /* NOTE: base_reg is an internal register number, so each D register
17768 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17769 saved
+= vfp_emit_fstmd (base_reg
, 16);
17773 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17774 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17776 reg
= gen_rtx_REG (DFmode
, base_reg
);
17779 XVECEXP (par
, 0, 0)
17780 = gen_rtx_SET (gen_frame_mem
17782 gen_rtx_PRE_MODIFY (Pmode
,
17785 (Pmode
, stack_pointer_rtx
,
17788 gen_rtx_UNSPEC (BLKmode
,
17789 gen_rtvec (1, reg
),
17790 UNSPEC_PUSH_MULT
));
17792 tmp
= gen_rtx_SET (stack_pointer_rtx
,
17793 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17794 RTX_FRAME_RELATED_P (tmp
) = 1;
17795 XVECEXP (dwarf
, 0, 0) = tmp
;
17797 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
17798 RTX_FRAME_RELATED_P (tmp
) = 1;
17799 XVECEXP (dwarf
, 0, 1) = tmp
;
17801 for (i
= 1; i
< count
; i
++)
17803 reg
= gen_rtx_REG (DFmode
, base_reg
);
17805 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17807 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
17808 plus_constant (Pmode
,
17812 RTX_FRAME_RELATED_P (tmp
) = 1;
17813 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17816 par
= emit_insn (par
);
17817 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17818 RTX_FRAME_RELATED_P (par
) = 1;
17823 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17824 has the cmse_nonsecure_call attribute and returns false otherwise. */
17827 detect_cmse_nonsecure_call (tree addr
)
17832 tree fntype
= TREE_TYPE (addr
);
17833 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_call",
17834 TYPE_ATTRIBUTES (fntype
)))
17840 /* Emit a call instruction with pattern PAT. ADDR is the address of
17841 the call target. */
17844 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
17848 insn
= emit_call_insn (pat
);
17850 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17851 If the call might use such an entry, add a use of the PIC register
17852 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17853 if (TARGET_VXWORKS_RTP
17856 && GET_CODE (addr
) == SYMBOL_REF
17857 && (SYMBOL_REF_DECL (addr
)
17858 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17859 : !SYMBOL_REF_LOCAL_P (addr
)))
17861 require_pic_register ();
17862 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17865 if (TARGET_AAPCS_BASED
)
17867 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17868 linker. We need to add an IP clobber to allow setting
17869 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17870 is not needed since it's a fixed register. */
17871 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
17872 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
17876 /* Output a 'call' insn. */
17878 output_call (rtx
*operands
)
17880 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17882 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17883 if (REGNO (operands
[0]) == LR_REGNUM
)
17885 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17886 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17889 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17891 if (TARGET_INTERWORK
|| arm_arch4t
)
17892 output_asm_insn ("bx%?\t%0", operands
);
17894 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17899 /* Output a move from arm registers to arm registers of a long double
17900 OPERANDS[0] is the destination.
17901 OPERANDS[1] is the source. */
17903 output_mov_long_double_arm_from_arm (rtx
*operands
)
17905 /* We have to be careful here because the two might overlap. */
17906 int dest_start
= REGNO (operands
[0]);
17907 int src_start
= REGNO (operands
[1]);
17911 if (dest_start
< src_start
)
17913 for (i
= 0; i
< 3; i
++)
17915 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17916 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17917 output_asm_insn ("mov%?\t%0, %1", ops
);
17922 for (i
= 2; i
>= 0; i
--)
17924 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17925 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17926 output_asm_insn ("mov%?\t%0, %1", ops
);
17934 arm_emit_movpair (rtx dest
, rtx src
)
17936 /* If the src is an immediate, simplify it. */
17937 if (CONST_INT_P (src
))
17939 HOST_WIDE_INT val
= INTVAL (src
);
17940 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17941 if ((val
>> 16) & 0x0000ffff)
17943 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17945 GEN_INT ((val
>> 16) & 0x0000ffff));
17946 rtx_insn
*insn
= get_last_insn ();
17947 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17951 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17952 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17953 rtx_insn
*insn
= get_last_insn ();
17954 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17957 /* Output a move between double words. It must be REG<-MEM
17960 output_move_double (rtx
*operands
, bool emit
, int *count
)
17962 enum rtx_code code0
= GET_CODE (operands
[0]);
17963 enum rtx_code code1
= GET_CODE (operands
[1]);
17968 /* The only case when this might happen is when
17969 you are looking at the length of a DImode instruction
17970 that has an invalid constant in it. */
17971 if (code0
== REG
&& code1
!= MEM
)
17973 gcc_assert (!emit
);
17980 unsigned int reg0
= REGNO (operands
[0]);
17982 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17984 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17986 switch (GET_CODE (XEXP (operands
[1], 0)))
17993 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17994 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
17996 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18001 gcc_assert (TARGET_LDRD
);
18003 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
18010 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
18012 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
18020 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
18022 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
18027 gcc_assert (TARGET_LDRD
);
18029 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
18034 /* Autoicrement addressing modes should never have overlapping
18035 base and destination registers, and overlapping index registers
18036 are already prohibited, so this doesn't need to worry about
18038 otherops
[0] = operands
[0];
18039 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
18040 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
18042 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
18044 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
18046 /* Registers overlap so split out the increment. */
18049 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
18050 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
18057 /* Use a single insn if we can.
18058 FIXME: IWMMXT allows offsets larger than ldrd can
18059 handle, fix these up with a pair of ldr. */
18061 || !CONST_INT_P (otherops
[2])
18062 || (INTVAL (otherops
[2]) > -256
18063 && INTVAL (otherops
[2]) < 256))
18066 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
18072 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
18073 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18083 /* Use a single insn if we can.
18084 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18085 fix these up with a pair of ldr. */
18087 || !CONST_INT_P (otherops
[2])
18088 || (INTVAL (otherops
[2]) > -256
18089 && INTVAL (otherops
[2]) < 256))
18092 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
18098 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18099 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18109 /* We might be able to use ldrd %0, %1 here. However the range is
18110 different to ldr/adr, and it is broken on some ARMv7-M
18111 implementations. */
18112 /* Use the second register of the pair to avoid problematic
18114 otherops
[1] = operands
[1];
18116 output_asm_insn ("adr%?\t%0, %1", otherops
);
18117 operands
[1] = otherops
[0];
18121 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18123 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
18130 /* ??? This needs checking for thumb2. */
18132 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18133 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18135 otherops
[0] = operands
[0];
18136 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18137 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18139 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18141 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18143 switch ((int) INTVAL (otherops
[2]))
18147 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
18153 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
18159 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
18163 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18164 operands
[1] = otherops
[0];
18166 && (REG_P (otherops
[2])
18168 || (CONST_INT_P (otherops
[2])
18169 && INTVAL (otherops
[2]) > -256
18170 && INTVAL (otherops
[2]) < 256)))
18172 if (reg_overlap_mentioned_p (operands
[0],
18175 /* Swap base and index registers over to
18176 avoid a conflict. */
18177 std::swap (otherops
[1], otherops
[2]);
18179 /* If both registers conflict, it will usually
18180 have been fixed by a splitter. */
18181 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18182 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18186 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18187 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18194 otherops
[0] = operands
[0];
18196 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
18201 if (CONST_INT_P (otherops
[2]))
18205 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18206 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18208 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18214 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18220 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18227 return "ldrd%?\t%0, [%1]";
18229 return "ldmia%?\t%1, %M0";
18233 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18234 /* Take care of overlapping base/data reg. */
18235 if (reg_mentioned_p (operands
[0], operands
[1]))
18239 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18240 output_asm_insn ("ldr%?\t%0, %1", operands
);
18250 output_asm_insn ("ldr%?\t%0, %1", operands
);
18251 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18261 /* Constraints should ensure this. */
18262 gcc_assert (code0
== MEM
&& code1
== REG
);
18263 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18264 || (TARGET_ARM
&& TARGET_LDRD
));
18266 switch (GET_CODE (XEXP (operands
[0], 0)))
18272 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
18274 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18279 gcc_assert (TARGET_LDRD
);
18281 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
18288 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
18290 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
18298 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
18300 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
18305 gcc_assert (TARGET_LDRD
);
18307 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
18312 otherops
[0] = operands
[1];
18313 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18314 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18316 /* IWMMXT allows offsets larger than ldrd can handle,
18317 fix these up with a pair of ldr. */
18319 && CONST_INT_P (otherops
[2])
18320 && (INTVAL(otherops
[2]) <= -256
18321 || INTVAL(otherops
[2]) >= 256))
18323 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18327 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18328 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18337 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18338 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18344 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18347 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
18352 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
18357 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18358 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18360 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18364 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
18371 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
18378 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
18383 && (REG_P (otherops
[2])
18385 || (CONST_INT_P (otherops
[2])
18386 && INTVAL (otherops
[2]) > -256
18387 && INTVAL (otherops
[2]) < 256)))
18389 otherops
[0] = operands
[1];
18390 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18392 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
18398 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18399 otherops
[1] = operands
[1];
18402 output_asm_insn ("str%?\t%1, %0", operands
);
18403 output_asm_insn ("str%?\t%H1, %0", otherops
);
18413 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18414 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18417 output_move_quad (rtx
*operands
)
18419 if (REG_P (operands
[0]))
18421 /* Load, or reg->reg move. */
18423 if (MEM_P (operands
[1]))
18425 switch (GET_CODE (XEXP (operands
[1], 0)))
18428 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18433 output_asm_insn ("adr%?\t%0, %1", operands
);
18434 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
18438 gcc_unreachable ();
18446 gcc_assert (REG_P (operands
[1]));
18448 dest
= REGNO (operands
[0]);
18449 src
= REGNO (operands
[1]);
18451 /* This seems pretty dumb, but hopefully GCC won't try to do it
18454 for (i
= 0; i
< 4; i
++)
18456 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18457 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18458 output_asm_insn ("mov%?\t%0, %1", ops
);
18461 for (i
= 3; i
>= 0; i
--)
18463 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18464 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18465 output_asm_insn ("mov%?\t%0, %1", ops
);
18471 gcc_assert (MEM_P (operands
[0]));
18472 gcc_assert (REG_P (operands
[1]));
18473 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18475 switch (GET_CODE (XEXP (operands
[0], 0)))
18478 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18482 gcc_unreachable ();
18489 /* Output a VFP load or store instruction. */
18492 output_move_vfp (rtx
*operands
)
18494 rtx reg
, mem
, addr
, ops
[2];
18495 int load
= REG_P (operands
[0]);
18496 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18497 int sp
= (!TARGET_VFP_FP16INST
18498 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
18499 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18504 reg
= operands
[!load
];
18505 mem
= operands
[load
];
18507 mode
= GET_MODE (reg
);
18509 gcc_assert (REG_P (reg
));
18510 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18511 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
18517 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18518 gcc_assert (MEM_P (mem
));
18520 addr
= XEXP (mem
, 0);
18522 switch (GET_CODE (addr
))
18525 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18526 ops
[0] = XEXP (addr
, 0);
18531 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18532 ops
[0] = XEXP (addr
, 0);
18537 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18543 sprintf (buff
, templ
,
18544 load
? "ld" : "st",
18545 dp
? "64" : sp
? "32" : "16",
18547 integer_p
? "\t%@ int" : "");
18548 output_asm_insn (buff
, ops
);
18553 /* Output a Neon double-word or quad-word load or store, or a load
18554 or store for larger structure modes.
18556 WARNING: The ordering of elements is weird in big-endian mode,
18557 because the EABI requires that vectors stored in memory appear
18558 as though they were stored by a VSTM, as required by the EABI.
18559 GCC RTL defines element ordering based on in-memory order.
18560 This can be different from the architectural ordering of elements
18561 within a NEON register. The intrinsics defined in arm_neon.h use the
18562 NEON register element ordering, not the GCC RTL element ordering.
18564 For example, the in-memory ordering of a big-endian a quadword
18565 vector with 16-bit elements when stored from register pair {d0,d1}
18566 will be (lowest address first, d0[N] is NEON register element N):
18568 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18570 When necessary, quadword registers (dN, dN+1) are moved to ARM
18571 registers from rN in the order:
18573 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18575 So that STM/LDM can be used on vectors in ARM registers, and the
18576 same memory layout will result as if VSTM/VLDM were used.
18578 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18579 possible, which allows use of appropriate alignment tags.
18580 Note that the choice of "64" is independent of the actual vector
18581 element size; this size simply ensures that the behavior is
18582 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18584 Due to limitations of those instructions, use of VST1.64/VLD1.64
18585 is not possible if:
18586 - the address contains PRE_DEC, or
18587 - the mode refers to more than 4 double-word registers
18589 In those cases, it would be possible to replace VSTM/VLDM by a
18590 sequence of instructions; this is not currently implemented since
18591 this is not certain to actually improve performance. */
18594 output_move_neon (rtx
*operands
)
18596 rtx reg
, mem
, addr
, ops
[2];
18597 int regno
, nregs
, load
= REG_P (operands
[0]);
18602 reg
= operands
[!load
];
18603 mem
= operands
[load
];
18605 mode
= GET_MODE (reg
);
18607 gcc_assert (REG_P (reg
));
18608 regno
= REGNO (reg
);
18609 nregs
= REG_NREGS (reg
) / 2;
18610 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18611 || NEON_REGNO_OK_FOR_QUAD (regno
));
18612 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18613 || VALID_NEON_QREG_MODE (mode
)
18614 || VALID_NEON_STRUCT_MODE (mode
));
18615 gcc_assert (MEM_P (mem
));
18617 addr
= XEXP (mem
, 0);
18619 /* Strip off const from addresses like (const (plus (...))). */
18620 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18621 addr
= XEXP (addr
, 0);
18623 switch (GET_CODE (addr
))
18626 /* We have to use vldm / vstm for too-large modes. */
18629 templ
= "v%smia%%?\t%%0!, %%h1";
18630 ops
[0] = XEXP (addr
, 0);
18634 templ
= "v%s1.64\t%%h1, %%A0";
18641 /* We have to use vldm / vstm in this case, since there is no
18642 pre-decrement form of the vld1 / vst1 instructions. */
18643 templ
= "v%smdb%%?\t%%0!, %%h1";
18644 ops
[0] = XEXP (addr
, 0);
18649 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18650 gcc_unreachable ();
18653 /* We have to use vldm / vstm for too-large modes. */
18657 templ
= "v%smia%%?\t%%m0, %%h1";
18659 templ
= "v%s1.64\t%%h1, %%A0";
18665 /* Fall through. */
18671 for (i
= 0; i
< nregs
; i
++)
18673 /* We're only using DImode here because it's a convenient size. */
18674 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18675 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18676 if (reg_overlap_mentioned_p (ops
[0], mem
))
18678 gcc_assert (overlap
== -1);
18683 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18684 output_asm_insn (buff
, ops
);
18689 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18690 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18691 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18692 output_asm_insn (buff
, ops
);
18699 gcc_unreachable ();
18702 sprintf (buff
, templ
, load
? "ld" : "st");
18703 output_asm_insn (buff
, ops
);
18708 /* Compute and return the length of neon_mov<mode>, where <mode> is
18709 one of VSTRUCT modes: EI, OI, CI or XI. */
18711 arm_attr_length_move_neon (rtx_insn
*insn
)
18713 rtx reg
, mem
, addr
;
18717 extract_insn_cached (insn
);
18719 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18721 mode
= GET_MODE (recog_data
.operand
[0]);
18732 gcc_unreachable ();
18736 load
= REG_P (recog_data
.operand
[0]);
18737 reg
= recog_data
.operand
[!load
];
18738 mem
= recog_data
.operand
[load
];
18740 gcc_assert (MEM_P (mem
));
18742 addr
= XEXP (mem
, 0);
18744 /* Strip off const from addresses like (const (plus (...))). */
18745 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18746 addr
= XEXP (addr
, 0);
18748 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18750 int insns
= REG_NREGS (reg
) / 2;
18757 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18761 arm_address_offset_is_imm (rtx_insn
*insn
)
18765 extract_insn_cached (insn
);
18767 if (REG_P (recog_data
.operand
[0]))
18770 mem
= recog_data
.operand
[0];
18772 gcc_assert (MEM_P (mem
));
18774 addr
= XEXP (mem
, 0);
18777 || (GET_CODE (addr
) == PLUS
18778 && REG_P (XEXP (addr
, 0))
18779 && CONST_INT_P (XEXP (addr
, 1))))
18785 /* Output an ADD r, s, #n where n may be too big for one instruction.
18786 If adding zero to one register, output nothing. */
18788 output_add_immediate (rtx
*operands
)
18790 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18792 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18795 output_multi_immediate (operands
,
18796 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18799 output_multi_immediate (operands
,
18800 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18807 /* Output a multiple immediate operation.
18808 OPERANDS is the vector of operands referred to in the output patterns.
18809 INSTR1 is the output pattern to use for the first constant.
18810 INSTR2 is the output pattern to use for subsequent constants.
18811 IMMED_OP is the index of the constant slot in OPERANDS.
18812 N is the constant value. */
18813 static const char *
18814 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18815 int immed_op
, HOST_WIDE_INT n
)
18817 #if HOST_BITS_PER_WIDE_INT > 32
18823 /* Quick and easy output. */
18824 operands
[immed_op
] = const0_rtx
;
18825 output_asm_insn (instr1
, operands
);
18830 const char * instr
= instr1
;
18832 /* Note that n is never zero here (which would give no output). */
18833 for (i
= 0; i
< 32; i
+= 2)
18837 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18838 output_asm_insn (instr
, operands
);
18848 /* Return the name of a shifter operation. */
18849 static const char *
18850 arm_shift_nmem(enum rtx_code code
)
18855 return ARM_LSL_NAME
;
18871 /* Return the appropriate ARM instruction for the operation code.
18872 The returned result should not be overwritten. OP is the rtx of the
18873 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18876 arithmetic_instr (rtx op
, int shift_first_arg
)
18878 switch (GET_CODE (op
))
18884 return shift_first_arg
? "rsb" : "sub";
18899 return arm_shift_nmem(GET_CODE(op
));
18902 gcc_unreachable ();
18906 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18907 for the operation code. The returned result should not be overwritten.
18908 OP is the rtx code of the shift.
18909 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18911 static const char *
18912 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18915 enum rtx_code code
= GET_CODE (op
);
18920 if (!CONST_INT_P (XEXP (op
, 1)))
18922 output_operand_lossage ("invalid shift operand");
18927 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18935 mnem
= arm_shift_nmem(code
);
18936 if (CONST_INT_P (XEXP (op
, 1)))
18938 *amountp
= INTVAL (XEXP (op
, 1));
18940 else if (REG_P (XEXP (op
, 1)))
18947 output_operand_lossage ("invalid shift operand");
18953 /* We never have to worry about the amount being other than a
18954 power of 2, since this case can never be reloaded from a reg. */
18955 if (!CONST_INT_P (XEXP (op
, 1)))
18957 output_operand_lossage ("invalid shift operand");
18961 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18963 /* Amount must be a power of two. */
18964 if (*amountp
& (*amountp
- 1))
18966 output_operand_lossage ("invalid shift operand");
18970 *amountp
= exact_log2 (*amountp
);
18971 gcc_assert (IN_RANGE (*amountp
, 0, 31));
18972 return ARM_LSL_NAME
;
18975 output_operand_lossage ("invalid shift operand");
18979 /* This is not 100% correct, but follows from the desire to merge
18980 multiplication by a power of 2 with the recognizer for a
18981 shift. >=32 is not a valid shift for "lsl", so we must try and
18982 output a shift that produces the correct arithmetical result.
18983 Using lsr #32 is identical except for the fact that the carry bit
18984 is not set correctly if we set the flags; but we never use the
18985 carry bit from such an operation, so we can ignore that. */
18986 if (code
== ROTATERT
)
18987 /* Rotate is just modulo 32. */
18989 else if (*amountp
!= (*amountp
& 31))
18991 if (code
== ASHIFT
)
18996 /* Shifts of 0 are no-ops. */
19003 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19004 because /bin/as is horribly restrictive. The judgement about
19005 whether or not each character is 'printable' (and can be output as
19006 is) or not (and must be printed with an octal escape) must be made
19007 with reference to the *host* character set -- the situation is
19008 similar to that discussed in the comments above pp_c_char in
19009 c-pretty-print.c. */
19011 #define MAX_ASCII_LEN 51
19014 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
19017 int len_so_far
= 0;
19019 fputs ("\t.ascii\t\"", stream
);
19021 for (i
= 0; i
< len
; i
++)
19025 if (len_so_far
>= MAX_ASCII_LEN
)
19027 fputs ("\"\n\t.ascii\t\"", stream
);
19033 if (c
== '\\' || c
== '\"')
19035 putc ('\\', stream
);
19043 fprintf (stream
, "\\%03o", c
);
19048 fputs ("\"\n", stream
);
19051 /* Whether a register is callee saved or not. This is necessary because high
19052 registers are marked as caller saved when optimizing for size on Thumb-1
19053 targets despite being callee saved in order to avoid using them. */
19054 #define callee_saved_reg_p(reg) \
19055 (!call_used_regs[reg] \
19056 || (TARGET_THUMB1 && optimize_size \
19057 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19059 /* Compute the register save mask for registers 0 through 12
19060 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19062 static unsigned long
19063 arm_compute_save_reg0_reg12_mask (void)
19065 unsigned long func_type
= arm_current_func_type ();
19066 unsigned long save_reg_mask
= 0;
19069 if (IS_INTERRUPT (func_type
))
19071 unsigned int max_reg
;
19072 /* Interrupt functions must not corrupt any registers,
19073 even call clobbered ones. If this is a leaf function
19074 we can just examine the registers used by the RTL, but
19075 otherwise we have to assume that whatever function is
19076 called might clobber anything, and so we have to save
19077 all the call-clobbered registers as well. */
19078 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
19079 /* FIQ handlers have registers r8 - r12 banked, so
19080 we only need to check r0 - r7, Normal ISRs only
19081 bank r14 and r15, so we must check up to r12.
19082 r13 is the stack pointer which is always preserved,
19083 so we do not need to consider it here. */
19088 for (reg
= 0; reg
<= max_reg
; reg
++)
19089 if (df_regs_ever_live_p (reg
)
19090 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
19091 save_reg_mask
|= (1 << reg
);
19093 /* Also save the pic base register if necessary. */
19095 && !TARGET_SINGLE_PIC_BASE
19096 && arm_pic_register
!= INVALID_REGNUM
19097 && crtl
->uses_pic_offset_table
)
19098 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19100 else if (IS_VOLATILE(func_type
))
19102 /* For noreturn functions we historically omitted register saves
19103 altogether. However this really messes up debugging. As a
19104 compromise save just the frame pointers. Combined with the link
19105 register saved elsewhere this should be sufficient to get
19107 if (frame_pointer_needed
)
19108 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19109 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19110 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19111 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19112 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19116 /* In the normal case we only need to save those registers
19117 which are call saved and which are used by this function. */
19118 for (reg
= 0; reg
<= 11; reg
++)
19119 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19120 save_reg_mask
|= (1 << reg
);
19122 /* Handle the frame pointer as a special case. */
19123 if (frame_pointer_needed
)
19124 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19126 /* If we aren't loading the PIC register,
19127 don't stack it even though it may be live. */
19129 && !TARGET_SINGLE_PIC_BASE
19130 && arm_pic_register
!= INVALID_REGNUM
19131 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19132 || crtl
->uses_pic_offset_table
))
19133 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19135 /* The prologue will copy SP into R0, so save it. */
19136 if (IS_STACKALIGN (func_type
))
19137 save_reg_mask
|= 1;
19140 /* Save registers so the exception handler can modify them. */
19141 if (crtl
->calls_eh_return
)
19147 reg
= EH_RETURN_DATA_REGNO (i
);
19148 if (reg
== INVALID_REGNUM
)
19150 save_reg_mask
|= 1 << reg
;
19154 return save_reg_mask
;
19157 /* Return true if r3 is live at the start of the function. */
19160 arm_r3_live_at_start_p (void)
19162 /* Just look at cfg info, which is still close enough to correct at this
19163 point. This gives false positives for broken functions that might use
19164 uninitialized data that happens to be allocated in r3, but who cares? */
19165 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19168 /* Compute the number of bytes used to store the static chain register on the
19169 stack, above the stack frame. We need to know this accurately to get the
19170 alignment of the rest of the stack frame correct. */
19173 arm_compute_static_chain_stack_bytes (void)
19175 /* See the defining assertion in arm_expand_prologue. */
19176 if (IS_NESTED (arm_current_func_type ())
19177 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19178 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
19179 || flag_stack_clash_protection
)
19180 && !df_regs_ever_live_p (LR_REGNUM
)))
19181 && arm_r3_live_at_start_p ()
19182 && crtl
->args
.pretend_args_size
== 0)
19188 /* Compute a bit mask of which core registers need to be
19189 saved on the stack for the current function.
19190 This is used by arm_compute_frame_layout, which may add extra registers. */
19192 static unsigned long
19193 arm_compute_save_core_reg_mask (void)
19195 unsigned int save_reg_mask
= 0;
19196 unsigned long func_type
= arm_current_func_type ();
19199 if (IS_NAKED (func_type
))
19200 /* This should never really happen. */
19203 /* If we are creating a stack frame, then we must save the frame pointer,
19204 IP (which will hold the old stack pointer), LR and the PC. */
19205 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19207 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19210 | (1 << PC_REGNUM
);
19212 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19214 /* Decide if we need to save the link register.
19215 Interrupt routines have their own banked link register,
19216 so they never need to save it.
19217 Otherwise if we do not use the link register we do not need to save
19218 it. If we are pushing other registers onto the stack however, we
19219 can save an instruction in the epilogue by pushing the link register
19220 now and then popping it back into the PC. This incurs extra memory
19221 accesses though, so we only do it when optimizing for size, and only
19222 if we know that we will not need a fancy return sequence. */
19223 if (df_regs_ever_live_p (LR_REGNUM
)
19226 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19227 && !crtl
->tail_call_emit
19228 && !crtl
->calls_eh_return
))
19229 save_reg_mask
|= 1 << LR_REGNUM
;
19231 if (cfun
->machine
->lr_save_eliminated
)
19232 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19234 if (TARGET_REALLY_IWMMXT
19235 && ((bit_count (save_reg_mask
)
19236 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19237 arm_compute_static_chain_stack_bytes())
19240 /* The total number of registers that are going to be pushed
19241 onto the stack is odd. We need to ensure that the stack
19242 is 64-bit aligned before we start to save iWMMXt registers,
19243 and also before we start to create locals. (A local variable
19244 might be a double or long long which we will load/store using
19245 an iWMMXt instruction). Therefore we need to push another
19246 ARM register, so that the stack will be 64-bit aligned. We
19247 try to avoid using the arg registers (r0 -r3) as they might be
19248 used to pass values in a tail call. */
19249 for (reg
= 4; reg
<= 12; reg
++)
19250 if ((save_reg_mask
& (1 << reg
)) == 0)
19254 save_reg_mask
|= (1 << reg
);
19257 cfun
->machine
->sibcall_blocked
= 1;
19258 save_reg_mask
|= (1 << 3);
19262 /* We may need to push an additional register for use initializing the
19263 PIC base register. */
19264 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19265 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19267 reg
= thumb_find_work_register (1 << 4);
19268 if (!call_used_regs
[reg
])
19269 save_reg_mask
|= (1 << reg
);
19272 return save_reg_mask
;
19275 /* Compute a bit mask of which core registers need to be
19276 saved on the stack for the current function. */
19277 static unsigned long
19278 thumb1_compute_save_core_reg_mask (void)
19280 unsigned long mask
;
19284 for (reg
= 0; reg
< 12; reg
++)
19285 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19288 /* Handle the frame pointer as a special case. */
19289 if (frame_pointer_needed
)
19290 mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19293 && !TARGET_SINGLE_PIC_BASE
19294 && arm_pic_register
!= INVALID_REGNUM
19295 && crtl
->uses_pic_offset_table
)
19296 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19298 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19299 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19300 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19302 /* LR will also be pushed if any lo regs are pushed. */
19303 if (mask
& 0xff || thumb_force_lr_save ())
19304 mask
|= (1 << LR_REGNUM
);
19306 /* Make sure we have a low work register if we need one.
19307 We will need one if we are going to push a high register,
19308 but we are not currently intending to push a low register. */
19309 if ((mask
& 0xff) == 0
19310 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19312 /* Use thumb_find_work_register to choose which register
19313 we will use. If the register is live then we will
19314 have to push it. Use LAST_LO_REGNUM as our fallback
19315 choice for the register to select. */
19316 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19317 /* Make sure the register returned by thumb_find_work_register is
19318 not part of the return value. */
19319 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19320 reg
= LAST_LO_REGNUM
;
19322 if (callee_saved_reg_p (reg
))
19326 /* The 504 below is 8 bytes less than 512 because there are two possible
19327 alignment words. We can't tell here if they will be present or not so we
19328 have to play it safe and assume that they are. */
19329 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19330 ROUND_UP_WORD (get_frame_size ()) +
19331 crtl
->outgoing_args_size
) >= 504)
19333 /* This is the same as the code in thumb1_expand_prologue() which
19334 determines which register to use for stack decrement. */
19335 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19336 if (mask
& (1 << reg
))
19339 if (reg
> LAST_LO_REGNUM
)
19341 /* Make sure we have a register available for stack decrement. */
19342 mask
|= 1 << LAST_LO_REGNUM
;
19350 /* Return the number of bytes required to save VFP registers. */
19352 arm_get_vfp_saved_size (void)
19354 unsigned int regno
;
19359 /* Space for saved VFP registers. */
19360 if (TARGET_HARD_FLOAT
)
19363 for (regno
= FIRST_VFP_REGNUM
;
19364 regno
< LAST_VFP_REGNUM
;
19367 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19368 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19372 /* Workaround ARM10 VFPr1 bug. */
19373 if (count
== 2 && !arm_arch6
)
19375 saved
+= count
* 8;
19384 if (count
== 2 && !arm_arch6
)
19386 saved
+= count
* 8;
19393 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19394 everything bar the final return instruction. If simple_return is true,
19395 then do not output epilogue, because it has already been emitted in RTL. */
19397 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19398 bool simple_return
)
19400 char conditional
[10];
19403 unsigned long live_regs_mask
;
19404 unsigned long func_type
;
19405 arm_stack_offsets
*offsets
;
19407 func_type
= arm_current_func_type ();
19409 if (IS_NAKED (func_type
))
19412 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19414 /* If this function was declared non-returning, and we have
19415 found a tail call, then we have to trust that the called
19416 function won't return. */
19421 /* Otherwise, trap an attempted return by aborting. */
19423 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19425 assemble_external_libcall (ops
[1]);
19426 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19432 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19434 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19436 cfun
->machine
->return_used_this_function
= 1;
19438 offsets
= arm_get_frame_offsets ();
19439 live_regs_mask
= offsets
->saved_regs_mask
;
19441 if (!simple_return
&& live_regs_mask
)
19443 const char * return_reg
;
19445 /* If we do not have any special requirements for function exit
19446 (e.g. interworking) then we can load the return address
19447 directly into the PC. Otherwise we must load it into LR. */
19449 && !IS_CMSE_ENTRY (func_type
)
19450 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19451 return_reg
= reg_names
[PC_REGNUM
];
19453 return_reg
= reg_names
[LR_REGNUM
];
19455 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19457 /* There are three possible reasons for the IP register
19458 being saved. 1) a stack frame was created, in which case
19459 IP contains the old stack pointer, or 2) an ISR routine
19460 corrupted it, or 3) it was saved to align the stack on
19461 iWMMXt. In case 1, restore IP into SP, otherwise just
19463 if (frame_pointer_needed
)
19465 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19466 live_regs_mask
|= (1 << SP_REGNUM
);
19469 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19472 /* On some ARM architectures it is faster to use LDR rather than
19473 LDM to load a single register. On other architectures, the
19474 cost is the same. In 26 bit mode, or for exception handlers,
19475 we have to use LDM to load the PC so that the CPSR is also
19477 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19478 if (live_regs_mask
== (1U << reg
))
19481 if (reg
<= LAST_ARM_REGNUM
19482 && (reg
!= LR_REGNUM
19484 || ! IS_INTERRUPT (func_type
)))
19486 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19487 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19494 /* Generate the load multiple instruction to restore the
19495 registers. Note we can get here, even if
19496 frame_pointer_needed is true, but only if sp already
19497 points to the base of the saved core registers. */
19498 if (live_regs_mask
& (1 << SP_REGNUM
))
19500 unsigned HOST_WIDE_INT stack_adjust
;
19502 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19503 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19505 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19506 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19509 /* If we can't use ldmib (SA110 bug),
19510 then try to pop r3 instead. */
19512 live_regs_mask
|= 1 << 3;
19514 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19517 /* For interrupt returns we have to use an LDM rather than
19518 a POP so that we can use the exception return variant. */
19519 else if (IS_INTERRUPT (func_type
))
19520 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
19522 sprintf (instr
, "pop%s\t{", conditional
);
19524 p
= instr
+ strlen (instr
);
19526 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19527 if (live_regs_mask
& (1 << reg
))
19529 int l
= strlen (reg_names
[reg
]);
19535 memcpy (p
, ", ", 2);
19539 memcpy (p
, "%|", 2);
19540 memcpy (p
+ 2, reg_names
[reg
], l
);
19544 if (live_regs_mask
& (1 << LR_REGNUM
))
19546 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19547 /* If returning from an interrupt, restore the CPSR. */
19548 if (IS_INTERRUPT (func_type
))
19555 output_asm_insn (instr
, & operand
);
19557 /* See if we need to generate an extra instruction to
19558 perform the actual function return. */
19560 && func_type
!= ARM_FT_INTERWORKED
19561 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19563 /* The return has already been handled
19564 by loading the LR into the PC. */
19571 switch ((int) ARM_FUNC_TYPE (func_type
))
19575 /* ??? This is wrong for unified assembly syntax. */
19576 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19579 case ARM_FT_INTERWORKED
:
19580 gcc_assert (arm_arch5
|| arm_arch4t
);
19581 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19584 case ARM_FT_EXCEPTION
:
19585 /* ??? This is wrong for unified assembly syntax. */
19586 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19590 if (IS_CMSE_ENTRY (func_type
))
19592 /* Check if we have to clear the 'GE bits' which is only used if
19593 parallel add and subtraction instructions are available. */
19594 if (TARGET_INT_SIMD
)
19595 snprintf (instr
, sizeof (instr
),
19596 "msr%s\tAPSR_nzcvqg, %%|lr", conditional
);
19598 snprintf (instr
, sizeof (instr
),
19599 "msr%s\tAPSR_nzcvq, %%|lr", conditional
);
19601 output_asm_insn (instr
, & operand
);
19602 if (TARGET_HARD_FLOAT
&& !TARGET_THUMB1
)
19604 /* Clear the cumulative exception-status bits (0-4,7) and the
19605 condition code bits (28-31) of the FPSCR. We need to
19606 remember to clear the first scratch register used (IP) and
19607 save and restore the second (r4). */
19608 snprintf (instr
, sizeof (instr
), "push\t{%%|r4}");
19609 output_asm_insn (instr
, & operand
);
19610 snprintf (instr
, sizeof (instr
), "vmrs\t%%|ip, fpscr");
19611 output_asm_insn (instr
, & operand
);
19612 snprintf (instr
, sizeof (instr
), "movw\t%%|r4, #65376");
19613 output_asm_insn (instr
, & operand
);
19614 snprintf (instr
, sizeof (instr
), "movt\t%%|r4, #4095");
19615 output_asm_insn (instr
, & operand
);
19616 snprintf (instr
, sizeof (instr
), "and\t%%|ip, %%|r4");
19617 output_asm_insn (instr
, & operand
);
19618 snprintf (instr
, sizeof (instr
), "vmsr\tfpscr, %%|ip");
19619 output_asm_insn (instr
, & operand
);
19620 snprintf (instr
, sizeof (instr
), "pop\t{%%|r4}");
19621 output_asm_insn (instr
, & operand
);
19622 snprintf (instr
, sizeof (instr
), "mov\t%%|ip, %%|lr");
19623 output_asm_insn (instr
, & operand
);
19625 snprintf (instr
, sizeof (instr
), "bxns\t%%|lr");
19627 /* Use bx if it's available. */
19628 else if (arm_arch5
|| arm_arch4t
)
19629 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19631 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19635 output_asm_insn (instr
, & operand
);
19641 /* Output in FILE asm statements needed to declare the NAME of the function
19642 defined by its DECL node. */
19645 arm_asm_declare_function_name (FILE *file
, const char *name
, tree decl
)
19647 size_t cmse_name_len
;
19648 char *cmse_name
= 0;
19649 char cmse_prefix
[] = "__acle_se_";
19651 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19652 extra function label for each function with the 'cmse_nonsecure_entry'
19653 attribute. This extra function label should be prepended with
19654 '__acle_se_', telling the linker that it needs to create secure gateway
19655 veneers for this function. */
19656 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_entry",
19657 DECL_ATTRIBUTES (decl
)))
19659 cmse_name_len
= sizeof (cmse_prefix
) + strlen (name
);
19660 cmse_name
= XALLOCAVEC (char, cmse_name_len
);
19661 snprintf (cmse_name
, cmse_name_len
, "%s%s", cmse_prefix
, name
);
19662 targetm
.asm_out
.globalize_label (file
, cmse_name
);
19664 ARM_DECLARE_FUNCTION_NAME (file
, cmse_name
, decl
);
19665 ASM_OUTPUT_TYPE_DIRECTIVE (file
, cmse_name
, "function");
19668 ARM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
19669 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
19670 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
19671 ASM_OUTPUT_LABEL (file
, name
);
19674 ASM_OUTPUT_LABEL (file
, cmse_name
);
19676 ARM_OUTPUT_FN_UNWIND (file
, TRUE
);
19679 /* Write the function name into the code section, directly preceding
19680 the function prologue.
19682 Code will be output similar to this:
19684 .ascii "arm_poke_function_name", 0
19687 .word 0xff000000 + (t1 - t0)
19688 arm_poke_function_name
19690 stmfd sp!, {fp, ip, lr, pc}
19693 When performing a stack backtrace, code can inspect the value
19694 of 'pc' stored at 'fp' + 0. If the trace function then looks
19695 at location pc - 12 and the top 8 bits are set, then we know
19696 that there is a function name embedded immediately preceding this
19697 location and has length ((pc[-3]) & 0xff000000).
19699 We assume that pc is declared as a pointer to an unsigned long.
19701 It is of no benefit to output the function name if we are assembling
19702 a leaf function. These function types will not contain a stack
19703 backtrace structure, therefore it is not possible to determine the
19706 arm_poke_function_name (FILE *stream
, const char *name
)
19708 unsigned long alignlength
;
19709 unsigned long length
;
19712 length
= strlen (name
) + 1;
19713 alignlength
= ROUND_UP_WORD (length
);
19715 ASM_OUTPUT_ASCII (stream
, name
, length
);
19716 ASM_OUTPUT_ALIGN (stream
, 2);
19717 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19718 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19721 /* Place some comments into the assembler stream
19722 describing the current function. */
19724 arm_output_function_prologue (FILE *f
)
19726 unsigned long func_type
;
19728 /* Sanity check. */
19729 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19731 func_type
= arm_current_func_type ();
19733 switch ((int) ARM_FUNC_TYPE (func_type
))
19736 case ARM_FT_NORMAL
:
19738 case ARM_FT_INTERWORKED
:
19739 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19742 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19745 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19747 case ARM_FT_EXCEPTION
:
19748 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19752 if (IS_NAKED (func_type
))
19753 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19755 if (IS_VOLATILE (func_type
))
19756 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19758 if (IS_NESTED (func_type
))
19759 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19760 if (IS_STACKALIGN (func_type
))
19761 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19762 if (IS_CMSE_ENTRY (func_type
))
19763 asm_fprintf (f
, "\t%@ Non-secure entry function: called from non-secure code.\n");
19765 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19767 crtl
->args
.pretend_args_size
,
19768 (HOST_WIDE_INT
) get_frame_size ());
19770 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19771 frame_pointer_needed
,
19772 cfun
->machine
->uses_anonymous_args
);
19774 if (cfun
->machine
->lr_save_eliminated
)
19775 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19777 if (crtl
->calls_eh_return
)
19778 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19783 arm_output_function_epilogue (FILE *)
19785 arm_stack_offsets
*offsets
;
19791 /* Emit any call-via-reg trampolines that are needed for v4t support
19792 of call_reg and call_value_reg type insns. */
19793 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19795 rtx label
= cfun
->machine
->call_via
[regno
];
19799 switch_to_section (function_section (current_function_decl
));
19800 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19801 CODE_LABEL_NUMBER (label
));
19802 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19806 /* ??? Probably not safe to set this here, since it assumes that a
19807 function will be emitted as assembly immediately after we generate
19808 RTL for it. This does not happen for inline functions. */
19809 cfun
->machine
->return_used_this_function
= 0;
19811 else /* TARGET_32BIT */
19813 /* We need to take into account any stack-frame rounding. */
19814 offsets
= arm_get_frame_offsets ();
19816 gcc_assert (!use_return_insn (FALSE
, NULL
)
19817 || (cfun
->machine
->return_used_this_function
!= 0)
19818 || offsets
->saved_regs
== offsets
->outgoing_args
19819 || frame_pointer_needed
);
19823 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19824 STR and STRD. If an even number of registers are being pushed, one
19825 or more STRD patterns are created for each register pair. If an
19826 odd number of registers are pushed, emit an initial STR followed by
19827 as many STRD instructions as are needed. This works best when the
19828 stack is initially 64-bit aligned (the normal case), since it
19829 ensures that each STRD is also 64-bit aligned. */
19831 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19836 rtx par
= NULL_RTX
;
19837 rtx dwarf
= NULL_RTX
;
19841 num_regs
= bit_count (saved_regs_mask
);
19843 /* Must be at least one register to save, and can't save SP or PC. */
19844 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19845 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19846 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19848 /* Create sequence for DWARF info. All the frame-related data for
19849 debugging is held in this wrapper. */
19850 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19852 /* Describe the stack adjustment. */
19853 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19854 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19855 RTX_FRAME_RELATED_P (tmp
) = 1;
19856 XVECEXP (dwarf
, 0, 0) = tmp
;
19858 /* Find the first register. */
19859 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19864 /* If there's an odd number of registers to push. Start off by
19865 pushing a single register. This ensures that subsequent strd
19866 operations are dword aligned (assuming that SP was originally
19867 64-bit aligned). */
19868 if ((num_regs
& 1) != 0)
19870 rtx reg
, mem
, insn
;
19872 reg
= gen_rtx_REG (SImode
, regno
);
19874 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19875 stack_pointer_rtx
));
19877 mem
= gen_frame_mem (Pmode
,
19879 (Pmode
, stack_pointer_rtx
,
19880 plus_constant (Pmode
, stack_pointer_rtx
,
19883 tmp
= gen_rtx_SET (mem
, reg
);
19884 RTX_FRAME_RELATED_P (tmp
) = 1;
19885 insn
= emit_insn (tmp
);
19886 RTX_FRAME_RELATED_P (insn
) = 1;
19887 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19888 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
19889 RTX_FRAME_RELATED_P (tmp
) = 1;
19892 XVECEXP (dwarf
, 0, i
) = tmp
;
19896 while (i
< num_regs
)
19897 if (saved_regs_mask
& (1 << regno
))
19899 rtx reg1
, reg2
, mem1
, mem2
;
19900 rtx tmp0
, tmp1
, tmp2
;
19903 /* Find the register to pair with this one. */
19904 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19908 reg1
= gen_rtx_REG (SImode
, regno
);
19909 reg2
= gen_rtx_REG (SImode
, regno2
);
19916 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19919 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19921 -4 * (num_regs
- 1)));
19922 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
19923 plus_constant (Pmode
, stack_pointer_rtx
,
19925 tmp1
= gen_rtx_SET (mem1
, reg1
);
19926 tmp2
= gen_rtx_SET (mem2
, reg2
);
19927 RTX_FRAME_RELATED_P (tmp0
) = 1;
19928 RTX_FRAME_RELATED_P (tmp1
) = 1;
19929 RTX_FRAME_RELATED_P (tmp2
) = 1;
19930 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19931 XVECEXP (par
, 0, 0) = tmp0
;
19932 XVECEXP (par
, 0, 1) = tmp1
;
19933 XVECEXP (par
, 0, 2) = tmp2
;
19934 insn
= emit_insn (par
);
19935 RTX_FRAME_RELATED_P (insn
) = 1;
19936 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19940 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19943 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19946 tmp1
= gen_rtx_SET (mem1
, reg1
);
19947 tmp2
= gen_rtx_SET (mem2
, reg2
);
19948 RTX_FRAME_RELATED_P (tmp1
) = 1;
19949 RTX_FRAME_RELATED_P (tmp2
) = 1;
19950 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19951 XVECEXP (par
, 0, 0) = tmp1
;
19952 XVECEXP (par
, 0, 1) = tmp2
;
19956 /* Create unwind information. This is an approximation. */
19957 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
19958 plus_constant (Pmode
,
19962 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
19963 plus_constant (Pmode
,
19968 RTX_FRAME_RELATED_P (tmp1
) = 1;
19969 RTX_FRAME_RELATED_P (tmp2
) = 1;
19970 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19971 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19973 regno
= regno2
+ 1;
19981 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19982 whenever possible, otherwise it emits single-word stores. The first store
19983 also allocates stack space for all saved registers, using writeback with
19984 post-addressing mode. All other stores use offset addressing. If no STRD
19985 can be emitted, this function emits a sequence of single-word stores,
19986 and not an STM as before, because single-word stores provide more freedom
19987 scheduling and can be turned into an STM by peephole optimizations. */
19989 arm_emit_strd_push (unsigned long saved_regs_mask
)
19992 int i
, j
, dwarf_index
= 0;
19994 rtx dwarf
= NULL_RTX
;
19995 rtx insn
= NULL_RTX
;
19998 /* TODO: A more efficient code can be emitted by changing the
19999 layout, e.g., first push all pairs that can use STRD to keep the
20000 stack aligned, and then push all other registers. */
20001 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20002 if (saved_regs_mask
& (1 << i
))
20005 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20006 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
20007 gcc_assert (num_regs
> 0);
20009 /* Create sequence for DWARF info. */
20010 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20012 /* For dwarf info, we generate explicit stack update. */
20013 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20014 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20015 RTX_FRAME_RELATED_P (tmp
) = 1;
20016 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20018 /* Save registers. */
20019 offset
= - 4 * num_regs
;
20021 while (j
<= LAST_ARM_REGNUM
)
20022 if (saved_regs_mask
& (1 << j
))
20025 && (saved_regs_mask
& (1 << (j
+ 1))))
20027 /* Current register and previous register form register pair for
20028 which STRD can be generated. */
20031 /* Allocate stack space for all saved registers. */
20032 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20033 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20034 mem
= gen_frame_mem (DImode
, tmp
);
20037 else if (offset
> 0)
20038 mem
= gen_frame_mem (DImode
,
20039 plus_constant (Pmode
,
20043 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20045 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
20046 RTX_FRAME_RELATED_P (tmp
) = 1;
20047 tmp
= emit_insn (tmp
);
20049 /* Record the first store insn. */
20050 if (dwarf_index
== 1)
20053 /* Generate dwarf info. */
20054 mem
= gen_frame_mem (SImode
,
20055 plus_constant (Pmode
,
20058 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20059 RTX_FRAME_RELATED_P (tmp
) = 1;
20060 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20062 mem
= gen_frame_mem (SImode
,
20063 plus_constant (Pmode
,
20066 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
20067 RTX_FRAME_RELATED_P (tmp
) = 1;
20068 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20075 /* Emit a single word store. */
20078 /* Allocate stack space for all saved registers. */
20079 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20080 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20081 mem
= gen_frame_mem (SImode
, tmp
);
20084 else if (offset
> 0)
20085 mem
= gen_frame_mem (SImode
,
20086 plus_constant (Pmode
,
20090 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20092 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20093 RTX_FRAME_RELATED_P (tmp
) = 1;
20094 tmp
= emit_insn (tmp
);
20096 /* Record the first store insn. */
20097 if (dwarf_index
== 1)
20100 /* Generate dwarf info. */
20101 mem
= gen_frame_mem (SImode
,
20102 plus_constant(Pmode
,
20105 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20106 RTX_FRAME_RELATED_P (tmp
) = 1;
20107 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20116 /* Attach dwarf info to the first insn we generate. */
20117 gcc_assert (insn
!= NULL_RTX
);
20118 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20119 RTX_FRAME_RELATED_P (insn
) = 1;
20122 /* Generate and emit an insn that we will recognize as a push_multi.
20123 Unfortunately, since this insn does not reflect very well the actual
20124 semantics of the operation, we need to annotate the insn for the benefit
20125 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20126 MASK for registers that should be annotated for DWARF2 frame unwind
20129 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
20132 int num_dwarf_regs
= 0;
20136 int dwarf_par_index
;
20139 /* We don't record the PC in the dwarf frame information. */
20140 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20142 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20144 if (mask
& (1 << i
))
20146 if (dwarf_regs_mask
& (1 << i
))
20150 gcc_assert (num_regs
&& num_regs
<= 16);
20151 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20153 /* For the body of the insn we are going to generate an UNSPEC in
20154 parallel with several USEs. This allows the insn to be recognized
20155 by the push_multi pattern in the arm.md file.
20157 The body of the insn looks something like this:
20160 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20161 (const_int:SI <num>)))
20162 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20168 For the frame note however, we try to be more explicit and actually
20169 show each register being stored into the stack frame, plus a (single)
20170 decrement of the stack pointer. We do it this way in order to be
20171 friendly to the stack unwinding code, which only wants to see a single
20172 stack decrement per instruction. The RTL we generate for the note looks
20173 something like this:
20176 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20177 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20178 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20179 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20183 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20184 instead we'd have a parallel expression detailing all
20185 the stores to the various memory addresses so that debug
20186 information is more up-to-date. Remember however while writing
20187 this to take care of the constraints with the push instruction.
20189 Note also that this has to be taken care of for the VFP registers.
20191 For more see PR43399. */
20193 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20194 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20195 dwarf_par_index
= 1;
20197 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20199 if (mask
& (1 << i
))
20201 reg
= gen_rtx_REG (SImode
, i
);
20203 XVECEXP (par
, 0, 0)
20204 = gen_rtx_SET (gen_frame_mem
20206 gen_rtx_PRE_MODIFY (Pmode
,
20209 (Pmode
, stack_pointer_rtx
,
20212 gen_rtx_UNSPEC (BLKmode
,
20213 gen_rtvec (1, reg
),
20214 UNSPEC_PUSH_MULT
));
20216 if (dwarf_regs_mask
& (1 << i
))
20218 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
20220 RTX_FRAME_RELATED_P (tmp
) = 1;
20221 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20228 for (j
= 1, i
++; j
< num_regs
; i
++)
20230 if (mask
& (1 << i
))
20232 reg
= gen_rtx_REG (SImode
, i
);
20234 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20236 if (dwarf_regs_mask
& (1 << i
))
20239 = gen_rtx_SET (gen_frame_mem
20241 plus_constant (Pmode
, stack_pointer_rtx
,
20244 RTX_FRAME_RELATED_P (tmp
) = 1;
20245 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20252 par
= emit_insn (par
);
20254 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20255 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20256 RTX_FRAME_RELATED_P (tmp
) = 1;
20257 XVECEXP (dwarf
, 0, 0) = tmp
;
20259 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20264 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20265 SIZE is the offset to be adjusted.
20266 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20268 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20272 RTX_FRAME_RELATED_P (insn
) = 1;
20273 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
20274 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20277 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20278 SAVED_REGS_MASK shows which registers need to be restored.
20280 Unfortunately, since this insn does not reflect very well the actual
20281 semantics of the operation, we need to annotate the insn for the benefit
20282 of DWARF2 frame unwind information. */
20284 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20289 rtx dwarf
= NULL_RTX
;
20291 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20295 offset_adj
= return_in_pc
? 1 : 0;
20296 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20297 if (saved_regs_mask
& (1 << i
))
20300 gcc_assert (num_regs
&& num_regs
<= 16);
20302 /* If SP is in reglist, then we don't emit SP update insn. */
20303 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20305 /* The parallel needs to hold num_regs SETs
20306 and one SET for the stack update. */
20307 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20310 XVECEXP (par
, 0, 0) = ret_rtx
;
20314 /* Increment the stack pointer, based on there being
20315 num_regs 4-byte registers to restore. */
20316 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20317 plus_constant (Pmode
,
20320 RTX_FRAME_RELATED_P (tmp
) = 1;
20321 XVECEXP (par
, 0, offset_adj
) = tmp
;
20324 /* Now restore every reg, which may include PC. */
20325 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20326 if (saved_regs_mask
& (1 << i
))
20328 reg
= gen_rtx_REG (SImode
, i
);
20329 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20331 /* Emit single load with writeback. */
20332 tmp
= gen_frame_mem (SImode
,
20333 gen_rtx_POST_INC (Pmode
,
20334 stack_pointer_rtx
));
20335 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
20336 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20340 tmp
= gen_rtx_SET (reg
,
20343 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20344 RTX_FRAME_RELATED_P (tmp
) = 1;
20345 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20347 /* We need to maintain a sequence for DWARF info too. As dwarf info
20348 should not have PC, skip PC. */
20349 if (i
!= PC_REGNUM
)
20350 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20356 par
= emit_jump_insn (par
);
20358 par
= emit_insn (par
);
20360 REG_NOTES (par
) = dwarf
;
20362 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20363 stack_pointer_rtx
, stack_pointer_rtx
);
20366 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20367 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20369 Unfortunately, since this insn does not reflect very well the actual
20370 semantics of the operation, we need to annotate the insn for the benefit
20371 of DWARF2 frame unwind information. */
20373 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20377 rtx dwarf
= NULL_RTX
;
20380 gcc_assert (num_regs
&& num_regs
<= 32);
20382 /* Workaround ARM10 VFPr1 bug. */
20383 if (num_regs
== 2 && !arm_arch6
)
20385 if (first_reg
== 15)
20391 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20392 there could be up to 32 D-registers to restore.
20393 If there are more than 16 D-registers, make two recursive calls,
20394 each of which emits one pop_multi instruction. */
20397 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20398 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20402 /* The parallel needs to hold num_regs SETs
20403 and one SET for the stack update. */
20404 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20406 /* Increment the stack pointer, based on there being
20407 num_regs 8-byte registers to restore. */
20408 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20409 RTX_FRAME_RELATED_P (tmp
) = 1;
20410 XVECEXP (par
, 0, 0) = tmp
;
20412 /* Now show every reg that will be restored, using a SET for each. */
20413 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20415 reg
= gen_rtx_REG (DFmode
, i
);
20417 tmp
= gen_rtx_SET (reg
,
20420 plus_constant (Pmode
, base_reg
, 8 * j
)));
20421 RTX_FRAME_RELATED_P (tmp
) = 1;
20422 XVECEXP (par
, 0, j
+ 1) = tmp
;
20424 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20429 par
= emit_insn (par
);
20430 REG_NOTES (par
) = dwarf
;
20432 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20433 if (REGNO (base_reg
) == IP_REGNUM
)
20435 RTX_FRAME_RELATED_P (par
) = 1;
20436 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20439 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20440 base_reg
, base_reg
);
20443 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20444 number of registers are being popped, multiple LDRD patterns are created for
20445 all register pairs. If odd number of registers are popped, last register is
20446 loaded by using LDR pattern. */
20448 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20452 rtx par
= NULL_RTX
;
20453 rtx dwarf
= NULL_RTX
;
20454 rtx tmp
, reg
, tmp1
;
20455 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20457 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20458 if (saved_regs_mask
& (1 << i
))
20461 gcc_assert (num_regs
&& num_regs
<= 16);
20463 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20464 to be popped. So, if num_regs is even, now it will become odd,
20465 and we can generate pop with PC. If num_regs is odd, it will be
20466 even now, and ldr with return can be generated for PC. */
20470 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20472 /* Var j iterates over all the registers to gather all the registers in
20473 saved_regs_mask. Var i gives index of saved registers in stack frame.
20474 A PARALLEL RTX of register-pair is created here, so that pattern for
20475 LDRD can be matched. As PC is always last register to be popped, and
20476 we have already decremented num_regs if PC, we don't have to worry
20477 about PC in this loop. */
20478 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20479 if (saved_regs_mask
& (1 << j
))
20481 /* Create RTX for memory load. */
20482 reg
= gen_rtx_REG (SImode
, j
);
20483 tmp
= gen_rtx_SET (reg
,
20484 gen_frame_mem (SImode
,
20485 plus_constant (Pmode
,
20486 stack_pointer_rtx
, 4 * i
)));
20487 RTX_FRAME_RELATED_P (tmp
) = 1;
20491 /* When saved-register index (i) is even, the RTX to be emitted is
20492 yet to be created. Hence create it first. The LDRD pattern we
20493 are generating is :
20494 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20495 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20496 where target registers need not be consecutive. */
20497 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20501 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20502 added as 0th element and if i is odd, reg_i is added as 1st element
20503 of LDRD pattern shown above. */
20504 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20505 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20509 /* When saved-register index (i) is odd, RTXs for both the registers
20510 to be loaded are generated in above given LDRD pattern, and the
20511 pattern can be emitted now. */
20512 par
= emit_insn (par
);
20513 REG_NOTES (par
) = dwarf
;
20514 RTX_FRAME_RELATED_P (par
) = 1;
20520 /* If the number of registers pushed is odd AND return_in_pc is false OR
20521 number of registers are even AND return_in_pc is true, last register is
20522 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20523 then LDR with post increment. */
20525 /* Increment the stack pointer, based on there being
20526 num_regs 4-byte registers to restore. */
20527 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20528 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20529 RTX_FRAME_RELATED_P (tmp
) = 1;
20530 tmp
= emit_insn (tmp
);
20533 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20534 stack_pointer_rtx
, stack_pointer_rtx
);
20539 if (((num_regs
% 2) == 1 && !return_in_pc
)
20540 || ((num_regs
% 2) == 0 && return_in_pc
))
20542 /* Scan for the single register to be popped. Skip until the saved
20543 register is found. */
20544 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20546 /* Gen LDR with post increment here. */
20547 tmp1
= gen_rtx_MEM (SImode
,
20548 gen_rtx_POST_INC (SImode
,
20549 stack_pointer_rtx
));
20550 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20552 reg
= gen_rtx_REG (SImode
, j
);
20553 tmp
= gen_rtx_SET (reg
, tmp1
);
20554 RTX_FRAME_RELATED_P (tmp
) = 1;
20555 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20559 /* If return_in_pc, j must be PC_REGNUM. */
20560 gcc_assert (j
== PC_REGNUM
);
20561 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20562 XVECEXP (par
, 0, 0) = ret_rtx
;
20563 XVECEXP (par
, 0, 1) = tmp
;
20564 par
= emit_jump_insn (par
);
20568 par
= emit_insn (tmp
);
20569 REG_NOTES (par
) = dwarf
;
20570 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20571 stack_pointer_rtx
, stack_pointer_rtx
);
20575 else if ((num_regs
% 2) == 1 && return_in_pc
)
20577 /* There are 2 registers to be popped. So, generate the pattern
20578 pop_multiple_with_stack_update_and_return to pop in PC. */
20579 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20585 /* LDRD in ARM mode needs consecutive registers as operands. This function
20586 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20587 offset addressing and then generates one separate stack udpate. This provides
20588 more scheduling freedom, compared to writeback on every load. However,
20589 if the function returns using load into PC directly
20590 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20591 before the last load. TODO: Add a peephole optimization to recognize
20592 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20593 peephole optimization to merge the load at stack-offset zero
20594 with the stack update instruction using load with writeback
20595 in post-index addressing mode. */
20597 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20601 rtx par
= NULL_RTX
;
20602 rtx dwarf
= NULL_RTX
;
20605 /* Restore saved registers. */
20606 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20608 while (j
<= LAST_ARM_REGNUM
)
20609 if (saved_regs_mask
& (1 << j
))
20612 && (saved_regs_mask
& (1 << (j
+ 1)))
20613 && (j
+ 1) != PC_REGNUM
)
20615 /* Current register and next register form register pair for which
20616 LDRD can be generated. PC is always the last register popped, and
20617 we handle it separately. */
20619 mem
= gen_frame_mem (DImode
,
20620 plus_constant (Pmode
,
20624 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20626 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
20627 tmp
= emit_insn (tmp
);
20628 RTX_FRAME_RELATED_P (tmp
) = 1;
20630 /* Generate dwarf info. */
20632 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20633 gen_rtx_REG (SImode
, j
),
20635 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20636 gen_rtx_REG (SImode
, j
+ 1),
20639 REG_NOTES (tmp
) = dwarf
;
20644 else if (j
!= PC_REGNUM
)
20646 /* Emit a single word load. */
20648 mem
= gen_frame_mem (SImode
,
20649 plus_constant (Pmode
,
20653 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20655 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
20656 tmp
= emit_insn (tmp
);
20657 RTX_FRAME_RELATED_P (tmp
) = 1;
20659 /* Generate dwarf info. */
20660 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20661 gen_rtx_REG (SImode
, j
),
20667 else /* j == PC_REGNUM */
20673 /* Update the stack. */
20676 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20677 plus_constant (Pmode
,
20680 tmp
= emit_insn (tmp
);
20681 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20682 stack_pointer_rtx
, stack_pointer_rtx
);
20686 if (saved_regs_mask
& (1 << PC_REGNUM
))
20688 /* Only PC is to be popped. */
20689 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20690 XVECEXP (par
, 0, 0) = ret_rtx
;
20691 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
20692 gen_frame_mem (SImode
,
20693 gen_rtx_POST_INC (SImode
,
20694 stack_pointer_rtx
)));
20695 RTX_FRAME_RELATED_P (tmp
) = 1;
20696 XVECEXP (par
, 0, 1) = tmp
;
20697 par
= emit_jump_insn (par
);
20699 /* Generate dwarf info. */
20700 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20701 gen_rtx_REG (SImode
, PC_REGNUM
),
20703 REG_NOTES (par
) = dwarf
;
20704 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20705 stack_pointer_rtx
, stack_pointer_rtx
);
20709 /* Calculate the size of the return value that is passed in registers. */
20711 arm_size_return_regs (void)
20715 if (crtl
->return_rtx
!= 0)
20716 mode
= GET_MODE (crtl
->return_rtx
);
20718 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20720 return GET_MODE_SIZE (mode
);
20723 /* Return true if the current function needs to save/restore LR. */
20725 thumb_force_lr_save (void)
20727 return !cfun
->machine
->lr_save_eliminated
20729 || thumb_far_jump_used_p ()
20730 || df_regs_ever_live_p (LR_REGNUM
));
20733 /* We do not know if r3 will be available because
20734 we do have an indirect tailcall happening in this
20735 particular case. */
20737 is_indirect_tailcall_p (rtx call
)
20739 rtx pat
= PATTERN (call
);
20741 /* Indirect tail call. */
20742 pat
= XVECEXP (pat
, 0, 0);
20743 if (GET_CODE (pat
) == SET
)
20744 pat
= SET_SRC (pat
);
20746 pat
= XEXP (XEXP (pat
, 0), 0);
20747 return REG_P (pat
);
20750 /* Return true if r3 is used by any of the tail call insns in the
20751 current function. */
20753 any_sibcall_could_use_r3 (void)
20758 if (!crtl
->tail_call_emit
)
20760 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20761 if (e
->flags
& EDGE_SIBCALL
)
20763 rtx_insn
*call
= BB_END (e
->src
);
20764 if (!CALL_P (call
))
20765 call
= prev_nonnote_nondebug_insn (call
);
20766 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20767 if (find_regno_fusage (call
, USE
, 3)
20768 || is_indirect_tailcall_p (call
))
20775 /* Compute the distance from register FROM to register TO.
20776 These can be the arg pointer (26), the soft frame pointer (25),
20777 the stack pointer (13) or the hard frame pointer (11).
20778 In thumb mode r7 is used as the soft frame pointer, if needed.
20779 Typical stack layout looks like this:
20781 old stack pointer -> | |
20784 | | saved arguments for
20785 | | vararg functions
20788 hard FP & arg pointer -> | | \
20796 soft frame pointer -> | | /
20801 locals base pointer -> | | /
20806 current stack pointer -> | | /
20809 For a given function some or all of these stack components
20810 may not be needed, giving rise to the possibility of
20811 eliminating some of the registers.
20813 The values returned by this function must reflect the behavior
20814 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20816 The sign of the number returned reflects the direction of stack
20817 growth, so the values are positive for all eliminations except
20818 from the soft frame pointer to the hard frame pointer.
20820 SFP may point just inside the local variables block to ensure correct
20824 /* Return cached stack offsets. */
20826 static arm_stack_offsets
*
20827 arm_get_frame_offsets (void)
20829 struct arm_stack_offsets
*offsets
;
20831 offsets
= &cfun
->machine
->stack_offsets
;
20837 /* Calculate stack offsets. These are used to calculate register elimination
20838 offsets and in prologue/epilogue code. Also calculates which registers
20839 should be saved. */
20842 arm_compute_frame_layout (void)
20844 struct arm_stack_offsets
*offsets
;
20845 unsigned long func_type
;
20848 HOST_WIDE_INT frame_size
;
20851 offsets
= &cfun
->machine
->stack_offsets
;
20853 /* Initially this is the size of the local variables. It will translated
20854 into an offset once we have determined the size of preceding data. */
20855 frame_size
= ROUND_UP_WORD (get_frame_size ());
20857 /* Space for variadic functions. */
20858 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20860 /* In Thumb mode this is incorrect, but never used. */
20862 = (offsets
->saved_args
20863 + arm_compute_static_chain_stack_bytes ()
20864 + (frame_pointer_needed
? 4 : 0));
20868 unsigned int regno
;
20870 offsets
->saved_regs_mask
= arm_compute_save_core_reg_mask ();
20871 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20872 saved
= core_saved
;
20874 /* We know that SP will be doubleword aligned on entry, and we must
20875 preserve that condition at any subroutine call. We also require the
20876 soft frame pointer to be doubleword aligned. */
20878 if (TARGET_REALLY_IWMMXT
)
20880 /* Check for the call-saved iWMMXt registers. */
20881 for (regno
= FIRST_IWMMXT_REGNUM
;
20882 regno
<= LAST_IWMMXT_REGNUM
;
20884 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20888 func_type
= arm_current_func_type ();
20889 /* Space for saved VFP registers. */
20890 if (! IS_VOLATILE (func_type
)
20891 && TARGET_HARD_FLOAT
)
20892 saved
+= arm_get_vfp_saved_size ();
20894 else /* TARGET_THUMB1 */
20896 offsets
->saved_regs_mask
= thumb1_compute_save_core_reg_mask ();
20897 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20898 saved
= core_saved
;
20899 if (TARGET_BACKTRACE
)
20903 /* Saved registers include the stack frame. */
20904 offsets
->saved_regs
20905 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20906 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20908 /* A leaf function does not need any stack alignment if it has nothing
20910 if (crtl
->is_leaf
&& frame_size
== 0
20911 /* However if it calls alloca(), we have a dynamically allocated
20912 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20913 && ! cfun
->calls_alloca
)
20915 offsets
->outgoing_args
= offsets
->soft_frame
;
20916 offsets
->locals_base
= offsets
->soft_frame
;
20920 /* Ensure SFP has the correct alignment. */
20921 if (ARM_DOUBLEWORD_ALIGN
20922 && (offsets
->soft_frame
& 7))
20924 offsets
->soft_frame
+= 4;
20925 /* Try to align stack by pushing an extra reg. Don't bother doing this
20926 when there is a stack frame as the alignment will be rolled into
20927 the normal stack adjustment. */
20928 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20932 /* Register r3 is caller-saved. Normally it does not need to be
20933 saved on entry by the prologue. However if we choose to save
20934 it for padding then we may confuse the compiler into thinking
20935 a prologue sequence is required when in fact it is not. This
20936 will occur when shrink-wrapping if r3 is used as a scratch
20937 register and there are no other callee-saved writes.
20939 This situation can be avoided when other callee-saved registers
20940 are available and r3 is not mandatory if we choose a callee-saved
20941 register for padding. */
20942 bool prefer_callee_reg_p
= false;
20944 /* If it is safe to use r3, then do so. This sometimes
20945 generates better code on Thumb-2 by avoiding the need to
20946 use 32-bit push/pop instructions. */
20947 if (! any_sibcall_could_use_r3 ()
20948 && arm_size_return_regs () <= 12
20949 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20951 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20954 if (!TARGET_THUMB2
)
20955 prefer_callee_reg_p
= true;
20958 || prefer_callee_reg_p
)
20960 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20962 /* Avoid fixed registers; they may be changed at
20963 arbitrary times so it's unsafe to restore them
20964 during the epilogue. */
20966 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20976 offsets
->saved_regs
+= 4;
20977 offsets
->saved_regs_mask
|= (1 << reg
);
20982 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20983 offsets
->outgoing_args
= (offsets
->locals_base
20984 + crtl
->outgoing_args_size
);
20986 if (ARM_DOUBLEWORD_ALIGN
)
20988 /* Ensure SP remains doubleword aligned. */
20989 if (offsets
->outgoing_args
& 7)
20990 offsets
->outgoing_args
+= 4;
20991 gcc_assert (!(offsets
->outgoing_args
& 7));
20996 /* Calculate the relative offsets for the different stack pointers. Positive
20997 offsets are in the direction of stack growth. */
21000 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
21002 arm_stack_offsets
*offsets
;
21004 offsets
= arm_get_frame_offsets ();
21006 /* OK, now we have enough information to compute the distances.
21007 There must be an entry in these switch tables for each pair
21008 of registers in ELIMINABLE_REGS, even if some of the entries
21009 seem to be redundant or useless. */
21012 case ARG_POINTER_REGNUM
:
21015 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21018 case FRAME_POINTER_REGNUM
:
21019 /* This is the reverse of the soft frame pointer
21020 to hard frame pointer elimination below. */
21021 return offsets
->soft_frame
- offsets
->saved_args
;
21023 case ARM_HARD_FRAME_POINTER_REGNUM
:
21024 /* This is only non-zero in the case where the static chain register
21025 is stored above the frame. */
21026 return offsets
->frame
- offsets
->saved_args
- 4;
21028 case STACK_POINTER_REGNUM
:
21029 /* If nothing has been pushed on the stack at all
21030 then this will return -4. This *is* correct! */
21031 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
21034 gcc_unreachable ();
21036 gcc_unreachable ();
21038 case FRAME_POINTER_REGNUM
:
21041 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21044 case ARM_HARD_FRAME_POINTER_REGNUM
:
21045 /* The hard frame pointer points to the top entry in the
21046 stack frame. The soft frame pointer to the bottom entry
21047 in the stack frame. If there is no stack frame at all,
21048 then they are identical. */
21050 return offsets
->frame
- offsets
->soft_frame
;
21052 case STACK_POINTER_REGNUM
:
21053 return offsets
->outgoing_args
- offsets
->soft_frame
;
21056 gcc_unreachable ();
21058 gcc_unreachable ();
21061 /* You cannot eliminate from the stack pointer.
21062 In theory you could eliminate from the hard frame
21063 pointer to the stack pointer, but this will never
21064 happen, since if a stack frame is not needed the
21065 hard frame pointer will never be used. */
21066 gcc_unreachable ();
21070 /* Given FROM and TO register numbers, say whether this elimination is
21071 allowed. Frame pointer elimination is automatically handled.
21073 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21074 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21075 pointer, we must eliminate FRAME_POINTER_REGNUM into
21076 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21077 ARG_POINTER_REGNUM. */
21080 arm_can_eliminate (const int from
, const int to
)
21082 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
21083 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
21084 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
21085 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
21089 /* Emit RTL to save coprocessor registers on function entry. Returns the
21090 number of bytes pushed. */
21093 arm_save_coproc_regs(void)
21095 int saved_size
= 0;
21097 unsigned start_reg
;
21100 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
21101 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
21103 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21104 insn
= gen_rtx_MEM (V2SImode
, insn
);
21105 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
21106 RTX_FRAME_RELATED_P (insn
) = 1;
21110 if (TARGET_HARD_FLOAT
)
21112 start_reg
= FIRST_VFP_REGNUM
;
21114 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
21116 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
21117 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
21119 if (start_reg
!= reg
)
21120 saved_size
+= vfp_emit_fstmd (start_reg
,
21121 (reg
- start_reg
) / 2);
21122 start_reg
= reg
+ 2;
21125 if (start_reg
!= reg
)
21126 saved_size
+= vfp_emit_fstmd (start_reg
,
21127 (reg
- start_reg
) / 2);
21133 /* Set the Thumb frame pointer from the stack pointer. */
21136 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21138 HOST_WIDE_INT amount
;
21141 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21143 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21144 stack_pointer_rtx
, GEN_INT (amount
)));
21147 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21148 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21149 expects the first two operands to be the same. */
21152 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21154 hard_frame_pointer_rtx
));
21158 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21159 hard_frame_pointer_rtx
,
21160 stack_pointer_rtx
));
21162 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
21163 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21164 RTX_FRAME_RELATED_P (dwarf
) = 1;
21165 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21168 RTX_FRAME_RELATED_P (insn
) = 1;
21171 struct scratch_reg
{
21176 /* Return a short-lived scratch register for use as a 2nd scratch register on
21177 function entry after the registers are saved in the prologue. This register
21178 must be released by means of release_scratch_register_on_entry. IP is not
21179 considered since it is always used as the 1st scratch register if available.
21181 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21182 mask of live registers. */
21185 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
21186 unsigned long live_regs
)
21192 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
21198 for (i
= 4; i
< 11; i
++)
21199 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
21207 /* If IP is used as the 1st scratch register for a nested function,
21208 then either r3 wasn't available or is used to preserve IP. */
21209 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
21211 regno
= (regno1
== 3 ? 2 : 3);
21213 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
21218 sr
->reg
= gen_rtx_REG (SImode
, regno
);
21221 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21222 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
21223 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21224 plus_constant (Pmode
, stack_pointer_rtx
, -4));
21225 RTX_FRAME_RELATED_P (insn
) = 1;
21226 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21230 /* Release a scratch register obtained from the preceding function. */
21233 release_scratch_register_on_entry (struct scratch_reg
*sr
)
21237 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
21238 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
21239 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21240 plus_constant (Pmode
, stack_pointer_rtx
, 4));
21241 RTX_FRAME_RELATED_P (insn
) = 1;
21242 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21246 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21248 #if PROBE_INTERVAL > 4096
21249 #error Cannot use indexed addressing mode for stack probing
21252 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21253 inclusive. These are offsets from the current stack pointer. REGNO1
21254 is the index number of the 1st scratch register and LIVE_REGS is the
21255 mask of live registers. */
21258 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
21259 unsigned int regno1
, unsigned long live_regs
)
21261 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
21263 /* See if we have a constant small number of probes to generate. If so,
21264 that's the easy case. */
21265 if (size
<= PROBE_INTERVAL
)
21267 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21268 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21269 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
21272 /* The run-time loop is made up of 10 insns in the generic case while the
21273 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21274 else if (size
<= 5 * PROBE_INTERVAL
)
21276 HOST_WIDE_INT i
, rem
;
21278 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21279 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21280 emit_stack_probe (reg1
);
21282 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21283 it exceeds SIZE. If only two probes are needed, this will not
21284 generate any code. Then probe at FIRST + SIZE. */
21285 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
21287 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21288 emit_stack_probe (reg1
);
21291 rem
= size
- (i
- PROBE_INTERVAL
);
21292 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21294 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21295 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
21298 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
21301 /* Otherwise, do the same as above, but in a loop. Note that we must be
21302 extra careful with variables wrapping around because we might be at
21303 the very top (or the very bottom) of the address space and we have
21304 to be able to handle this case properly; in particular, we use an
21305 equality test for the loop condition. */
21308 HOST_WIDE_INT rounded_size
;
21309 struct scratch_reg sr
;
21311 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
21313 emit_move_insn (reg1
, GEN_INT (first
));
21316 /* Step 1: round SIZE to the previous multiple of the interval. */
21318 rounded_size
= size
& -PROBE_INTERVAL
;
21319 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
21322 /* Step 2: compute initial and final value of the loop counter. */
21324 /* TEST_ADDR = SP + FIRST. */
21325 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21327 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21328 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
21331 /* Step 3: the loop
21335 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21338 while (TEST_ADDR != LAST_ADDR)
21340 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21341 until it is equal to ROUNDED_SIZE. */
21343 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
21346 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21347 that SIZE is equal to ROUNDED_SIZE. */
21349 if (size
!= rounded_size
)
21351 HOST_WIDE_INT rem
= size
- rounded_size
;
21353 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21355 emit_set_insn (sr
.reg
,
21356 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
21357 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
21358 PROBE_INTERVAL
- rem
));
21361 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
21364 release_scratch_register_on_entry (&sr
);
21367 /* Make sure nothing is scheduled before we are done. */
21368 emit_insn (gen_blockage ());
21371 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21372 absolute addresses. */
21375 output_probe_stack_range (rtx reg1
, rtx reg2
)
21377 static int labelno
= 0;
21381 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
21384 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
21386 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21388 xops
[1] = GEN_INT (PROBE_INTERVAL
);
21389 output_asm_insn ("sub\t%0, %0, %1", xops
);
21391 /* Probe at TEST_ADDR. */
21392 output_asm_insn ("str\tr0, [%0, #0]", xops
);
21394 /* Test if TEST_ADDR == LAST_ADDR. */
21396 output_asm_insn ("cmp\t%0, %1", xops
);
21399 fputs ("\tbne\t", asm_out_file
);
21400 assemble_name_raw (asm_out_file
, loop_lab
);
21401 fputc ('\n', asm_out_file
);
21406 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21409 arm_expand_prologue (void)
21414 unsigned long live_regs_mask
;
21415 unsigned long func_type
;
21417 int saved_pretend_args
= 0;
21418 int saved_regs
= 0;
21419 unsigned HOST_WIDE_INT args_to_push
;
21420 HOST_WIDE_INT size
;
21421 arm_stack_offsets
*offsets
;
21424 func_type
= arm_current_func_type ();
21426 /* Naked functions don't have prologues. */
21427 if (IS_NAKED (func_type
))
21429 if (flag_stack_usage_info
)
21430 current_function_static_stack_size
= 0;
21434 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21435 args_to_push
= crtl
->args
.pretend_args_size
;
21437 /* Compute which register we will have to save onto the stack. */
21438 offsets
= arm_get_frame_offsets ();
21439 live_regs_mask
= offsets
->saved_regs_mask
;
21441 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21443 if (IS_STACKALIGN (func_type
))
21447 /* Handle a word-aligned stack pointer. We generate the following:
21452 <save and restore r0 in normal prologue/epilogue>
21456 The unwinder doesn't need to know about the stack realignment.
21457 Just tell it we saved SP in r0. */
21458 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21460 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
21461 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
21463 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21464 RTX_FRAME_RELATED_P (insn
) = 1;
21465 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21467 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21469 /* ??? The CFA changes here, which may cause GDB to conclude that it
21470 has entered a different function. That said, the unwind info is
21471 correct, individually, before and after this instruction because
21472 we've described the save of SP, which will override the default
21473 handling of SP as restoring from the CFA. */
21474 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21477 /* The static chain register is the same as the IP register. If it is
21478 clobbered when creating the frame, we need to save and restore it. */
21479 clobber_ip
= IS_NESTED (func_type
)
21480 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21481 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21482 || flag_stack_clash_protection
)
21483 && !df_regs_ever_live_p (LR_REGNUM
)
21484 && arm_r3_live_at_start_p ()));
21486 /* Find somewhere to store IP whilst the frame is being created.
21487 We try the following places in order:
21489 1. The last argument register r3 if it is available.
21490 2. A slot on the stack above the frame if there are no
21491 arguments to push onto the stack.
21492 3. Register r3 again, after pushing the argument registers
21493 onto the stack, if this is a varargs function.
21494 4. The last slot on the stack created for the arguments to
21495 push, if this isn't a varargs function.
21497 Note - we only need to tell the dwarf2 backend about the SP
21498 adjustment in the second variant; the static chain register
21499 doesn't need to be unwound, as it doesn't contain a value
21500 inherited from the caller. */
21503 if (!arm_r3_live_at_start_p ())
21504 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21505 else if (args_to_push
== 0)
21509 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21512 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21513 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21516 /* Just tell the dwarf backend that we adjusted SP. */
21517 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21518 plus_constant (Pmode
, stack_pointer_rtx
,
21520 RTX_FRAME_RELATED_P (insn
) = 1;
21521 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21525 /* Store the args on the stack. */
21526 if (cfun
->machine
->uses_anonymous_args
)
21528 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21529 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21530 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21531 saved_pretend_args
= 1;
21537 if (args_to_push
== 4)
21538 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21540 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21541 plus_constant (Pmode
,
21545 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21547 /* Just tell the dwarf backend that we adjusted SP. */
21548 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21549 plus_constant (Pmode
, stack_pointer_rtx
,
21551 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21554 RTX_FRAME_RELATED_P (insn
) = 1;
21555 fp_offset
= args_to_push
;
21560 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21562 if (IS_INTERRUPT (func_type
))
21564 /* Interrupt functions must not corrupt any registers.
21565 Creating a frame pointer however, corrupts the IP
21566 register, so we must push it first. */
21567 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21569 /* Do not set RTX_FRAME_RELATED_P on this insn.
21570 The dwarf stack unwinding code only wants to see one
21571 stack decrement per function, and this is not it. If
21572 this instruction is labeled as being part of the frame
21573 creation sequence then dwarf2out_frame_debug_expr will
21574 die when it encounters the assignment of IP to FP
21575 later on, since the use of SP here establishes SP as
21576 the CFA register and not IP.
21578 Anyway this instruction is not really part of the stack
21579 frame creation although it is part of the prologue. */
21582 insn
= emit_set_insn (ip_rtx
,
21583 plus_constant (Pmode
, stack_pointer_rtx
,
21585 RTX_FRAME_RELATED_P (insn
) = 1;
21590 /* Push the argument registers, or reserve space for them. */
21591 if (cfun
->machine
->uses_anonymous_args
)
21592 insn
= emit_multi_reg_push
21593 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21594 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21597 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21598 GEN_INT (- args_to_push
)));
21599 RTX_FRAME_RELATED_P (insn
) = 1;
21602 /* If this is an interrupt service routine, and the link register
21603 is going to be pushed, and we're not generating extra
21604 push of IP (needed when frame is needed and frame layout if apcs),
21605 subtracting four from LR now will mean that the function return
21606 can be done with a single instruction. */
21607 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21608 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21609 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21612 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21614 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21617 if (live_regs_mask
)
21619 unsigned long dwarf_regs_mask
= live_regs_mask
;
21621 saved_regs
+= bit_count (live_regs_mask
) * 4;
21622 if (optimize_size
&& !frame_pointer_needed
21623 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21625 /* If no coprocessor registers are being pushed and we don't have
21626 to worry about a frame pointer then push extra registers to
21627 create the stack frame. This is done in a way that does not
21628 alter the frame layout, so is independent of the epilogue. */
21632 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21634 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21635 if (frame
&& n
* 4 >= frame
)
21638 live_regs_mask
|= (1 << n
) - 1;
21639 saved_regs
+= frame
;
21644 && current_tune
->prefer_ldrd_strd
21645 && !optimize_function_for_size_p (cfun
))
21647 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21649 thumb2_emit_strd_push (live_regs_mask
);
21650 else if (TARGET_ARM
21651 && !TARGET_APCS_FRAME
21652 && !IS_INTERRUPT (func_type
))
21653 arm_emit_strd_push (live_regs_mask
);
21656 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21657 RTX_FRAME_RELATED_P (insn
) = 1;
21662 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21663 RTX_FRAME_RELATED_P (insn
) = 1;
21667 if (! IS_VOLATILE (func_type
))
21668 saved_regs
+= arm_save_coproc_regs ();
21670 if (frame_pointer_needed
&& TARGET_ARM
)
21672 /* Create the new frame pointer. */
21673 if (TARGET_APCS_FRAME
)
21675 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21676 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21677 RTX_FRAME_RELATED_P (insn
) = 1;
21681 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
21682 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21683 stack_pointer_rtx
, insn
));
21684 RTX_FRAME_RELATED_P (insn
) = 1;
21688 size
= offsets
->outgoing_args
- offsets
->saved_args
;
21689 if (flag_stack_usage_info
)
21690 current_function_static_stack_size
= size
;
21692 /* If this isn't an interrupt service routine and we have a frame, then do
21693 stack checking. We use IP as the first scratch register, except for the
21694 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21695 if (!IS_INTERRUPT (func_type
)
21696 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21697 || flag_stack_clash_protection
))
21699 unsigned int regno
;
21701 if (!IS_NESTED (func_type
) || clobber_ip
)
21703 else if (df_regs_ever_live_p (LR_REGNUM
))
21708 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
21710 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
21711 arm_emit_probe_stack_range (get_stack_check_protect (),
21712 size
- get_stack_check_protect (),
21713 regno
, live_regs_mask
);
21716 arm_emit_probe_stack_range (get_stack_check_protect (), size
,
21717 regno
, live_regs_mask
);
21720 /* Recover the static chain register. */
21723 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21724 insn
= gen_rtx_REG (SImode
, 3);
21727 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21728 insn
= gen_frame_mem (SImode
, insn
);
21730 emit_set_insn (ip_rtx
, insn
);
21731 emit_insn (gen_force_register_use (ip_rtx
));
21734 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21736 /* This add can produce multiple insns for a large constant, so we
21737 need to get tricky. */
21738 rtx_insn
*last
= get_last_insn ();
21740 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21741 - offsets
->outgoing_args
);
21743 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21747 last
= last
? NEXT_INSN (last
) : get_insns ();
21748 RTX_FRAME_RELATED_P (last
) = 1;
21750 while (last
!= insn
);
21752 /* If the frame pointer is needed, emit a special barrier that
21753 will prevent the scheduler from moving stores to the frame
21754 before the stack adjustment. */
21755 if (frame_pointer_needed
)
21756 emit_insn (gen_stack_tie (stack_pointer_rtx
,
21757 hard_frame_pointer_rtx
));
21761 if (frame_pointer_needed
&& TARGET_THUMB2
)
21762 thumb_set_frame_pointer (offsets
);
21764 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21766 unsigned long mask
;
21768 mask
= live_regs_mask
;
21769 mask
&= THUMB2_WORK_REGS
;
21770 if (!IS_NESTED (func_type
))
21771 mask
|= (1 << IP_REGNUM
);
21772 arm_load_pic_register (mask
);
21775 /* If we are profiling, make sure no instructions are scheduled before
21776 the call to mcount. Similarly if the user has requested no
21777 scheduling in the prolog. Similarly if we want non-call exceptions
21778 using the EABI unwinder, to prevent faulting instructions from being
21779 swapped with a stack adjustment. */
21780 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21781 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21782 && cfun
->can_throw_non_call_exceptions
))
21783 emit_insn (gen_blockage ());
21785 /* If the link register is being kept alive, with the return address in it,
21786 then make sure that it does not get reused by the ce2 pass. */
21787 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21788 cfun
->machine
->lr_save_eliminated
= 1;
21791 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21793 arm_print_condition (FILE *stream
)
21795 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21797 /* Branch conversion is not implemented for Thumb-2. */
21800 output_operand_lossage ("predicated Thumb instruction");
21803 if (current_insn_predicate
!= NULL
)
21805 output_operand_lossage
21806 ("predicated instruction in conditional sequence");
21810 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21812 else if (current_insn_predicate
)
21814 enum arm_cond_code code
;
21818 output_operand_lossage ("predicated Thumb instruction");
21822 code
= get_arm_condition_code (current_insn_predicate
);
21823 fputs (arm_condition_codes
[code
], stream
);
21828 /* Globally reserved letters: acln
21829 Puncutation letters currently used: @_|?().!#
21830 Lower case letters currently used: bcdefhimpqtvwxyz
21831 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21832 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21834 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21836 If CODE is 'd', then the X is a condition operand and the instruction
21837 should only be executed if the condition is true.
21838 if CODE is 'D', then the X is a condition operand and the instruction
21839 should only be executed if the condition is false: however, if the mode
21840 of the comparison is CCFPEmode, then always execute the instruction -- we
21841 do this because in these circumstances !GE does not necessarily imply LT;
21842 in these cases the instruction pattern will take care to make sure that
21843 an instruction containing %d will follow, thereby undoing the effects of
21844 doing this instruction unconditionally.
21845 If CODE is 'N' then X is a floating point operand that must be negated
21847 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21848 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21850 arm_print_operand (FILE *stream
, rtx x
, int code
)
21855 fputs (ASM_COMMENT_START
, stream
);
21859 fputs (user_label_prefix
, stream
);
21863 fputs (REGISTER_PREFIX
, stream
);
21867 arm_print_condition (stream
);
21871 /* The current condition code for a condition code setting instruction.
21872 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21873 fputc('s', stream
);
21874 arm_print_condition (stream
);
21878 /* If the instruction is conditionally executed then print
21879 the current condition code, otherwise print 's'. */
21880 gcc_assert (TARGET_THUMB2
);
21881 if (current_insn_predicate
)
21882 arm_print_condition (stream
);
21884 fputc('s', stream
);
21887 /* %# is a "break" sequence. It doesn't output anything, but is used to
21888 separate e.g. operand numbers from following text, if that text consists
21889 of further digits which we don't want to be part of the operand
21897 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
21898 fprintf (stream
, "%s", fp_const_from_val (&r
));
21902 /* An integer or symbol address without a preceding # sign. */
21904 switch (GET_CODE (x
))
21907 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21911 output_addr_const (stream
, x
);
21915 if (GET_CODE (XEXP (x
, 0)) == PLUS
21916 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21918 output_addr_const (stream
, x
);
21921 /* Fall through. */
21924 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21928 /* An integer that we want to print in HEX. */
21930 switch (GET_CODE (x
))
21933 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21937 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21942 if (CONST_INT_P (x
))
21945 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21946 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21950 putc ('~', stream
);
21951 output_addr_const (stream
, x
);
21956 /* Print the log2 of a CONST_INT. */
21960 if (!CONST_INT_P (x
)
21961 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
21962 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21964 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21969 /* The low 16 bits of an immediate constant. */
21970 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21974 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21978 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21986 shift
= shift_op (x
, &val
);
21990 fprintf (stream
, ", %s ", shift
);
21992 arm_print_operand (stream
, XEXP (x
, 1), 0);
21994 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21999 /* An explanation of the 'Q', 'R' and 'H' register operands:
22001 In a pair of registers containing a DI or DF value the 'Q'
22002 operand returns the register number of the register containing
22003 the least significant part of the value. The 'R' operand returns
22004 the register number of the register containing the most
22005 significant part of the value.
22007 The 'H' operand returns the higher of the two register numbers.
22008 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22009 same as the 'Q' operand, since the most significant part of the
22010 value is held in the lower number register. The reverse is true
22011 on systems where WORDS_BIG_ENDIAN is false.
22013 The purpose of these operands is to distinguish between cases
22014 where the endian-ness of the values is important (for example
22015 when they are added together), and cases where the endian-ness
22016 is irrelevant, but the order of register operations is important.
22017 For example when loading a value from memory into a register
22018 pair, the endian-ness does not matter. Provided that the value
22019 from the lower memory address is put into the lower numbered
22020 register, and the value from the higher address is put into the
22021 higher numbered register, the load will work regardless of whether
22022 the value being loaded is big-wordian or little-wordian. The
22023 order of the two register loads can matter however, if the address
22024 of the memory location is actually held in one of the registers
22025 being overwritten by the load.
22027 The 'Q' and 'R' constraints are also available for 64-bit
22030 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22032 rtx part
= gen_lowpart (SImode
, x
);
22033 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22037 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22039 output_operand_lossage ("invalid operand for code '%c'", code
);
22043 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
22047 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22049 machine_mode mode
= GET_MODE (x
);
22052 if (mode
== VOIDmode
)
22054 part
= gen_highpart_mode (SImode
, mode
, x
);
22055 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22059 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22061 output_operand_lossage ("invalid operand for code '%c'", code
);
22065 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
22069 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22071 output_operand_lossage ("invalid operand for code '%c'", code
);
22075 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
22079 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22081 output_operand_lossage ("invalid operand for code '%c'", code
);
22085 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
22089 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22091 output_operand_lossage ("invalid operand for code '%c'", code
);
22095 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
22099 asm_fprintf (stream
, "%r",
22100 REG_P (XEXP (x
, 0))
22101 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
22105 asm_fprintf (stream
, "{%r-%r}",
22107 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
22110 /* Like 'M', but writing doubleword vector registers, for use by Neon
22114 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
22115 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
22117 asm_fprintf (stream
, "{d%d}", regno
);
22119 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
22124 /* CONST_TRUE_RTX means always -- that's the default. */
22125 if (x
== const_true_rtx
)
22128 if (!COMPARISON_P (x
))
22130 output_operand_lossage ("invalid operand for code '%c'", code
);
22134 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
22139 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22140 want to do that. */
22141 if (x
== const_true_rtx
)
22143 output_operand_lossage ("instruction never executed");
22146 if (!COMPARISON_P (x
))
22148 output_operand_lossage ("invalid operand for code '%c'", code
);
22152 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
22153 (get_arm_condition_code (x
))],
22163 /* Former Maverick support, removed after GCC-4.7. */
22164 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
22169 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
22170 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
22171 /* Bad value for wCG register number. */
22173 output_operand_lossage ("invalid operand for code '%c'", code
);
22178 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
22181 /* Print an iWMMXt control register name. */
22183 if (!CONST_INT_P (x
)
22185 || INTVAL (x
) >= 16)
22186 /* Bad value for wC register number. */
22188 output_operand_lossage ("invalid operand for code '%c'", code
);
22194 static const char * wc_reg_names
[16] =
22196 "wCID", "wCon", "wCSSF", "wCASF",
22197 "wC4", "wC5", "wC6", "wC7",
22198 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22199 "wC12", "wC13", "wC14", "wC15"
22202 fputs (wc_reg_names
[INTVAL (x
)], stream
);
22206 /* Print the high single-precision register of a VFP double-precision
22210 machine_mode mode
= GET_MODE (x
);
22213 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
22215 output_operand_lossage ("invalid operand for code '%c'", code
);
22220 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
22222 output_operand_lossage ("invalid operand for code '%c'", code
);
22226 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
22230 /* Print a VFP/Neon double precision or quad precision register name. */
22234 machine_mode mode
= GET_MODE (x
);
22235 int is_quad
= (code
== 'q');
22238 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
22240 output_operand_lossage ("invalid operand for code '%c'", code
);
22245 || !IS_VFP_REGNUM (REGNO (x
)))
22247 output_operand_lossage ("invalid operand for code '%c'", code
);
22252 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
22253 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
22255 output_operand_lossage ("invalid operand for code '%c'", code
);
22259 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
22260 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
22264 /* These two codes print the low/high doubleword register of a Neon quad
22265 register, respectively. For pair-structure types, can also print
22266 low/high quadword registers. */
22270 machine_mode mode
= GET_MODE (x
);
22273 if ((GET_MODE_SIZE (mode
) != 16
22274 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
22276 output_operand_lossage ("invalid operand for code '%c'", code
);
22281 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
22283 output_operand_lossage ("invalid operand for code '%c'", code
);
22287 if (GET_MODE_SIZE (mode
) == 16)
22288 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
22289 + (code
== 'f' ? 1 : 0));
22291 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
22292 + (code
== 'f' ? 1 : 0));
22296 /* Print a VFPv3 floating-point constant, represented as an integer
22300 int index
= vfp3_const_double_index (x
);
22301 gcc_assert (index
!= -1);
22302 fprintf (stream
, "%d", index
);
22306 /* Print bits representing opcode features for Neon.
22308 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22309 and polynomials as unsigned.
22311 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22313 Bit 2 is 1 for rounding functions, 0 otherwise. */
22315 /* Identify the type as 's', 'u', 'p' or 'f'. */
22318 HOST_WIDE_INT bits
= INTVAL (x
);
22319 fputc ("uspf"[bits
& 3], stream
);
22323 /* Likewise, but signed and unsigned integers are both 'i'. */
22326 HOST_WIDE_INT bits
= INTVAL (x
);
22327 fputc ("iipf"[bits
& 3], stream
);
22331 /* As for 'T', but emit 'u' instead of 'p'. */
22334 HOST_WIDE_INT bits
= INTVAL (x
);
22335 fputc ("usuf"[bits
& 3], stream
);
22339 /* Bit 2: rounding (vs none). */
22342 HOST_WIDE_INT bits
= INTVAL (x
);
22343 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
22347 /* Memory operand for vld1/vst1 instruction. */
22351 bool postinc
= FALSE
;
22352 rtx postinc_reg
= NULL
;
22353 unsigned align
, memsize
, align_bits
;
22355 gcc_assert (MEM_P (x
));
22356 addr
= XEXP (x
, 0);
22357 if (GET_CODE (addr
) == POST_INC
)
22360 addr
= XEXP (addr
, 0);
22362 if (GET_CODE (addr
) == POST_MODIFY
)
22364 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22365 addr
= XEXP (addr
, 0);
22367 asm_fprintf (stream
, "[%r", REGNO (addr
));
22369 /* We know the alignment of this access, so we can emit a hint in the
22370 instruction (for some alignments) as an aid to the memory subsystem
22372 align
= MEM_ALIGN (x
) >> 3;
22373 memsize
= MEM_SIZE (x
);
22375 /* Only certain alignment specifiers are supported by the hardware. */
22376 if (memsize
== 32 && (align
% 32) == 0)
22378 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22380 else if (memsize
>= 8 && (align
% 8) == 0)
22385 if (align_bits
!= 0)
22386 asm_fprintf (stream
, ":%d", align_bits
);
22388 asm_fprintf (stream
, "]");
22391 fputs("!", stream
);
22393 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22401 gcc_assert (MEM_P (x
));
22402 addr
= XEXP (x
, 0);
22403 gcc_assert (REG_P (addr
));
22404 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22408 /* Translate an S register number into a D register number and element index. */
22411 machine_mode mode
= GET_MODE (x
);
22414 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22416 output_operand_lossage ("invalid operand for code '%c'", code
);
22421 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22423 output_operand_lossage ("invalid operand for code '%c'", code
);
22427 regno
= regno
- FIRST_VFP_REGNUM
;
22428 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22433 gcc_assert (CONST_DOUBLE_P (x
));
22435 result
= vfp3_const_double_for_fract_bits (x
);
22437 result
= vfp3_const_double_for_bits (x
);
22438 fprintf (stream
, "#%d", result
);
22441 /* Register specifier for vld1.16/vst1.16. Translate the S register
22442 number into a D register number and element index. */
22445 machine_mode mode
= GET_MODE (x
);
22448 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22450 output_operand_lossage ("invalid operand for code '%c'", code
);
22455 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22457 output_operand_lossage ("invalid operand for code '%c'", code
);
22461 regno
= regno
- FIRST_VFP_REGNUM
;
22462 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22469 output_operand_lossage ("missing operand");
22473 switch (GET_CODE (x
))
22476 asm_fprintf (stream
, "%r", REGNO (x
));
22480 output_address (GET_MODE (x
), XEXP (x
, 0));
22486 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22487 sizeof (fpstr
), 0, 1);
22488 fprintf (stream
, "#%s", fpstr
);
22493 gcc_assert (GET_CODE (x
) != NEG
);
22494 fputc ('#', stream
);
22495 if (GET_CODE (x
) == HIGH
)
22497 fputs (":lower16:", stream
);
22501 output_addr_const (stream
, x
);
22507 /* Target hook for printing a memory address. */
22509 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
22513 int is_minus
= GET_CODE (x
) == MINUS
;
22516 asm_fprintf (stream
, "[%r]", REGNO (x
));
22517 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22519 rtx base
= XEXP (x
, 0);
22520 rtx index
= XEXP (x
, 1);
22521 HOST_WIDE_INT offset
= 0;
22523 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22525 /* Ensure that BASE is a register. */
22526 /* (one of them must be). */
22527 /* Also ensure the SP is not used as in index register. */
22528 std::swap (base
, index
);
22530 switch (GET_CODE (index
))
22533 offset
= INTVAL (index
);
22536 asm_fprintf (stream
, "[%r, #%wd]",
22537 REGNO (base
), offset
);
22541 asm_fprintf (stream
, "[%r, %s%r]",
22542 REGNO (base
), is_minus
? "-" : "",
22552 asm_fprintf (stream
, "[%r, %s%r",
22553 REGNO (base
), is_minus
? "-" : "",
22554 REGNO (XEXP (index
, 0)));
22555 arm_print_operand (stream
, index
, 'S');
22556 fputs ("]", stream
);
22561 gcc_unreachable ();
22564 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22565 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22567 gcc_assert (REG_P (XEXP (x
, 0)));
22569 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22570 asm_fprintf (stream
, "[%r, #%s%d]!",
22571 REGNO (XEXP (x
, 0)),
22572 GET_CODE (x
) == PRE_DEC
? "-" : "",
22573 GET_MODE_SIZE (mode
));
22575 asm_fprintf (stream
, "[%r], #%s%d",
22576 REGNO (XEXP (x
, 0)),
22577 GET_CODE (x
) == POST_DEC
? "-" : "",
22578 GET_MODE_SIZE (mode
));
22580 else if (GET_CODE (x
) == PRE_MODIFY
)
22582 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22583 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22584 asm_fprintf (stream
, "#%wd]!",
22585 INTVAL (XEXP (XEXP (x
, 1), 1)));
22587 asm_fprintf (stream
, "%r]!",
22588 REGNO (XEXP (XEXP (x
, 1), 1)));
22590 else if (GET_CODE (x
) == POST_MODIFY
)
22592 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22593 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22594 asm_fprintf (stream
, "#%wd",
22595 INTVAL (XEXP (XEXP (x
, 1), 1)));
22597 asm_fprintf (stream
, "%r",
22598 REGNO (XEXP (XEXP (x
, 1), 1)));
22600 else output_addr_const (stream
, x
);
22605 asm_fprintf (stream
, "[%r]", REGNO (x
));
22606 else if (GET_CODE (x
) == POST_INC
)
22607 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22608 else if (GET_CODE (x
) == PLUS
)
22610 gcc_assert (REG_P (XEXP (x
, 0)));
22611 if (CONST_INT_P (XEXP (x
, 1)))
22612 asm_fprintf (stream
, "[%r, #%wd]",
22613 REGNO (XEXP (x
, 0)),
22614 INTVAL (XEXP (x
, 1)));
22616 asm_fprintf (stream
, "[%r, %r]",
22617 REGNO (XEXP (x
, 0)),
22618 REGNO (XEXP (x
, 1)));
22621 output_addr_const (stream
, x
);
22625 /* Target hook for indicating whether a punctuation character for
22626 TARGET_PRINT_OPERAND is valid. */
22628 arm_print_operand_punct_valid_p (unsigned char code
)
22630 return (code
== '@' || code
== '|' || code
== '.'
22631 || code
== '(' || code
== ')' || code
== '#'
22632 || (TARGET_32BIT
&& (code
== '?'))
22633 || (TARGET_THUMB2
&& (code
== '!'))
22634 || (TARGET_THUMB
&& (code
== '_')));
22637 /* Target hook for assembling integer objects. The ARM version needs to
22638 handle word-sized values specially. */
22640 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22644 if (size
== UNITS_PER_WORD
&& aligned_p
)
22646 fputs ("\t.word\t", asm_out_file
);
22647 output_addr_const (asm_out_file
, x
);
22649 /* Mark symbols as position independent. We only do this in the
22650 .text segment, not in the .data segment. */
22651 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22652 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22654 /* See legitimize_pic_address for an explanation of the
22655 TARGET_VXWORKS_RTP check. */
22656 /* References to weak symbols cannot be resolved locally:
22657 they may be overridden by a non-weak definition at link
22659 if (!arm_pic_data_is_text_relative
22660 || (GET_CODE (x
) == SYMBOL_REF
22661 && (!SYMBOL_REF_LOCAL_P (x
)
22662 || (SYMBOL_REF_DECL (x
)
22663 ? DECL_WEAK (SYMBOL_REF_DECL (x
)) : 0))))
22664 fputs ("(GOT)", asm_out_file
);
22666 fputs ("(GOTOFF)", asm_out_file
);
22668 fputc ('\n', asm_out_file
);
22672 mode
= GET_MODE (x
);
22674 if (arm_vector_mode_supported_p (mode
))
22678 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22680 units
= CONST_VECTOR_NUNITS (x
);
22681 size
= GET_MODE_UNIT_SIZE (mode
);
22683 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22684 for (i
= 0; i
< units
; i
++)
22686 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22688 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22691 for (i
= 0; i
< units
; i
++)
22693 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22695 (*CONST_DOUBLE_REAL_VALUE (elt
),
22696 as_a
<scalar_float_mode
> (GET_MODE_INNER (mode
)),
22697 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22703 return default_assemble_integer (x
, size
, aligned_p
);
22707 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22711 if (!TARGET_AAPCS_BASED
)
22714 default_named_section_asm_out_constructor
22715 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22719 /* Put these in the .init_array section, using a special relocation. */
22720 if (priority
!= DEFAULT_INIT_PRIORITY
)
22723 sprintf (buf
, "%s.%.5u",
22724 is_ctor
? ".init_array" : ".fini_array",
22726 s
= get_section (buf
, SECTION_WRITE
| SECTION_NOTYPE
, NULL_TREE
);
22733 switch_to_section (s
);
22734 assemble_align (POINTER_SIZE
);
22735 fputs ("\t.word\t", asm_out_file
);
22736 output_addr_const (asm_out_file
, symbol
);
22737 fputs ("(target1)\n", asm_out_file
);
22740 /* Add a function to the list of static constructors. */
22743 arm_elf_asm_constructor (rtx symbol
, int priority
)
22745 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22748 /* Add a function to the list of static destructors. */
22751 arm_elf_asm_destructor (rtx symbol
, int priority
)
22753 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22756 /* A finite state machine takes care of noticing whether or not instructions
22757 can be conditionally executed, and thus decrease execution time and code
22758 size by deleting branch instructions. The fsm is controlled by
22759 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22761 /* The state of the fsm controlling condition codes are:
22762 0: normal, do nothing special
22763 1: make ASM_OUTPUT_OPCODE not output this instruction
22764 2: make ASM_OUTPUT_OPCODE not output this instruction
22765 3: make instructions conditional
22766 4: make instructions conditional
22768 State transitions (state->state by whom under condition):
22769 0 -> 1 final_prescan_insn if the `target' is a label
22770 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22771 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22772 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22773 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22774 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22775 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22776 (the target insn is arm_target_insn).
22778 If the jump clobbers the conditions then we use states 2 and 4.
22780 A similar thing can be done with conditional return insns.
22782 XXX In case the `target' is an unconditional branch, this conditionalising
22783 of the instructions always reduces code size, but not always execution
22784 time. But then, I want to reduce the code size to somewhere near what
22785 /bin/cc produces. */
22787 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22788 instructions. When a COND_EXEC instruction is seen the subsequent
22789 instructions are scanned so that multiple conditional instructions can be
22790 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22791 specify the length and true/false mask for the IT block. These will be
22792 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22794 /* Returns the index of the ARM condition code string in
22795 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22796 COMPARISON should be an rtx like `(eq (...) (...))'. */
22799 maybe_get_arm_condition_code (rtx comparison
)
22801 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22802 enum arm_cond_code code
;
22803 enum rtx_code comp_code
= GET_CODE (comparison
);
22805 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22806 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22807 XEXP (comparison
, 1));
22811 case E_CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22812 case E_CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22813 case E_CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22814 case E_CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22815 case E_CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22816 case E_CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22817 case E_CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22818 case E_CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22819 case E_CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22820 case E_CC_DLTUmode
: code
= ARM_CC
;
22823 if (comp_code
== EQ
)
22824 return ARM_INVERSE_CONDITION_CODE (code
);
22825 if (comp_code
== NE
)
22829 case E_CC_NOOVmode
:
22832 case NE
: return ARM_NE
;
22833 case EQ
: return ARM_EQ
;
22834 case GE
: return ARM_PL
;
22835 case LT
: return ARM_MI
;
22836 default: return ARM_NV
;
22842 case NE
: return ARM_NE
;
22843 case EQ
: return ARM_EQ
;
22844 default: return ARM_NV
;
22850 case NE
: return ARM_MI
;
22851 case EQ
: return ARM_PL
;
22852 default: return ARM_NV
;
22857 /* We can handle all cases except UNEQ and LTGT. */
22860 case GE
: return ARM_GE
;
22861 case GT
: return ARM_GT
;
22862 case LE
: return ARM_LS
;
22863 case LT
: return ARM_MI
;
22864 case NE
: return ARM_NE
;
22865 case EQ
: return ARM_EQ
;
22866 case ORDERED
: return ARM_VC
;
22867 case UNORDERED
: return ARM_VS
;
22868 case UNLT
: return ARM_LT
;
22869 case UNLE
: return ARM_LE
;
22870 case UNGT
: return ARM_HI
;
22871 case UNGE
: return ARM_PL
;
22872 /* UNEQ and LTGT do not have a representation. */
22873 case UNEQ
: /* Fall through. */
22874 case LTGT
: /* Fall through. */
22875 default: return ARM_NV
;
22881 case NE
: return ARM_NE
;
22882 case EQ
: return ARM_EQ
;
22883 case GE
: return ARM_LE
;
22884 case GT
: return ARM_LT
;
22885 case LE
: return ARM_GE
;
22886 case LT
: return ARM_GT
;
22887 case GEU
: return ARM_LS
;
22888 case GTU
: return ARM_CC
;
22889 case LEU
: return ARM_CS
;
22890 case LTU
: return ARM_HI
;
22891 default: return ARM_NV
;
22897 case LTU
: return ARM_CS
;
22898 case GEU
: return ARM_CC
;
22899 case NE
: return ARM_CS
;
22900 case EQ
: return ARM_CC
;
22901 default: return ARM_NV
;
22907 case NE
: return ARM_NE
;
22908 case EQ
: return ARM_EQ
;
22909 case GEU
: return ARM_CS
;
22910 case GTU
: return ARM_HI
;
22911 case LEU
: return ARM_LS
;
22912 case LTU
: return ARM_CC
;
22913 default: return ARM_NV
;
22919 case GE
: return ARM_GE
;
22920 case LT
: return ARM_LT
;
22921 case GEU
: return ARM_CS
;
22922 case LTU
: return ARM_CC
;
22923 default: return ARM_NV
;
22929 case NE
: return ARM_VS
;
22930 case EQ
: return ARM_VC
;
22931 default: return ARM_NV
;
22937 case NE
: return ARM_NE
;
22938 case EQ
: return ARM_EQ
;
22939 case GE
: return ARM_GE
;
22940 case GT
: return ARM_GT
;
22941 case LE
: return ARM_LE
;
22942 case LT
: return ARM_LT
;
22943 case GEU
: return ARM_CS
;
22944 case GTU
: return ARM_HI
;
22945 case LEU
: return ARM_LS
;
22946 case LTU
: return ARM_CC
;
22947 default: return ARM_NV
;
22950 default: gcc_unreachable ();
22954 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22955 static enum arm_cond_code
22956 get_arm_condition_code (rtx comparison
)
22958 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22959 gcc_assert (code
!= ARM_NV
);
22963 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
22964 code registers when not targetting Thumb1. The VFP condition register
22965 only exists when generating hard-float code. */
22967 arm_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
22973 *p2
= TARGET_HARD_FLOAT
? VFPCC_REGNUM
: INVALID_REGNUM
;
22977 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22980 thumb2_final_prescan_insn (rtx_insn
*insn
)
22982 rtx_insn
*first_insn
= insn
;
22983 rtx body
= PATTERN (insn
);
22985 enum arm_cond_code code
;
22990 /* max_insns_skipped in the tune was already taken into account in the
22991 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22992 just emit the IT blocks as we can. It does not make sense to split
22994 max
= MAX_INSN_PER_IT_BLOCK
;
22996 /* Remove the previous insn from the count of insns to be output. */
22997 if (arm_condexec_count
)
22998 arm_condexec_count
--;
23000 /* Nothing to do if we are already inside a conditional block. */
23001 if (arm_condexec_count
)
23004 if (GET_CODE (body
) != COND_EXEC
)
23007 /* Conditional jumps are implemented directly. */
23011 predicate
= COND_EXEC_TEST (body
);
23012 arm_current_cc
= get_arm_condition_code (predicate
);
23014 n
= get_attr_ce_count (insn
);
23015 arm_condexec_count
= 1;
23016 arm_condexec_mask
= (1 << n
) - 1;
23017 arm_condexec_masklen
= n
;
23018 /* See if subsequent instructions can be combined into the same block. */
23021 insn
= next_nonnote_insn (insn
);
23023 /* Jumping into the middle of an IT block is illegal, so a label or
23024 barrier terminates the block. */
23025 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
23028 body
= PATTERN (insn
);
23029 /* USE and CLOBBER aren't really insns, so just skip them. */
23030 if (GET_CODE (body
) == USE
23031 || GET_CODE (body
) == CLOBBER
)
23034 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23035 if (GET_CODE (body
) != COND_EXEC
)
23037 /* Maximum number of conditionally executed instructions in a block. */
23038 n
= get_attr_ce_count (insn
);
23039 if (arm_condexec_masklen
+ n
> max
)
23042 predicate
= COND_EXEC_TEST (body
);
23043 code
= get_arm_condition_code (predicate
);
23044 mask
= (1 << n
) - 1;
23045 if (arm_current_cc
== code
)
23046 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
23047 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
23050 arm_condexec_count
++;
23051 arm_condexec_masklen
+= n
;
23053 /* A jump must be the last instruction in a conditional block. */
23057 /* Restore recog_data (getting the attributes of other insns can
23058 destroy this array, but final.c assumes that it remains intact
23059 across this call). */
23060 extract_constrain_insn_cached (first_insn
);
23064 arm_final_prescan_insn (rtx_insn
*insn
)
23066 /* BODY will hold the body of INSN. */
23067 rtx body
= PATTERN (insn
);
23069 /* This will be 1 if trying to repeat the trick, and things need to be
23070 reversed if it appears to fail. */
23073 /* If we start with a return insn, we only succeed if we find another one. */
23074 int seeking_return
= 0;
23075 enum rtx_code return_code
= UNKNOWN
;
23077 /* START_INSN will hold the insn from where we start looking. This is the
23078 first insn after the following code_label if REVERSE is true. */
23079 rtx_insn
*start_insn
= insn
;
23081 /* If in state 4, check if the target branch is reached, in order to
23082 change back to state 0. */
23083 if (arm_ccfsm_state
== 4)
23085 if (insn
== arm_target_insn
)
23087 arm_target_insn
= NULL
;
23088 arm_ccfsm_state
= 0;
23093 /* If in state 3, it is possible to repeat the trick, if this insn is an
23094 unconditional branch to a label, and immediately following this branch
23095 is the previous target label which is only used once, and the label this
23096 branch jumps to is not too far off. */
23097 if (arm_ccfsm_state
== 3)
23099 if (simplejump_p (insn
))
23101 start_insn
= next_nonnote_insn (start_insn
);
23102 if (BARRIER_P (start_insn
))
23104 /* XXX Isn't this always a barrier? */
23105 start_insn
= next_nonnote_insn (start_insn
);
23107 if (LABEL_P (start_insn
)
23108 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23109 && LABEL_NUSES (start_insn
) == 1)
23114 else if (ANY_RETURN_P (body
))
23116 start_insn
= next_nonnote_insn (start_insn
);
23117 if (BARRIER_P (start_insn
))
23118 start_insn
= next_nonnote_insn (start_insn
);
23119 if (LABEL_P (start_insn
)
23120 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23121 && LABEL_NUSES (start_insn
) == 1)
23124 seeking_return
= 1;
23125 return_code
= GET_CODE (body
);
23134 gcc_assert (!arm_ccfsm_state
|| reverse
);
23135 if (!JUMP_P (insn
))
23138 /* This jump might be paralleled with a clobber of the condition codes
23139 the jump should always come first */
23140 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
23141 body
= XVECEXP (body
, 0, 0);
23144 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
23145 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
23148 int fail
= FALSE
, succeed
= FALSE
;
23149 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23150 int then_not_else
= TRUE
;
23151 rtx_insn
*this_insn
= start_insn
;
23154 /* Register the insn jumped to. */
23157 if (!seeking_return
)
23158 label
= XEXP (SET_SRC (body
), 0);
23160 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
23161 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
23162 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
23164 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
23165 then_not_else
= FALSE
;
23167 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
23169 seeking_return
= 1;
23170 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
23172 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
23174 seeking_return
= 1;
23175 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
23176 then_not_else
= FALSE
;
23179 gcc_unreachable ();
23181 /* See how many insns this branch skips, and what kind of insns. If all
23182 insns are okay, and the label or unconditional branch to the same
23183 label is not too far away, succeed. */
23184 for (insns_skipped
= 0;
23185 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
23189 this_insn
= next_nonnote_insn (this_insn
);
23193 switch (GET_CODE (this_insn
))
23196 /* Succeed if it is the target label, otherwise fail since
23197 control falls in from somewhere else. */
23198 if (this_insn
== label
)
23200 arm_ccfsm_state
= 1;
23208 /* Succeed if the following insn is the target label.
23210 If return insns are used then the last insn in a function
23211 will be a barrier. */
23212 this_insn
= next_nonnote_insn (this_insn
);
23213 if (this_insn
&& this_insn
== label
)
23215 arm_ccfsm_state
= 1;
23223 /* The AAPCS says that conditional calls should not be
23224 used since they make interworking inefficient (the
23225 linker can't transform BL<cond> into BLX). That's
23226 only a problem if the machine has BLX. */
23233 /* Succeed if the following insn is the target label, or
23234 if the following two insns are a barrier and the
23236 this_insn
= next_nonnote_insn (this_insn
);
23237 if (this_insn
&& BARRIER_P (this_insn
))
23238 this_insn
= next_nonnote_insn (this_insn
);
23240 if (this_insn
&& this_insn
== label
23241 && insns_skipped
< max_insns_skipped
)
23243 arm_ccfsm_state
= 1;
23251 /* If this is an unconditional branch to the same label, succeed.
23252 If it is to another label, do nothing. If it is conditional,
23254 /* XXX Probably, the tests for SET and the PC are
23257 scanbody
= PATTERN (this_insn
);
23258 if (GET_CODE (scanbody
) == SET
23259 && GET_CODE (SET_DEST (scanbody
)) == PC
)
23261 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
23262 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
23264 arm_ccfsm_state
= 2;
23267 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
23270 /* Fail if a conditional return is undesirable (e.g. on a
23271 StrongARM), but still allow this if optimizing for size. */
23272 else if (GET_CODE (scanbody
) == return_code
23273 && !use_return_insn (TRUE
, NULL
)
23276 else if (GET_CODE (scanbody
) == return_code
)
23278 arm_ccfsm_state
= 2;
23281 else if (GET_CODE (scanbody
) == PARALLEL
)
23283 switch (get_attr_conds (this_insn
))
23293 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
23298 /* Instructions using or affecting the condition codes make it
23300 scanbody
= PATTERN (this_insn
);
23301 if (!(GET_CODE (scanbody
) == SET
23302 || GET_CODE (scanbody
) == PARALLEL
)
23303 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
23313 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
23314 arm_target_label
= CODE_LABEL_NUMBER (label
);
23317 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
23319 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
23321 this_insn
= next_nonnote_insn (this_insn
);
23322 gcc_assert (!this_insn
23323 || (!BARRIER_P (this_insn
)
23324 && !LABEL_P (this_insn
)));
23328 /* Oh, dear! we ran off the end.. give up. */
23329 extract_constrain_insn_cached (insn
);
23330 arm_ccfsm_state
= 0;
23331 arm_target_insn
= NULL
;
23334 arm_target_insn
= this_insn
;
23337 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23340 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
23342 if (reverse
|| then_not_else
)
23343 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
23346 /* Restore recog_data (getting the attributes of other insns can
23347 destroy this array, but final.c assumes that it remains intact
23348 across this call. */
23349 extract_constrain_insn_cached (insn
);
23353 /* Output IT instructions. */
23355 thumb2_asm_output_opcode (FILE * stream
)
23360 if (arm_condexec_mask
)
23362 for (n
= 0; n
< arm_condexec_masklen
; n
++)
23363 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
23365 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
23366 arm_condition_codes
[arm_current_cc
]);
23367 arm_condexec_mask
= 0;
23371 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
23372 UNITS_PER_WORD bytes wide. */
23373 static unsigned int
23374 arm_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
23377 && regno
> PC_REGNUM
23378 && regno
!= FRAME_POINTER_REGNUM
23379 && regno
!= ARG_POINTER_REGNUM
23380 && !IS_VFP_REGNUM (regno
))
23383 return ARM_NUM_REGS (mode
);
23386 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23388 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
23390 if (GET_MODE_CLASS (mode
) == MODE_CC
)
23391 return (regno
== CC_REGNUM
23392 || (TARGET_HARD_FLOAT
23393 && regno
== VFPCC_REGNUM
));
23395 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23399 /* For the Thumb we only allow values bigger than SImode in
23400 registers 0 - 6, so that there is always a second low
23401 register available to hold the upper part of the value.
23402 We probably we ought to ensure that the register is the
23403 start of an even numbered register pair. */
23404 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23406 if (TARGET_HARD_FLOAT
&& IS_VFP_REGNUM (regno
))
23408 if (mode
== SFmode
|| mode
== SImode
)
23409 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23411 if (mode
== DFmode
)
23412 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23414 if (mode
== HFmode
)
23415 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23417 /* VFP registers can hold HImode values. */
23418 if (mode
== HImode
)
23419 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23422 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23423 || (VALID_NEON_QREG_MODE (mode
)
23424 && NEON_REGNO_OK_FOR_QUAD (regno
))
23425 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23426 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23427 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23428 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23429 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23434 if (TARGET_REALLY_IWMMXT
)
23436 if (IS_IWMMXT_GR_REGNUM (regno
))
23437 return mode
== SImode
;
23439 if (IS_IWMMXT_REGNUM (regno
))
23440 return VALID_IWMMXT_REG_MODE (mode
);
23443 /* We allow almost any value to be stored in the general registers.
23444 Restrict doubleword quantities to even register pairs in ARM state
23445 so that we can use ldrd. Do not allow very large Neon structure
23446 opaque modes in general registers; they would use too many. */
23447 if (regno
<= LAST_ARM_REGNUM
)
23449 if (ARM_NUM_REGS (mode
) > 4)
23455 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23458 if (regno
== FRAME_POINTER_REGNUM
23459 || regno
== ARG_POINTER_REGNUM
)
23460 /* We only allow integers in the fake hard registers. */
23461 return GET_MODE_CLASS (mode
) == MODE_INT
;
23466 /* Implement TARGET_MODES_TIEABLE_P. */
23469 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23471 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23474 /* We specifically want to allow elements of "structure" modes to
23475 be tieable to the structure. This more general condition allows
23476 other rarer situations too. */
23478 && (VALID_NEON_DREG_MODE (mode1
)
23479 || VALID_NEON_QREG_MODE (mode1
)
23480 || VALID_NEON_STRUCT_MODE (mode1
))
23481 && (VALID_NEON_DREG_MODE (mode2
)
23482 || VALID_NEON_QREG_MODE (mode2
)
23483 || VALID_NEON_STRUCT_MODE (mode2
)))
23489 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23490 not used in arm mode. */
23493 arm_regno_class (int regno
)
23495 if (regno
== PC_REGNUM
)
23500 if (regno
== STACK_POINTER_REGNUM
)
23502 if (regno
== CC_REGNUM
)
23509 if (TARGET_THUMB2
&& regno
< 8)
23512 if ( regno
<= LAST_ARM_REGNUM
23513 || regno
== FRAME_POINTER_REGNUM
23514 || regno
== ARG_POINTER_REGNUM
)
23515 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23517 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23518 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23520 if (IS_VFP_REGNUM (regno
))
23522 if (regno
<= D7_VFP_REGNUM
)
23523 return VFP_D0_D7_REGS
;
23524 else if (regno
<= LAST_LO_VFP_REGNUM
)
23525 return VFP_LO_REGS
;
23527 return VFP_HI_REGS
;
23530 if (IS_IWMMXT_REGNUM (regno
))
23531 return IWMMXT_REGS
;
23533 if (IS_IWMMXT_GR_REGNUM (regno
))
23534 return IWMMXT_GR_REGS
;
23539 /* Handle a special case when computing the offset
23540 of an argument from the frame pointer. */
23542 arm_debugger_arg_offset (int value
, rtx addr
)
23546 /* We are only interested if dbxout_parms() failed to compute the offset. */
23550 /* We can only cope with the case where the address is held in a register. */
23554 /* If we are using the frame pointer to point at the argument, then
23555 an offset of 0 is correct. */
23556 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23559 /* If we are using the stack pointer to point at the
23560 argument, then an offset of 0 is correct. */
23561 /* ??? Check this is consistent with thumb2 frame layout. */
23562 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23563 && REGNO (addr
) == SP_REGNUM
)
23566 /* Oh dear. The argument is pointed to by a register rather
23567 than being held in a register, or being stored at a known
23568 offset from the frame pointer. Since GDB only understands
23569 those two kinds of argument we must translate the address
23570 held in the register into an offset from the frame pointer.
23571 We do this by searching through the insns for the function
23572 looking to see where this register gets its value. If the
23573 register is initialized from the frame pointer plus an offset
23574 then we are in luck and we can continue, otherwise we give up.
23576 This code is exercised by producing debugging information
23577 for a function with arguments like this:
23579 double func (double a, double b, int c, double d) {return d;}
23581 Without this code the stab for parameter 'd' will be set to
23582 an offset of 0 from the frame pointer, rather than 8. */
23584 /* The if() statement says:
23586 If the insn is a normal instruction
23587 and if the insn is setting the value in a register
23588 and if the register being set is the register holding the address of the argument
23589 and if the address is computing by an addition
23590 that involves adding to a register
23591 which is the frame pointer
23596 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23598 if ( NONJUMP_INSN_P (insn
)
23599 && GET_CODE (PATTERN (insn
)) == SET
23600 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23601 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23602 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23603 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23604 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23607 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23616 warning (0, "unable to compute real location of stacked parameter");
23617 value
= 8; /* XXX magic hack */
23623 /* Implement TARGET_PROMOTED_TYPE. */
23626 arm_promoted_type (const_tree t
)
23628 if (SCALAR_FLOAT_TYPE_P (t
)
23629 && TYPE_PRECISION (t
) == 16
23630 && TYPE_MAIN_VARIANT (t
) == arm_fp16_type_node
)
23631 return float_type_node
;
23635 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23636 This simply adds HFmode as a supported mode; even though we don't
23637 implement arithmetic on this type directly, it's supported by
23638 optabs conversions, much the way the double-word arithmetic is
23639 special-cased in the default hook. */
23642 arm_scalar_mode_supported_p (scalar_mode mode
)
23644 if (mode
== HFmode
)
23645 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23646 else if (ALL_FIXED_POINT_MODE_P (mode
))
23649 return default_scalar_mode_supported_p (mode
);
23652 /* Set the value of FLT_EVAL_METHOD.
23653 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23655 0: evaluate all operations and constants, whose semantic type has at
23656 most the range and precision of type float, to the range and
23657 precision of float; evaluate all other operations and constants to
23658 the range and precision of the semantic type;
23660 N, where _FloatN is a supported interchange floating type
23661 evaluate all operations and constants, whose semantic type has at
23662 most the range and precision of _FloatN type, to the range and
23663 precision of the _FloatN type; evaluate all other operations and
23664 constants to the range and precision of the semantic type;
23666 If we have the ARMv8.2-A extensions then we support _Float16 in native
23667 precision, so we should set this to 16. Otherwise, we support the type,
23668 but want to evaluate expressions in float precision, so set this to
23671 static enum flt_eval_method
23672 arm_excess_precision (enum excess_precision_type type
)
23676 case EXCESS_PRECISION_TYPE_FAST
:
23677 case EXCESS_PRECISION_TYPE_STANDARD
:
23678 /* We can calculate either in 16-bit range and precision or
23679 32-bit range and precision. Make that decision based on whether
23680 we have native support for the ARMv8.2-A 16-bit floating-point
23681 instructions or not. */
23682 return (TARGET_VFP_FP16INST
23683 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23684 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
23685 case EXCESS_PRECISION_TYPE_IMPLICIT
:
23686 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
23688 gcc_unreachable ();
23690 return FLT_EVAL_METHOD_UNPREDICTABLE
;
23694 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23695 _Float16 if we are using anything other than ieee format for 16-bit
23696 floating point. Otherwise, punt to the default implementation. */
23697 static opt_scalar_float_mode
23698 arm_floatn_mode (int n
, bool extended
)
23700 if (!extended
&& n
== 16)
23702 if (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
)
23704 return opt_scalar_float_mode ();
23707 return default_floatn_mode (n
, extended
);
23711 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23712 not to early-clobber SRC registers in the process.
23714 We assume that the operands described by SRC and DEST represent a
23715 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23716 number of components into which the copy has been decomposed. */
23718 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23722 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23723 || REGNO (operands
[0]) < REGNO (operands
[1]))
23725 for (i
= 0; i
< count
; i
++)
23727 operands
[2 * i
] = dest
[i
];
23728 operands
[2 * i
+ 1] = src
[i
];
23733 for (i
= 0; i
< count
; i
++)
23735 operands
[2 * i
] = dest
[count
- i
- 1];
23736 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23741 /* Split operands into moves from op[1] + op[2] into op[0]. */
23744 neon_split_vcombine (rtx operands
[3])
23746 unsigned int dest
= REGNO (operands
[0]);
23747 unsigned int src1
= REGNO (operands
[1]);
23748 unsigned int src2
= REGNO (operands
[2]);
23749 machine_mode halfmode
= GET_MODE (operands
[1]);
23750 unsigned int halfregs
= REG_NREGS (operands
[1]);
23751 rtx destlo
, desthi
;
23753 if (src1
== dest
&& src2
== dest
+ halfregs
)
23755 /* No-op move. Can't split to nothing; emit something. */
23756 emit_note (NOTE_INSN_DELETED
);
23760 /* Preserve register attributes for variable tracking. */
23761 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23762 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23763 GET_MODE_SIZE (halfmode
));
23765 /* Special case of reversed high/low parts. Use VSWP. */
23766 if (src2
== dest
&& src1
== dest
+ halfregs
)
23768 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
23769 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
23770 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
23774 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
23776 /* Try to avoid unnecessary moves if part of the result
23777 is in the right place already. */
23779 emit_move_insn (destlo
, operands
[1]);
23780 if (src2
!= dest
+ halfregs
)
23781 emit_move_insn (desthi
, operands
[2]);
23785 if (src2
!= dest
+ halfregs
)
23786 emit_move_insn (desthi
, operands
[2]);
23788 emit_move_insn (destlo
, operands
[1]);
23792 /* Return the number (counting from 0) of
23793 the least significant set bit in MASK. */
23796 number_of_first_bit_set (unsigned mask
)
23798 return ctz_hwi (mask
);
23801 /* Like emit_multi_reg_push, but allowing for a different set of
23802 registers to be described as saved. MASK is the set of registers
23803 to be saved; REAL_REGS is the set of registers to be described as
23804 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23807 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
23809 unsigned long regno
;
23810 rtx par
[10], tmp
, reg
;
23814 /* Build the parallel of the registers actually being stored. */
23815 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
23817 regno
= ctz_hwi (mask
);
23818 reg
= gen_rtx_REG (SImode
, regno
);
23821 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
23823 tmp
= gen_rtx_USE (VOIDmode
, reg
);
23828 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23829 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
23830 tmp
= gen_frame_mem (BLKmode
, tmp
);
23831 tmp
= gen_rtx_SET (tmp
, par
[0]);
23834 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
23835 insn
= emit_insn (tmp
);
23837 /* Always build the stack adjustment note for unwind info. */
23838 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23839 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
23842 /* Build the parallel of the registers recorded as saved for unwind. */
23843 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
23845 regno
= ctz_hwi (real_regs
);
23846 reg
= gen_rtx_REG (SImode
, regno
);
23848 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
23849 tmp
= gen_frame_mem (SImode
, tmp
);
23850 tmp
= gen_rtx_SET (tmp
, reg
);
23851 RTX_FRAME_RELATED_P (tmp
) = 1;
23859 RTX_FRAME_RELATED_P (par
[0]) = 1;
23860 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
23863 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
23868 /* Emit code to push or pop registers to or from the stack. F is the
23869 assembly file. MASK is the registers to pop. */
23871 thumb_pop (FILE *f
, unsigned long mask
)
23874 int lo_mask
= mask
& 0xFF;
23878 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
23880 /* Special case. Do not generate a POP PC statement here, do it in
23882 thumb_exit (f
, -1);
23886 fprintf (f
, "\tpop\t{");
23888 /* Look at the low registers first. */
23889 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
23893 asm_fprintf (f
, "%r", regno
);
23895 if ((lo_mask
& ~1) != 0)
23900 if (mask
& (1 << PC_REGNUM
))
23902 /* Catch popping the PC. */
23903 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
|| crtl
->calls_eh_return
23904 || IS_CMSE_ENTRY (arm_current_func_type ()))
23906 /* The PC is never poped directly, instead
23907 it is popped into r3 and then BX is used. */
23908 fprintf (f
, "}\n");
23910 thumb_exit (f
, -1);
23919 asm_fprintf (f
, "%r", PC_REGNUM
);
23923 fprintf (f
, "}\n");
23926 /* Generate code to return from a thumb function.
23927 If 'reg_containing_return_addr' is -1, then the return address is
23928 actually on the stack, at the stack pointer. */
23930 thumb_exit (FILE *f
, int reg_containing_return_addr
)
23932 unsigned regs_available_for_popping
;
23933 unsigned regs_to_pop
;
23935 unsigned available
;
23939 int restore_a4
= FALSE
;
23941 /* Compute the registers we need to pop. */
23945 if (reg_containing_return_addr
== -1)
23947 regs_to_pop
|= 1 << LR_REGNUM
;
23951 if (TARGET_BACKTRACE
)
23953 /* Restore the (ARM) frame pointer and stack pointer. */
23954 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
23958 /* If there is nothing to pop then just emit the BX instruction and
23960 if (pops_needed
== 0)
23962 if (crtl
->calls_eh_return
)
23963 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23965 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23967 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n",
23968 reg_containing_return_addr
);
23969 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
23972 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23975 /* Otherwise if we are not supporting interworking and we have not created
23976 a backtrace structure and the function was not entered in ARM mode then
23977 just pop the return address straight into the PC. */
23978 else if (!TARGET_INTERWORK
23979 && !TARGET_BACKTRACE
23980 && !is_called_in_ARM_mode (current_function_decl
)
23981 && !crtl
->calls_eh_return
23982 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23984 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
23988 /* Find out how many of the (return) argument registers we can corrupt. */
23989 regs_available_for_popping
= 0;
23991 /* If returning via __builtin_eh_return, the bottom three registers
23992 all contain information needed for the return. */
23993 if (crtl
->calls_eh_return
)
23997 /* If we can deduce the registers used from the function's
23998 return value. This is more reliable that examining
23999 df_regs_ever_live_p () because that will be set if the register is
24000 ever used in the function, not just if the register is used
24001 to hold a return value. */
24003 if (crtl
->return_rtx
!= 0)
24004 mode
= GET_MODE (crtl
->return_rtx
);
24006 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
24008 size
= GET_MODE_SIZE (mode
);
24012 /* In a void function we can use any argument register.
24013 In a function that returns a structure on the stack
24014 we can use the second and third argument registers. */
24015 if (mode
== VOIDmode
)
24016 regs_available_for_popping
=
24017 (1 << ARG_REGISTER (1))
24018 | (1 << ARG_REGISTER (2))
24019 | (1 << ARG_REGISTER (3));
24021 regs_available_for_popping
=
24022 (1 << ARG_REGISTER (2))
24023 | (1 << ARG_REGISTER (3));
24025 else if (size
<= 4)
24026 regs_available_for_popping
=
24027 (1 << ARG_REGISTER (2))
24028 | (1 << ARG_REGISTER (3));
24029 else if (size
<= 8)
24030 regs_available_for_popping
=
24031 (1 << ARG_REGISTER (3));
24034 /* Match registers to be popped with registers into which we pop them. */
24035 for (available
= regs_available_for_popping
,
24036 required
= regs_to_pop
;
24037 required
!= 0 && available
!= 0;
24038 available
&= ~(available
& - available
),
24039 required
&= ~(required
& - required
))
24042 /* If we have any popping registers left over, remove them. */
24044 regs_available_for_popping
&= ~available
;
24046 /* Otherwise if we need another popping register we can use
24047 the fourth argument register. */
24048 else if (pops_needed
)
24050 /* If we have not found any free argument registers and
24051 reg a4 contains the return address, we must move it. */
24052 if (regs_available_for_popping
== 0
24053 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
24055 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24056 reg_containing_return_addr
= LR_REGNUM
;
24058 else if (size
> 12)
24060 /* Register a4 is being used to hold part of the return value,
24061 but we have dire need of a free, low register. */
24064 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
24067 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
24069 /* The fourth argument register is available. */
24070 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
24076 /* Pop as many registers as we can. */
24077 thumb_pop (f
, regs_available_for_popping
);
24079 /* Process the registers we popped. */
24080 if (reg_containing_return_addr
== -1)
24082 /* The return address was popped into the lowest numbered register. */
24083 regs_to_pop
&= ~(1 << LR_REGNUM
);
24085 reg_containing_return_addr
=
24086 number_of_first_bit_set (regs_available_for_popping
);
24088 /* Remove this register for the mask of available registers, so that
24089 the return address will not be corrupted by further pops. */
24090 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
24093 /* If we popped other registers then handle them here. */
24094 if (regs_available_for_popping
)
24098 /* Work out which register currently contains the frame pointer. */
24099 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24101 /* Move it into the correct place. */
24102 asm_fprintf (f
, "\tmov\t%r, %r\n",
24103 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
24105 /* (Temporarily) remove it from the mask of popped registers. */
24106 regs_available_for_popping
&= ~(1 << frame_pointer
);
24107 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
24109 if (regs_available_for_popping
)
24113 /* We popped the stack pointer as well,
24114 find the register that contains it. */
24115 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24117 /* Move it into the stack register. */
24118 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
24120 /* At this point we have popped all necessary registers, so
24121 do not worry about restoring regs_available_for_popping
24122 to its correct value:
24124 assert (pops_needed == 0)
24125 assert (regs_available_for_popping == (1 << frame_pointer))
24126 assert (regs_to_pop == (1 << STACK_POINTER)) */
24130 /* Since we have just move the popped value into the frame
24131 pointer, the popping register is available for reuse, and
24132 we know that we still have the stack pointer left to pop. */
24133 regs_available_for_popping
|= (1 << frame_pointer
);
24137 /* If we still have registers left on the stack, but we no longer have
24138 any registers into which we can pop them, then we must move the return
24139 address into the link register and make available the register that
24141 if (regs_available_for_popping
== 0 && pops_needed
> 0)
24143 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
24145 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
24146 reg_containing_return_addr
);
24148 reg_containing_return_addr
= LR_REGNUM
;
24151 /* If we have registers left on the stack then pop some more.
24152 We know that at most we will want to pop FP and SP. */
24153 if (pops_needed
> 0)
24158 thumb_pop (f
, regs_available_for_popping
);
24160 /* We have popped either FP or SP.
24161 Move whichever one it is into the correct register. */
24162 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24163 move_to
= number_of_first_bit_set (regs_to_pop
);
24165 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
24169 /* If we still have not popped everything then we must have only
24170 had one register available to us and we are now popping the SP. */
24171 if (pops_needed
> 0)
24175 thumb_pop (f
, regs_available_for_popping
);
24177 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24179 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
24181 assert (regs_to_pop == (1 << STACK_POINTER))
24182 assert (pops_needed == 1)
24186 /* If necessary restore the a4 register. */
24189 if (reg_containing_return_addr
!= LR_REGNUM
)
24191 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24192 reg_containing_return_addr
= LR_REGNUM
;
24195 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
24198 if (crtl
->calls_eh_return
)
24199 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24201 /* Return to caller. */
24202 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24204 /* This is for the cases where LR is not being used to contain the return
24205 address. It may therefore contain information that we might not want
24206 to leak, hence it must be cleared. The value in R0 will never be a
24207 secret at this point, so it is safe to use it, see the clearing code
24208 in 'cmse_nonsecure_entry_clear_before_return'. */
24209 if (reg_containing_return_addr
!= LR_REGNUM
)
24210 asm_fprintf (f
, "\tmov\tlr, r0\n");
24212 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr
);
24213 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
24216 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24219 /* Scan INSN just before assembler is output for it.
24220 For Thumb-1, we track the status of the condition codes; this
24221 information is used in the cbranchsi4_insn pattern. */
24223 thumb1_final_prescan_insn (rtx_insn
*insn
)
24225 if (flag_print_asm_name
)
24226 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
24227 INSN_ADDRESSES (INSN_UID (insn
)));
24228 /* Don't overwrite the previous setter when we get to a cbranch. */
24229 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
24231 enum attr_conds conds
;
24233 if (cfun
->machine
->thumb1_cc_insn
)
24235 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
24236 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
24239 conds
= get_attr_conds (insn
);
24240 if (conds
== CONDS_SET
)
24242 rtx set
= single_set (insn
);
24243 cfun
->machine
->thumb1_cc_insn
= insn
;
24244 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
24245 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
24246 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
24247 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
24249 rtx src1
= XEXP (SET_SRC (set
), 1);
24250 if (src1
== const0_rtx
)
24251 cfun
->machine
->thumb1_cc_mode
= CCmode
;
24253 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
24255 /* Record the src register operand instead of dest because
24256 cprop_hardreg pass propagates src. */
24257 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
24260 else if (conds
!= CONDS_NOCOND
)
24261 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
24264 /* Check if unexpected far jump is used. */
24265 if (cfun
->machine
->lr_save_eliminated
24266 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24267 internal_error("Unexpected thumb1 far jump");
24271 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
24273 unsigned HOST_WIDE_INT mask
= 0xff;
24276 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
24277 if (val
== 0) /* XXX */
24280 for (i
= 0; i
< 25; i
++)
24281 if ((val
& (mask
<< i
)) == val
)
24287 /* Returns nonzero if the current function contains,
24288 or might contain a far jump. */
24290 thumb_far_jump_used_p (void)
24293 bool far_jump
= false;
24294 unsigned int func_size
= 0;
24296 /* If we have already decided that far jumps may be used,
24297 do not bother checking again, and always return true even if
24298 it turns out that they are not being used. Once we have made
24299 the decision that far jumps are present (and that hence the link
24300 register will be pushed onto the stack) we cannot go back on it. */
24301 if (cfun
->machine
->far_jump_used
)
24304 /* If this function is not being called from the prologue/epilogue
24305 generation code then it must be being called from the
24306 INITIAL_ELIMINATION_OFFSET macro. */
24307 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
24309 /* In this case we know that we are being asked about the elimination
24310 of the arg pointer register. If that register is not being used,
24311 then there are no arguments on the stack, and we do not have to
24312 worry that a far jump might force the prologue to push the link
24313 register, changing the stack offsets. In this case we can just
24314 return false, since the presence of far jumps in the function will
24315 not affect stack offsets.
24317 If the arg pointer is live (or if it was live, but has now been
24318 eliminated and so set to dead) then we do have to test to see if
24319 the function might contain a far jump. This test can lead to some
24320 false negatives, since before reload is completed, then length of
24321 branch instructions is not known, so gcc defaults to returning their
24322 longest length, which in turn sets the far jump attribute to true.
24324 A false negative will not result in bad code being generated, but it
24325 will result in a needless push and pop of the link register. We
24326 hope that this does not occur too often.
24328 If we need doubleword stack alignment this could affect the other
24329 elimination offsets so we can't risk getting it wrong. */
24330 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
24331 cfun
->machine
->arg_pointer_live
= 1;
24332 else if (!cfun
->machine
->arg_pointer_live
)
24336 /* We should not change far_jump_used during or after reload, as there is
24337 no chance to change stack frame layout. */
24338 if (reload_in_progress
|| reload_completed
)
24341 /* Check to see if the function contains a branch
24342 insn with the far jump attribute set. */
24343 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
24345 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24349 func_size
+= get_attr_length (insn
);
24352 /* Attribute far_jump will always be true for thumb1 before
24353 shorten_branch pass. So checking far_jump attribute before
24354 shorten_branch isn't much useful.
24356 Following heuristic tries to estimate more accurately if a far jump
24357 may finally be used. The heuristic is very conservative as there is
24358 no chance to roll-back the decision of not to use far jump.
24360 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24361 2-byte insn is associated with a 4 byte constant pool. Using
24362 function size 2048/3 as the threshold is conservative enough. */
24365 if ((func_size
* 3) >= 2048)
24367 /* Record the fact that we have decided that
24368 the function does use far jumps. */
24369 cfun
->machine
->far_jump_used
= 1;
24377 /* Return nonzero if FUNC must be entered in ARM mode. */
24379 is_called_in_ARM_mode (tree func
)
24381 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
24383 /* Ignore the problem about functions whose address is taken. */
24384 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
24388 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
24394 /* Given the stack offsets and register mask in OFFSETS, decide how
24395 many additional registers to push instead of subtracting a constant
24396 from SP. For epilogues the principle is the same except we use pop.
24397 FOR_PROLOGUE indicates which we're generating. */
24399 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
24401 HOST_WIDE_INT amount
;
24402 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
24403 /* Extract a mask of the ones we can give to the Thumb's push/pop
24405 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
24406 /* Then count how many other high registers will need to be pushed. */
24407 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24408 int n_free
, reg_base
, size
;
24410 if (!for_prologue
&& frame_pointer_needed
)
24411 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24413 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24415 /* If the stack frame size is 512 exactly, we can save one load
24416 instruction, which should make this a win even when optimizing
24418 if (!optimize_size
&& amount
!= 512)
24421 /* Can't do this if there are high registers to push. */
24422 if (high_regs_pushed
!= 0)
24425 /* Shouldn't do it in the prologue if no registers would normally
24426 be pushed at all. In the epilogue, also allow it if we'll have
24427 a pop insn for the PC. */
24430 || TARGET_BACKTRACE
24431 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
24432 || TARGET_INTERWORK
24433 || crtl
->args
.pretend_args_size
!= 0))
24436 /* Don't do this if thumb_expand_prologue wants to emit instructions
24437 between the push and the stack frame allocation. */
24439 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24440 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
24447 size
= arm_size_return_regs ();
24448 reg_base
= ARM_NUM_INTS (size
);
24449 live_regs_mask
>>= reg_base
;
24452 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
24453 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
24455 live_regs_mask
>>= 1;
24461 gcc_assert (amount
/ 4 * 4 == amount
);
24463 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
24464 return (amount
- 508) / 4;
24465 if (amount
<= n_free
* 4)
24470 /* The bits which aren't usefully expanded as rtl. */
24472 thumb1_unexpanded_epilogue (void)
24474 arm_stack_offsets
*offsets
;
24476 unsigned long live_regs_mask
= 0;
24477 int high_regs_pushed
= 0;
24479 int had_to_push_lr
;
24482 if (cfun
->machine
->return_used_this_function
!= 0)
24485 if (IS_NAKED (arm_current_func_type ()))
24488 offsets
= arm_get_frame_offsets ();
24489 live_regs_mask
= offsets
->saved_regs_mask
;
24490 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24492 /* If we can deduce the registers used from the function's return value.
24493 This is more reliable that examining df_regs_ever_live_p () because that
24494 will be set if the register is ever used in the function, not just if
24495 the register is used to hold a return value. */
24496 size
= arm_size_return_regs ();
24498 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24501 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24502 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24505 /* The prolog may have pushed some high registers to use as
24506 work registers. e.g. the testsuite file:
24507 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24508 compiles to produce:
24509 push {r4, r5, r6, r7, lr}
24513 as part of the prolog. We have to undo that pushing here. */
24515 if (high_regs_pushed
)
24517 unsigned long mask
= live_regs_mask
& 0xff;
24520 /* The available low registers depend on the size of the value we are
24528 /* Oh dear! We have no low registers into which we can pop
24531 ("no low registers available for popping high registers");
24533 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
24534 if (live_regs_mask
& (1 << next_hi_reg
))
24537 while (high_regs_pushed
)
24539 /* Find lo register(s) into which the high register(s) can
24541 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24543 if (mask
& (1 << regno
))
24544 high_regs_pushed
--;
24545 if (high_regs_pushed
== 0)
24549 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24551 /* Pop the values into the low register(s). */
24552 thumb_pop (asm_out_file
, mask
);
24554 /* Move the value(s) into the high registers. */
24555 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24557 if (mask
& (1 << regno
))
24559 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24562 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24563 if (live_regs_mask
& (1 << next_hi_reg
))
24568 live_regs_mask
&= ~0x0f00;
24571 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24572 live_regs_mask
&= 0xff;
24574 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24576 /* Pop the return address into the PC. */
24577 if (had_to_push_lr
)
24578 live_regs_mask
|= 1 << PC_REGNUM
;
24580 /* Either no argument registers were pushed or a backtrace
24581 structure was created which includes an adjusted stack
24582 pointer, so just pop everything. */
24583 if (live_regs_mask
)
24584 thumb_pop (asm_out_file
, live_regs_mask
);
24586 /* We have either just popped the return address into the
24587 PC or it is was kept in LR for the entire function.
24588 Note that thumb_pop has already called thumb_exit if the
24589 PC was in the list. */
24590 if (!had_to_push_lr
)
24591 thumb_exit (asm_out_file
, LR_REGNUM
);
24595 /* Pop everything but the return address. */
24596 if (live_regs_mask
)
24597 thumb_pop (asm_out_file
, live_regs_mask
);
24599 if (had_to_push_lr
)
24603 /* We have no free low regs, so save one. */
24604 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24608 /* Get the return address into a temporary register. */
24609 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24613 /* Move the return address to lr. */
24614 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24616 /* Restore the low register. */
24617 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24622 regno
= LAST_ARG_REGNUM
;
24627 /* Remove the argument registers that were pushed onto the stack. */
24628 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24629 SP_REGNUM
, SP_REGNUM
,
24630 crtl
->args
.pretend_args_size
);
24632 thumb_exit (asm_out_file
, regno
);
24638 /* Functions to save and restore machine-specific function data. */
24639 static struct machine_function
*
24640 arm_init_machine_status (void)
24642 struct machine_function
*machine
;
24643 machine
= ggc_cleared_alloc
<machine_function
> ();
24645 #if ARM_FT_UNKNOWN != 0
24646 machine
->func_type
= ARM_FT_UNKNOWN
;
24651 /* Return an RTX indicating where the return address to the
24652 calling function can be found. */
24654 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24659 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24662 /* Do anything needed before RTL is emitted for each function. */
24664 arm_init_expanders (void)
24666 /* Arrange to initialize and mark the machine per-function status. */
24667 init_machine_status
= arm_init_machine_status
;
24669 /* This is to stop the combine pass optimizing away the alignment
24670 adjustment of va_arg. */
24671 /* ??? It is claimed that this should not be necessary. */
24673 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24676 /* Check that FUNC is called with a different mode. */
24679 arm_change_mode_p (tree func
)
24681 if (TREE_CODE (func
) != FUNCTION_DECL
)
24684 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
24687 callee_tree
= target_option_default_node
;
24689 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
24690 int flags
= callee_opts
->x_target_flags
;
24692 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
24695 /* Like arm_compute_initial_elimination offset. Simpler because there
24696 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24697 to point at the base of the local variables after static stack
24698 space for a function has been allocated. */
24701 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24703 arm_stack_offsets
*offsets
;
24705 offsets
= arm_get_frame_offsets ();
24709 case ARG_POINTER_REGNUM
:
24712 case STACK_POINTER_REGNUM
:
24713 return offsets
->outgoing_args
- offsets
->saved_args
;
24715 case FRAME_POINTER_REGNUM
:
24716 return offsets
->soft_frame
- offsets
->saved_args
;
24718 case ARM_HARD_FRAME_POINTER_REGNUM
:
24719 return offsets
->saved_regs
- offsets
->saved_args
;
24721 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24722 return offsets
->locals_base
- offsets
->saved_args
;
24725 gcc_unreachable ();
24729 case FRAME_POINTER_REGNUM
:
24732 case STACK_POINTER_REGNUM
:
24733 return offsets
->outgoing_args
- offsets
->soft_frame
;
24735 case ARM_HARD_FRAME_POINTER_REGNUM
:
24736 return offsets
->saved_regs
- offsets
->soft_frame
;
24738 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24739 return offsets
->locals_base
- offsets
->soft_frame
;
24742 gcc_unreachable ();
24747 gcc_unreachable ();
24751 /* Generate the function's prologue. */
24754 thumb1_expand_prologue (void)
24758 HOST_WIDE_INT amount
;
24759 HOST_WIDE_INT size
;
24760 arm_stack_offsets
*offsets
;
24761 unsigned long func_type
;
24763 unsigned long live_regs_mask
;
24764 unsigned long l_mask
;
24765 unsigned high_regs_pushed
= 0;
24766 bool lr_needs_saving
;
24768 func_type
= arm_current_func_type ();
24770 /* Naked functions don't have prologues. */
24771 if (IS_NAKED (func_type
))
24773 if (flag_stack_usage_info
)
24774 current_function_static_stack_size
= 0;
24778 if (IS_INTERRUPT (func_type
))
24780 error ("interrupt Service Routines cannot be coded in Thumb mode");
24784 if (is_called_in_ARM_mode (current_function_decl
))
24785 emit_insn (gen_prologue_thumb1_interwork ());
24787 offsets
= arm_get_frame_offsets ();
24788 live_regs_mask
= offsets
->saved_regs_mask
;
24789 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
24791 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24792 l_mask
= live_regs_mask
& 0x40ff;
24793 /* Then count how many other high registers will need to be pushed. */
24794 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24796 if (crtl
->args
.pretend_args_size
)
24798 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
24800 if (cfun
->machine
->uses_anonymous_args
)
24802 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
24803 unsigned long mask
;
24805 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
24806 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
24808 insn
= thumb1_emit_multi_reg_push (mask
, 0);
24812 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24813 stack_pointer_rtx
, x
));
24815 RTX_FRAME_RELATED_P (insn
) = 1;
24818 if (TARGET_BACKTRACE
)
24820 HOST_WIDE_INT offset
= 0;
24821 unsigned work_register
;
24822 rtx work_reg
, x
, arm_hfp_rtx
;
24824 /* We have been asked to create a stack backtrace structure.
24825 The code looks like this:
24829 0 sub SP, #16 Reserve space for 4 registers.
24830 2 push {R7} Push low registers.
24831 4 add R7, SP, #20 Get the stack pointer before the push.
24832 6 str R7, [SP, #8] Store the stack pointer
24833 (before reserving the space).
24834 8 mov R7, PC Get hold of the start of this code + 12.
24835 10 str R7, [SP, #16] Store it.
24836 12 mov R7, FP Get hold of the current frame pointer.
24837 14 str R7, [SP, #4] Store it.
24838 16 mov R7, LR Get hold of the current return address.
24839 18 str R7, [SP, #12] Store it.
24840 20 add R7, SP, #16 Point at the start of the
24841 backtrace structure.
24842 22 mov FP, R7 Put this value into the frame pointer. */
24844 work_register
= thumb_find_work_register (live_regs_mask
);
24845 work_reg
= gen_rtx_REG (SImode
, work_register
);
24846 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
24848 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24849 stack_pointer_rtx
, GEN_INT (-16)));
24850 RTX_FRAME_RELATED_P (insn
) = 1;
24854 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
24855 RTX_FRAME_RELATED_P (insn
) = 1;
24856 lr_needs_saving
= false;
24858 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
24861 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
24862 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24864 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
24865 x
= gen_frame_mem (SImode
, x
);
24866 emit_move_insn (x
, work_reg
);
24868 /* Make sure that the instruction fetching the PC is in the right place
24869 to calculate "start of backtrace creation code + 12". */
24870 /* ??? The stores using the common WORK_REG ought to be enough to
24871 prevent the scheduler from doing anything weird. Failing that
24872 we could always move all of the following into an UNSPEC_VOLATILE. */
24875 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24876 emit_move_insn (work_reg
, x
);
24878 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24879 x
= gen_frame_mem (SImode
, x
);
24880 emit_move_insn (x
, work_reg
);
24882 emit_move_insn (work_reg
, arm_hfp_rtx
);
24884 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24885 x
= gen_frame_mem (SImode
, x
);
24886 emit_move_insn (x
, work_reg
);
24890 emit_move_insn (work_reg
, arm_hfp_rtx
);
24892 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24893 x
= gen_frame_mem (SImode
, x
);
24894 emit_move_insn (x
, work_reg
);
24896 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24897 emit_move_insn (work_reg
, x
);
24899 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24900 x
= gen_frame_mem (SImode
, x
);
24901 emit_move_insn (x
, work_reg
);
24904 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
24905 emit_move_insn (work_reg
, x
);
24907 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
24908 x
= gen_frame_mem (SImode
, x
);
24909 emit_move_insn (x
, work_reg
);
24911 x
= GEN_INT (offset
+ 12);
24912 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24914 emit_move_insn (arm_hfp_rtx
, work_reg
);
24916 /* Optimization: If we are not pushing any low registers but we are going
24917 to push some high registers then delay our first push. This will just
24918 be a push of LR and we can combine it with the push of the first high
24920 else if ((l_mask
& 0xff) != 0
24921 || (high_regs_pushed
== 0 && lr_needs_saving
))
24923 unsigned long mask
= l_mask
;
24924 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
24925 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
24926 RTX_FRAME_RELATED_P (insn
) = 1;
24927 lr_needs_saving
= false;
24930 if (high_regs_pushed
)
24932 unsigned pushable_regs
;
24933 unsigned next_hi_reg
;
24934 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
24935 : crtl
->args
.info
.nregs
;
24936 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
24938 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
24939 if (live_regs_mask
& (1 << next_hi_reg
))
24942 /* Here we need to mask out registers used for passing arguments
24943 even if they can be pushed. This is to avoid using them to stash the high
24944 registers. Such kind of stash may clobber the use of arguments. */
24945 pushable_regs
= l_mask
& (~arg_regs_mask
);
24946 if (lr_needs_saving
)
24947 pushable_regs
&= ~(1 << LR_REGNUM
);
24949 if (pushable_regs
== 0)
24950 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
24952 while (high_regs_pushed
> 0)
24954 unsigned long real_regs_mask
= 0;
24955 unsigned long push_mask
= 0;
24957 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
24959 if (pushable_regs
& (1 << regno
))
24961 emit_move_insn (gen_rtx_REG (SImode
, regno
),
24962 gen_rtx_REG (SImode
, next_hi_reg
));
24964 high_regs_pushed
--;
24965 real_regs_mask
|= (1 << next_hi_reg
);
24966 push_mask
|= (1 << regno
);
24968 if (high_regs_pushed
)
24970 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
24972 if (live_regs_mask
& (1 << next_hi_reg
))
24980 /* If we had to find a work register and we have not yet
24981 saved the LR then add it to the list of regs to push. */
24982 if (lr_needs_saving
)
24984 push_mask
|= 1 << LR_REGNUM
;
24985 real_regs_mask
|= 1 << LR_REGNUM
;
24986 lr_needs_saving
= false;
24989 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
24990 RTX_FRAME_RELATED_P (insn
) = 1;
24994 /* Load the pic register before setting the frame pointer,
24995 so we can use r7 as a temporary work register. */
24996 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24997 arm_load_pic_register (live_regs_mask
);
24999 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
25000 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
25001 stack_pointer_rtx
);
25003 size
= offsets
->outgoing_args
- offsets
->saved_args
;
25004 if (flag_stack_usage_info
)
25005 current_function_static_stack_size
= size
;
25007 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25008 if ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
25009 || flag_stack_clash_protection
)
25011 sorry ("-fstack-check=specific for Thumb-1");
25013 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25014 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
25019 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25020 GEN_INT (- amount
)));
25021 RTX_FRAME_RELATED_P (insn
) = 1;
25027 /* The stack decrement is too big for an immediate value in a single
25028 insn. In theory we could issue multiple subtracts, but after
25029 three of them it becomes more space efficient to place the full
25030 value in the constant pool and load into a register. (Also the
25031 ARM debugger really likes to see only one stack decrement per
25032 function). So instead we look for a scratch register into which
25033 we can load the decrement, and then we subtract this from the
25034 stack pointer. Unfortunately on the thumb the only available
25035 scratch registers are the argument registers, and we cannot use
25036 these as they may hold arguments to the function. Instead we
25037 attempt to locate a call preserved register which is used by this
25038 function. If we can find one, then we know that it will have
25039 been pushed at the start of the prologue and so we can corrupt
25041 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
25042 if (live_regs_mask
& (1 << regno
))
25045 gcc_assert(regno
<= LAST_LO_REGNUM
);
25047 reg
= gen_rtx_REG (SImode
, regno
);
25049 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
25051 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25052 stack_pointer_rtx
, reg
));
25054 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
25055 plus_constant (Pmode
, stack_pointer_rtx
,
25057 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
25058 RTX_FRAME_RELATED_P (insn
) = 1;
25062 if (frame_pointer_needed
)
25063 thumb_set_frame_pointer (offsets
);
25065 /* If we are profiling, make sure no instructions are scheduled before
25066 the call to mcount. Similarly if the user has requested no
25067 scheduling in the prolog. Similarly if we want non-call exceptions
25068 using the EABI unwinder, to prevent faulting instructions from being
25069 swapped with a stack adjustment. */
25070 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
25071 || (arm_except_unwind_info (&global_options
) == UI_TARGET
25072 && cfun
->can_throw_non_call_exceptions
))
25073 emit_insn (gen_blockage ());
25075 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
25076 if (live_regs_mask
& 0xff)
25077 cfun
->machine
->lr_save_eliminated
= 0;
25080 /* Clear caller saved registers not used to pass return values and leaked
25081 condition flags before exiting a cmse_nonsecure_entry function. */
25084 cmse_nonsecure_entry_clear_before_return (void)
25086 int regno
, maxregno
= TARGET_HARD_FLOAT
? LAST_VFP_REGNUM
: IP_REGNUM
;
25087 uint32_t padding_bits_to_clear
= 0;
25088 uint32_t * padding_bits_to_clear_ptr
= &padding_bits_to_clear
;
25089 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
25093 bitmap_clear (to_clear_bitmap
);
25094 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
, NUM_ARG_REGS
);
25095 bitmap_set_bit (to_clear_bitmap
, IP_REGNUM
);
25097 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25099 if (TARGET_HARD_FLOAT
)
25101 int float_bits
= D7_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1;
25103 bitmap_set_range (to_clear_bitmap
, FIRST_VFP_REGNUM
, float_bits
);
25105 /* Make sure we don't clear the two scratch registers used to clear the
25106 relevant FPSCR bits in output_return_instruction. */
25107 emit_use (gen_rtx_REG (SImode
, IP_REGNUM
));
25108 bitmap_clear_bit (to_clear_bitmap
, IP_REGNUM
);
25109 emit_use (gen_rtx_REG (SImode
, 4));
25110 bitmap_clear_bit (to_clear_bitmap
, 4);
25113 /* If the user has defined registers to be caller saved, these are no longer
25114 restored by the function before returning and must thus be cleared for
25115 security purposes. */
25116 for (regno
= NUM_ARG_REGS
; regno
<= maxregno
; regno
++)
25118 /* We do not touch registers that can be used to pass arguments as per
25119 the AAPCS, since these should never be made callee-saved by user
25121 if (IN_RANGE (regno
, FIRST_VFP_REGNUM
, D7_VFP_REGNUM
))
25123 if (IN_RANGE (regno
, IP_REGNUM
, PC_REGNUM
))
25125 if (call_used_regs
[regno
])
25126 bitmap_set_bit (to_clear_bitmap
, regno
);
25129 /* Make sure we do not clear the registers used to return the result in. */
25130 result_type
= TREE_TYPE (DECL_RESULT (current_function_decl
));
25131 if (!VOID_TYPE_P (result_type
))
25133 uint64_t to_clear_return_mask
;
25134 result_rtl
= arm_function_value (result_type
, current_function_decl
, 0);
25136 /* No need to check that we return in registers, because we don't
25137 support returning on stack yet. */
25138 gcc_assert (REG_P (result_rtl
));
25139 to_clear_return_mask
25140 = compute_not_to_clear_mask (result_type
, result_rtl
, 0,
25141 padding_bits_to_clear_ptr
);
25142 if (to_clear_return_mask
)
25144 gcc_assert ((unsigned) maxregno
< sizeof (long long) * __CHAR_BIT__
);
25145 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
25147 if (to_clear_return_mask
& (1ULL << regno
))
25148 bitmap_clear_bit (to_clear_bitmap
, regno
);
25153 if (padding_bits_to_clear
!= 0)
25156 auto_sbitmap
to_clear_arg_regs_bitmap (R0_REGNUM
+ NUM_ARG_REGS
);
25158 /* Padding bits to clear is not 0 so we know we are dealing with
25159 returning a composite type, which only uses r0. Let's make sure that
25160 r1-r3 is cleared too, we will use r1 as a scratch register. */
25161 bitmap_clear (to_clear_arg_regs_bitmap
);
25162 bitmap_set_range (to_clear_arg_regs_bitmap
, R0_REGNUM
+ 1,
25164 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap
, to_clear_bitmap
));
25166 reg_rtx
= gen_rtx_REG (SImode
, R1_REGNUM
);
25168 /* Fill the lower half of the negated padding_bits_to_clear. */
25169 emit_move_insn (reg_rtx
,
25170 GEN_INT ((((~padding_bits_to_clear
) << 16u) >> 16u)));
25172 /* Also fill the top half of the negated padding_bits_to_clear. */
25173 if (((~padding_bits_to_clear
) >> 16) > 0)
25174 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode
, reg_rtx
,
25177 GEN_INT ((~padding_bits_to_clear
) >> 16)));
25179 emit_insn (gen_andsi3 (gen_rtx_REG (SImode
, R0_REGNUM
),
25180 gen_rtx_REG (SImode
, R0_REGNUM
),
25184 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
25186 if (!bitmap_bit_p (to_clear_bitmap
, regno
))
25189 if (IS_VFP_REGNUM (regno
))
25191 /* If regno is an even vfp register and its successor is also to
25192 be cleared, use vmov. */
25193 if (TARGET_VFP_DOUBLE
25194 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
25195 && bitmap_bit_p (to_clear_bitmap
, regno
+ 1))
25197 emit_move_insn (gen_rtx_REG (DFmode
, regno
),
25198 CONST1_RTX (DFmode
));
25199 emit_use (gen_rtx_REG (DFmode
, regno
));
25204 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
25205 CONST1_RTX (SFmode
));
25206 emit_use (gen_rtx_REG (SFmode
, regno
));
25213 if (regno
== R0_REGNUM
)
25214 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25217 /* R0 has either been cleared before, see code above, or it
25218 holds a return value, either way it is not secret
25220 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25221 gen_rtx_REG (SImode
, R0_REGNUM
));
25222 emit_use (gen_rtx_REG (SImode
, regno
));
25226 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25227 gen_rtx_REG (SImode
, LR_REGNUM
));
25228 emit_use (gen_rtx_REG (SImode
, regno
));
25234 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25235 POP instruction can be generated. LR should be replaced by PC. All
25236 the checks required are already done by USE_RETURN_INSN (). Hence,
25237 all we really need to check here is if single register is to be
25238 returned, or multiple register return. */
25240 thumb2_expand_return (bool simple_return
)
25243 unsigned long saved_regs_mask
;
25244 arm_stack_offsets
*offsets
;
25246 offsets
= arm_get_frame_offsets ();
25247 saved_regs_mask
= offsets
->saved_regs_mask
;
25249 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25250 if (saved_regs_mask
& (1 << i
))
25253 if (!simple_return
&& saved_regs_mask
)
25255 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25256 functions or adapt code to handle according to ACLE. This path should
25257 not be reachable for cmse_nonsecure_entry functions though we prefer
25258 to assert it for now to ensure that future code changes do not silently
25259 change this behavior. */
25260 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25263 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25264 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
25265 rtx addr
= gen_rtx_MEM (SImode
,
25266 gen_rtx_POST_INC (SImode
,
25267 stack_pointer_rtx
));
25268 set_mem_alias_set (addr
, get_frame_alias_set ());
25269 XVECEXP (par
, 0, 0) = ret_rtx
;
25270 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
25271 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
25272 emit_jump_insn (par
);
25276 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
25277 saved_regs_mask
|= (1 << PC_REGNUM
);
25278 arm_emit_multi_reg_pop (saved_regs_mask
);
25283 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25284 cmse_nonsecure_entry_clear_before_return ();
25285 emit_jump_insn (simple_return_rtx
);
25290 thumb1_expand_epilogue (void)
25292 HOST_WIDE_INT amount
;
25293 arm_stack_offsets
*offsets
;
25296 /* Naked functions don't have prologues. */
25297 if (IS_NAKED (arm_current_func_type ()))
25300 offsets
= arm_get_frame_offsets ();
25301 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25303 if (frame_pointer_needed
)
25305 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
25306 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25308 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
25310 gcc_assert (amount
>= 0);
25313 emit_insn (gen_blockage ());
25316 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25317 GEN_INT (amount
)));
25320 /* r3 is always free in the epilogue. */
25321 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
25323 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
25324 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
25328 /* Emit a USE (stack_pointer_rtx), so that
25329 the stack adjustment will not be deleted. */
25330 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25332 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
25333 emit_insn (gen_blockage ());
25335 /* Emit a clobber for each insn that will be restored in the epilogue,
25336 so that flow2 will get register lifetimes correct. */
25337 for (regno
= 0; regno
< 13; regno
++)
25338 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
25339 emit_clobber (gen_rtx_REG (SImode
, regno
));
25341 if (! df_regs_ever_live_p (LR_REGNUM
))
25342 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
25344 /* Clear all caller-saved regs that are not used to return. */
25345 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25346 cmse_nonsecure_entry_clear_before_return ();
25349 /* Epilogue code for APCS frame. */
25351 arm_expand_epilogue_apcs_frame (bool really_return
)
25353 unsigned long func_type
;
25354 unsigned long saved_regs_mask
;
25357 int floats_from_frame
= 0;
25358 arm_stack_offsets
*offsets
;
25360 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
25361 func_type
= arm_current_func_type ();
25363 /* Get frame offsets for ARM. */
25364 offsets
= arm_get_frame_offsets ();
25365 saved_regs_mask
= offsets
->saved_regs_mask
;
25367 /* Find the offset of the floating-point save area in the frame. */
25369 = (offsets
->saved_args
25370 + arm_compute_static_chain_stack_bytes ()
25373 /* Compute how many core registers saved and how far away the floats are. */
25374 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25375 if (saved_regs_mask
& (1 << i
))
25378 floats_from_frame
+= 4;
25381 if (TARGET_HARD_FLOAT
)
25384 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
25386 /* The offset is from IP_REGNUM. */
25387 int saved_size
= arm_get_vfp_saved_size ();
25388 if (saved_size
> 0)
25391 floats_from_frame
+= saved_size
;
25392 insn
= emit_insn (gen_addsi3 (ip_rtx
,
25393 hard_frame_pointer_rtx
,
25394 GEN_INT (-floats_from_frame
)));
25395 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
25396 ip_rtx
, hard_frame_pointer_rtx
);
25399 /* Generate VFP register multi-pop. */
25400 start_reg
= FIRST_VFP_REGNUM
;
25402 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
25403 /* Look for a case where a reg does not need restoring. */
25404 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25405 && (!df_regs_ever_live_p (i
+ 1)
25406 || call_used_regs
[i
+ 1]))
25408 if (start_reg
!= i
)
25409 arm_emit_vfp_multi_reg_pop (start_reg
,
25410 (i
- start_reg
) / 2,
25411 gen_rtx_REG (SImode
,
25416 /* Restore the remaining regs that we have discovered (or possibly
25417 even all of them, if the conditional in the for loop never
25419 if (start_reg
!= i
)
25420 arm_emit_vfp_multi_reg_pop (start_reg
,
25421 (i
- start_reg
) / 2,
25422 gen_rtx_REG (SImode
, IP_REGNUM
));
25427 /* The frame pointer is guaranteed to be non-double-word aligned, as
25428 it is set to double-word-aligned old_stack_pointer - 4. */
25430 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
25432 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
25433 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25435 rtx addr
= gen_frame_mem (V2SImode
,
25436 plus_constant (Pmode
, hard_frame_pointer_rtx
,
25438 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25439 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25440 gen_rtx_REG (V2SImode
, i
),
25446 /* saved_regs_mask should contain IP which contains old stack pointer
25447 at the time of activation creation. Since SP and IP are adjacent registers,
25448 we can restore the value directly into SP. */
25449 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
25450 saved_regs_mask
&= ~(1 << IP_REGNUM
);
25451 saved_regs_mask
|= (1 << SP_REGNUM
);
25453 /* There are two registers left in saved_regs_mask - LR and PC. We
25454 only need to restore LR (the return address), but to
25455 save time we can load it directly into PC, unless we need a
25456 special function exit sequence, or we are not really returning. */
25458 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
25459 && !crtl
->calls_eh_return
)
25460 /* Delete LR from the register mask, so that LR on
25461 the stack is loaded into the PC in the register mask. */
25462 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25464 saved_regs_mask
&= ~(1 << PC_REGNUM
);
25466 num_regs
= bit_count (saved_regs_mask
);
25467 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
25470 emit_insn (gen_blockage ());
25471 /* Unwind the stack to just below the saved registers. */
25472 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25473 hard_frame_pointer_rtx
,
25474 GEN_INT (- 4 * num_regs
)));
25476 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
25477 stack_pointer_rtx
, hard_frame_pointer_rtx
);
25480 arm_emit_multi_reg_pop (saved_regs_mask
);
25482 if (IS_INTERRUPT (func_type
))
25484 /* Interrupt handlers will have pushed the
25485 IP onto the stack, so restore it now. */
25487 rtx addr
= gen_rtx_MEM (SImode
,
25488 gen_rtx_POST_INC (SImode
,
25489 stack_pointer_rtx
));
25490 set_mem_alias_set (addr
, get_frame_alias_set ());
25491 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
25492 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25493 gen_rtx_REG (SImode
, IP_REGNUM
),
25497 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
25500 if (crtl
->calls_eh_return
)
25501 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25503 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25505 if (IS_STACKALIGN (func_type
))
25506 /* Restore the original stack pointer. Before prologue, the stack was
25507 realigned and the original stack pointer saved in r0. For details,
25508 see comment in arm_expand_prologue. */
25509 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25511 emit_jump_insn (simple_return_rtx
);
25514 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25515 function is not a sibcall. */
25517 arm_expand_epilogue (bool really_return
)
25519 unsigned long func_type
;
25520 unsigned long saved_regs_mask
;
25524 arm_stack_offsets
*offsets
;
25526 func_type
= arm_current_func_type ();
25528 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25529 let output_return_instruction take care of instruction emission if any. */
25530 if (IS_NAKED (func_type
)
25531 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
25534 emit_jump_insn (simple_return_rtx
);
25538 /* If we are throwing an exception, then we really must be doing a
25539 return, so we can't tail-call. */
25540 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
25542 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
25544 arm_expand_epilogue_apcs_frame (really_return
);
25548 /* Get frame offsets for ARM. */
25549 offsets
= arm_get_frame_offsets ();
25550 saved_regs_mask
= offsets
->saved_regs_mask
;
25551 num_regs
= bit_count (saved_regs_mask
);
25553 if (frame_pointer_needed
)
25556 /* Restore stack pointer if necessary. */
25559 /* In ARM mode, frame pointer points to first saved register.
25560 Restore stack pointer to last saved register. */
25561 amount
= offsets
->frame
- offsets
->saved_regs
;
25563 /* Force out any pending memory operations that reference stacked data
25564 before stack de-allocation occurs. */
25565 emit_insn (gen_blockage ());
25566 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25567 hard_frame_pointer_rtx
,
25568 GEN_INT (amount
)));
25569 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25571 hard_frame_pointer_rtx
);
25573 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25575 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25579 /* In Thumb-2 mode, the frame pointer points to the last saved
25581 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25584 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
25585 hard_frame_pointer_rtx
,
25586 GEN_INT (amount
)));
25587 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25588 hard_frame_pointer_rtx
,
25589 hard_frame_pointer_rtx
);
25592 /* Force out any pending memory operations that reference stacked data
25593 before stack de-allocation occurs. */
25594 emit_insn (gen_blockage ());
25595 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
25596 hard_frame_pointer_rtx
));
25597 arm_add_cfa_adjust_cfa_note (insn
, 0,
25599 hard_frame_pointer_rtx
);
25600 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25602 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25607 /* Pop off outgoing args and local frame to adjust stack pointer to
25608 last saved register. */
25609 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25613 /* Force out any pending memory operations that reference stacked data
25614 before stack de-allocation occurs. */
25615 emit_insn (gen_blockage ());
25616 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25618 GEN_INT (amount
)));
25619 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25620 stack_pointer_rtx
, stack_pointer_rtx
);
25621 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25623 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25627 if (TARGET_HARD_FLOAT
)
25629 /* Generate VFP register multi-pop. */
25630 int end_reg
= LAST_VFP_REGNUM
+ 1;
25632 /* Scan the registers in reverse order. We need to match
25633 any groupings made in the prologue and generate matching
25634 vldm operations. The need to match groups is because,
25635 unlike pop, vldm can only do consecutive regs. */
25636 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
25637 /* Look for a case where a reg does not need restoring. */
25638 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25639 && (!df_regs_ever_live_p (i
+ 1)
25640 || call_used_regs
[i
+ 1]))
25642 /* Restore the regs discovered so far (from reg+2 to
25644 if (end_reg
> i
+ 2)
25645 arm_emit_vfp_multi_reg_pop (i
+ 2,
25646 (end_reg
- (i
+ 2)) / 2,
25647 stack_pointer_rtx
);
25651 /* Restore the remaining regs that we have discovered (or possibly
25652 even all of them, if the conditional in the for loop never
25654 if (end_reg
> i
+ 2)
25655 arm_emit_vfp_multi_reg_pop (i
+ 2,
25656 (end_reg
- (i
+ 2)) / 2,
25657 stack_pointer_rtx
);
25661 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
25662 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25665 rtx addr
= gen_rtx_MEM (V2SImode
,
25666 gen_rtx_POST_INC (SImode
,
25667 stack_pointer_rtx
));
25668 set_mem_alias_set (addr
, get_frame_alias_set ());
25669 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25670 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25671 gen_rtx_REG (V2SImode
, i
),
25673 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25674 stack_pointer_rtx
, stack_pointer_rtx
);
25677 if (saved_regs_mask
)
25680 bool return_in_pc
= false;
25682 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
25683 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
25684 && !IS_CMSE_ENTRY (func_type
)
25685 && !IS_STACKALIGN (func_type
)
25687 && crtl
->args
.pretend_args_size
== 0
25688 && saved_regs_mask
& (1 << LR_REGNUM
)
25689 && !crtl
->calls_eh_return
)
25691 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25692 saved_regs_mask
|= (1 << PC_REGNUM
);
25693 return_in_pc
= true;
25696 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
25698 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25699 if (saved_regs_mask
& (1 << i
))
25701 rtx addr
= gen_rtx_MEM (SImode
,
25702 gen_rtx_POST_INC (SImode
,
25703 stack_pointer_rtx
));
25704 set_mem_alias_set (addr
, get_frame_alias_set ());
25706 if (i
== PC_REGNUM
)
25708 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25709 XVECEXP (insn
, 0, 0) = ret_rtx
;
25710 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
25712 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
25713 insn
= emit_jump_insn (insn
);
25717 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
25719 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25720 gen_rtx_REG (SImode
, i
),
25722 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25724 stack_pointer_rtx
);
25731 && current_tune
->prefer_ldrd_strd
25732 && !optimize_function_for_size_p (cfun
))
25735 thumb2_emit_ldrd_pop (saved_regs_mask
);
25736 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
25737 arm_emit_ldrd_pop (saved_regs_mask
);
25739 arm_emit_multi_reg_pop (saved_regs_mask
);
25742 arm_emit_multi_reg_pop (saved_regs_mask
);
25750 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
25754 rtx dwarf
= NULL_RTX
;
25756 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25758 GEN_INT (amount
)));
25760 RTX_FRAME_RELATED_P (tmp
) = 1;
25762 if (cfun
->machine
->uses_anonymous_args
)
25764 /* Restore pretend args. Refer arm_expand_prologue on how to save
25765 pretend_args in stack. */
25766 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25767 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25768 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25769 if (saved_regs_mask
& (1 << i
))
25771 rtx reg
= gen_rtx_REG (SImode
, i
);
25772 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25775 REG_NOTES (tmp
) = dwarf
;
25777 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25778 stack_pointer_rtx
, stack_pointer_rtx
);
25781 /* Clear all caller-saved regs that are not used to return. */
25782 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25784 /* CMSE_ENTRY always returns. */
25785 gcc_assert (really_return
);
25786 cmse_nonsecure_entry_clear_before_return ();
25789 if (!really_return
)
25792 if (crtl
->calls_eh_return
)
25793 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25795 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25797 if (IS_STACKALIGN (func_type
))
25798 /* Restore the original stack pointer. Before prologue, the stack was
25799 realigned and the original stack pointer saved in r0. For details,
25800 see comment in arm_expand_prologue. */
25801 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25803 emit_jump_insn (simple_return_rtx
);
25806 /* Implementation of insn prologue_thumb1_interwork. This is the first
25807 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25810 thumb1_output_interwork (void)
25813 FILE *f
= asm_out_file
;
25815 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25816 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25818 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25820 /* Generate code sequence to switch us into Thumb mode. */
25821 /* The .code 32 directive has already been emitted by
25822 ASM_DECLARE_FUNCTION_NAME. */
25823 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25824 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
25826 /* Generate a label, so that the debugger will notice the
25827 change in instruction sets. This label is also used by
25828 the assembler to bypass the ARM code when this function
25829 is called from a Thumb encoded function elsewhere in the
25830 same file. Hence the definition of STUB_NAME here must
25831 agree with the definition in gas/config/tc-arm.c. */
25833 #define STUB_NAME ".real_start_of"
25835 fprintf (f
, "\t.code\t16\n");
25837 if (arm_dllexport_name_p (name
))
25838 name
= arm_strip_name_encoding (name
);
25840 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
25841 fprintf (f
, "\t.thumb_func\n");
25842 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
25847 /* Handle the case of a double word load into a low register from
25848 a computed memory address. The computed address may involve a
25849 register which is overwritten by the load. */
25851 thumb_load_double_from_address (rtx
*operands
)
25859 gcc_assert (REG_P (operands
[0]));
25860 gcc_assert (MEM_P (operands
[1]));
25862 /* Get the memory address. */
25863 addr
= XEXP (operands
[1], 0);
25865 /* Work out how the memory address is computed. */
25866 switch (GET_CODE (addr
))
25869 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25871 if (REGNO (operands
[0]) == REGNO (addr
))
25873 output_asm_insn ("ldr\t%H0, %2", operands
);
25874 output_asm_insn ("ldr\t%0, %1", operands
);
25878 output_asm_insn ("ldr\t%0, %1", operands
);
25879 output_asm_insn ("ldr\t%H0, %2", operands
);
25884 /* Compute <address> + 4 for the high order load. */
25885 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25887 output_asm_insn ("ldr\t%0, %1", operands
);
25888 output_asm_insn ("ldr\t%H0, %2", operands
);
25892 arg1
= XEXP (addr
, 0);
25893 arg2
= XEXP (addr
, 1);
25895 if (CONSTANT_P (arg1
))
25896 base
= arg2
, offset
= arg1
;
25898 base
= arg1
, offset
= arg2
;
25900 gcc_assert (REG_P (base
));
25902 /* Catch the case of <address> = <reg> + <reg> */
25903 if (REG_P (offset
))
25905 int reg_offset
= REGNO (offset
);
25906 int reg_base
= REGNO (base
);
25907 int reg_dest
= REGNO (operands
[0]);
25909 /* Add the base and offset registers together into the
25910 higher destination register. */
25911 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
25912 reg_dest
+ 1, reg_base
, reg_offset
);
25914 /* Load the lower destination register from the address in
25915 the higher destination register. */
25916 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
25917 reg_dest
, reg_dest
+ 1);
25919 /* Load the higher destination register from its own address
25921 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
25922 reg_dest
+ 1, reg_dest
+ 1);
25926 /* Compute <address> + 4 for the high order load. */
25927 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25929 /* If the computed address is held in the low order register
25930 then load the high order register first, otherwise always
25931 load the low order register first. */
25932 if (REGNO (operands
[0]) == REGNO (base
))
25934 output_asm_insn ("ldr\t%H0, %2", operands
);
25935 output_asm_insn ("ldr\t%0, %1", operands
);
25939 output_asm_insn ("ldr\t%0, %1", operands
);
25940 output_asm_insn ("ldr\t%H0, %2", operands
);
25946 /* With no registers to worry about we can just load the value
25948 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25950 output_asm_insn ("ldr\t%H0, %2", operands
);
25951 output_asm_insn ("ldr\t%0, %1", operands
);
25955 gcc_unreachable ();
25962 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
25967 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25968 std::swap (operands
[4], operands
[5]);
25970 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
25971 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
25975 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25976 std::swap (operands
[4], operands
[5]);
25977 if (REGNO (operands
[5]) > REGNO (operands
[6]))
25978 std::swap (operands
[5], operands
[6]);
25979 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25980 std::swap (operands
[4], operands
[5]);
25982 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
25983 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
25987 gcc_unreachable ();
25993 /* Output a call-via instruction for thumb state. */
25995 thumb_call_via_reg (rtx reg
)
25997 int regno
= REGNO (reg
);
26000 gcc_assert (regno
< LR_REGNUM
);
26002 /* If we are in the normal text section we can use a single instance
26003 per compilation unit. If we are doing function sections, then we need
26004 an entry per section, since we can't rely on reachability. */
26005 if (in_section
== text_section
)
26007 thumb_call_reg_needed
= 1;
26009 if (thumb_call_via_label
[regno
] == NULL
)
26010 thumb_call_via_label
[regno
] = gen_label_rtx ();
26011 labelp
= thumb_call_via_label
+ regno
;
26015 if (cfun
->machine
->call_via
[regno
] == NULL
)
26016 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
26017 labelp
= cfun
->machine
->call_via
+ regno
;
26020 output_asm_insn ("bl\t%a0", labelp
);
26024 /* Routines for generating rtl. */
26026 thumb_expand_movmemqi (rtx
*operands
)
26028 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
26029 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
26030 HOST_WIDE_INT len
= INTVAL (operands
[2]);
26031 HOST_WIDE_INT offset
= 0;
26035 emit_insn (gen_movmem12b (out
, in
, out
, in
));
26041 emit_insn (gen_movmem8b (out
, in
, out
, in
));
26047 rtx reg
= gen_reg_rtx (SImode
);
26048 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
26049 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
26056 rtx reg
= gen_reg_rtx (HImode
);
26057 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
26058 plus_constant (Pmode
, in
,
26060 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
26069 rtx reg
= gen_reg_rtx (QImode
);
26070 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
26071 plus_constant (Pmode
, in
,
26073 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
26080 thumb_reload_out_hi (rtx
*operands
)
26082 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
26085 /* Return the length of a function name prefix
26086 that starts with the character 'c'. */
26088 arm_get_strip_length (int c
)
26092 ARM_NAME_ENCODING_LENGTHS
26097 /* Return a pointer to a function's name with any
26098 and all prefix encodings stripped from it. */
26100 arm_strip_name_encoding (const char *name
)
26104 while ((skip
= arm_get_strip_length (* name
)))
26110 /* If there is a '*' anywhere in the name's prefix, then
26111 emit the stripped name verbatim, otherwise prepend an
26112 underscore if leading underscores are being used. */
26114 arm_asm_output_labelref (FILE *stream
, const char *name
)
26119 while ((skip
= arm_get_strip_length (* name
)))
26121 verbatim
|= (*name
== '*');
26126 fputs (name
, stream
);
26128 asm_fprintf (stream
, "%U%s", name
);
26131 /* This function is used to emit an EABI tag and its associated value.
26132 We emit the numerical value of the tag in case the assembler does not
26133 support textual tags. (Eg gas prior to 2.20). If requested we include
26134 the tag name in a comment so that anyone reading the assembler output
26135 will know which tag is being set.
26137 This function is not static because arm-c.c needs it too. */
26140 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
26142 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
26143 if (flag_verbose_asm
|| flag_debug_asm
)
26144 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
26145 asm_fprintf (asm_out_file
, "\n");
26148 /* This function is used to print CPU tuning information as comment
26149 in assembler file. Pointers are not printed for now. */
26152 arm_print_tune_info (void)
26154 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune parameters\n");
26155 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"constant_limit:\t%d\n",
26156 current_tune
->constant_limit
);
26157 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26158 "max_insns_skipped:\t%d\n", current_tune
->max_insns_skipped
);
26159 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26160 "prefetch.num_slots:\t%d\n", current_tune
->prefetch
.num_slots
);
26161 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26162 "prefetch.l1_cache_size:\t%d\n",
26163 current_tune
->prefetch
.l1_cache_size
);
26164 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26165 "prefetch.l1_cache_line_size:\t%d\n",
26166 current_tune
->prefetch
.l1_cache_line_size
);
26167 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26168 "prefer_constant_pool:\t%d\n",
26169 (int) current_tune
->prefer_constant_pool
);
26170 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26171 "branch_cost:\t(s:speed, p:predictable)\n");
26172 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\ts&p\tcost\n");
26173 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t00\t%d\n",
26174 current_tune
->branch_cost (false, false));
26175 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t01\t%d\n",
26176 current_tune
->branch_cost (false, true));
26177 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t10\t%d\n",
26178 current_tune
->branch_cost (true, false));
26179 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t11\t%d\n",
26180 current_tune
->branch_cost (true, true));
26181 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26182 "prefer_ldrd_strd:\t%d\n",
26183 (int) current_tune
->prefer_ldrd_strd
);
26184 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26185 "logical_op_non_short_circuit:\t[%d,%d]\n",
26186 (int) current_tune
->logical_op_non_short_circuit_thumb
,
26187 (int) current_tune
->logical_op_non_short_circuit_arm
);
26188 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26189 "prefer_neon_for_64bits:\t%d\n",
26190 (int) current_tune
->prefer_neon_for_64bits
);
26191 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26192 "disparage_flag_setting_t16_encodings:\t%d\n",
26193 (int) current_tune
->disparage_flag_setting_t16_encodings
);
26194 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26195 "string_ops_prefer_neon:\t%d\n",
26196 (int) current_tune
->string_ops_prefer_neon
);
26197 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26198 "max_insns_inline_memset:\t%d\n",
26199 current_tune
->max_insns_inline_memset
);
26200 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"fusible_ops:\t%u\n",
26201 current_tune
->fusible_ops
);
26202 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"sched_autopref:\t%d\n",
26203 (int) current_tune
->sched_autopref
);
26206 /* Print .arch and .arch_extension directives corresponding to the
26207 current architecture configuration. */
26209 arm_print_asm_arch_directives ()
26211 const arch_option
*arch
26212 = arm_parse_arch_option_name (all_architectures
, "-march",
26213 arm_active_target
.arch_name
);
26214 auto_sbitmap
opt_bits (isa_num_bits
);
26218 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_active_target
.arch_name
);
26219 if (!arch
->common
.extensions
)
26222 for (const struct cpu_arch_extension
*opt
= arch
->common
.extensions
;
26228 arm_initialize_isa (opt_bits
, opt
->isa_bits
);
26230 /* If every feature bit of this option is set in the target
26231 ISA specification, print out the option name. However,
26232 don't print anything if all the bits are part of the
26233 FPU specification. */
26234 if (bitmap_subset_p (opt_bits
, arm_active_target
.isa
)
26235 && !bitmap_subset_p (opt_bits
, isa_all_fpubits
))
26236 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", opt
->name
);
26242 arm_file_start (void)
26248 /* We don't have a specified CPU. Use the architecture to
26251 Note: it might be better to do this unconditionally, then the
26252 assembler would not need to know about all new CPU names as
26254 if (!arm_active_target
.core_name
)
26256 /* armv7ve doesn't support any extensions. */
26257 if (strcmp (arm_active_target
.arch_name
, "armv7ve") == 0)
26259 /* Keep backward compatability for assemblers
26260 which don't support armv7ve. */
26261 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
26262 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
26263 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
26264 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
26265 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
26268 arm_print_asm_arch_directives ();
26270 else if (strncmp (arm_active_target
.core_name
, "generic", 7) == 0)
26271 asm_fprintf (asm_out_file
, "\t.arch %s\n",
26272 arm_active_target
.core_name
+ 8);
26275 const char* truncated_name
26276 = arm_rewrite_selected_cpu (arm_active_target
.core_name
);
26277 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
26280 if (print_tune_info
)
26281 arm_print_tune_info ();
26283 if (! TARGET_SOFT_FLOAT
)
26285 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
26286 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26288 if (TARGET_HARD_FLOAT_ABI
)
26289 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26292 /* Some of these attributes only apply when the corresponding features
26293 are used. However we don't have any easy way of figuring this out.
26294 Conservatively record the setting that would have been used. */
26296 if (flag_rounding_math
)
26297 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26299 if (!flag_unsafe_math_optimizations
)
26301 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26302 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26304 if (flag_signaling_nans
)
26305 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26307 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26308 flag_finite_math_only
? 1 : 3);
26310 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26311 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26312 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26313 flag_short_enums
? 1 : 2);
26315 /* Tag_ABI_optimization_goals. */
26318 else if (optimize
>= 2)
26324 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
26326 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26329 if (arm_fp16_format
)
26330 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26331 (int) arm_fp16_format
);
26333 if (arm_lang_output_object_attributes_hook
)
26334 arm_lang_output_object_attributes_hook();
26337 default_file_start ();
26341 arm_file_end (void)
26345 if (NEED_INDICATE_EXEC_STACK
)
26346 /* Add .note.GNU-stack. */
26347 file_end_indicate_exec_stack ();
26349 if (! thumb_call_reg_needed
)
26352 switch_to_section (text_section
);
26353 asm_fprintf (asm_out_file
, "\t.code 16\n");
26354 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
26356 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
26358 rtx label
= thumb_call_via_label
[regno
];
26362 targetm
.asm_out
.internal_label (asm_out_file
, "L",
26363 CODE_LABEL_NUMBER (label
));
26364 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
26370 /* Symbols in the text segment can be accessed without indirecting via the
26371 constant pool; it may take an extra binary operation, but this is still
26372 faster than indirecting via memory. Don't do this when not optimizing,
26373 since we won't be calculating al of the offsets necessary to do this
26377 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
26379 if (optimize
> 0 && TREE_CONSTANT (decl
))
26380 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
26382 default_encode_section_info (decl
, rtl
, first
);
26384 #endif /* !ARM_PE */
26387 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
26389 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
26390 && !strcmp (prefix
, "L"))
26392 arm_ccfsm_state
= 0;
26393 arm_target_insn
= NULL
;
26395 default_internal_label (stream
, prefix
, labelno
);
26398 /* Output code to add DELTA to the first argument, and then jump
26399 to FUNCTION. Used for C++ multiple inheritance. */
26402 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26403 HOST_WIDE_INT
, tree function
)
26405 static int thunk_label
= 0;
26408 int mi_delta
= delta
;
26409 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
26411 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
26414 mi_delta
= - mi_delta
;
26416 final_start_function (emit_barrier (), file
, 1);
26420 int labelno
= thunk_label
++;
26421 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
26422 /* Thunks are entered in arm mode when available. */
26423 if (TARGET_THUMB1_ONLY
)
26425 /* push r3 so we can use it as a temporary. */
26426 /* TODO: Omit this save if r3 is not used. */
26427 fputs ("\tpush {r3}\n", file
);
26428 fputs ("\tldr\tr3, ", file
);
26432 fputs ("\tldr\tr12, ", file
);
26434 assemble_name (file
, label
);
26435 fputc ('\n', file
);
26438 /* If we are generating PIC, the ldr instruction below loads
26439 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26440 the address of the add + 8, so we have:
26442 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26445 Note that we have "+ 1" because some versions of GNU ld
26446 don't set the low bit of the result for R_ARM_REL32
26447 relocations against thumb function symbols.
26448 On ARMv6M this is +4, not +8. */
26449 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
26450 assemble_name (file
, labelpc
);
26451 fputs (":\n", file
);
26452 if (TARGET_THUMB1_ONLY
)
26454 /* This is 2 insns after the start of the thunk, so we know it
26455 is 4-byte aligned. */
26456 fputs ("\tadd\tr3, pc, r3\n", file
);
26457 fputs ("\tmov r12, r3\n", file
);
26460 fputs ("\tadd\tr12, pc, r12\n", file
);
26462 else if (TARGET_THUMB1_ONLY
)
26463 fputs ("\tmov r12, r3\n", file
);
26465 if (TARGET_THUMB1_ONLY
)
26467 if (mi_delta
> 255)
26469 fputs ("\tldr\tr3, ", file
);
26470 assemble_name (file
, label
);
26471 fputs ("+4\n", file
);
26472 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
26473 mi_op
, this_regno
, this_regno
);
26475 else if (mi_delta
!= 0)
26477 /* Thumb1 unified syntax requires s suffix in instruction name when
26478 one of the operands is immediate. */
26479 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
26480 mi_op
, this_regno
, this_regno
,
26486 /* TODO: Use movw/movt for large constants when available. */
26487 while (mi_delta
!= 0)
26489 if ((mi_delta
& (3 << shift
)) == 0)
26493 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
26494 mi_op
, this_regno
, this_regno
,
26495 mi_delta
& (0xff << shift
));
26496 mi_delta
&= ~(0xff << shift
);
26503 if (TARGET_THUMB1_ONLY
)
26504 fputs ("\tpop\t{r3}\n", file
);
26506 fprintf (file
, "\tbx\tr12\n");
26507 ASM_OUTPUT_ALIGN (file
, 2);
26508 assemble_name (file
, label
);
26509 fputs (":\n", file
);
26512 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26513 rtx tem
= XEXP (DECL_RTL (function
), 0);
26514 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26515 pipeline offset is four rather than eight. Adjust the offset
26517 tem
= plus_constant (GET_MODE (tem
), tem
,
26518 TARGET_THUMB1_ONLY
? -3 : -7);
26519 tem
= gen_rtx_MINUS (GET_MODE (tem
),
26521 gen_rtx_SYMBOL_REF (Pmode
,
26522 ggc_strdup (labelpc
)));
26523 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
26526 /* Output ".word .LTHUNKn". */
26527 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
26529 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
26530 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
26534 fputs ("\tb\t", file
);
26535 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
26536 if (NEED_PLT_RELOC
)
26537 fputs ("(PLT)", file
);
26538 fputc ('\n', file
);
26541 final_end_function ();
26544 /* MI thunk handling for TARGET_32BIT. */
26547 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26548 HOST_WIDE_INT vcall_offset
, tree function
)
26550 /* On ARM, this_regno is R0 or R1 depending on
26551 whether the function returns an aggregate or not.
26553 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
26555 ? R1_REGNUM
: R0_REGNUM
);
26557 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
26558 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
26559 reload_completed
= 1;
26560 emit_note (NOTE_INSN_PROLOGUE_END
);
26562 /* Add DELTA to THIS_RTX. */
26564 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
26565 delta
, this_rtx
, this_rtx
, false);
26567 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26568 if (vcall_offset
!= 0)
26570 /* Load *THIS_RTX. */
26571 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
26572 /* Compute *THIS_RTX + VCALL_OFFSET. */
26573 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
26575 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26576 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
26577 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
26580 /* Generate a tail call to the target function. */
26581 if (!TREE_USED (function
))
26583 assemble_external (function
);
26584 TREE_USED (function
) = 1;
26586 rtx funexp
= XEXP (DECL_RTL (function
), 0);
26587 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
26588 rtx_insn
* insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
26589 SIBLING_CALL_P (insn
) = 1;
26591 insn
= get_insns ();
26592 shorten_branches (insn
);
26593 final_start_function (insn
, file
, 1);
26594 final (insn
, file
, 1);
26595 final_end_function ();
26597 /* Stop pretending this is a post-reload pass. */
26598 reload_completed
= 0;
26601 /* Output code to add DELTA to the first argument, and then jump
26602 to FUNCTION. Used for C++ multiple inheritance. */
26605 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
26606 HOST_WIDE_INT vcall_offset
, tree function
)
26609 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26611 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26615 arm_emit_vector_const (FILE *file
, rtx x
)
26618 const char * pattern
;
26620 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
26622 switch (GET_MODE (x
))
26624 case E_V2SImode
: pattern
= "%08x"; break;
26625 case E_V4HImode
: pattern
= "%04x"; break;
26626 case E_V8QImode
: pattern
= "%02x"; break;
26627 default: gcc_unreachable ();
26630 fprintf (file
, "0x");
26631 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
26635 element
= CONST_VECTOR_ELT (x
, i
);
26636 fprintf (file
, pattern
, INTVAL (element
));
26642 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26643 HFmode constant pool entries are actually loaded with ldr. */
26645 arm_emit_fp16_const (rtx c
)
26649 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
26650 if (WORDS_BIG_ENDIAN
)
26651 assemble_zeros (2);
26652 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
26653 if (!WORDS_BIG_ENDIAN
)
26654 assemble_zeros (2);
26658 arm_output_load_gr (rtx
*operands
)
26665 if (!MEM_P (operands
[1])
26666 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
26667 || !REG_P (reg
= XEXP (sum
, 0))
26668 || !CONST_INT_P (offset
= XEXP (sum
, 1))
26669 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
26670 return "wldrw%?\t%0, %1";
26672 /* Fix up an out-of-range load of a GR register. */
26673 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
26674 wcgr
= operands
[0];
26676 output_asm_insn ("ldr%?\t%0, %1", operands
);
26678 operands
[0] = wcgr
;
26680 output_asm_insn ("tmcr%?\t%0, %1", operands
);
26681 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
26686 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26688 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26689 named arg and all anonymous args onto the stack.
26690 XXX I know the prologue shouldn't be pushing registers, but it is faster
26694 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
26698 int second_time ATTRIBUTE_UNUSED
)
26700 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
26703 cfun
->machine
->uses_anonymous_args
= 1;
26704 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
26706 nregs
= pcum
->aapcs_ncrn
;
26709 int res
= arm_needs_doubleword_align (mode
, type
);
26710 if (res
< 0 && warn_psabi
)
26711 inform (input_location
, "parameter passing for argument of "
26712 "type %qT changed in GCC 7.1", type
);
26718 nregs
= pcum
->nregs
;
26720 if (nregs
< NUM_ARG_REGS
)
26721 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
26724 /* We can't rely on the caller doing the proper promotion when
26725 using APCS or ATPCS. */
26728 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
26730 return !TARGET_AAPCS_BASED
;
26733 static machine_mode
26734 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
26736 int *punsignedp ATTRIBUTE_UNUSED
,
26737 const_tree fntype ATTRIBUTE_UNUSED
,
26738 int for_return ATTRIBUTE_UNUSED
)
26740 if (GET_MODE_CLASS (mode
) == MODE_INT
26741 && GET_MODE_SIZE (mode
) < 4)
26749 arm_default_short_enums (void)
26751 return ARM_DEFAULT_SHORT_ENUMS
;
26755 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26758 arm_align_anon_bitfield (void)
26760 return TARGET_AAPCS_BASED
;
26764 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26767 arm_cxx_guard_type (void)
26769 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
26773 /* The EABI says test the least significant bit of a guard variable. */
26776 arm_cxx_guard_mask_bit (void)
26778 return TARGET_AAPCS_BASED
;
26782 /* The EABI specifies that all array cookies are 8 bytes long. */
26785 arm_get_cookie_size (tree type
)
26789 if (!TARGET_AAPCS_BASED
)
26790 return default_cxx_get_cookie_size (type
);
26792 size
= build_int_cst (sizetype
, 8);
26797 /* The EABI says that array cookies should also contain the element size. */
26800 arm_cookie_has_size (void)
26802 return TARGET_AAPCS_BASED
;
26806 /* The EABI says constructors and destructors should return a pointer to
26807 the object constructed/destroyed. */
26810 arm_cxx_cdtor_returns_this (void)
26812 return TARGET_AAPCS_BASED
;
26815 /* The EABI says that an inline function may never be the key
26819 arm_cxx_key_method_may_be_inline (void)
26821 return !TARGET_AAPCS_BASED
;
26825 arm_cxx_determine_class_data_visibility (tree decl
)
26827 if (!TARGET_AAPCS_BASED
26828 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
26831 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26832 is exported. However, on systems without dynamic vague linkage,
26833 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26834 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
26835 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
26837 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
26838 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
26842 arm_cxx_class_data_always_comdat (void)
26844 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26845 vague linkage if the class has no key function. */
26846 return !TARGET_AAPCS_BASED
;
26850 /* The EABI says __aeabi_atexit should be used to register static
26854 arm_cxx_use_aeabi_atexit (void)
26856 return TARGET_AAPCS_BASED
;
26861 arm_set_return_address (rtx source
, rtx scratch
)
26863 arm_stack_offsets
*offsets
;
26864 HOST_WIDE_INT delta
;
26866 unsigned long saved_regs
;
26868 offsets
= arm_get_frame_offsets ();
26869 saved_regs
= offsets
->saved_regs_mask
;
26871 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
26872 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26875 if (frame_pointer_needed
)
26876 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
26879 /* LR will be the first saved register. */
26880 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
26885 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
26886 GEN_INT (delta
& ~4095)));
26891 addr
= stack_pointer_rtx
;
26893 addr
= plus_constant (Pmode
, addr
, delta
);
26896 /* The store needs to be marked to prevent DSE from deleting
26897 it as dead if it is based on fp. */
26898 mem
= gen_frame_mem (Pmode
, addr
);
26899 MEM_VOLATILE_P (mem
) = true;
26900 emit_move_insn (mem
, source
);
26906 thumb_set_return_address (rtx source
, rtx scratch
)
26908 arm_stack_offsets
*offsets
;
26909 HOST_WIDE_INT delta
;
26910 HOST_WIDE_INT limit
;
26913 unsigned long mask
;
26917 offsets
= arm_get_frame_offsets ();
26918 mask
= offsets
->saved_regs_mask
;
26919 if (mask
& (1 << LR_REGNUM
))
26922 /* Find the saved regs. */
26923 if (frame_pointer_needed
)
26925 delta
= offsets
->soft_frame
- offsets
->saved_args
;
26926 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
26932 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
26935 /* Allow for the stack frame. */
26936 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
26938 /* The link register is always the first saved register. */
26941 /* Construct the address. */
26942 addr
= gen_rtx_REG (SImode
, reg
);
26945 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
26946 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
26950 addr
= plus_constant (Pmode
, addr
, delta
);
26952 /* The store needs to be marked to prevent DSE from deleting
26953 it as dead if it is based on fp. */
26954 mem
= gen_frame_mem (Pmode
, addr
);
26955 MEM_VOLATILE_P (mem
) = true;
26956 emit_move_insn (mem
, source
);
26959 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26962 /* Implements target hook vector_mode_supported_p. */
26964 arm_vector_mode_supported_p (machine_mode mode
)
26966 /* Neon also supports V2SImode, etc. listed in the clause below. */
26967 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
26968 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
26969 || mode
== V2DImode
|| mode
== V8HFmode
))
26972 if ((TARGET_NEON
|| TARGET_IWMMXT
)
26973 && ((mode
== V2SImode
)
26974 || (mode
== V4HImode
)
26975 || (mode
== V8QImode
)))
26978 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
26979 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
26980 || mode
== V2HAmode
))
26986 /* Implements target hook array_mode_supported_p. */
26989 arm_array_mode_supported_p (machine_mode mode
,
26990 unsigned HOST_WIDE_INT nelems
)
26993 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
26994 && (nelems
>= 2 && nelems
<= 4))
27000 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27001 registers when autovectorizing for Neon, at least until multiple vector
27002 widths are supported properly by the middle-end. */
27004 static machine_mode
27005 arm_preferred_simd_mode (scalar_mode mode
)
27011 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
27013 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
27015 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
27017 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
27019 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
27026 if (TARGET_REALLY_IWMMXT
)
27042 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27044 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27045 using r0-r4 for function arguments, r7 for the stack frame and don't have
27046 enough left over to do doubleword arithmetic. For Thumb-2 all the
27047 potentially problematic instructions accept high registers so this is not
27048 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27049 that require many low registers. */
27051 arm_class_likely_spilled_p (reg_class_t rclass
)
27053 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
27054 || rclass
== CC_REG
)
27060 /* Implements target hook small_register_classes_for_mode_p. */
27062 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
27064 return TARGET_THUMB1
;
27067 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27068 ARM insns and therefore guarantee that the shift count is modulo 256.
27069 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27070 guarantee no particular behavior for out-of-range counts. */
27072 static unsigned HOST_WIDE_INT
27073 arm_shift_truncation_mask (machine_mode mode
)
27075 return mode
== SImode
? 255 : 0;
27079 /* Map internal gcc register numbers to DWARF2 register numbers. */
27082 arm_dbx_register_number (unsigned int regno
)
27087 if (IS_VFP_REGNUM (regno
))
27089 /* See comment in arm_dwarf_register_span. */
27090 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
27091 return 64 + regno
- FIRST_VFP_REGNUM
;
27093 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
27096 if (IS_IWMMXT_GR_REGNUM (regno
))
27097 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
27099 if (IS_IWMMXT_REGNUM (regno
))
27100 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
27102 return DWARF_FRAME_REGISTERS
;
27105 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27106 GCC models tham as 64 32-bit registers, so we need to describe this to
27107 the DWARF generation code. Other registers can use the default. */
27109 arm_dwarf_register_span (rtx rtl
)
27117 regno
= REGNO (rtl
);
27118 if (!IS_VFP_REGNUM (regno
))
27121 /* XXX FIXME: The EABI defines two VFP register ranges:
27122 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27124 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27125 corresponding D register. Until GDB supports this, we shall use the
27126 legacy encodings. We also use these encodings for D0-D15 for
27127 compatibility with older debuggers. */
27128 mode
= GET_MODE (rtl
);
27129 if (GET_MODE_SIZE (mode
) < 8)
27132 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
27134 nregs
= GET_MODE_SIZE (mode
) / 4;
27135 for (i
= 0; i
< nregs
; i
+= 2)
27136 if (TARGET_BIG_END
)
27138 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
27139 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
27143 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
27144 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
27149 nregs
= GET_MODE_SIZE (mode
) / 8;
27150 for (i
= 0; i
< nregs
; i
++)
27151 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
27154 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
27157 #if ARM_UNWIND_INFO
27158 /* Emit unwind directives for a store-multiple instruction or stack pointer
27159 push during alignment.
27160 These should only ever be generated by the function prologue code, so
27161 expect them to have a particular form.
27162 The store-multiple instruction sometimes pushes pc as the last register,
27163 although it should not be tracked into unwind information, or for -Os
27164 sometimes pushes some dummy registers before first register that needs
27165 to be tracked in unwind information; such dummy registers are there just
27166 to avoid separate stack adjustment, and will not be restored in the
27170 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
27173 HOST_WIDE_INT offset
;
27174 HOST_WIDE_INT nregs
;
27178 unsigned padfirst
= 0, padlast
= 0;
27181 e
= XVECEXP (p
, 0, 0);
27182 gcc_assert (GET_CODE (e
) == SET
);
27184 /* First insn will adjust the stack pointer. */
27185 gcc_assert (GET_CODE (e
) == SET
27186 && REG_P (SET_DEST (e
))
27187 && REGNO (SET_DEST (e
)) == SP_REGNUM
27188 && GET_CODE (SET_SRC (e
)) == PLUS
);
27190 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
27191 nregs
= XVECLEN (p
, 0) - 1;
27192 gcc_assert (nregs
);
27194 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
27197 /* For -Os dummy registers can be pushed at the beginning to
27198 avoid separate stack pointer adjustment. */
27199 e
= XVECEXP (p
, 0, 1);
27200 e
= XEXP (SET_DEST (e
), 0);
27201 if (GET_CODE (e
) == PLUS
)
27202 padfirst
= INTVAL (XEXP (e
, 1));
27203 gcc_assert (padfirst
== 0 || optimize_size
);
27204 /* The function prologue may also push pc, but not annotate it as it is
27205 never restored. We turn this into a stack pointer adjustment. */
27206 e
= XVECEXP (p
, 0, nregs
);
27207 e
= XEXP (SET_DEST (e
), 0);
27208 if (GET_CODE (e
) == PLUS
)
27209 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
27211 padlast
= offset
- 4;
27212 gcc_assert (padlast
== 0 || padlast
== 4);
27214 fprintf (asm_out_file
, "\t.pad #4\n");
27216 fprintf (asm_out_file
, "\t.save {");
27218 else if (IS_VFP_REGNUM (reg
))
27221 fprintf (asm_out_file
, "\t.vsave {");
27224 /* Unknown register type. */
27225 gcc_unreachable ();
27227 /* If the stack increment doesn't match the size of the saved registers,
27228 something has gone horribly wrong. */
27229 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
27233 /* The remaining insns will describe the stores. */
27234 for (i
= 1; i
<= nregs
; i
++)
27236 /* Expect (set (mem <addr>) (reg)).
27237 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27238 e
= XVECEXP (p
, 0, i
);
27239 gcc_assert (GET_CODE (e
) == SET
27240 && MEM_P (SET_DEST (e
))
27241 && REG_P (SET_SRC (e
)));
27243 reg
= REGNO (SET_SRC (e
));
27244 gcc_assert (reg
>= lastreg
);
27247 fprintf (asm_out_file
, ", ");
27248 /* We can't use %r for vfp because we need to use the
27249 double precision register names. */
27250 if (IS_VFP_REGNUM (reg
))
27251 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
27253 asm_fprintf (asm_out_file
, "%r", reg
);
27257 /* Check that the addresses are consecutive. */
27258 e
= XEXP (SET_DEST (e
), 0);
27259 if (GET_CODE (e
) == PLUS
)
27260 gcc_assert (REG_P (XEXP (e
, 0))
27261 && REGNO (XEXP (e
, 0)) == SP_REGNUM
27262 && CONST_INT_P (XEXP (e
, 1))
27263 && offset
== INTVAL (XEXP (e
, 1)));
27267 && REGNO (e
) == SP_REGNUM
);
27268 offset
+= reg_size
;
27271 fprintf (asm_out_file
, "}\n");
27273 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
27276 /* Emit unwind directives for a SET. */
27279 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
27287 switch (GET_CODE (e0
))
27290 /* Pushing a single register. */
27291 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
27292 || !REG_P (XEXP (XEXP (e0
, 0), 0))
27293 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
27296 asm_fprintf (asm_out_file
, "\t.save ");
27297 if (IS_VFP_REGNUM (REGNO (e1
)))
27298 asm_fprintf(asm_out_file
, "{d%d}\n",
27299 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
27301 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
27305 if (REGNO (e0
) == SP_REGNUM
)
27307 /* A stack increment. */
27308 if (GET_CODE (e1
) != PLUS
27309 || !REG_P (XEXP (e1
, 0))
27310 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
27311 || !CONST_INT_P (XEXP (e1
, 1)))
27314 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
27315 -INTVAL (XEXP (e1
, 1)));
27317 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
27319 HOST_WIDE_INT offset
;
27321 if (GET_CODE (e1
) == PLUS
)
27323 if (!REG_P (XEXP (e1
, 0))
27324 || !CONST_INT_P (XEXP (e1
, 1)))
27326 reg
= REGNO (XEXP (e1
, 0));
27327 offset
= INTVAL (XEXP (e1
, 1));
27328 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
27329 HARD_FRAME_POINTER_REGNUM
, reg
,
27332 else if (REG_P (e1
))
27335 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
27336 HARD_FRAME_POINTER_REGNUM
, reg
);
27341 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
27343 /* Move from sp to reg. */
27344 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
27346 else if (GET_CODE (e1
) == PLUS
27347 && REG_P (XEXP (e1
, 0))
27348 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
27349 && CONST_INT_P (XEXP (e1
, 1)))
27351 /* Set reg to offset from sp. */
27352 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
27353 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
27365 /* Emit unwind directives for the given insn. */
27368 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
27371 bool handled_one
= false;
27373 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27376 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27377 && (TREE_NOTHROW (current_function_decl
)
27378 || crtl
->all_throwers_are_sibcalls
))
27381 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
27384 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
27386 switch (REG_NOTE_KIND (note
))
27388 case REG_FRAME_RELATED_EXPR
:
27389 pat
= XEXP (note
, 0);
27392 case REG_CFA_REGISTER
:
27393 pat
= XEXP (note
, 0);
27396 pat
= PATTERN (insn
);
27397 if (GET_CODE (pat
) == PARALLEL
)
27398 pat
= XVECEXP (pat
, 0, 0);
27401 /* Only emitted for IS_STACKALIGN re-alignment. */
27406 src
= SET_SRC (pat
);
27407 dest
= SET_DEST (pat
);
27409 gcc_assert (src
== stack_pointer_rtx
);
27410 reg
= REGNO (dest
);
27411 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27414 handled_one
= true;
27417 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27418 to get correct dwarf information for shrink-wrap. We should not
27419 emit unwind information for it because these are used either for
27420 pretend arguments or notes to adjust sp and restore registers from
27422 case REG_CFA_DEF_CFA
:
27423 case REG_CFA_ADJUST_CFA
:
27424 case REG_CFA_RESTORE
:
27427 case REG_CFA_EXPRESSION
:
27428 case REG_CFA_OFFSET
:
27429 /* ??? Only handling here what we actually emit. */
27430 gcc_unreachable ();
27438 pat
= PATTERN (insn
);
27441 switch (GET_CODE (pat
))
27444 arm_unwind_emit_set (asm_out_file
, pat
);
27448 /* Store multiple. */
27449 arm_unwind_emit_sequence (asm_out_file
, pat
);
27458 /* Output a reference from a function exception table to the type_info
27459 object X. The EABI specifies that the symbol should be relocated by
27460 an R_ARM_TARGET2 relocation. */
27463 arm_output_ttype (rtx x
)
27465 fputs ("\t.word\t", asm_out_file
);
27466 output_addr_const (asm_out_file
, x
);
27467 /* Use special relocations for symbol references. */
27468 if (!CONST_INT_P (x
))
27469 fputs ("(TARGET2)", asm_out_file
);
27470 fputc ('\n', asm_out_file
);
27475 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27478 arm_asm_emit_except_personality (rtx personality
)
27480 fputs ("\t.personality\t", asm_out_file
);
27481 output_addr_const (asm_out_file
, personality
);
27482 fputc ('\n', asm_out_file
);
27484 #endif /* ARM_UNWIND_INFO */
27486 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27489 arm_asm_init_sections (void)
27491 #if ARM_UNWIND_INFO
27492 exception_section
= get_unnamed_section (0, output_section_asm_op
,
27494 #endif /* ARM_UNWIND_INFO */
27496 #ifdef OBJECT_FORMAT_ELF
27497 if (target_pure_code
)
27498 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
27502 /* Output unwind directives for the start/end of a function. */
27505 arm_output_fn_unwind (FILE * f
, bool prologue
)
27507 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27511 fputs ("\t.fnstart\n", f
);
27514 /* If this function will never be unwound, then mark it as such.
27515 The came condition is used in arm_unwind_emit to suppress
27516 the frame annotations. */
27517 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27518 && (TREE_NOTHROW (current_function_decl
)
27519 || crtl
->all_throwers_are_sibcalls
))
27520 fputs("\t.cantunwind\n", f
);
27522 fputs ("\t.fnend\n", f
);
27527 arm_emit_tls_decoration (FILE *fp
, rtx x
)
27529 enum tls_reloc reloc
;
27532 val
= XVECEXP (x
, 0, 0);
27533 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
27535 output_addr_const (fp
, val
);
27540 fputs ("(tlsgd)", fp
);
27543 fputs ("(tlsldm)", fp
);
27546 fputs ("(tlsldo)", fp
);
27549 fputs ("(gottpoff)", fp
);
27552 fputs ("(tpoff)", fp
);
27555 fputs ("(tlsdesc)", fp
);
27558 gcc_unreachable ();
27567 fputs (" + (. - ", fp
);
27568 output_addr_const (fp
, XVECEXP (x
, 0, 2));
27569 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27570 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
27571 output_addr_const (fp
, XVECEXP (x
, 0, 3));
27581 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27584 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
27586 gcc_assert (size
== 4);
27587 fputs ("\t.word\t", file
);
27588 output_addr_const (file
, x
);
27589 fputs ("(tlsldo)", file
);
27592 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27595 arm_output_addr_const_extra (FILE *fp
, rtx x
)
27597 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
27598 return arm_emit_tls_decoration (fp
, x
);
27599 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
27602 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
27604 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
27605 assemble_name_raw (fp
, label
);
27609 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
27611 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
27615 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27619 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
27621 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27625 output_addr_const (fp
, XVECEXP (x
, 0, 1));
27629 else if (GET_CODE (x
) == CONST_VECTOR
)
27630 return arm_emit_vector_const (fp
, x
);
27635 /* Output assembly for a shift instruction.
27636 SET_FLAGS determines how the instruction modifies the condition codes.
27637 0 - Do not set condition codes.
27638 1 - Set condition codes.
27639 2 - Use smallest instruction. */
27641 arm_output_shift(rtx
* operands
, int set_flags
)
27644 static const char flag_chars
[3] = {'?', '.', '!'};
27649 c
= flag_chars
[set_flags
];
27650 shift
= shift_op(operands
[3], &val
);
27654 operands
[2] = GEN_INT(val
);
27655 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
27658 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
27660 output_asm_insn (pattern
, operands
);
27664 /* Output assembly for a WMMX immediate shift instruction. */
27666 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
27668 int shift
= INTVAL (operands
[2]);
27670 machine_mode opmode
= GET_MODE (operands
[0]);
27672 gcc_assert (shift
>= 0);
27674 /* If the shift value in the register versions is > 63 (for D qualifier),
27675 31 (for W qualifier) or 15 (for H qualifier). */
27676 if (((opmode
== V4HImode
) && (shift
> 15))
27677 || ((opmode
== V2SImode
) && (shift
> 31))
27678 || ((opmode
== DImode
) && (shift
> 63)))
27682 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27683 output_asm_insn (templ
, operands
);
27684 if (opmode
== DImode
)
27686 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
27687 output_asm_insn (templ
, operands
);
27692 /* The destination register will contain all zeros. */
27693 sprintf (templ
, "wzero\t%%0");
27694 output_asm_insn (templ
, operands
);
27699 if ((opmode
== DImode
) && (shift
> 32))
27701 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27702 output_asm_insn (templ
, operands
);
27703 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
27704 output_asm_insn (templ
, operands
);
27708 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
27709 output_asm_insn (templ
, operands
);
27714 /* Output assembly for a WMMX tinsr instruction. */
27716 arm_output_iwmmxt_tinsr (rtx
*operands
)
27718 int mask
= INTVAL (operands
[3]);
27721 int units
= mode_nunits
[GET_MODE (operands
[0])];
27722 gcc_assert ((mask
& (mask
- 1)) == 0);
27723 for (i
= 0; i
< units
; ++i
)
27725 if ((mask
& 0x01) == 1)
27731 gcc_assert (i
< units
);
27733 switch (GET_MODE (operands
[0]))
27736 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
27739 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
27742 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
27745 gcc_unreachable ();
27748 output_asm_insn (templ
, operands
);
27753 /* Output a Thumb-1 casesi dispatch sequence. */
27755 thumb1_output_casesi (rtx
*operands
)
27757 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
27759 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27761 switch (GET_MODE(diff_vec
))
27764 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27765 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27767 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27768 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27770 return "bl\t%___gnu_thumb1_case_si";
27772 gcc_unreachable ();
27776 /* Output a Thumb-2 casesi instruction. */
27778 thumb2_output_casesi (rtx
*operands
)
27780 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
27782 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27784 output_asm_insn ("cmp\t%0, %1", operands
);
27785 output_asm_insn ("bhi\t%l3", operands
);
27786 switch (GET_MODE(diff_vec
))
27789 return "tbb\t[%|pc, %0]";
27791 return "tbh\t[%|pc, %0, lsl #1]";
27795 output_asm_insn ("adr\t%4, %l2", operands
);
27796 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
27797 output_asm_insn ("add\t%4, %4, %5", operands
);
27802 output_asm_insn ("adr\t%4, %l2", operands
);
27803 return "ldr\t%|pc, [%4, %0, lsl #2]";
27806 gcc_unreachable ();
27810 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27811 per-core tuning structs. */
27813 arm_issue_rate (void)
27815 return current_tune
->issue_rate
;
27818 /* Return how many instructions should scheduler lookahead to choose the
27821 arm_first_cycle_multipass_dfa_lookahead (void)
27823 int issue_rate
= arm_issue_rate ();
27825 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
27828 /* Enable modeling of L2 auto-prefetcher. */
27830 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
27832 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
27836 arm_mangle_type (const_tree type
)
27838 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27839 has to be managled as if it is in the "std" namespace. */
27840 if (TARGET_AAPCS_BASED
27841 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
27842 return "St9__va_list";
27844 /* Half-precision float. */
27845 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
27848 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27850 if (TYPE_NAME (type
) != NULL
)
27851 return arm_mangle_builtin_type (type
);
27853 /* Use the default mangling. */
27857 /* Order of allocation of core registers for Thumb: this allocation is
27858 written over the corresponding initial entries of the array
27859 initialized with REG_ALLOC_ORDER. We allocate all low registers
27860 first. Saving and restoring a low register is usually cheaper than
27861 using a call-clobbered high register. */
27863 static const int thumb_core_reg_alloc_order
[] =
27865 3, 2, 1, 0, 4, 5, 6, 7,
27866 12, 14, 8, 9, 10, 11
27869 /* Adjust register allocation order when compiling for Thumb. */
27872 arm_order_regs_for_local_alloc (void)
27874 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
27875 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
27877 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
27878 sizeof (thumb_core_reg_alloc_order
));
27881 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27884 arm_frame_pointer_required (void)
27886 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
27889 /* If the function receives nonlocal gotos, it needs to save the frame
27890 pointer in the nonlocal_goto_save_area object. */
27891 if (cfun
->has_nonlocal_label
)
27894 /* The frame pointer is required for non-leaf APCS frames. */
27895 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !crtl
->is_leaf
)
27898 /* If we are probing the stack in the prologue, we will have a faulting
27899 instruction prior to the stack adjustment and this requires a frame
27900 pointer if we want to catch the exception using the EABI unwinder. */
27901 if (!IS_INTERRUPT (arm_current_func_type ())
27902 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
27903 || flag_stack_clash_protection
)
27904 && arm_except_unwind_info (&global_options
) == UI_TARGET
27905 && cfun
->can_throw_non_call_exceptions
)
27907 HOST_WIDE_INT size
= get_frame_size ();
27909 /* That's irrelevant if there is no stack adjustment. */
27913 /* That's relevant only if there is a stack probe. */
27914 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
27916 /* We don't have the final size of the frame so adjust. */
27917 size
+= 32 * UNITS_PER_WORD
;
27918 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
27928 /* Only thumb1 can't support conditional execution, so return true if
27929 the target is not thumb1. */
27931 arm_have_conditional_execution (void)
27933 return !TARGET_THUMB1
;
27936 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27937 static HOST_WIDE_INT
27938 arm_vector_alignment (const_tree type
)
27940 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
27942 if (TARGET_AAPCS_BASED
)
27943 align
= MIN (align
, 64);
27948 static unsigned int
27949 arm_autovectorize_vector_sizes (void)
27951 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
27955 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
27957 /* Vectors which aren't in packed structures will not be less aligned than
27958 the natural alignment of their element type, so this is safe. */
27959 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27962 return default_builtin_vector_alignment_reachable (type
, is_packed
);
27966 arm_builtin_support_vector_misalignment (machine_mode mode
,
27967 const_tree type
, int misalignment
,
27970 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27972 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
27977 /* If the misalignment is unknown, we should be able to handle the access
27978 so long as it is not to a member of a packed data structure. */
27979 if (misalignment
== -1)
27982 /* Return true if the misalignment is a multiple of the natural alignment
27983 of the vector's element type. This is probably always going to be
27984 true in practice, since we've already established that this isn't a
27986 return ((misalignment
% align
) == 0);
27989 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
27994 arm_conditional_register_usage (void)
27998 if (TARGET_THUMB1
&& optimize_size
)
28000 /* When optimizing for size on Thumb-1, it's better not
28001 to use the HI regs, because of the overhead of
28003 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
28004 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
28007 /* The link register can be clobbered by any branch insn,
28008 but we have no way to track that at present, so mark
28009 it as unavailable. */
28011 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
28013 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
)
28015 /* VFPv3 registers are disabled when earlier VFP
28016 versions are selected due to the definition of
28017 LAST_VFP_REGNUM. */
28018 for (regno
= FIRST_VFP_REGNUM
;
28019 regno
<= LAST_VFP_REGNUM
; ++ regno
)
28021 fixed_regs
[regno
] = 0;
28022 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
28023 || regno
>= FIRST_VFP_REGNUM
+ 32;
28027 if (TARGET_REALLY_IWMMXT
)
28029 regno
= FIRST_IWMMXT_GR_REGNUM
;
28030 /* The 2002/10/09 revision of the XScale ABI has wCG0
28031 and wCG1 as call-preserved registers. The 2002/11/21
28032 revision changed this so that all wCG registers are
28033 scratch registers. */
28034 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
28035 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
28036 fixed_regs
[regno
] = 0;
28037 /* The XScale ABI has wR0 - wR9 as scratch registers,
28038 the rest as call-preserved registers. */
28039 for (regno
= FIRST_IWMMXT_REGNUM
;
28040 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
28042 fixed_regs
[regno
] = 0;
28043 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
28047 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
28049 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
28050 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
28052 else if (TARGET_APCS_STACK
)
28054 fixed_regs
[10] = 1;
28055 call_used_regs
[10] = 1;
28057 /* -mcaller-super-interworking reserves r11 for calls to
28058 _interwork_r11_call_via_rN(). Making the register global
28059 is an easy way of ensuring that it remains valid for all
28061 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
28062 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
28064 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28065 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28066 if (TARGET_CALLER_INTERWORKING
)
28067 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28069 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28073 arm_preferred_rename_class (reg_class_t rclass
)
28075 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28076 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28077 and code size can be reduced. */
28078 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
28084 /* Compute the attribute "length" of insn "*push_multi".
28085 So this function MUST be kept in sync with that insn pattern. */
28087 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
28089 int i
, regno
, hi_reg
;
28090 int num_saves
= XVECLEN (parallel_op
, 0);
28100 regno
= REGNO (first_op
);
28101 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28102 list is 8-bit. Normally this means all registers in the list must be
28103 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28104 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28105 with 16-bit encoding. */
28106 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
28107 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
28109 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
28110 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
28118 /* Compute the attribute "length" of insn. Currently, this function is used
28119 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28120 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28121 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28122 true if OPERANDS contains insn which explicit updates base register. */
28125 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
28134 rtx parallel_op
= operands
[0];
28135 /* Initialize to elements number of PARALLEL. */
28136 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
28137 /* Initialize the value to base register. */
28138 unsigned regno
= REGNO (operands
[1]);
28139 /* Skip return and write back pattern.
28140 We only need register pop pattern for later analysis. */
28141 unsigned first_indx
= 0;
28142 first_indx
+= return_pc
? 1 : 0;
28143 first_indx
+= write_back_p
? 1 : 0;
28145 /* A pop operation can be done through LDM or POP. If the base register is SP
28146 and if it's with write back, then a LDM will be alias of POP. */
28147 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
28148 bool ldm_p
= !pop_p
;
28150 /* Check base register for LDM. */
28151 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
28154 /* Check each register in the list. */
28155 for (; indx
>= first_indx
; indx
--)
28157 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
28158 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28159 comment in arm_attr_length_push_multi. */
28160 if (REGNO_REG_CLASS (regno
) == HI_REGS
28161 && (regno
!= PC_REGNUM
|| ldm_p
))
28168 /* Compute the number of instructions emitted by output_move_double. */
28170 arm_count_output_move_double_insns (rtx
*operands
)
28174 /* output_move_double may modify the operands array, so call it
28175 here on a copy of the array. */
28176 ops
[0] = operands
[0];
28177 ops
[1] = operands
[1];
28178 output_move_double (ops
, false, &count
);
28183 vfp3_const_double_for_fract_bits (rtx operand
)
28185 REAL_VALUE_TYPE r0
;
28187 if (!CONST_DOUBLE_P (operand
))
28190 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
28191 if (exact_real_inverse (DFmode
, &r0
)
28192 && !REAL_VALUE_NEGATIVE (r0
))
28194 if (exact_real_truncate (DFmode
, &r0
))
28196 HOST_WIDE_INT value
= real_to_integer (&r0
);
28197 value
= value
& 0xffffffff;
28198 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
28200 int ret
= exact_log2 (value
);
28201 gcc_assert (IN_RANGE (ret
, 0, 31));
28209 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28210 log2 is in [1, 32], return that log2. Otherwise return -1.
28211 This is used in the patterns for vcvt.s32.f32 floating-point to
28212 fixed-point conversions. */
28215 vfp3_const_double_for_bits (rtx x
)
28217 const REAL_VALUE_TYPE
*r
;
28219 if (!CONST_DOUBLE_P (x
))
28222 r
= CONST_DOUBLE_REAL_VALUE (x
);
28224 if (REAL_VALUE_NEGATIVE (*r
)
28225 || REAL_VALUE_ISNAN (*r
)
28226 || REAL_VALUE_ISINF (*r
)
28227 || !real_isinteger (r
, SFmode
))
28230 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
28232 /* The exact_log2 above will have returned -1 if this is
28233 not an exact log2. */
28234 if (!IN_RANGE (hwint
, 1, 32))
28241 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28244 arm_pre_atomic_barrier (enum memmodel model
)
28246 if (need_atomic_barrier_p (model
, true))
28247 emit_insn (gen_memory_barrier ());
28251 arm_post_atomic_barrier (enum memmodel model
)
28253 if (need_atomic_barrier_p (model
, false))
28254 emit_insn (gen_memory_barrier ());
28257 /* Emit the load-exclusive and store-exclusive instructions.
28258 Use acquire and release versions if necessary. */
28261 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
28263 rtx (*gen
) (rtx
, rtx
);
28269 case E_QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
28270 case E_HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
28271 case E_SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
28272 case E_DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
28274 gcc_unreachable ();
28281 case E_QImode
: gen
= gen_arm_load_exclusiveqi
; break;
28282 case E_HImode
: gen
= gen_arm_load_exclusivehi
; break;
28283 case E_SImode
: gen
= gen_arm_load_exclusivesi
; break;
28284 case E_DImode
: gen
= gen_arm_load_exclusivedi
; break;
28286 gcc_unreachable ();
28290 emit_insn (gen (rval
, mem
));
28294 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
28297 rtx (*gen
) (rtx
, rtx
, rtx
);
28303 case E_QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
28304 case E_HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
28305 case E_SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
28306 case E_DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
28308 gcc_unreachable ();
28315 case E_QImode
: gen
= gen_arm_store_exclusiveqi
; break;
28316 case E_HImode
: gen
= gen_arm_store_exclusivehi
; break;
28317 case E_SImode
: gen
= gen_arm_store_exclusivesi
; break;
28318 case E_DImode
: gen
= gen_arm_store_exclusivedi
; break;
28320 gcc_unreachable ();
28324 emit_insn (gen (bval
, rval
, mem
));
28327 /* Mark the previous jump instruction as unlikely. */
28330 emit_unlikely_jump (rtx insn
)
28332 rtx_insn
*jump
= emit_jump_insn (insn
);
28333 add_reg_br_prob_note (jump
, profile_probability::very_unlikely ());
28336 /* Expand a compare and swap pattern. */
28339 arm_expand_compare_and_swap (rtx operands
[])
28341 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
28343 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
28345 bval
= operands
[0];
28346 rval
= operands
[1];
28348 oldval
= operands
[3];
28349 newval
= operands
[4];
28350 is_weak
= operands
[5];
28351 mod_s
= operands
[6];
28352 mod_f
= operands
[7];
28353 mode
= GET_MODE (mem
);
28355 /* Normally the succ memory model must be stronger than fail, but in the
28356 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28357 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28359 if (TARGET_HAVE_LDACQ
28360 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
28361 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
28362 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
28368 /* For narrow modes, we're going to perform the comparison in SImode,
28369 so do the zero-extension now. */
28370 rval
= gen_reg_rtx (SImode
);
28371 oldval
= convert_modes (SImode
, mode
, oldval
, true);
28375 /* Force the value into a register if needed. We waited until after
28376 the zero-extension above to do this properly. */
28377 if (!arm_add_operand (oldval
, SImode
))
28378 oldval
= force_reg (SImode
, oldval
);
28382 if (!cmpdi_operand (oldval
, mode
))
28383 oldval
= force_reg (mode
, oldval
);
28387 gcc_unreachable ();
28394 case E_QImode
: gen
= gen_atomic_compare_and_swapt1qi_1
; break;
28395 case E_HImode
: gen
= gen_atomic_compare_and_swapt1hi_1
; break;
28396 case E_SImode
: gen
= gen_atomic_compare_and_swapt1si_1
; break;
28397 case E_DImode
: gen
= gen_atomic_compare_and_swapt1di_1
; break;
28399 gcc_unreachable ();
28406 case E_QImode
: gen
= gen_atomic_compare_and_swap32qi_1
; break;
28407 case E_HImode
: gen
= gen_atomic_compare_and_swap32hi_1
; break;
28408 case E_SImode
: gen
= gen_atomic_compare_and_swap32si_1
; break;
28409 case E_DImode
: gen
= gen_atomic_compare_and_swap32di_1
; break;
28411 gcc_unreachable ();
28415 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CC_Zmode
, CC_REGNUM
);
28416 emit_insn (gen (bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
28418 if (mode
== QImode
|| mode
== HImode
)
28419 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
28421 /* In all cases, we arrange for success to be signaled by Z set.
28422 This arrangement allows for the boolean result to be used directly
28423 in a subsequent branch, post optimization. For Thumb-1 targets, the
28424 boolean negation of the result is also stored in bval because Thumb-1
28425 backend lacks dependency tracking for CC flag due to flag-setting not
28426 being represented at RTL level. */
28428 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
28431 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
28432 emit_insn (gen_rtx_SET (bval
, x
));
28436 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28437 another memory store between the load-exclusive and store-exclusive can
28438 reset the monitor from Exclusive to Open state. This means we must wait
28439 until after reload to split the pattern, lest we get a register spill in
28440 the middle of the atomic sequence. Success of the compare and swap is
28441 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28442 for Thumb-1 targets (ie. negation of the boolean value returned by
28443 atomic_compare_and_swapmode standard pattern in operand 0). */
28446 arm_split_compare_and_swap (rtx operands
[])
28448 rtx rval
, mem
, oldval
, newval
, neg_bval
;
28450 enum memmodel mod_s
, mod_f
;
28452 rtx_code_label
*label1
, *label2
;
28455 rval
= operands
[1];
28457 oldval
= operands
[3];
28458 newval
= operands
[4];
28459 is_weak
= (operands
[5] != const0_rtx
);
28460 mod_s
= memmodel_from_int (INTVAL (operands
[6]));
28461 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
28462 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
28463 mode
= GET_MODE (mem
);
28465 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
28467 bool use_acquire
= TARGET_HAVE_LDACQ
28468 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28469 || is_mm_release (mod_s
));
28471 bool use_release
= TARGET_HAVE_LDACQ
28472 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28473 || is_mm_acquire (mod_s
));
28475 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28476 a full barrier is emitted after the store-release. */
28478 use_acquire
= false;
28480 /* Checks whether a barrier is needed and emits one accordingly. */
28481 if (!(use_acquire
|| use_release
))
28482 arm_pre_atomic_barrier (mod_s
);
28487 label1
= gen_label_rtx ();
28488 emit_label (label1
);
28490 label2
= gen_label_rtx ();
28492 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
28494 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28495 as required to communicate with arm_expand_compare_and_swap. */
28498 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
28499 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28500 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
28501 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
28502 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
28506 emit_move_insn (neg_bval
, const1_rtx
);
28507 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
28508 if (thumb1_cmpneg_operand (oldval
, SImode
))
28509 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval
, rval
, oldval
,
28512 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
28515 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
28517 /* Weak or strong, we want EQ to be true for success, so that we
28518 match the flags that we got from the compare above. */
28521 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
28522 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
28523 emit_insn (gen_rtx_SET (cond
, x
));
28528 /* Z is set to boolean value of !neg_bval, as required to communicate
28529 with arm_expand_compare_and_swap. */
28530 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
28531 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
28534 if (!is_mm_relaxed (mod_f
))
28535 emit_label (label2
);
28537 /* Checks whether a barrier is needed and emits one accordingly. */
28539 || !(use_acquire
|| use_release
))
28540 arm_post_atomic_barrier (mod_s
);
28542 if (is_mm_relaxed (mod_f
))
28543 emit_label (label2
);
28546 /* Split an atomic operation pattern. Operation is given by CODE and is one
28547 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28548 operation). Operation is performed on the content at MEM and on VALUE
28549 following the memory model MODEL_RTX. The content at MEM before and after
28550 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28551 success of the operation is returned in COND. Using a scratch register or
28552 an operand register for these determines what result is returned for that
28556 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
28557 rtx value
, rtx model_rtx
, rtx cond
)
28559 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
28560 machine_mode mode
= GET_MODE (mem
);
28561 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
28562 rtx_code_label
*label
;
28563 bool all_low_regs
, bind_old_new
;
28566 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
28568 bool use_acquire
= TARGET_HAVE_LDACQ
28569 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28570 || is_mm_release (model
));
28572 bool use_release
= TARGET_HAVE_LDACQ
28573 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28574 || is_mm_acquire (model
));
28576 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28577 a full barrier is emitted after the store-release. */
28579 use_acquire
= false;
28581 /* Checks whether a barrier is needed and emits one accordingly. */
28582 if (!(use_acquire
|| use_release
))
28583 arm_pre_atomic_barrier (model
);
28585 label
= gen_label_rtx ();
28586 emit_label (label
);
28589 new_out
= gen_lowpart (wmode
, new_out
);
28591 old_out
= gen_lowpart (wmode
, old_out
);
28594 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
28596 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
28598 /* Does the operation require destination and first operand to use the same
28599 register? This is decided by register constraints of relevant insn
28600 patterns in thumb1.md. */
28601 gcc_assert (!new_out
|| REG_P (new_out
));
28602 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
28603 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
28604 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
28609 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
28611 /* We want to return the old value while putting the result of the operation
28612 in the same register as the old value so copy the old value over to the
28613 destination register and use that register for the operation. */
28614 if (old_out
&& bind_old_new
)
28616 emit_move_insn (new_out
, old_out
);
28627 x
= gen_rtx_AND (wmode
, old_out
, value
);
28628 emit_insn (gen_rtx_SET (new_out
, x
));
28629 x
= gen_rtx_NOT (wmode
, new_out
);
28630 emit_insn (gen_rtx_SET (new_out
, x
));
28634 if (CONST_INT_P (value
))
28636 value
= GEN_INT (-INTVAL (value
));
28642 if (mode
== DImode
)
28644 /* DImode plus/minus need to clobber flags. */
28645 /* The adddi3 and subdi3 patterns are incorrectly written so that
28646 they require matching operands, even when we could easily support
28647 three operands. Thankfully, this can be fixed up post-splitting,
28648 as the individual add+adc patterns do accept three operands and
28649 post-reload cprop can make these moves go away. */
28650 emit_move_insn (new_out
, old_out
);
28652 x
= gen_adddi3 (new_out
, new_out
, value
);
28654 x
= gen_subdi3 (new_out
, new_out
, value
);
28661 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
28662 emit_insn (gen_rtx_SET (new_out
, x
));
28666 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
28669 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28670 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
28672 /* Checks whether a barrier is needed and emits one accordingly. */
28674 || !(use_acquire
|| use_release
))
28675 arm_post_atomic_barrier (model
);
28678 #define MAX_VECT_LEN 16
28680 struct expand_vec_perm_d
28682 rtx target
, op0
, op1
;
28683 auto_vec_perm_indices perm
;
28684 machine_mode vmode
;
28689 /* Generate a variable permutation. */
28692 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28694 machine_mode vmode
= GET_MODE (target
);
28695 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28697 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
28698 gcc_checking_assert (GET_MODE (op0
) == vmode
);
28699 gcc_checking_assert (GET_MODE (op1
) == vmode
);
28700 gcc_checking_assert (GET_MODE (sel
) == vmode
);
28701 gcc_checking_assert (TARGET_NEON
);
28705 if (vmode
== V8QImode
)
28706 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
28708 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
28714 if (vmode
== V8QImode
)
28716 pair
= gen_reg_rtx (V16QImode
);
28717 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
28718 pair
= gen_lowpart (TImode
, pair
);
28719 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
28723 pair
= gen_reg_rtx (OImode
);
28724 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
28725 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
28731 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28733 machine_mode vmode
= GET_MODE (target
);
28734 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
28735 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28736 rtx rmask
[MAX_VECT_LEN
], mask
;
28738 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28739 numbering of elements for big-endian, we must reverse the order. */
28740 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
28742 /* The VTBL instruction does not use a modulo index, so we must take care
28743 of that ourselves. */
28744 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28745 for (i
= 0; i
< nelt
; ++i
)
28747 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
28748 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
28750 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
28753 /* Map lane ordering between architectural lane order, and GCC lane order,
28754 taking into account ABI. See comment above output_move_neon for details. */
28757 neon_endian_lane_map (machine_mode mode
, int lane
)
28759 if (BYTES_BIG_ENDIAN
)
28761 int nelems
= GET_MODE_NUNITS (mode
);
28762 /* Reverse lane order. */
28763 lane
= (nelems
- 1 - lane
);
28764 /* Reverse D register order, to match ABI. */
28765 if (GET_MODE_SIZE (mode
) == 16)
28766 lane
= lane
^ (nelems
/ 2);
28771 /* Some permutations index into pairs of vectors, this is a helper function
28772 to map indexes into those pairs of vectors. */
28775 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
28777 int nelem
= GET_MODE_NUNITS (mode
);
28778 if (BYTES_BIG_ENDIAN
)
28780 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
28784 /* Generate or test for an insn that supports a constant permutation. */
28786 /* Recognize patterns for the VUZP insns. */
28789 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
28791 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
28792 rtx out0
, out1
, in0
, in1
;
28793 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28797 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28800 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28801 big endian pattern on 64 bit vectors, so we correct for that. */
28802 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
28803 && GET_MODE_SIZE (d
->vmode
) == 8 ? nelt
: 0;
28805 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
28807 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28809 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
28813 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28815 for (i
= 0; i
< nelt
; i
++)
28818 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
28819 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
28829 case E_V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
28830 case E_V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
28831 case E_V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
28832 case E_V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
28833 case E_V8HFmode
: gen
= gen_neon_vuzpv8hf_internal
; break;
28834 case E_V4HFmode
: gen
= gen_neon_vuzpv4hf_internal
; break;
28835 case E_V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
28836 case E_V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
28837 case E_V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
28838 case E_V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
28840 gcc_unreachable ();
28845 if (swap_nelt
!= 0)
28846 std::swap (in0
, in1
);
28849 out1
= gen_reg_rtx (d
->vmode
);
28851 std::swap (out0
, out1
);
28853 emit_insn (gen (out0
, in0
, in1
, out1
));
28857 /* Recognize patterns for the VZIP insns. */
28860 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
28862 unsigned int i
, high
, mask
, nelt
= d
->perm
.length ();
28863 rtx out0
, out1
, in0
, in1
;
28864 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28868 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28871 is_swapped
= BYTES_BIG_ENDIAN
;
28873 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
28876 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
28878 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28882 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28884 for (i
= 0; i
< nelt
/ 2; i
++)
28887 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
28888 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
28892 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
28893 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
28904 case E_V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
28905 case E_V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
28906 case E_V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
28907 case E_V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
28908 case E_V8HFmode
: gen
= gen_neon_vzipv8hf_internal
; break;
28909 case E_V4HFmode
: gen
= gen_neon_vzipv4hf_internal
; break;
28910 case E_V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
28911 case E_V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
28912 case E_V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
28913 case E_V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
28915 gcc_unreachable ();
28921 std::swap (in0
, in1
);
28924 out1
= gen_reg_rtx (d
->vmode
);
28926 std::swap (out0
, out1
);
28928 emit_insn (gen (out0
, in0
, in1
, out1
));
28932 /* Recognize patterns for the VREV insns. */
28935 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
28937 unsigned int i
, j
, diff
, nelt
= d
->perm
.length ();
28938 rtx (*gen
)(rtx
, rtx
);
28940 if (!d
->one_vector_p
)
28949 case E_V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
28950 case E_V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
28958 case E_V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
28959 case E_V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
28960 case E_V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
28961 case E_V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
28962 case E_V8HFmode
: gen
= gen_neon_vrev64v8hf
; break;
28963 case E_V4HFmode
: gen
= gen_neon_vrev64v4hf
; break;
28971 case E_V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
28972 case E_V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
28973 case E_V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
28974 case E_V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
28975 case E_V4SImode
: gen
= gen_neon_vrev64v4si
; break;
28976 case E_V2SImode
: gen
= gen_neon_vrev64v2si
; break;
28977 case E_V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
28978 case E_V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
28987 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
28988 for (j
= 0; j
<= diff
; j
+= 1)
28990 /* This is guaranteed to be true as the value of diff
28991 is 7, 3, 1 and we should have enough elements in the
28992 queue to generate this. Getting a vector mask with a
28993 value of diff other than these values implies that
28994 something is wrong by the time we get here. */
28995 gcc_assert (i
+ j
< nelt
);
28996 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
29004 emit_insn (gen (d
->target
, d
->op0
));
29008 /* Recognize patterns for the VTRN insns. */
29011 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
29013 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
29014 rtx out0
, out1
, in0
, in1
;
29015 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
29017 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
29020 /* Note that these are little-endian tests. Adjust for big-endian later. */
29021 if (d
->perm
[0] == 0)
29023 else if (d
->perm
[0] == 1)
29027 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
29029 for (i
= 0; i
< nelt
; i
+= 2)
29031 if (d
->perm
[i
] != i
+ odd
)
29033 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
29043 case E_V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
29044 case E_V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
29045 case E_V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
29046 case E_V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
29047 case E_V8HFmode
: gen
= gen_neon_vtrnv8hf_internal
; break;
29048 case E_V4HFmode
: gen
= gen_neon_vtrnv4hf_internal
; break;
29049 case E_V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
29050 case E_V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
29051 case E_V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
29052 case E_V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
29054 gcc_unreachable ();
29059 if (BYTES_BIG_ENDIAN
)
29061 std::swap (in0
, in1
);
29066 out1
= gen_reg_rtx (d
->vmode
);
29068 std::swap (out0
, out1
);
29070 emit_insn (gen (out0
, in0
, in1
, out1
));
29074 /* Recognize patterns for the VEXT insns. */
29077 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
29079 unsigned int i
, nelt
= d
->perm
.length ();
29080 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
29083 unsigned int location
;
29085 unsigned int next
= d
->perm
[0] + 1;
29087 /* TODO: Handle GCC's numbering of elements for big-endian. */
29088 if (BYTES_BIG_ENDIAN
)
29091 /* Check if the extracted indexes are increasing by one. */
29092 for (i
= 1; i
< nelt
; next
++, i
++)
29094 /* If we hit the most significant element of the 2nd vector in
29095 the previous iteration, no need to test further. */
29096 if (next
== 2 * nelt
)
29099 /* If we are operating on only one vector: it could be a
29100 rotation. If there are only two elements of size < 64, let
29101 arm_evpc_neon_vrev catch it. */
29102 if (d
->one_vector_p
&& (next
== nelt
))
29104 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
29110 if (d
->perm
[i
] != next
)
29114 location
= d
->perm
[0];
29118 case E_V16QImode
: gen
= gen_neon_vextv16qi
; break;
29119 case E_V8QImode
: gen
= gen_neon_vextv8qi
; break;
29120 case E_V4HImode
: gen
= gen_neon_vextv4hi
; break;
29121 case E_V8HImode
: gen
= gen_neon_vextv8hi
; break;
29122 case E_V2SImode
: gen
= gen_neon_vextv2si
; break;
29123 case E_V4SImode
: gen
= gen_neon_vextv4si
; break;
29124 case E_V4HFmode
: gen
= gen_neon_vextv4hf
; break;
29125 case E_V8HFmode
: gen
= gen_neon_vextv8hf
; break;
29126 case E_V2SFmode
: gen
= gen_neon_vextv2sf
; break;
29127 case E_V4SFmode
: gen
= gen_neon_vextv4sf
; break;
29128 case E_V2DImode
: gen
= gen_neon_vextv2di
; break;
29137 offset
= GEN_INT (location
);
29138 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
29142 /* The NEON VTBL instruction is a fully variable permuation that's even
29143 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29144 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29145 can do slightly better by expanding this as a constant where we don't
29146 have to apply a mask. */
29149 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
29151 rtx rperm
[MAX_VECT_LEN
], sel
;
29152 machine_mode vmode
= d
->vmode
;
29153 unsigned int i
, nelt
= d
->perm
.length ();
29155 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29156 numbering of elements for big-endian, we must reverse the order. */
29157 if (BYTES_BIG_ENDIAN
)
29163 /* Generic code will try constant permutation twice. Once with the
29164 original mode and again with the elements lowered to QImode.
29165 So wait and don't do the selector expansion ourselves. */
29166 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
29169 for (i
= 0; i
< nelt
; ++i
)
29170 rperm
[i
] = GEN_INT (d
->perm
[i
]);
29171 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
29172 sel
= force_reg (vmode
, sel
);
29174 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
29179 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
29181 /* Check if the input mask matches vext before reordering the
29184 if (arm_evpc_neon_vext (d
))
29187 /* The pattern matching functions above are written to look for a small
29188 number to begin the sequence (0, 1, N/2). If we begin with an index
29189 from the second operand, we can swap the operands. */
29190 unsigned int nelt
= d
->perm
.length ();
29191 if (d
->perm
[0] >= nelt
)
29193 for (unsigned int i
= 0; i
< nelt
; ++i
)
29194 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
29196 std::swap (d
->op0
, d
->op1
);
29201 if (arm_evpc_neon_vuzp (d
))
29203 if (arm_evpc_neon_vzip (d
))
29205 if (arm_evpc_neon_vrev (d
))
29207 if (arm_evpc_neon_vtrn (d
))
29209 return arm_evpc_neon_vtbl (d
);
29214 /* Expand a vec_perm_const pattern. */
29217 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
29219 struct expand_vec_perm_d d
;
29220 int i
, nelt
, which
;
29226 d
.vmode
= GET_MODE (target
);
29227 gcc_assert (VECTOR_MODE_P (d
.vmode
));
29228 d
.testing_p
= false;
29230 nelt
= GET_MODE_NUNITS (d
.vmode
);
29231 d
.perm
.reserve (nelt
);
29232 for (i
= which
= 0; i
< nelt
; ++i
)
29234 rtx e
= XVECEXP (sel
, 0, i
);
29235 int ei
= INTVAL (e
) & (2 * nelt
- 1);
29236 which
|= (ei
< nelt
? 1 : 2);
29237 d
.perm
.quick_push (ei
);
29246 d
.one_vector_p
= false;
29247 if (!rtx_equal_p (op0
, op1
))
29250 /* The elements of PERM do not suggest that only the first operand
29251 is used, but both operands are identical. Allow easier matching
29252 of the permutation by folding the permutation into the single
29256 for (i
= 0; i
< nelt
; ++i
)
29257 d
.perm
[i
] &= nelt
- 1;
29259 d
.one_vector_p
= true;
29264 d
.one_vector_p
= true;
29268 return arm_expand_vec_perm_const_1 (&d
);
29271 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29274 arm_vectorize_vec_perm_const_ok (machine_mode vmode
, vec_perm_indices sel
)
29276 struct expand_vec_perm_d d
;
29277 unsigned int i
, nelt
, which
;
29281 d
.testing_p
= true;
29282 d
.perm
.safe_splice (sel
);
29284 /* Categorize the set of elements in the selector. */
29285 nelt
= GET_MODE_NUNITS (d
.vmode
);
29286 for (i
= which
= 0; i
< nelt
; ++i
)
29288 unsigned int e
= d
.perm
[i
];
29289 gcc_assert (e
< 2 * nelt
);
29290 which
|= (e
< nelt
? 1 : 2);
29293 /* For all elements from second vector, fold the elements to first. */
29295 for (i
= 0; i
< nelt
; ++i
)
29298 /* Check whether the mask can be applied to the vector type. */
29299 d
.one_vector_p
= (which
!= 3);
29301 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
29302 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
29303 if (!d
.one_vector_p
)
29304 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
29307 ret
= arm_expand_vec_perm_const_1 (&d
);
29314 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
29316 /* If we are soft float and we do not have ldrd
29317 then all auto increment forms are ok. */
29318 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
29323 /* Post increment and Pre Decrement are supported for all
29324 instruction forms except for vector forms. */
29327 if (VECTOR_MODE_P (mode
))
29329 if (code
!= ARM_PRE_DEC
)
29339 /* Without LDRD and mode size greater than
29340 word size, there is no point in auto-incrementing
29341 because ldm and stm will not have these forms. */
29342 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
29345 /* Vector and floating point modes do not support
29346 these auto increment forms. */
29347 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
29360 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29361 on ARM, since we know that shifts by negative amounts are no-ops.
29362 Additionally, the default expansion code is not available or suitable
29363 for post-reload insn splits (this can occur when the register allocator
29364 chooses not to do a shift in NEON).
29366 This function is used in both initial expand and post-reload splits, and
29367 handles all kinds of 64-bit shifts.
29369 Input requirements:
29370 - It is safe for the input and output to be the same register, but
29371 early-clobber rules apply for the shift amount and scratch registers.
29372 - Shift by register requires both scratch registers. In all other cases
29373 the scratch registers may be NULL.
29374 - Ashiftrt by a register also clobbers the CC register. */
29376 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
29377 rtx amount
, rtx scratch1
, rtx scratch2
)
29379 rtx out_high
= gen_highpart (SImode
, out
);
29380 rtx out_low
= gen_lowpart (SImode
, out
);
29381 rtx in_high
= gen_highpart (SImode
, in
);
29382 rtx in_low
= gen_lowpart (SImode
, in
);
29385 in = the register pair containing the input value.
29386 out = the destination register pair.
29387 up = the high- or low-part of each pair.
29388 down = the opposite part to "up".
29389 In a shift, we can consider bits to shift from "up"-stream to
29390 "down"-stream, so in a left-shift "up" is the low-part and "down"
29391 is the high-part of each register pair. */
29393 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
29394 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
29395 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
29396 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
29398 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
29400 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
29401 && GET_MODE (out
) == DImode
);
29403 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
29404 && GET_MODE (in
) == DImode
);
29406 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
29407 && GET_MODE (amount
) == SImode
)
29408 || CONST_INT_P (amount
)));
29409 gcc_assert (scratch1
== NULL
29410 || (GET_CODE (scratch1
) == SCRATCH
)
29411 || (GET_MODE (scratch1
) == SImode
29412 && REG_P (scratch1
)));
29413 gcc_assert (scratch2
== NULL
29414 || (GET_CODE (scratch2
) == SCRATCH
)
29415 || (GET_MODE (scratch2
) == SImode
29416 && REG_P (scratch2
)));
29417 gcc_assert (!REG_P (out
) || !REG_P (amount
)
29418 || !HARD_REGISTER_P (out
)
29419 || (REGNO (out
) != REGNO (amount
)
29420 && REGNO (out
) + 1 != REGNO (amount
)));
29422 /* Macros to make following code more readable. */
29423 #define SUB_32(DEST,SRC) \
29424 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29425 #define RSB_32(DEST,SRC) \
29426 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29427 #define SUB_S_32(DEST,SRC) \
29428 gen_addsi3_compare0 ((DEST), (SRC), \
29430 #define SET(DEST,SRC) \
29431 gen_rtx_SET ((DEST), (SRC))
29432 #define SHIFT(CODE,SRC,AMOUNT) \
29433 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29434 #define LSHIFT(CODE,SRC,AMOUNT) \
29435 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29436 SImode, (SRC), (AMOUNT))
29437 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29438 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29439 SImode, (SRC), (AMOUNT))
29441 gen_rtx_IOR (SImode, (A), (B))
29442 #define BRANCH(COND,LABEL) \
29443 gen_arm_cond_branch ((LABEL), \
29444 gen_rtx_ ## COND (CCmode, cc_reg, \
29448 /* Shifts by register and shifts by constant are handled separately. */
29449 if (CONST_INT_P (amount
))
29451 /* We have a shift-by-constant. */
29453 /* First, handle out-of-range shift amounts.
29454 In both cases we try to match the result an ARM instruction in a
29455 shift-by-register would give. This helps reduce execution
29456 differences between optimization levels, but it won't stop other
29457 parts of the compiler doing different things. This is "undefined
29458 behavior, in any case. */
29459 if (INTVAL (amount
) <= 0)
29460 emit_insn (gen_movdi (out
, in
));
29461 else if (INTVAL (amount
) >= 64)
29463 if (code
== ASHIFTRT
)
29465 rtx const31_rtx
= GEN_INT (31);
29466 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
29467 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
29470 emit_insn (gen_movdi (out
, const0_rtx
));
29473 /* Now handle valid shifts. */
29474 else if (INTVAL (amount
) < 32)
29476 /* Shifts by a constant less than 32. */
29477 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
29479 /* Clearing the out register in DImode first avoids lots
29480 of spilling and results in less stack usage.
29481 Later this redundant insn is completely removed.
29482 Do that only if "in" and "out" are different registers. */
29483 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29484 emit_insn (SET (out
, const0_rtx
));
29485 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29486 emit_insn (SET (out_down
,
29487 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
29489 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29493 /* Shifts by a constant greater than 31. */
29494 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
29496 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29497 emit_insn (SET (out
, const0_rtx
));
29498 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
29499 if (code
== ASHIFTRT
)
29500 emit_insn (gen_ashrsi3 (out_up
, in_up
,
29503 emit_insn (SET (out_up
, const0_rtx
));
29508 /* We have a shift-by-register. */
29509 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
29511 /* This alternative requires the scratch registers. */
29512 gcc_assert (scratch1
&& REG_P (scratch1
));
29513 gcc_assert (scratch2
&& REG_P (scratch2
));
29515 /* We will need the values "amount-32" and "32-amount" later.
29516 Swapping them around now allows the later code to be more general. */
29520 emit_insn (SUB_32 (scratch1
, amount
));
29521 emit_insn (RSB_32 (scratch2
, amount
));
29524 emit_insn (RSB_32 (scratch1
, amount
));
29525 /* Also set CC = amount > 32. */
29526 emit_insn (SUB_S_32 (scratch2
, amount
));
29529 emit_insn (RSB_32 (scratch1
, amount
));
29530 emit_insn (SUB_32 (scratch2
, amount
));
29533 gcc_unreachable ();
29536 /* Emit code like this:
29539 out_down = in_down << amount;
29540 out_down = (in_up << (amount - 32)) | out_down;
29541 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29542 out_up = in_up << amount;
29545 out_down = in_down >> amount;
29546 out_down = (in_up << (32 - amount)) | out_down;
29548 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29549 out_up = in_up << amount;
29552 out_down = in_down >> amount;
29553 out_down = (in_up << (32 - amount)) | out_down;
29555 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29556 out_up = in_up << amount;
29558 The ARM and Thumb2 variants are the same but implemented slightly
29559 differently. If this were only called during expand we could just
29560 use the Thumb2 case and let combine do the right thing, but this
29561 can also be called from post-reload splitters. */
29563 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29565 if (!TARGET_THUMB2
)
29567 /* Emit code for ARM mode. */
29568 emit_insn (SET (out_down
,
29569 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
29570 if (code
== ASHIFTRT
)
29572 rtx_code_label
*done_label
= gen_label_rtx ();
29573 emit_jump_insn (BRANCH (LT
, done_label
));
29574 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
29576 emit_label (done_label
);
29579 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
29584 /* Emit code for Thumb2 mode.
29585 Thumb2 can't do shift and or in one insn. */
29586 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
29587 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
29589 if (code
== ASHIFTRT
)
29591 rtx_code_label
*done_label
= gen_label_rtx ();
29592 emit_jump_insn (BRANCH (LT
, done_label
));
29593 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
29594 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
29595 emit_label (done_label
);
29599 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
29600 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
29604 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29618 /* Returns true if the pattern is a valid symbolic address, which is either a
29619 symbol_ref or (symbol_ref + addend).
29621 According to the ARM ELF ABI, the initial addend of REL-type relocations
29622 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29623 literal field of the instruction as a 16-bit signed value in the range
29624 -32768 <= A < 32768. */
29627 arm_valid_symbolic_address_p (rtx addr
)
29629 rtx xop0
, xop1
= NULL_RTX
;
29632 if (GET_CODE (tmp
) == SYMBOL_REF
|| GET_CODE (tmp
) == LABEL_REF
)
29635 /* (const (plus: symbol_ref const_int)) */
29636 if (GET_CODE (addr
) == CONST
)
29637 tmp
= XEXP (addr
, 0);
29639 if (GET_CODE (tmp
) == PLUS
)
29641 xop0
= XEXP (tmp
, 0);
29642 xop1
= XEXP (tmp
, 1);
29644 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
29645 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
29651 /* Returns true if a valid comparison operation and makes
29652 the operands in a form that is valid. */
29654 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
29656 enum rtx_code code
= GET_CODE (*comparison
);
29658 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
29659 ? GET_MODE (*op2
) : GET_MODE (*op1
);
29661 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
29663 if (code
== UNEQ
|| code
== LTGT
)
29666 code_int
= (int)code
;
29667 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
29668 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
29673 if (!arm_add_operand (*op1
, mode
))
29674 *op1
= force_reg (mode
, *op1
);
29675 if (!arm_add_operand (*op2
, mode
))
29676 *op2
= force_reg (mode
, *op2
);
29680 if (!cmpdi_operand (*op1
, mode
))
29681 *op1
= force_reg (mode
, *op1
);
29682 if (!cmpdi_operand (*op2
, mode
))
29683 *op2
= force_reg (mode
, *op2
);
29687 if (!TARGET_VFP_FP16INST
)
29689 /* FP16 comparisons are done in SF mode. */
29691 *op1
= convert_to_mode (mode
, *op1
, 1);
29692 *op2
= convert_to_mode (mode
, *op2
, 1);
29693 /* Fall through. */
29696 if (!vfp_compare_operand (*op1
, mode
))
29697 *op1
= force_reg (mode
, *op1
);
29698 if (!vfp_compare_operand (*op2
, mode
))
29699 *op2
= force_reg (mode
, *op2
);
29709 /* Maximum number of instructions to set block of memory. */
29711 arm_block_set_max_insns (void)
29713 if (optimize_function_for_size_p (cfun
))
29716 return current_tune
->max_insns_inline_memset
;
29719 /* Return TRUE if it's profitable to set block of memory for
29720 non-vectorized case. VAL is the value to set the memory
29721 with. LENGTH is the number of bytes to set. ALIGN is the
29722 alignment of the destination memory in bytes. UNALIGNED_P
29723 is TRUE if we can only set the memory with instructions
29724 meeting alignment requirements. USE_STRD_P is TRUE if we
29725 can use strd to set the memory. */
29727 arm_block_set_non_vect_profit_p (rtx val
,
29728 unsigned HOST_WIDE_INT length
,
29729 unsigned HOST_WIDE_INT align
,
29730 bool unaligned_p
, bool use_strd_p
)
29733 /* For leftovers in bytes of 0-7, we can set the memory block using
29734 strb/strh/str with minimum instruction number. */
29735 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29739 num
= arm_const_inline_cost (SET
, val
);
29740 num
+= length
/ align
+ length
% align
;
29742 else if (use_strd_p
)
29744 num
= arm_const_double_inline_cost (val
);
29745 num
+= (length
>> 3) + leftover
[length
& 7];
29749 num
= arm_const_inline_cost (SET
, val
);
29750 num
+= (length
>> 2) + leftover
[length
& 3];
29753 /* We may be able to combine last pair STRH/STRB into a single STR
29754 by shifting one byte back. */
29755 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
29758 return (num
<= arm_block_set_max_insns ());
29761 /* Return TRUE if it's profitable to set block of memory for
29762 vectorized case. LENGTH is the number of bytes to set.
29763 ALIGN is the alignment of destination memory in bytes.
29764 MODE is the vector mode used to set the memory. */
29766 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
29767 unsigned HOST_WIDE_INT align
,
29771 bool unaligned_p
= ((align
& 3) != 0);
29772 unsigned int nelt
= GET_MODE_NUNITS (mode
);
29774 /* Instruction loading constant value. */
29776 /* Instructions storing the memory. */
29777 num
+= (length
+ nelt
- 1) / nelt
;
29778 /* Instructions adjusting the address expression. Only need to
29779 adjust address expression if it's 4 bytes aligned and bytes
29780 leftover can only be stored by mis-aligned store instruction. */
29781 if (!unaligned_p
&& (length
& 3) != 0)
29784 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29785 if (!unaligned_p
&& mode
== V16QImode
)
29788 return (num
<= arm_block_set_max_insns ());
29791 /* Set a block of memory using vectorization instructions for the
29792 unaligned case. We fill the first LENGTH bytes of the memory
29793 area starting from DSTBASE with byte constant VALUE. ALIGN is
29794 the alignment requirement of memory. Return TRUE if succeeded. */
29796 arm_block_set_unaligned_vect (rtx dstbase
,
29797 unsigned HOST_WIDE_INT length
,
29798 unsigned HOST_WIDE_INT value
,
29799 unsigned HOST_WIDE_INT align
)
29801 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
29803 rtx val_elt
, val_vec
, reg
;
29804 rtx rval
[MAX_VECT_LEN
];
29805 rtx (*gen_func
) (rtx
, rtx
);
29807 unsigned HOST_WIDE_INT v
= value
;
29808 unsigned int offset
= 0;
29809 gcc_assert ((align
& 0x3) != 0);
29810 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29811 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29812 if (length
>= nelt_v16
)
29815 gen_func
= gen_movmisalignv16qi
;
29820 gen_func
= gen_movmisalignv8qi
;
29822 nelt_mode
= GET_MODE_NUNITS (mode
);
29823 gcc_assert (length
>= nelt_mode
);
29824 /* Skip if it isn't profitable. */
29825 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29828 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29829 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29831 v
= sext_hwi (v
, BITS_PER_WORD
);
29832 val_elt
= GEN_INT (v
);
29833 for (j
= 0; j
< nelt_mode
; j
++)
29836 reg
= gen_reg_rtx (mode
);
29837 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29838 /* Emit instruction loading the constant value. */
29839 emit_move_insn (reg
, val_vec
);
29841 /* Handle nelt_mode bytes in a vector. */
29842 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29844 emit_insn ((*gen_func
) (mem
, reg
));
29845 if (i
+ 2 * nelt_mode
<= length
)
29847 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
29848 offset
+= nelt_mode
;
29849 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29853 /* If there are not less than nelt_v8 bytes leftover, we must be in
29855 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
29857 /* Handle (8, 16) bytes leftover. */
29858 if (i
+ nelt_v8
< length
)
29860 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
29861 offset
+= length
- i
;
29862 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29864 /* We are shifting bytes back, set the alignment accordingly. */
29865 if ((length
& 1) != 0 && align
>= 2)
29866 set_mem_align (mem
, BITS_PER_UNIT
);
29868 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29870 /* Handle (0, 8] bytes leftover. */
29871 else if (i
< length
&& i
+ nelt_v8
>= length
)
29873 if (mode
== V16QImode
)
29874 reg
= gen_lowpart (V8QImode
, reg
);
29876 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
29877 + (nelt_mode
- nelt_v8
))));
29878 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
29879 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
29881 /* We are shifting bytes back, set the alignment accordingly. */
29882 if ((length
& 1) != 0 && align
>= 2)
29883 set_mem_align (mem
, BITS_PER_UNIT
);
29885 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29891 /* Set a block of memory using vectorization instructions for the
29892 aligned case. We fill the first LENGTH bytes of the memory area
29893 starting from DSTBASE with byte constant VALUE. ALIGN is the
29894 alignment requirement of memory. Return TRUE if succeeded. */
29896 arm_block_set_aligned_vect (rtx dstbase
,
29897 unsigned HOST_WIDE_INT length
,
29898 unsigned HOST_WIDE_INT value
,
29899 unsigned HOST_WIDE_INT align
)
29901 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
29902 rtx dst
, addr
, mem
;
29903 rtx val_elt
, val_vec
, reg
;
29904 rtx rval
[MAX_VECT_LEN
];
29906 unsigned HOST_WIDE_INT v
= value
;
29907 unsigned int offset
= 0;
29909 gcc_assert ((align
& 0x3) == 0);
29910 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29911 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29912 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
29917 nelt_mode
= GET_MODE_NUNITS (mode
);
29918 gcc_assert (length
>= nelt_mode
);
29919 /* Skip if it isn't profitable. */
29920 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29923 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29925 v
= sext_hwi (v
, BITS_PER_WORD
);
29926 val_elt
= GEN_INT (v
);
29927 for (j
= 0; j
< nelt_mode
; j
++)
29930 reg
= gen_reg_rtx (mode
);
29931 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29932 /* Emit instruction loading the constant value. */
29933 emit_move_insn (reg
, val_vec
);
29936 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29937 if (mode
== V16QImode
)
29939 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29940 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29942 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29943 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
29945 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29946 offset
+= length
- nelt_mode
;
29947 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29948 /* We are shifting bytes back, set the alignment accordingly. */
29949 if ((length
& 0x3) == 0)
29950 set_mem_align (mem
, BITS_PER_UNIT
* 4);
29951 else if ((length
& 0x1) == 0)
29952 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29954 set_mem_align (mem
, BITS_PER_UNIT
);
29956 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29959 /* Fall through for bytes leftover. */
29961 nelt_mode
= GET_MODE_NUNITS (mode
);
29962 reg
= gen_lowpart (V8QImode
, reg
);
29965 /* Handle 8 bytes in a vector. */
29966 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29968 addr
= plus_constant (Pmode
, dst
, i
);
29969 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
29970 emit_move_insn (mem
, reg
);
29973 /* Handle single word leftover by shifting 4 bytes back. We can
29974 use aligned access for this case. */
29975 if (i
+ UNITS_PER_WORD
== length
)
29977 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
29978 offset
+= i
- UNITS_PER_WORD
;
29979 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
29980 /* We are shifting 4 bytes back, set the alignment accordingly. */
29981 if (align
> UNITS_PER_WORD
)
29982 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
29984 emit_move_insn (mem
, reg
);
29986 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29987 We have to use unaligned access for this case. */
29988 else if (i
< length
)
29990 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29991 offset
+= length
- nelt_mode
;
29992 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29993 /* We are shifting bytes back, set the alignment accordingly. */
29994 if ((length
& 1) == 0)
29995 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29997 set_mem_align (mem
, BITS_PER_UNIT
);
29999 emit_insn (gen_movmisalignv8qi (mem
, reg
));
30005 /* Set a block of memory using plain strh/strb instructions, only
30006 using instructions allowed by ALIGN on processor. We fill the
30007 first LENGTH bytes of the memory area starting from DSTBASE
30008 with byte constant VALUE. ALIGN is the alignment requirement
30011 arm_block_set_unaligned_non_vect (rtx dstbase
,
30012 unsigned HOST_WIDE_INT length
,
30013 unsigned HOST_WIDE_INT value
,
30014 unsigned HOST_WIDE_INT align
)
30017 rtx dst
, addr
, mem
;
30018 rtx val_exp
, val_reg
, reg
;
30020 HOST_WIDE_INT v
= value
;
30022 gcc_assert (align
== 1 || align
== 2);
30025 v
|= (value
<< BITS_PER_UNIT
);
30027 v
= sext_hwi (v
, BITS_PER_WORD
);
30028 val_exp
= GEN_INT (v
);
30029 /* Skip if it isn't profitable. */
30030 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30031 align
, true, false))
30034 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30035 mode
= (align
== 2 ? HImode
: QImode
);
30036 val_reg
= force_reg (SImode
, val_exp
);
30037 reg
= gen_lowpart (mode
, val_reg
);
30039 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
30041 addr
= plus_constant (Pmode
, dst
, i
);
30042 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
30043 emit_move_insn (mem
, reg
);
30046 /* Handle single byte leftover. */
30047 if (i
+ 1 == length
)
30049 reg
= gen_lowpart (QImode
, val_reg
);
30050 addr
= plus_constant (Pmode
, dst
, i
);
30051 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
30052 emit_move_insn (mem
, reg
);
30056 gcc_assert (i
== length
);
30060 /* Set a block of memory using plain strd/str/strh/strb instructions,
30061 to permit unaligned copies on processors which support unaligned
30062 semantics for those instructions. We fill the first LENGTH bytes
30063 of the memory area starting from DSTBASE with byte constant VALUE.
30064 ALIGN is the alignment requirement of memory. */
30066 arm_block_set_aligned_non_vect (rtx dstbase
,
30067 unsigned HOST_WIDE_INT length
,
30068 unsigned HOST_WIDE_INT value
,
30069 unsigned HOST_WIDE_INT align
)
30072 rtx dst
, addr
, mem
;
30073 rtx val_exp
, val_reg
, reg
;
30074 unsigned HOST_WIDE_INT v
;
30077 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
30078 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
30080 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
30081 if (length
< UNITS_PER_WORD
)
30082 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
30085 v
|= (v
<< BITS_PER_WORD
);
30087 v
= sext_hwi (v
, BITS_PER_WORD
);
30089 val_exp
= GEN_INT (v
);
30090 /* Skip if it isn't profitable. */
30091 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30092 align
, false, use_strd_p
))
30097 /* Try without strd. */
30098 v
= (v
>> BITS_PER_WORD
);
30099 v
= sext_hwi (v
, BITS_PER_WORD
);
30100 val_exp
= GEN_INT (v
);
30101 use_strd_p
= false;
30102 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30103 align
, false, use_strd_p
))
30108 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30109 /* Handle double words using strd if possible. */
30112 val_reg
= force_reg (DImode
, val_exp
);
30114 for (; (i
+ 8 <= length
); i
+= 8)
30116 addr
= plus_constant (Pmode
, dst
, i
);
30117 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
30118 emit_move_insn (mem
, reg
);
30122 val_reg
= force_reg (SImode
, val_exp
);
30124 /* Handle words. */
30125 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
30126 for (; (i
+ 4 <= length
); i
+= 4)
30128 addr
= plus_constant (Pmode
, dst
, i
);
30129 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
30130 if ((align
& 3) == 0)
30131 emit_move_insn (mem
, reg
);
30133 emit_insn (gen_unaligned_storesi (mem
, reg
));
30136 /* Merge last pair of STRH and STRB into a STR if possible. */
30137 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
30139 addr
= plus_constant (Pmode
, dst
, i
- 1);
30140 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
30141 /* We are shifting one byte back, set the alignment accordingly. */
30142 if ((align
& 1) == 0)
30143 set_mem_align (mem
, BITS_PER_UNIT
);
30145 /* Most likely this is an unaligned access, and we can't tell at
30146 compilation time. */
30147 emit_insn (gen_unaligned_storesi (mem
, reg
));
30151 /* Handle half word leftover. */
30152 if (i
+ 2 <= length
)
30154 reg
= gen_lowpart (HImode
, val_reg
);
30155 addr
= plus_constant (Pmode
, dst
, i
);
30156 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
30157 if ((align
& 1) == 0)
30158 emit_move_insn (mem
, reg
);
30160 emit_insn (gen_unaligned_storehi (mem
, reg
));
30165 /* Handle single byte leftover. */
30166 if (i
+ 1 == length
)
30168 reg
= gen_lowpart (QImode
, val_reg
);
30169 addr
= plus_constant (Pmode
, dst
, i
);
30170 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
30171 emit_move_insn (mem
, reg
);
30177 /* Set a block of memory using vectorization instructions for both
30178 aligned and unaligned cases. We fill the first LENGTH bytes of
30179 the memory area starting from DSTBASE with byte constant VALUE.
30180 ALIGN is the alignment requirement of memory. */
30182 arm_block_set_vect (rtx dstbase
,
30183 unsigned HOST_WIDE_INT length
,
30184 unsigned HOST_WIDE_INT value
,
30185 unsigned HOST_WIDE_INT align
)
30187 /* Check whether we need to use unaligned store instruction. */
30188 if (((align
& 3) != 0 || (length
& 3) != 0)
30189 /* Check whether unaligned store instruction is available. */
30190 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
30193 if ((align
& 3) == 0)
30194 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
30196 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
30199 /* Expand string store operation. Firstly we try to do that by using
30200 vectorization instructions, then try with ARM unaligned access and
30201 double-word store if profitable. OPERANDS[0] is the destination,
30202 OPERANDS[1] is the number of bytes, operands[2] is the value to
30203 initialize the memory, OPERANDS[3] is the known alignment of the
30206 arm_gen_setmem (rtx
*operands
)
30208 rtx dstbase
= operands
[0];
30209 unsigned HOST_WIDE_INT length
;
30210 unsigned HOST_WIDE_INT value
;
30211 unsigned HOST_WIDE_INT align
;
30213 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
30216 length
= UINTVAL (operands
[1]);
30220 value
= (UINTVAL (operands
[2]) & 0xFF);
30221 align
= UINTVAL (operands
[3]);
30222 if (TARGET_NEON
&& length
>= 8
30223 && current_tune
->string_ops_prefer_neon
30224 && arm_block_set_vect (dstbase
, length
, value
, align
))
30227 if (!unaligned_access
&& (align
& 3) != 0)
30228 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
30230 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
30235 arm_macro_fusion_p (void)
30237 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
30240 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30241 for MOVW / MOVT macro fusion. */
30244 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
30246 /* We are trying to fuse
30247 movw imm / movt imm
30248 instructions as a group that gets scheduled together. */
30250 rtx set_dest
= SET_DEST (curr_set
);
30252 if (GET_MODE (set_dest
) != SImode
)
30255 /* We are trying to match:
30256 prev (movw) == (set (reg r0) (const_int imm16))
30257 curr (movt) == (set (zero_extract (reg r0)
30260 (const_int imm16_1))
30262 prev (movw) == (set (reg r1)
30263 (high (symbol_ref ("SYM"))))
30264 curr (movt) == (set (reg r0)
30266 (symbol_ref ("SYM")))) */
30268 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
30270 if (CONST_INT_P (SET_SRC (curr_set
))
30271 && CONST_INT_P (SET_SRC (prev_set
))
30272 && REG_P (XEXP (set_dest
, 0))
30273 && REG_P (SET_DEST (prev_set
))
30274 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
30278 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
30279 && REG_P (SET_DEST (curr_set
))
30280 && REG_P (SET_DEST (prev_set
))
30281 && GET_CODE (SET_SRC (prev_set
)) == HIGH
30282 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
30289 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
30291 rtx prev_set
= single_set (prev
);
30292 rtx curr_set
= single_set (curr
);
30298 if (any_condjump_p (curr
))
30301 if (!arm_macro_fusion_p ())
30304 if (current_tune
->fusible_ops
& tune_params::FUSE_AES_AESMC
30305 && aarch_crypto_can_dual_issue (prev
, curr
))
30308 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
30309 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
30315 /* Return true iff the instruction fusion described by OP is enabled. */
30317 arm_fusion_enabled_p (tune_params::fuse_ops op
)
30319 return current_tune
->fusible_ops
& op
;
30322 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30323 scheduled for speculative execution. Reject the long-running division
30324 and square-root instructions. */
30327 arm_sched_can_speculate_insn (rtx_insn
*insn
)
30329 switch (get_attr_type (insn
))
30337 case TYPE_NEON_FP_SQRT_S
:
30338 case TYPE_NEON_FP_SQRT_D
:
30339 case TYPE_NEON_FP_SQRT_S_Q
:
30340 case TYPE_NEON_FP_SQRT_D_Q
:
30341 case TYPE_NEON_FP_DIV_S
:
30342 case TYPE_NEON_FP_DIV_D
:
30343 case TYPE_NEON_FP_DIV_S_Q
:
30344 case TYPE_NEON_FP_DIV_D_Q
:
30351 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30353 static unsigned HOST_WIDE_INT
30354 arm_asan_shadow_offset (void)
30356 return HOST_WIDE_INT_1U
<< 29;
30360 /* This is a temporary fix for PR60655. Ideally we need
30361 to handle most of these cases in the generic part but
30362 currently we reject minus (..) (sym_ref). We try to
30363 ameliorate the case with minus (sym_ref1) (sym_ref2)
30364 where they are in the same section. */
30367 arm_const_not_ok_for_debug_p (rtx p
)
30369 tree decl_op0
= NULL
;
30370 tree decl_op1
= NULL
;
30372 if (GET_CODE (p
) == MINUS
)
30374 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
30376 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
30378 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
30379 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
30381 if ((VAR_P (decl_op1
)
30382 || TREE_CODE (decl_op1
) == CONST_DECL
)
30383 && (VAR_P (decl_op0
)
30384 || TREE_CODE (decl_op0
) == CONST_DECL
))
30385 return (get_variable_section (decl_op1
, false)
30386 != get_variable_section (decl_op0
, false));
30388 if (TREE_CODE (decl_op1
) == LABEL_DECL
30389 && TREE_CODE (decl_op0
) == LABEL_DECL
)
30390 return (DECL_CONTEXT (decl_op1
)
30391 != DECL_CONTEXT (decl_op0
));
30401 /* return TRUE if x is a reference to a value in a constant pool */
30403 arm_is_constant_pool_ref (rtx x
)
30406 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
30407 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
30410 /* Remember the last target of arm_set_current_function. */
30411 static GTY(()) tree arm_previous_fndecl
;
30413 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30416 save_restore_target_globals (tree new_tree
)
30418 /* If we have a previous state, use it. */
30419 if (TREE_TARGET_GLOBALS (new_tree
))
30420 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
30421 else if (new_tree
== target_option_default_node
)
30422 restore_target_globals (&default_target_globals
);
30425 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30426 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
30429 arm_option_params_internal ();
30432 /* Invalidate arm_previous_fndecl. */
30435 arm_reset_previous_fndecl (void)
30437 arm_previous_fndecl
= NULL_TREE
;
30440 /* Establish appropriate back-end context for processing the function
30441 FNDECL. The argument might be NULL to indicate processing at top
30442 level, outside of any function scope. */
30445 arm_set_current_function (tree fndecl
)
30447 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
30450 tree old_tree
= (arm_previous_fndecl
30451 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
30454 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30456 /* If current function has no attributes but previous one did,
30457 use the default node. */
30458 if (! new_tree
&& old_tree
)
30459 new_tree
= target_option_default_node
;
30461 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30462 the default have been handled by save_restore_target_globals from
30463 arm_pragma_target_parse. */
30464 if (old_tree
== new_tree
)
30467 arm_previous_fndecl
= fndecl
;
30469 /* First set the target options. */
30470 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
30472 save_restore_target_globals (new_tree
);
30475 /* Implement TARGET_OPTION_PRINT. */
30478 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
30480 int flags
= ptr
->x_target_flags
;
30481 const char *fpu_name
;
30483 fpu_name
= (ptr
->x_arm_fpu_index
== TARGET_FPU_auto
30484 ? "auto" : all_fpus
[ptr
->x_arm_fpu_index
].name
);
30486 fprintf (file
, "%*sselected isa %s\n", indent
, "",
30487 TARGET_THUMB2_P (flags
) ? "thumb2" :
30488 TARGET_THUMB_P (flags
) ? "thumb1" :
30491 if (ptr
->x_arm_arch_string
)
30492 fprintf (file
, "%*sselected architecture %s\n", indent
, "",
30493 ptr
->x_arm_arch_string
);
30495 if (ptr
->x_arm_cpu_string
)
30496 fprintf (file
, "%*sselected CPU %s\n", indent
, "",
30497 ptr
->x_arm_cpu_string
);
30499 if (ptr
->x_arm_tune_string
)
30500 fprintf (file
, "%*sselected tune %s\n", indent
, "",
30501 ptr
->x_arm_tune_string
);
30503 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_name
);
30506 /* Hook to determine if one function can safely inline another. */
30509 arm_can_inline_p (tree caller
, tree callee
)
30511 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
30512 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
30513 bool can_inline
= true;
30515 struct cl_target_option
*caller_opts
30516 = TREE_TARGET_OPTION (caller_tree
? caller_tree
30517 : target_option_default_node
);
30519 struct cl_target_option
*callee_opts
30520 = TREE_TARGET_OPTION (callee_tree
? callee_tree
30521 : target_option_default_node
);
30523 if (callee_opts
== caller_opts
)
30526 /* Callee's ISA features should be a subset of the caller's. */
30527 struct arm_build_target caller_target
;
30528 struct arm_build_target callee_target
;
30529 caller_target
.isa
= sbitmap_alloc (isa_num_bits
);
30530 callee_target
.isa
= sbitmap_alloc (isa_num_bits
);
30532 arm_configure_build_target (&caller_target
, caller_opts
, &global_options_set
,
30534 arm_configure_build_target (&callee_target
, callee_opts
, &global_options_set
,
30536 if (!bitmap_subset_p (callee_target
.isa
, caller_target
.isa
))
30537 can_inline
= false;
30539 sbitmap_free (caller_target
.isa
);
30540 sbitmap_free (callee_target
.isa
);
30542 /* OK to inline between different modes.
30543 Function with mode specific instructions, e.g using asm,
30544 must be explicitly protected with noinline. */
30548 /* Hook to fix function's alignment affected by target attribute. */
30551 arm_relayout_function (tree fndecl
)
30553 if (DECL_USER_ALIGN (fndecl
))
30556 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30559 callee_tree
= target_option_default_node
;
30561 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
30564 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts
->x_target_flags
)));
30567 /* Inner function to process the attribute((target(...))), take an argument and
30568 set the current options from the argument. If we have a list, recursively
30569 go over the list. */
30572 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
30574 if (TREE_CODE (args
) == TREE_LIST
)
30578 for (; args
; args
= TREE_CHAIN (args
))
30579 if (TREE_VALUE (args
)
30580 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
30585 else if (TREE_CODE (args
) != STRING_CST
)
30587 error ("attribute %<target%> argument not a string");
30591 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
30594 while ((q
= strtok (argstr
, ",")) != NULL
)
30596 while (ISSPACE (*q
)) ++q
;
30599 if (!strncmp (q
, "thumb", 5))
30600 opts
->x_target_flags
|= MASK_THUMB
;
30602 else if (!strncmp (q
, "arm", 3))
30603 opts
->x_target_flags
&= ~MASK_THUMB
;
30605 else if (!strncmp (q
, "fpu=", 4))
30608 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+4,
30609 &fpu_index
, CL_TARGET
))
30611 error ("invalid fpu for attribute(target(\"%s\"))", q
);
30614 if (fpu_index
== TARGET_FPU_auto
)
30616 /* This doesn't really make sense until we support
30617 general dynamic selection of the architecture and all
30619 sorry ("auto fpu selection not currently permitted here");
30622 opts
->x_arm_fpu_index
= (enum fpu_type
) fpu_index
;
30626 error ("attribute(target(\"%s\")) is unknown", q
);
30634 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30637 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
30638 struct gcc_options
*opts_set
)
30640 struct cl_target_option cl_opts
;
30642 if (!arm_valid_target_attribute_rec (args
, opts
))
30645 cl_target_option_save (&cl_opts
, opts
);
30646 arm_configure_build_target (&arm_active_target
, &cl_opts
, opts_set
, false);
30647 arm_option_check_internal (opts
);
30648 /* Do any overrides, such as global options arch=xxx. */
30649 arm_option_override_internal (opts
, opts_set
);
30651 return build_target_option_node (opts
);
30655 add_attribute (const char * mode
, tree
*attributes
)
30657 size_t len
= strlen (mode
);
30658 tree value
= build_string (len
, mode
);
30660 TREE_TYPE (value
) = build_array_type (char_type_node
,
30661 build_index_type (size_int (len
)));
30663 *attributes
= tree_cons (get_identifier ("target"),
30664 build_tree_list (NULL_TREE
, value
),
30668 /* For testing. Insert thumb or arm modes alternatively on functions. */
30671 arm_insert_attributes (tree fndecl
, tree
* attributes
)
30675 if (! TARGET_FLIP_THUMB
)
30678 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
30679 || DECL_BUILT_IN (fndecl
) || DECL_ARTIFICIAL (fndecl
))
30682 /* Nested definitions must inherit mode. */
30683 if (current_function_decl
)
30685 mode
= TARGET_THUMB
? "thumb" : "arm";
30686 add_attribute (mode
, attributes
);
30690 /* If there is already a setting don't change it. */
30691 if (lookup_attribute ("target", *attributes
) != NULL
)
30694 mode
= thumb_flipper
? "thumb" : "arm";
30695 add_attribute (mode
, attributes
);
30697 thumb_flipper
= !thumb_flipper
;
30700 /* Hook to validate attribute((target("string"))). */
30703 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
30704 tree args
, int ARG_UNUSED (flags
))
30707 struct gcc_options func_options
;
30708 tree cur_tree
, new_optimize
;
30709 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
30711 /* Get the optimization options of the current function. */
30712 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
30714 /* If the function changed the optimization levels as well as setting target
30715 options, start with the optimizations specified. */
30716 if (!func_optimize
)
30717 func_optimize
= optimization_default_node
;
30719 /* Init func_options. */
30720 memset (&func_options
, 0, sizeof (func_options
));
30721 init_options_struct (&func_options
, NULL
);
30722 lang_hooks
.init_options_struct (&func_options
);
30724 /* Initialize func_options to the defaults. */
30725 cl_optimization_restore (&func_options
,
30726 TREE_OPTIMIZATION (func_optimize
));
30728 cl_target_option_restore (&func_options
,
30729 TREE_TARGET_OPTION (target_option_default_node
));
30731 /* Set func_options flags with new target mode. */
30732 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
30733 &global_options_set
);
30735 if (cur_tree
== NULL_TREE
)
30738 new_optimize
= build_optimization_node (&func_options
);
30740 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
30742 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
30744 finalize_options_struct (&func_options
);
30749 /* Match an ISA feature bitmap to a named FPU. We always use the
30750 first entry that exactly matches the feature set, so that we
30751 effectively canonicalize the FPU name for the assembler. */
30753 arm_identify_fpu_from_isa (sbitmap isa
)
30755 auto_sbitmap
fpubits (isa_num_bits
);
30756 auto_sbitmap
cand_fpubits (isa_num_bits
);
30758 bitmap_and (fpubits
, isa
, isa_all_fpubits
);
30760 /* If there are no ISA feature bits relating to the FPU, we must be
30761 doing soft-float. */
30762 if (bitmap_empty_p (fpubits
))
30765 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
30767 arm_initialize_isa (cand_fpubits
, all_fpus
[i
].isa_bits
);
30768 if (bitmap_equal_p (fpubits
, cand_fpubits
))
30769 return all_fpus
[i
].name
;
30771 /* We must find an entry, or things have gone wrong. */
30772 gcc_unreachable ();
30776 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
30779 fprintf (stream
, "\t.syntax unified\n");
30783 if (is_called_in_ARM_mode (decl
)
30784 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
30785 && cfun
->is_thunk
))
30786 fprintf (stream
, "\t.code 32\n");
30787 else if (TARGET_THUMB1
)
30788 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
30790 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
30793 fprintf (stream
, "\t.arm\n");
30795 asm_fprintf (asm_out_file
, "\t.fpu %s\n",
30798 : arm_identify_fpu_from_isa (arm_active_target
.isa
)));
30800 if (TARGET_POKE_FUNCTION_NAME
)
30801 arm_poke_function_name (stream
, (const char *) name
);
30804 /* If MEM is in the form of [base+offset], extract the two parts
30805 of address and set to BASE and OFFSET, otherwise return false
30806 after clearing BASE and OFFSET. */
30809 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
30813 gcc_assert (MEM_P (mem
));
30815 addr
= XEXP (mem
, 0);
30817 /* Strip off const from addresses like (const (addr)). */
30818 if (GET_CODE (addr
) == CONST
)
30819 addr
= XEXP (addr
, 0);
30821 if (GET_CODE (addr
) == REG
)
30824 *offset
= const0_rtx
;
30828 if (GET_CODE (addr
) == PLUS
30829 && GET_CODE (XEXP (addr
, 0)) == REG
30830 && CONST_INT_P (XEXP (addr
, 1)))
30832 *base
= XEXP (addr
, 0);
30833 *offset
= XEXP (addr
, 1);
30838 *offset
= NULL_RTX
;
30843 /* If INSN is a load or store of address in the form of [base+offset],
30844 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30845 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30846 otherwise return FALSE. */
30849 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
30853 gcc_assert (INSN_P (insn
));
30854 x
= PATTERN (insn
);
30855 if (GET_CODE (x
) != SET
)
30859 dest
= SET_DEST (x
);
30860 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
30863 extract_base_offset_in_addr (dest
, base
, offset
);
30865 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
30868 extract_base_offset_in_addr (src
, base
, offset
);
30873 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
30876 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30878 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30879 and PRI are only calculated for these instructions. For other instruction,
30880 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30881 instruction fusion can be supported by returning different priorities.
30883 It's important that irrelevant instructions get the largest FUSION_PRI. */
30886 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
30887 int *fusion_pri
, int *pri
)
30893 gcc_assert (INSN_P (insn
));
30896 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
30903 /* Load goes first. */
30905 *fusion_pri
= tmp
- 1;
30907 *fusion_pri
= tmp
- 2;
30911 /* INSN with smaller base register goes first. */
30912 tmp
-= ((REGNO (base
) & 0xff) << 20);
30914 /* INSN with smaller offset goes first. */
30915 off_val
= (int)(INTVAL (offset
));
30917 tmp
-= (off_val
& 0xfffff);
30919 tmp
+= ((- off_val
) & 0xfffff);
30926 /* Construct and return a PARALLEL RTX vector with elements numbering the
30927 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30928 the vector - from the perspective of the architecture. This does not
30929 line up with GCC's perspective on lane numbers, so we end up with
30930 different masks depending on our target endian-ness. The diagram
30931 below may help. We must draw the distinction when building masks
30932 which select one half of the vector. An instruction selecting
30933 architectural low-lanes for a big-endian target, must be described using
30934 a mask selecting GCC high-lanes.
30936 Big-Endian Little-Endian
30938 GCC 0 1 2 3 3 2 1 0
30939 | x | x | x | x | | x | x | x | x |
30940 Architecture 3 2 1 0 3 2 1 0
30942 Low Mask: { 2, 3 } { 0, 1 }
30943 High Mask: { 0, 1 } { 2, 3 }
30947 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
30949 int nunits
= GET_MODE_NUNITS (mode
);
30950 rtvec v
= rtvec_alloc (nunits
/ 2);
30951 int high_base
= nunits
/ 2;
30957 if (BYTES_BIG_ENDIAN
)
30958 base
= high
? low_base
: high_base
;
30960 base
= high
? high_base
: low_base
;
30962 for (i
= 0; i
< nunits
/ 2; i
++)
30963 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
30965 t1
= gen_rtx_PARALLEL (mode
, v
);
30969 /* Check OP for validity as a PARALLEL RTX vector with elements
30970 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30971 from the perspective of the architecture. See the diagram above
30972 arm_simd_vect_par_cnst_half_p for more details. */
30975 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
30978 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
30979 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
30980 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
30983 if (!VECTOR_MODE_P (mode
))
30986 if (count_op
!= count_ideal
)
30989 for (i
= 0; i
< count_ideal
; i
++)
30991 rtx elt_op
= XVECEXP (op
, 0, i
);
30992 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
30994 if (!CONST_INT_P (elt_op
)
30995 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
31001 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31004 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
31007 /* For now, we punt and not handle this for TARGET_THUMB1. */
31008 if (vcall_offset
&& TARGET_THUMB1
)
31011 /* Otherwise ok. */
31015 /* Generate RTL for a conditional branch with rtx comparison CODE in
31016 mode CC_MODE. The destination of the unlikely conditional branch
31020 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
31024 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
31025 gen_rtx_REG (cc_mode
, CC_REGNUM
),
31028 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
31029 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
31031 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
31034 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31036 For pure-code sections there is no letter code for this attribute, so
31037 output all the section flags numerically when this is needed. */
31040 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
31043 if (flags
& SECTION_ARM_PURECODE
)
31047 if (!(flags
& SECTION_DEBUG
))
31049 if (flags
& SECTION_EXCLUDE
)
31050 *num
|= 0x80000000;
31051 if (flags
& SECTION_WRITE
)
31053 if (flags
& SECTION_CODE
)
31055 if (flags
& SECTION_MERGE
)
31057 if (flags
& SECTION_STRINGS
)
31059 if (flags
& SECTION_TLS
)
31061 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
31070 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31072 If pure-code is passed as an option, make sure all functions are in
31073 sections that have the SHF_ARM_PURECODE attribute. */
31076 arm_function_section (tree decl
, enum node_frequency freq
,
31077 bool startup
, bool exit
)
31079 const char * section_name
;
31082 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
31083 return default_function_section (decl
, freq
, startup
, exit
);
31085 if (!target_pure_code
)
31086 return default_function_section (decl
, freq
, startup
, exit
);
31089 section_name
= DECL_SECTION_NAME (decl
);
31091 /* If a function is not in a named section then it falls under the 'default'
31092 text section, also known as '.text'. We can preserve previous behavior as
31093 the default text section already has the SHF_ARM_PURECODE section
31097 section
*default_sec
= default_function_section (decl
, freq
, startup
,
31100 /* If default_sec is not null, then it must be a special section like for
31101 example .text.startup. We set the pure-code attribute and return the
31102 same section to preserve existing behavior. */
31104 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
31105 return default_sec
;
31108 /* Otherwise look whether a section has already been created with
31110 sec
= get_named_section (decl
, section_name
, 0);
31112 /* If that is not the case passing NULL as the section's name to
31113 'get_named_section' will create a section with the declaration's
31115 sec
= get_named_section (decl
, NULL
, 0);
31117 /* Set the SHF_ARM_PURECODE attribute. */
31118 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
31123 /* Implements the TARGET_SECTION_FLAGS hook.
31125 If DECL is a function declaration and pure-code is passed as an option
31126 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31127 section's name and RELOC indicates whether the declarations initializer may
31128 contain runtime relocations. */
31130 static unsigned int
31131 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
31133 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
31135 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
31136 flags
|= SECTION_ARM_PURECODE
;
31141 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31144 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
31146 rtx
*quot_p
, rtx
*rem_p
)
31148 if (mode
== SImode
)
31149 gcc_assert (!TARGET_IDIV
);
31151 scalar_int_mode libval_mode
31152 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode
));
31154 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
31156 op0
, GET_MODE (op0
),
31157 op1
, GET_MODE (op1
));
31159 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
31160 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
31161 GET_MODE_SIZE (mode
));
31163 gcc_assert (quotient
);
31164 gcc_assert (remainder
);
31166 *quot_p
= quotient
;
31167 *rem_p
= remainder
;
31170 /* This function checks for the availability of the coprocessor builtin passed
31171 in BUILTIN for the current target. Returns true if it is available and
31172 false otherwise. If a BUILTIN is passed for which this function has not
31173 been implemented it will cause an exception. */
31176 arm_coproc_builtin_available (enum unspecv builtin
)
31178 /* None of these builtins are available in Thumb mode if the target only
31179 supports Thumb-1. */
31197 case VUNSPEC_LDC2L
:
31199 case VUNSPEC_STC2L
:
31202 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31209 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31211 if (arm_arch6
|| arm_arch5te
)
31214 case VUNSPEC_MCRR2
:
31215 case VUNSPEC_MRRC2
:
31220 gcc_unreachable ();
31225 /* This function returns true if OP is a valid memory operand for the ldc and
31226 stc coprocessor instructions and false otherwise. */
31229 arm_coproc_ldc_stc_legitimate_address (rtx op
)
31231 HOST_WIDE_INT range
;
31232 /* Has to be a memory operand. */
31238 /* We accept registers. */
31242 switch GET_CODE (op
)
31246 /* Or registers with an offset. */
31247 if (!REG_P (XEXP (op
, 0)))
31252 /* The offset must be an immediate though. */
31253 if (!CONST_INT_P (op
))
31256 range
= INTVAL (op
);
31258 /* Within the range of [-1020,1020]. */
31259 if (!IN_RANGE (range
, -1020, 1020))
31262 /* And a multiple of 4. */
31263 return (range
% 4) == 0;
31269 return REG_P (XEXP (op
, 0));
31271 gcc_unreachable ();
31276 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31278 In VFPv1, VFP registers could only be accessed in the mode they were
31279 set, so subregs would be invalid there. However, we don't support
31280 VFPv1 at the moment, and the restriction was lifted in VFPv2.
31282 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31283 VFP registers in little-endian order. We can't describe that accurately to
31284 GCC, so avoid taking subregs of such values.
31286 The only exception is going from a 128-bit to a 64-bit type. In that
31287 case the data layout happens to be consistent for big-endian, so we
31288 explicitly allow that case. */
31291 arm_can_change_mode_class (machine_mode from
, machine_mode to
,
31292 reg_class_t rclass
)
31295 && !(GET_MODE_SIZE (from
) == 16 && GET_MODE_SIZE (to
) == 8)
31296 && (GET_MODE_SIZE (from
) > UNITS_PER_WORD
31297 || GET_MODE_SIZE (to
) > UNITS_PER_WORD
)
31298 && reg_classes_intersect_p (VFP_REGS
, rclass
))
31303 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
31304 strcpy from constants will be faster. */
31306 static HOST_WIDE_INT
31307 arm_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
31309 unsigned int factor
= (TARGET_THUMB
|| ! arm_tune_xscale
? 1 : 2);
31310 if (TREE_CODE (exp
) == STRING_CST
&& !optimize_size
)
31311 return MAX (align
, BITS_PER_WORD
* factor
);
31316 namespace selftest
{
31318 /* Scan the static data tables generated by parsecpu.awk looking for
31319 potential issues with the data. We primarily check for
31320 inconsistencies in the option extensions at present (extensions
31321 that duplicate others but aren't marked as aliases). Furthermore,
31322 for correct canonicalization later options must never be a subset
31323 of an earlier option. Any extension should also only specify other
31324 feature bits and never an architecture bit. The architecture is inferred
31325 from the declaration of the extension. */
31327 arm_test_cpu_arch_data (void)
31329 const arch_option
*arch
;
31330 const cpu_option
*cpu
;
31331 auto_sbitmap
target_isa (isa_num_bits
);
31332 auto_sbitmap
isa1 (isa_num_bits
);
31333 auto_sbitmap
isa2 (isa_num_bits
);
31335 for (arch
= all_architectures
; arch
->common
.name
!= NULL
; ++arch
)
31337 const cpu_arch_extension
*ext1
, *ext2
;
31339 if (arch
->common
.extensions
== NULL
)
31342 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
31344 for (ext1
= arch
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
31349 arm_initialize_isa (isa1
, ext1
->isa_bits
);
31350 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
31352 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
31355 arm_initialize_isa (isa2
, ext2
->isa_bits
);
31356 /* If the option is a subset of the parent option, it doesn't
31357 add anything and so isn't useful. */
31358 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
31360 /* If the extension specifies any architectural bits then
31361 disallow it. Extensions should only specify feature bits. */
31362 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
31367 for (cpu
= all_cores
; cpu
->common
.name
!= NULL
; ++cpu
)
31369 const cpu_arch_extension
*ext1
, *ext2
;
31371 if (cpu
->common
.extensions
== NULL
)
31374 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
31376 for (ext1
= cpu
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
31381 arm_initialize_isa (isa1
, ext1
->isa_bits
);
31382 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
31384 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
31387 arm_initialize_isa (isa2
, ext2
->isa_bits
);
31388 /* If the option is a subset of the parent option, it doesn't
31389 add anything and so isn't useful. */
31390 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
31392 /* If the extension specifies any architectural bits then
31393 disallow it. Extensions should only specify feature bits. */
31394 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
31400 /* Scan the static data tables generated by parsecpu.awk looking for
31401 potential issues with the data. Here we check for consistency between the
31402 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31403 a feature bit that is not defined by any FPU flag. */
31405 arm_test_fpu_data (void)
31407 auto_sbitmap
isa_all_fpubits (isa_num_bits
);
31408 auto_sbitmap
fpubits (isa_num_bits
);
31409 auto_sbitmap
tmpset (isa_num_bits
);
31411 static const enum isa_feature fpu_bitlist
[]
31412 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
31413 arm_initialize_isa (isa_all_fpubits
, fpu_bitlist
);
31415 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
31417 arm_initialize_isa (fpubits
, all_fpus
[i
].isa_bits
);
31418 bitmap_and_compl (tmpset
, isa_all_fpubits
, fpubits
);
31419 bitmap_clear (isa_all_fpubits
);
31420 bitmap_copy (isa_all_fpubits
, tmpset
);
31423 if (!bitmap_empty_p (isa_all_fpubits
))
31425 fprintf (stderr
, "Error: found feature bits in the ALL_FPU_INTERAL"
31426 " group that are not defined by any FPU.\n"
31427 " Check your arm-cpus.in.\n");
31428 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits
));
31433 arm_run_selftests (void)
31435 arm_test_cpu_arch_data ();
31436 arm_test_fpu_data ();
31438 } /* Namespace selftest. */
31440 #undef TARGET_RUN_TARGET_SELFTESTS
31441 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31442 #endif /* CHECKING_P */
31444 struct gcc_target targetm
= TARGET_INITIALIZER
;
31446 #include "gt-arm.h"