1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
31 #include "fold-const.h"
32 #include "stringpool.h"
33 #include "stor-layout.h"
38 #include "hard-reg-set.h"
39 #include "insn-config.h"
40 #include "conditions.h"
42 #include "insn-attr.h"
52 #include "insn-codes.h"
54 #include "diagnostic-core.h"
57 #include "dominance.h"
63 #include "cfgcleanup.h"
64 #include "basic-block.h"
65 #include "plugin-api.h"
71 #include "sched-int.h"
72 #include "target-def.h"
74 #include "langhooks.h"
81 #include "gimple-expr.h"
82 #include "target-globals.h"
84 #include "tm-constrs.h"
86 #include "sched-int.h"
88 /* Forward definitions of types. */
89 typedef struct minipool_node Mnode
;
90 typedef struct minipool_fixup Mfix
;
92 void (*arm_lang_output_object_attributes_hook
)(void);
99 /* Forward function declarations. */
100 static bool arm_const_not_ok_for_debug_p (rtx
);
101 static bool arm_needs_doubleword_align (machine_mode
, const_tree
);
102 static int arm_compute_static_chain_stack_bytes (void);
103 static arm_stack_offsets
*arm_get_frame_offsets (void);
104 static void arm_add_gc_roots (void);
105 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
106 HOST_WIDE_INT
, rtx
, rtx
, int, int);
107 static unsigned bit_count (unsigned long);
108 static int arm_address_register_rtx_p (rtx
, int);
109 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
110 static bool is_called_in_ARM_mode (tree
);
111 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
112 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
113 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
114 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
115 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
116 inline static int thumb1_index_register_rtx_p (rtx
, int);
117 static int thumb_far_jump_used_p (void);
118 static bool thumb_force_lr_save (void);
119 static unsigned arm_size_return_regs (void);
120 static bool arm_assemble_integer (rtx
, unsigned int, int);
121 static void arm_print_operand (FILE *, rtx
, int);
122 static void arm_print_operand_address (FILE *, rtx
);
123 static bool arm_print_operand_punct_valid_p (unsigned char code
);
124 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
125 static arm_cc
get_arm_condition_code (rtx
);
126 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
127 static const char *output_multi_immediate (rtx
*, const char *, const char *,
129 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
130 static struct machine_function
*arm_init_machine_status (void);
131 static void thumb_exit (FILE *, int);
132 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
133 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
134 static Mnode
*add_minipool_forward_ref (Mfix
*);
135 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
136 static Mnode
*add_minipool_backward_ref (Mfix
*);
137 static void assign_minipool_offsets (Mfix
*);
138 static void arm_print_value (FILE *, rtx
);
139 static void dump_minipool (rtx_insn
*);
140 static int arm_barrier_cost (rtx_insn
*);
141 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
142 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
143 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
145 static void arm_reorg (void);
146 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
147 static unsigned long arm_compute_save_reg0_reg12_mask (void);
148 static unsigned long arm_compute_save_reg_mask (void);
149 static unsigned long arm_isr_value (tree
);
150 static unsigned long arm_compute_func_type (void);
151 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
152 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
153 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
154 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
155 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
157 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
158 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
159 static int arm_comp_type_attributes (const_tree
, const_tree
);
160 static void arm_set_default_type_attributes (tree
);
161 static int arm_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int);
162 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
163 static int optimal_immediate_sequence (enum rtx_code code
,
164 unsigned HOST_WIDE_INT val
,
165 struct four_ints
*return_sequence
);
166 static int optimal_immediate_sequence_1 (enum rtx_code code
,
167 unsigned HOST_WIDE_INT val
,
168 struct four_ints
*return_sequence
,
170 static int arm_get_strip_length (int);
171 static bool arm_function_ok_for_sibcall (tree
, tree
);
172 static machine_mode
arm_promote_function_mode (const_tree
,
175 static bool arm_return_in_memory (const_tree
, const_tree
);
176 static rtx
arm_function_value (const_tree
, const_tree
, bool);
177 static rtx
arm_libcall_value_1 (machine_mode
);
178 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
179 static bool arm_function_value_regno_p (const unsigned int);
180 static void arm_internal_label (FILE *, const char *, unsigned long);
181 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
183 static bool arm_have_conditional_execution (void);
184 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
185 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
186 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
187 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
188 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
189 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
190 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
191 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
192 static bool arm_rtx_costs (rtx
, int, int, int, int *, bool);
193 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
194 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
195 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
196 static void emit_constant_insn (rtx cond
, rtx pattern
);
197 static rtx_insn
*emit_set_insn (rtx
, rtx
);
198 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
199 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
201 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
203 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
205 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
206 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
208 static rtx
aapcs_libcall_value (machine_mode
);
209 static int aapcs_select_return_coproc (const_tree
, const_tree
);
211 #ifdef OBJECT_FORMAT_ELF
212 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
213 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
216 static void arm_encode_section_info (tree
, rtx
, int);
219 static void arm_file_end (void);
220 static void arm_file_start (void);
222 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
224 static bool arm_pass_by_reference (cumulative_args_t
,
225 machine_mode
, const_tree
, bool);
226 static bool arm_promote_prototypes (const_tree
);
227 static bool arm_default_short_enums (void);
228 static bool arm_align_anon_bitfield (void);
229 static bool arm_return_in_msb (const_tree
);
230 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
231 static bool arm_return_in_memory (const_tree
, const_tree
);
233 static void arm_unwind_emit (FILE *, rtx_insn
*);
234 static bool arm_output_ttype (rtx
);
235 static void arm_asm_emit_except_personality (rtx
);
236 static void arm_asm_init_sections (void);
238 static rtx
arm_dwarf_register_span (rtx
);
240 static tree
arm_cxx_guard_type (void);
241 static bool arm_cxx_guard_mask_bit (void);
242 static tree
arm_get_cookie_size (tree
);
243 static bool arm_cookie_has_size (void);
244 static bool arm_cxx_cdtor_returns_this (void);
245 static bool arm_cxx_key_method_may_be_inline (void);
246 static void arm_cxx_determine_class_data_visibility (tree
);
247 static bool arm_cxx_class_data_always_comdat (void);
248 static bool arm_cxx_use_aeabi_atexit (void);
249 static void arm_init_libfuncs (void);
250 static tree
arm_build_builtin_va_list (void);
251 static void arm_expand_builtin_va_start (tree
, rtx
);
252 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
253 static void arm_option_override (void);
254 static void arm_set_current_function (tree
);
255 static bool arm_can_inline_p (tree
, tree
);
256 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
257 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
258 static bool arm_macro_fusion_p (void);
259 static bool arm_cannot_copy_insn_p (rtx_insn
*);
260 static int arm_issue_rate (void);
261 static int arm_first_cycle_multipass_dfa_lookahead (void);
262 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
263 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
264 static bool arm_output_addr_const_extra (FILE *, rtx
);
265 static bool arm_allocate_stack_slots_for_args (void);
266 static bool arm_warn_func_return (tree
);
267 static const char *arm_invalid_parameter_type (const_tree t
);
268 static const char *arm_invalid_return_type (const_tree t
);
269 static tree
arm_promoted_type (const_tree t
);
270 static tree
arm_convert_to_type (tree type
, tree expr
);
271 static bool arm_scalar_mode_supported_p (machine_mode
);
272 static bool arm_frame_pointer_required (void);
273 static bool arm_can_eliminate (const int, const int);
274 static void arm_asm_trampoline_template (FILE *);
275 static void arm_trampoline_init (rtx
, tree
, rtx
);
276 static rtx
arm_trampoline_adjust_address (rtx
);
277 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
278 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
279 static bool xscale_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
280 static bool fa726te_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
281 static bool arm_array_mode_supported_p (machine_mode
,
282 unsigned HOST_WIDE_INT
);
283 static machine_mode
arm_preferred_simd_mode (machine_mode
);
284 static bool arm_class_likely_spilled_p (reg_class_t
);
285 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
286 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
287 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
291 static void arm_conditional_register_usage (void);
292 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
293 static unsigned int arm_autovectorize_vector_sizes (void);
294 static int arm_default_branch_cost (bool, bool);
295 static int arm_cortex_a5_branch_cost (bool, bool);
296 static int arm_cortex_m_branch_cost (bool, bool);
297 static int arm_cortex_m7_branch_cost (bool, bool);
299 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
300 const unsigned char *sel
);
302 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
304 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
306 int misalign ATTRIBUTE_UNUSED
);
307 static unsigned arm_add_stmt_cost (void *data
, int count
,
308 enum vect_cost_for_stmt kind
,
309 struct _stmt_vec_info
*stmt_info
,
311 enum vect_cost_model_location where
);
313 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
314 bool op0_preserve_value
);
315 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
317 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
319 /* Table of machine attributes. */
320 static const struct attribute_spec arm_attribute_table
[] =
322 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
323 affects_type_identity } */
324 /* Function calls made to this symbol must be done indirectly, because
325 it may lie outside of the 26 bit addressing range of a normal function
327 { "long_call", 0, 0, false, true, true, NULL
, false },
328 /* Whereas these functions are always known to reside within the 26 bit
330 { "short_call", 0, 0, false, true, true, NULL
, false },
331 /* Specify the procedure call conventions for a function. */
332 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
334 /* Interrupt Service Routines have special prologue and epilogue requirements. */
335 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
337 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
339 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
342 /* ARM/PE has three new attributes:
344 dllexport - for exporting a function/variable that will live in a dll
345 dllimport - for importing a function/variable from a dll
347 Microsoft allows multiple declspecs in one __declspec, separating
348 them with spaces. We do NOT support this. Instead, use __declspec
351 { "dllimport", 0, 0, true, false, false, NULL
, false },
352 { "dllexport", 0, 0, true, false, false, NULL
, false },
353 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
355 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
356 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
357 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
358 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
361 { NULL
, 0, 0, false, false, false, NULL
, false }
364 /* Initialize the GCC target structure. */
365 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
366 #undef TARGET_MERGE_DECL_ATTRIBUTES
367 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
370 #undef TARGET_LEGITIMIZE_ADDRESS
371 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
374 #define TARGET_LRA_P hook_bool_void_true
376 #undef TARGET_ATTRIBUTE_TABLE
377 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
379 #undef TARGET_ASM_FILE_START
380 #define TARGET_ASM_FILE_START arm_file_start
381 #undef TARGET_ASM_FILE_END
382 #define TARGET_ASM_FILE_END arm_file_end
384 #undef TARGET_ASM_ALIGNED_SI_OP
385 #define TARGET_ASM_ALIGNED_SI_OP NULL
386 #undef TARGET_ASM_INTEGER
387 #define TARGET_ASM_INTEGER arm_assemble_integer
389 #undef TARGET_PRINT_OPERAND
390 #define TARGET_PRINT_OPERAND arm_print_operand
391 #undef TARGET_PRINT_OPERAND_ADDRESS
392 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
393 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
394 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
396 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
397 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
399 #undef TARGET_ASM_FUNCTION_PROLOGUE
400 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
402 #undef TARGET_ASM_FUNCTION_EPILOGUE
403 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
405 #undef TARGET_CAN_INLINE_P
406 #define TARGET_CAN_INLINE_P arm_can_inline_p
408 #undef TARGET_OPTION_OVERRIDE
409 #define TARGET_OPTION_OVERRIDE arm_option_override
411 #undef TARGET_COMP_TYPE_ATTRIBUTES
412 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
414 #undef TARGET_SCHED_MACRO_FUSION_P
415 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
417 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
418 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
420 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
421 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
423 #undef TARGET_SCHED_ADJUST_COST
424 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
426 #undef TARGET_SET_CURRENT_FUNCTION
427 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
429 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
430 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
432 #undef TARGET_SCHED_REORDER
433 #define TARGET_SCHED_REORDER arm_sched_reorder
435 #undef TARGET_REGISTER_MOVE_COST
436 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
438 #undef TARGET_MEMORY_MOVE_COST
439 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
441 #undef TARGET_ENCODE_SECTION_INFO
443 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
445 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
448 #undef TARGET_STRIP_NAME_ENCODING
449 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
451 #undef TARGET_ASM_INTERNAL_LABEL
452 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
454 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
455 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
457 #undef TARGET_FUNCTION_VALUE
458 #define TARGET_FUNCTION_VALUE arm_function_value
460 #undef TARGET_LIBCALL_VALUE
461 #define TARGET_LIBCALL_VALUE arm_libcall_value
463 #undef TARGET_FUNCTION_VALUE_REGNO_P
464 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
466 #undef TARGET_ASM_OUTPUT_MI_THUNK
467 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
468 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
469 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
471 #undef TARGET_RTX_COSTS
472 #define TARGET_RTX_COSTS arm_rtx_costs
473 #undef TARGET_ADDRESS_COST
474 #define TARGET_ADDRESS_COST arm_address_cost
476 #undef TARGET_SHIFT_TRUNCATION_MASK
477 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
478 #undef TARGET_VECTOR_MODE_SUPPORTED_P
479 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
480 #undef TARGET_ARRAY_MODE_SUPPORTED_P
481 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
482 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
483 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
484 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
485 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
486 arm_autovectorize_vector_sizes
488 #undef TARGET_MACHINE_DEPENDENT_REORG
489 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
491 #undef TARGET_INIT_BUILTINS
492 #define TARGET_INIT_BUILTINS arm_init_builtins
493 #undef TARGET_EXPAND_BUILTIN
494 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
495 #undef TARGET_BUILTIN_DECL
496 #define TARGET_BUILTIN_DECL arm_builtin_decl
498 #undef TARGET_INIT_LIBFUNCS
499 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
501 #undef TARGET_PROMOTE_FUNCTION_MODE
502 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
503 #undef TARGET_PROMOTE_PROTOTYPES
504 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
505 #undef TARGET_PASS_BY_REFERENCE
506 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
507 #undef TARGET_ARG_PARTIAL_BYTES
508 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
509 #undef TARGET_FUNCTION_ARG
510 #define TARGET_FUNCTION_ARG arm_function_arg
511 #undef TARGET_FUNCTION_ARG_ADVANCE
512 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
513 #undef TARGET_FUNCTION_ARG_BOUNDARY
514 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
516 #undef TARGET_SETUP_INCOMING_VARARGS
517 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
519 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
520 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
522 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
523 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
524 #undef TARGET_TRAMPOLINE_INIT
525 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
526 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
527 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
529 #undef TARGET_WARN_FUNC_RETURN
530 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
532 #undef TARGET_DEFAULT_SHORT_ENUMS
533 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
535 #undef TARGET_ALIGN_ANON_BITFIELD
536 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
538 #undef TARGET_NARROW_VOLATILE_BITFIELD
539 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
541 #undef TARGET_CXX_GUARD_TYPE
542 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
544 #undef TARGET_CXX_GUARD_MASK_BIT
545 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
547 #undef TARGET_CXX_GET_COOKIE_SIZE
548 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
550 #undef TARGET_CXX_COOKIE_HAS_SIZE
551 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
553 #undef TARGET_CXX_CDTOR_RETURNS_THIS
554 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
556 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
557 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
559 #undef TARGET_CXX_USE_AEABI_ATEXIT
560 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
562 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
563 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
564 arm_cxx_determine_class_data_visibility
566 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
567 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
569 #undef TARGET_RETURN_IN_MSB
570 #define TARGET_RETURN_IN_MSB arm_return_in_msb
572 #undef TARGET_RETURN_IN_MEMORY
573 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
575 #undef TARGET_MUST_PASS_IN_STACK
576 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
579 #undef TARGET_ASM_UNWIND_EMIT
580 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
582 /* EABI unwinding tables use a different format for the typeinfo tables. */
583 #undef TARGET_ASM_TTYPE
584 #define TARGET_ASM_TTYPE arm_output_ttype
586 #undef TARGET_ARM_EABI_UNWINDER
587 #define TARGET_ARM_EABI_UNWINDER true
589 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
590 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
592 #undef TARGET_ASM_INIT_SECTIONS
593 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
594 #endif /* ARM_UNWIND_INFO */
596 #undef TARGET_DWARF_REGISTER_SPAN
597 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
599 #undef TARGET_CANNOT_COPY_INSN_P
600 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
603 #undef TARGET_HAVE_TLS
604 #define TARGET_HAVE_TLS true
607 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
608 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
610 #undef TARGET_LEGITIMATE_CONSTANT_P
611 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
613 #undef TARGET_CANNOT_FORCE_CONST_MEM
614 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
616 #undef TARGET_MAX_ANCHOR_OFFSET
617 #define TARGET_MAX_ANCHOR_OFFSET 4095
619 /* The minimum is set such that the total size of the block
620 for a particular anchor is -4088 + 1 + 4095 bytes, which is
621 divisible by eight, ensuring natural spacing of anchors. */
622 #undef TARGET_MIN_ANCHOR_OFFSET
623 #define TARGET_MIN_ANCHOR_OFFSET -4088
625 #undef TARGET_SCHED_ISSUE_RATE
626 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
628 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
629 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
630 arm_first_cycle_multipass_dfa_lookahead
632 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
633 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
634 arm_first_cycle_multipass_dfa_lookahead_guard
636 #undef TARGET_MANGLE_TYPE
637 #define TARGET_MANGLE_TYPE arm_mangle_type
639 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
640 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
642 #undef TARGET_BUILD_BUILTIN_VA_LIST
643 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
644 #undef TARGET_EXPAND_BUILTIN_VA_START
645 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
646 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
647 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
650 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
651 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
654 #undef TARGET_LEGITIMATE_ADDRESS_P
655 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
657 #undef TARGET_PREFERRED_RELOAD_CLASS
658 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
660 #undef TARGET_INVALID_PARAMETER_TYPE
661 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
663 #undef TARGET_INVALID_RETURN_TYPE
664 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
666 #undef TARGET_PROMOTED_TYPE
667 #define TARGET_PROMOTED_TYPE arm_promoted_type
669 #undef TARGET_CONVERT_TO_TYPE
670 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
672 #undef TARGET_SCALAR_MODE_SUPPORTED_P
673 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
675 #undef TARGET_FRAME_POINTER_REQUIRED
676 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
678 #undef TARGET_CAN_ELIMINATE
679 #define TARGET_CAN_ELIMINATE arm_can_eliminate
681 #undef TARGET_CONDITIONAL_REGISTER_USAGE
682 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
684 #undef TARGET_CLASS_LIKELY_SPILLED_P
685 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
687 #undef TARGET_VECTORIZE_BUILTINS
688 #define TARGET_VECTORIZE_BUILTINS
690 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
691 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
692 arm_builtin_vectorized_function
694 #undef TARGET_VECTOR_ALIGNMENT
695 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
697 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
698 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
699 arm_vector_alignment_reachable
701 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
702 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
703 arm_builtin_support_vector_misalignment
705 #undef TARGET_PREFERRED_RENAME_CLASS
706 #define TARGET_PREFERRED_RENAME_CLASS \
707 arm_preferred_rename_class
709 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
710 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
711 arm_vectorize_vec_perm_const_ok
713 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
714 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
715 arm_builtin_vectorization_cost
716 #undef TARGET_VECTORIZE_ADD_STMT_COST
717 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
719 #undef TARGET_CANONICALIZE_COMPARISON
720 #define TARGET_CANONICALIZE_COMPARISON \
721 arm_canonicalize_comparison
723 #undef TARGET_ASAN_SHADOW_OFFSET
724 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
726 #undef MAX_INSN_PER_IT_BLOCK
727 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
729 #undef TARGET_CAN_USE_DOLOOP_P
730 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
732 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
733 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
735 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
736 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
738 #undef TARGET_SCHED_FUSION_PRIORITY
739 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
741 struct gcc_target targetm
= TARGET_INITIALIZER
;
743 /* Obstack for minipool constant handling. */
744 static struct obstack minipool_obstack
;
745 static char * minipool_startobj
;
747 /* The maximum number of insns skipped which
748 will be conditionalised if possible. */
749 static int max_insns_skipped
= 5;
751 extern FILE * asm_out_file
;
753 /* True if we are currently building a constant table. */
754 int making_const_table
;
756 /* The processor for which instructions should be scheduled. */
757 enum processor_type arm_tune
= arm_none
;
759 /* The current tuning set. */
760 const struct tune_params
*current_tune
;
762 /* Which floating point hardware to schedule for. */
765 /* Which floating popint hardware to use. */
766 const struct arm_fpu_desc
*arm_fpu_desc
;
768 /* Used for Thumb call_via trampolines. */
769 rtx thumb_call_via_label
[14];
770 static int thumb_call_reg_needed
;
772 /* The bits in this mask specify which
773 instructions we are allowed to generate. */
774 unsigned long insn_flags
= 0;
776 /* The bits in this mask specify which instruction scheduling options should
778 unsigned long tune_flags
= 0;
780 /* The highest ARM architecture version supported by the
782 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
784 /* The following are used in the arm.md file as equivalents to bits
785 in the above two flag variables. */
787 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
790 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
793 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
796 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
799 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
802 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
805 /* Nonzero if this chip supports the ARM 6K extensions. */
808 /* Nonzero if instructions present in ARMv6-M can be used. */
811 /* Nonzero if this chip supports the ARM 7 extensions. */
814 /* Nonzero if instructions not present in the 'M' profile can be used. */
815 int arm_arch_notm
= 0;
817 /* Nonzero if instructions present in ARMv7E-M can be used. */
820 /* Nonzero if instructions present in ARMv8 can be used. */
823 /* Nonzero if this chip can benefit from load scheduling. */
824 int arm_ld_sched
= 0;
826 /* Nonzero if this chip is a StrongARM. */
827 int arm_tune_strongarm
= 0;
829 /* Nonzero if this chip supports Intel Wireless MMX technology. */
830 int arm_arch_iwmmxt
= 0;
832 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
833 int arm_arch_iwmmxt2
= 0;
835 /* Nonzero if this chip is an XScale. */
836 int arm_arch_xscale
= 0;
838 /* Nonzero if tuning for XScale */
839 int arm_tune_xscale
= 0;
841 /* Nonzero if we want to tune for stores that access the write-buffer.
842 This typically means an ARM6 or ARM7 with MMU or MPU. */
843 int arm_tune_wbuf
= 0;
845 /* Nonzero if tuning for Cortex-A9. */
846 int arm_tune_cortex_a9
= 0;
848 /* Nonzero if we should define __THUMB_INTERWORK__ in the
850 XXX This is a bit of a hack, it's intended to help work around
851 problems in GLD which doesn't understand that armv5t code is
852 interworking clean. */
853 int arm_cpp_interwork
= 0;
855 /* Nonzero if chip supports Thumb 2. */
858 /* Nonzero if chip supports integer division instruction. */
859 int arm_arch_arm_hwdiv
;
860 int arm_arch_thumb_hwdiv
;
862 /* Nonzero if chip disallows volatile memory access in IT block. */
863 int arm_arch_no_volatile_ce
;
865 /* Nonzero if we should use Neon to handle 64-bits operations rather
866 than core registers. */
867 int prefer_neon_for_64bits
= 0;
869 /* Nonzero if we shouldn't use literal pools. */
870 bool arm_disable_literal_pool
= false;
872 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
873 we must report the mode of the memory reference from
874 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
875 machine_mode output_memory_reference_mode
;
877 /* The register number to be used for the PIC offset register. */
878 unsigned arm_pic_register
= INVALID_REGNUM
;
880 enum arm_pcs arm_pcs_default
;
882 /* For an explanation of these variables, see final_prescan_insn below. */
884 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
885 enum arm_cond_code arm_current_cc
;
888 int arm_target_label
;
889 /* The number of conditionally executed insns, including the current insn. */
890 int arm_condexec_count
= 0;
891 /* A bitmask specifying the patterns for the IT block.
892 Zero means do not output an IT block before this insn. */
893 int arm_condexec_mask
= 0;
894 /* The number of bits used in arm_condexec_mask. */
895 int arm_condexec_masklen
= 0;
897 /* Nonzero if chip supports the ARMv8 CRC instructions. */
898 int arm_arch_crc
= 0;
900 /* Nonzero if the core has a very small, high-latency, multiply unit. */
901 int arm_m_profile_small_mul
= 0;
903 /* The condition codes of the ARM, and the inverse function. */
904 static const char * const arm_condition_codes
[] =
906 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
907 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
910 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
911 int arm_regs_in_sequence
[] =
913 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
916 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
917 #define streq(string1, string2) (strcmp (string1, string2) == 0)
919 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
920 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
921 | (1 << PIC_OFFSET_TABLE_REGNUM)))
923 /* Initialization code. */
927 const char *const name
;
928 enum processor_type core
;
930 enum base_architecture base_arch
;
931 const unsigned long flags
;
932 const struct tune_params
*const tune
;
936 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
937 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
944 /* arm generic vectorizer costs. */
946 struct cpu_vec_costs arm_default_vec_cost
= {
947 1, /* scalar_stmt_cost. */
948 1, /* scalar load_cost. */
949 1, /* scalar_store_cost. */
950 1, /* vec_stmt_cost. */
951 1, /* vec_to_scalar_cost. */
952 1, /* scalar_to_vec_cost. */
953 1, /* vec_align_load_cost. */
954 1, /* vec_unalign_load_cost. */
955 1, /* vec_unalign_store_cost. */
956 1, /* vec_store_cost. */
957 3, /* cond_taken_branch_cost. */
958 1, /* cond_not_taken_branch_cost. */
961 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
962 #include "aarch-cost-tables.h"
966 const struct cpu_cost_table cortexa9_extra_costs
=
973 COSTS_N_INSNS (1), /* shift_reg. */
974 COSTS_N_INSNS (1), /* arith_shift. */
975 COSTS_N_INSNS (2), /* arith_shift_reg. */
977 COSTS_N_INSNS (1), /* log_shift_reg. */
978 COSTS_N_INSNS (1), /* extend. */
979 COSTS_N_INSNS (2), /* extend_arith. */
980 COSTS_N_INSNS (1), /* bfi. */
981 COSTS_N_INSNS (1), /* bfx. */
985 true /* non_exec_costs_exec. */
990 COSTS_N_INSNS (3), /* simple. */
991 COSTS_N_INSNS (3), /* flag_setting. */
992 COSTS_N_INSNS (2), /* extend. */
993 COSTS_N_INSNS (3), /* add. */
994 COSTS_N_INSNS (2), /* extend_add. */
995 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
999 0, /* simple (N/A). */
1000 0, /* flag_setting (N/A). */
1001 COSTS_N_INSNS (4), /* extend. */
1003 COSTS_N_INSNS (4), /* extend_add. */
1009 COSTS_N_INSNS (2), /* load. */
1010 COSTS_N_INSNS (2), /* load_sign_extend. */
1011 COSTS_N_INSNS (2), /* ldrd. */
1012 COSTS_N_INSNS (2), /* ldm_1st. */
1013 1, /* ldm_regs_per_insn_1st. */
1014 2, /* ldm_regs_per_insn_subsequent. */
1015 COSTS_N_INSNS (5), /* loadf. */
1016 COSTS_N_INSNS (5), /* loadd. */
1017 COSTS_N_INSNS (1), /* load_unaligned. */
1018 COSTS_N_INSNS (2), /* store. */
1019 COSTS_N_INSNS (2), /* strd. */
1020 COSTS_N_INSNS (2), /* stm_1st. */
1021 1, /* stm_regs_per_insn_1st. */
1022 2, /* stm_regs_per_insn_subsequent. */
1023 COSTS_N_INSNS (1), /* storef. */
1024 COSTS_N_INSNS (1), /* stored. */
1025 COSTS_N_INSNS (1), /* store_unaligned. */
1026 COSTS_N_INSNS (1), /* loadv. */
1027 COSTS_N_INSNS (1) /* storev. */
1032 COSTS_N_INSNS (14), /* div. */
1033 COSTS_N_INSNS (4), /* mult. */
1034 COSTS_N_INSNS (7), /* mult_addsub. */
1035 COSTS_N_INSNS (30), /* fma. */
1036 COSTS_N_INSNS (3), /* addsub. */
1037 COSTS_N_INSNS (1), /* fpconst. */
1038 COSTS_N_INSNS (1), /* neg. */
1039 COSTS_N_INSNS (3), /* compare. */
1040 COSTS_N_INSNS (3), /* widen. */
1041 COSTS_N_INSNS (3), /* narrow. */
1042 COSTS_N_INSNS (3), /* toint. */
1043 COSTS_N_INSNS (3), /* fromint. */
1044 COSTS_N_INSNS (3) /* roundint. */
1048 COSTS_N_INSNS (24), /* div. */
1049 COSTS_N_INSNS (5), /* mult. */
1050 COSTS_N_INSNS (8), /* mult_addsub. */
1051 COSTS_N_INSNS (30), /* fma. */
1052 COSTS_N_INSNS (3), /* addsub. */
1053 COSTS_N_INSNS (1), /* fpconst. */
1054 COSTS_N_INSNS (1), /* neg. */
1055 COSTS_N_INSNS (3), /* compare. */
1056 COSTS_N_INSNS (3), /* widen. */
1057 COSTS_N_INSNS (3), /* narrow. */
1058 COSTS_N_INSNS (3), /* toint. */
1059 COSTS_N_INSNS (3), /* fromint. */
1060 COSTS_N_INSNS (3) /* roundint. */
1065 COSTS_N_INSNS (1) /* alu. */
1069 const struct cpu_cost_table cortexa8_extra_costs
=
1075 COSTS_N_INSNS (1), /* shift. */
1077 COSTS_N_INSNS (1), /* arith_shift. */
1078 0, /* arith_shift_reg. */
1079 COSTS_N_INSNS (1), /* log_shift. */
1080 0, /* log_shift_reg. */
1082 0, /* extend_arith. */
1088 true /* non_exec_costs_exec. */
1093 COSTS_N_INSNS (1), /* simple. */
1094 COSTS_N_INSNS (1), /* flag_setting. */
1095 COSTS_N_INSNS (1), /* extend. */
1096 COSTS_N_INSNS (1), /* add. */
1097 COSTS_N_INSNS (1), /* extend_add. */
1098 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1102 0, /* simple (N/A). */
1103 0, /* flag_setting (N/A). */
1104 COSTS_N_INSNS (2), /* extend. */
1106 COSTS_N_INSNS (2), /* extend_add. */
1112 COSTS_N_INSNS (1), /* load. */
1113 COSTS_N_INSNS (1), /* load_sign_extend. */
1114 COSTS_N_INSNS (1), /* ldrd. */
1115 COSTS_N_INSNS (1), /* ldm_1st. */
1116 1, /* ldm_regs_per_insn_1st. */
1117 2, /* ldm_regs_per_insn_subsequent. */
1118 COSTS_N_INSNS (1), /* loadf. */
1119 COSTS_N_INSNS (1), /* loadd. */
1120 COSTS_N_INSNS (1), /* load_unaligned. */
1121 COSTS_N_INSNS (1), /* store. */
1122 COSTS_N_INSNS (1), /* strd. */
1123 COSTS_N_INSNS (1), /* stm_1st. */
1124 1, /* stm_regs_per_insn_1st. */
1125 2, /* stm_regs_per_insn_subsequent. */
1126 COSTS_N_INSNS (1), /* storef. */
1127 COSTS_N_INSNS (1), /* stored. */
1128 COSTS_N_INSNS (1), /* store_unaligned. */
1129 COSTS_N_INSNS (1), /* loadv. */
1130 COSTS_N_INSNS (1) /* storev. */
1135 COSTS_N_INSNS (36), /* div. */
1136 COSTS_N_INSNS (11), /* mult. */
1137 COSTS_N_INSNS (20), /* mult_addsub. */
1138 COSTS_N_INSNS (30), /* fma. */
1139 COSTS_N_INSNS (9), /* addsub. */
1140 COSTS_N_INSNS (3), /* fpconst. */
1141 COSTS_N_INSNS (3), /* neg. */
1142 COSTS_N_INSNS (6), /* compare. */
1143 COSTS_N_INSNS (4), /* widen. */
1144 COSTS_N_INSNS (4), /* narrow. */
1145 COSTS_N_INSNS (8), /* toint. */
1146 COSTS_N_INSNS (8), /* fromint. */
1147 COSTS_N_INSNS (8) /* roundint. */
1151 COSTS_N_INSNS (64), /* div. */
1152 COSTS_N_INSNS (16), /* mult. */
1153 COSTS_N_INSNS (25), /* mult_addsub. */
1154 COSTS_N_INSNS (30), /* fma. */
1155 COSTS_N_INSNS (9), /* addsub. */
1156 COSTS_N_INSNS (3), /* fpconst. */
1157 COSTS_N_INSNS (3), /* neg. */
1158 COSTS_N_INSNS (6), /* compare. */
1159 COSTS_N_INSNS (6), /* widen. */
1160 COSTS_N_INSNS (6), /* narrow. */
1161 COSTS_N_INSNS (8), /* toint. */
1162 COSTS_N_INSNS (8), /* fromint. */
1163 COSTS_N_INSNS (8) /* roundint. */
1168 COSTS_N_INSNS (1) /* alu. */
1172 const struct cpu_cost_table cortexa5_extra_costs
=
1178 COSTS_N_INSNS (1), /* shift. */
1179 COSTS_N_INSNS (1), /* shift_reg. */
1180 COSTS_N_INSNS (1), /* arith_shift. */
1181 COSTS_N_INSNS (1), /* arith_shift_reg. */
1182 COSTS_N_INSNS (1), /* log_shift. */
1183 COSTS_N_INSNS (1), /* log_shift_reg. */
1184 COSTS_N_INSNS (1), /* extend. */
1185 COSTS_N_INSNS (1), /* extend_arith. */
1186 COSTS_N_INSNS (1), /* bfi. */
1187 COSTS_N_INSNS (1), /* bfx. */
1188 COSTS_N_INSNS (1), /* clz. */
1189 COSTS_N_INSNS (1), /* rev. */
1191 true /* non_exec_costs_exec. */
1198 COSTS_N_INSNS (1), /* flag_setting. */
1199 COSTS_N_INSNS (1), /* extend. */
1200 COSTS_N_INSNS (1), /* add. */
1201 COSTS_N_INSNS (1), /* extend_add. */
1202 COSTS_N_INSNS (7) /* idiv. */
1206 0, /* simple (N/A). */
1207 0, /* flag_setting (N/A). */
1208 COSTS_N_INSNS (1), /* extend. */
1210 COSTS_N_INSNS (2), /* extend_add. */
1216 COSTS_N_INSNS (1), /* load. */
1217 COSTS_N_INSNS (1), /* load_sign_extend. */
1218 COSTS_N_INSNS (6), /* ldrd. */
1219 COSTS_N_INSNS (1), /* ldm_1st. */
1220 1, /* ldm_regs_per_insn_1st. */
1221 2, /* ldm_regs_per_insn_subsequent. */
1222 COSTS_N_INSNS (2), /* loadf. */
1223 COSTS_N_INSNS (4), /* loadd. */
1224 COSTS_N_INSNS (1), /* load_unaligned. */
1225 COSTS_N_INSNS (1), /* store. */
1226 COSTS_N_INSNS (3), /* strd. */
1227 COSTS_N_INSNS (1), /* stm_1st. */
1228 1, /* stm_regs_per_insn_1st. */
1229 2, /* stm_regs_per_insn_subsequent. */
1230 COSTS_N_INSNS (2), /* storef. */
1231 COSTS_N_INSNS (2), /* stored. */
1232 COSTS_N_INSNS (1), /* store_unaligned. */
1233 COSTS_N_INSNS (1), /* loadv. */
1234 COSTS_N_INSNS (1) /* storev. */
1239 COSTS_N_INSNS (15), /* div. */
1240 COSTS_N_INSNS (3), /* mult. */
1241 COSTS_N_INSNS (7), /* mult_addsub. */
1242 COSTS_N_INSNS (7), /* fma. */
1243 COSTS_N_INSNS (3), /* addsub. */
1244 COSTS_N_INSNS (3), /* fpconst. */
1245 COSTS_N_INSNS (3), /* neg. */
1246 COSTS_N_INSNS (3), /* compare. */
1247 COSTS_N_INSNS (3), /* widen. */
1248 COSTS_N_INSNS (3), /* narrow. */
1249 COSTS_N_INSNS (3), /* toint. */
1250 COSTS_N_INSNS (3), /* fromint. */
1251 COSTS_N_INSNS (3) /* roundint. */
1255 COSTS_N_INSNS (30), /* div. */
1256 COSTS_N_INSNS (6), /* mult. */
1257 COSTS_N_INSNS (10), /* mult_addsub. */
1258 COSTS_N_INSNS (7), /* fma. */
1259 COSTS_N_INSNS (3), /* addsub. */
1260 COSTS_N_INSNS (3), /* fpconst. */
1261 COSTS_N_INSNS (3), /* neg. */
1262 COSTS_N_INSNS (3), /* compare. */
1263 COSTS_N_INSNS (3), /* widen. */
1264 COSTS_N_INSNS (3), /* narrow. */
1265 COSTS_N_INSNS (3), /* toint. */
1266 COSTS_N_INSNS (3), /* fromint. */
1267 COSTS_N_INSNS (3) /* roundint. */
1272 COSTS_N_INSNS (1) /* alu. */
1277 const struct cpu_cost_table cortexa7_extra_costs
=
1283 COSTS_N_INSNS (1), /* shift. */
1284 COSTS_N_INSNS (1), /* shift_reg. */
1285 COSTS_N_INSNS (1), /* arith_shift. */
1286 COSTS_N_INSNS (1), /* arith_shift_reg. */
1287 COSTS_N_INSNS (1), /* log_shift. */
1288 COSTS_N_INSNS (1), /* log_shift_reg. */
1289 COSTS_N_INSNS (1), /* extend. */
1290 COSTS_N_INSNS (1), /* extend_arith. */
1291 COSTS_N_INSNS (1), /* bfi. */
1292 COSTS_N_INSNS (1), /* bfx. */
1293 COSTS_N_INSNS (1), /* clz. */
1294 COSTS_N_INSNS (1), /* rev. */
1296 true /* non_exec_costs_exec. */
1303 COSTS_N_INSNS (1), /* flag_setting. */
1304 COSTS_N_INSNS (1), /* extend. */
1305 COSTS_N_INSNS (1), /* add. */
1306 COSTS_N_INSNS (1), /* extend_add. */
1307 COSTS_N_INSNS (7) /* idiv. */
1311 0, /* simple (N/A). */
1312 0, /* flag_setting (N/A). */
1313 COSTS_N_INSNS (1), /* extend. */
1315 COSTS_N_INSNS (2), /* extend_add. */
1321 COSTS_N_INSNS (1), /* load. */
1322 COSTS_N_INSNS (1), /* load_sign_extend. */
1323 COSTS_N_INSNS (3), /* ldrd. */
1324 COSTS_N_INSNS (1), /* ldm_1st. */
1325 1, /* ldm_regs_per_insn_1st. */
1326 2, /* ldm_regs_per_insn_subsequent. */
1327 COSTS_N_INSNS (2), /* loadf. */
1328 COSTS_N_INSNS (2), /* loadd. */
1329 COSTS_N_INSNS (1), /* load_unaligned. */
1330 COSTS_N_INSNS (1), /* store. */
1331 COSTS_N_INSNS (3), /* strd. */
1332 COSTS_N_INSNS (1), /* stm_1st. */
1333 1, /* stm_regs_per_insn_1st. */
1334 2, /* stm_regs_per_insn_subsequent. */
1335 COSTS_N_INSNS (2), /* storef. */
1336 COSTS_N_INSNS (2), /* stored. */
1337 COSTS_N_INSNS (1), /* store_unaligned. */
1338 COSTS_N_INSNS (1), /* loadv. */
1339 COSTS_N_INSNS (1) /* storev. */
1344 COSTS_N_INSNS (15), /* div. */
1345 COSTS_N_INSNS (3), /* mult. */
1346 COSTS_N_INSNS (7), /* mult_addsub. */
1347 COSTS_N_INSNS (7), /* fma. */
1348 COSTS_N_INSNS (3), /* addsub. */
1349 COSTS_N_INSNS (3), /* fpconst. */
1350 COSTS_N_INSNS (3), /* neg. */
1351 COSTS_N_INSNS (3), /* compare. */
1352 COSTS_N_INSNS (3), /* widen. */
1353 COSTS_N_INSNS (3), /* narrow. */
1354 COSTS_N_INSNS (3), /* toint. */
1355 COSTS_N_INSNS (3), /* fromint. */
1356 COSTS_N_INSNS (3) /* roundint. */
1360 COSTS_N_INSNS (30), /* div. */
1361 COSTS_N_INSNS (6), /* mult. */
1362 COSTS_N_INSNS (10), /* mult_addsub. */
1363 COSTS_N_INSNS (7), /* fma. */
1364 COSTS_N_INSNS (3), /* addsub. */
1365 COSTS_N_INSNS (3), /* fpconst. */
1366 COSTS_N_INSNS (3), /* neg. */
1367 COSTS_N_INSNS (3), /* compare. */
1368 COSTS_N_INSNS (3), /* widen. */
1369 COSTS_N_INSNS (3), /* narrow. */
1370 COSTS_N_INSNS (3), /* toint. */
1371 COSTS_N_INSNS (3), /* fromint. */
1372 COSTS_N_INSNS (3) /* roundint. */
1377 COSTS_N_INSNS (1) /* alu. */
1381 const struct cpu_cost_table cortexa12_extra_costs
=
1388 COSTS_N_INSNS (1), /* shift_reg. */
1389 COSTS_N_INSNS (1), /* arith_shift. */
1390 COSTS_N_INSNS (1), /* arith_shift_reg. */
1391 COSTS_N_INSNS (1), /* log_shift. */
1392 COSTS_N_INSNS (1), /* log_shift_reg. */
1394 COSTS_N_INSNS (1), /* extend_arith. */
1396 COSTS_N_INSNS (1), /* bfx. */
1397 COSTS_N_INSNS (1), /* clz. */
1398 COSTS_N_INSNS (1), /* rev. */
1400 true /* non_exec_costs_exec. */
1405 COSTS_N_INSNS (2), /* simple. */
1406 COSTS_N_INSNS (3), /* flag_setting. */
1407 COSTS_N_INSNS (2), /* extend. */
1408 COSTS_N_INSNS (3), /* add. */
1409 COSTS_N_INSNS (2), /* extend_add. */
1410 COSTS_N_INSNS (18) /* idiv. */
1414 0, /* simple (N/A). */
1415 0, /* flag_setting (N/A). */
1416 COSTS_N_INSNS (3), /* extend. */
1418 COSTS_N_INSNS (3), /* extend_add. */
1424 COSTS_N_INSNS (3), /* load. */
1425 COSTS_N_INSNS (3), /* load_sign_extend. */
1426 COSTS_N_INSNS (3), /* ldrd. */
1427 COSTS_N_INSNS (3), /* ldm_1st. */
1428 1, /* ldm_regs_per_insn_1st. */
1429 2, /* ldm_regs_per_insn_subsequent. */
1430 COSTS_N_INSNS (3), /* loadf. */
1431 COSTS_N_INSNS (3), /* loadd. */
1432 0, /* load_unaligned. */
1436 1, /* stm_regs_per_insn_1st. */
1437 2, /* stm_regs_per_insn_subsequent. */
1438 COSTS_N_INSNS (2), /* storef. */
1439 COSTS_N_INSNS (2), /* stored. */
1440 0, /* store_unaligned. */
1441 COSTS_N_INSNS (1), /* loadv. */
1442 COSTS_N_INSNS (1) /* storev. */
1447 COSTS_N_INSNS (17), /* div. */
1448 COSTS_N_INSNS (4), /* mult. */
1449 COSTS_N_INSNS (8), /* mult_addsub. */
1450 COSTS_N_INSNS (8), /* fma. */
1451 COSTS_N_INSNS (4), /* addsub. */
1452 COSTS_N_INSNS (2), /* fpconst. */
1453 COSTS_N_INSNS (2), /* neg. */
1454 COSTS_N_INSNS (2), /* compare. */
1455 COSTS_N_INSNS (4), /* widen. */
1456 COSTS_N_INSNS (4), /* narrow. */
1457 COSTS_N_INSNS (4), /* toint. */
1458 COSTS_N_INSNS (4), /* fromint. */
1459 COSTS_N_INSNS (4) /* roundint. */
1463 COSTS_N_INSNS (31), /* div. */
1464 COSTS_N_INSNS (4), /* mult. */
1465 COSTS_N_INSNS (8), /* mult_addsub. */
1466 COSTS_N_INSNS (8), /* fma. */
1467 COSTS_N_INSNS (4), /* addsub. */
1468 COSTS_N_INSNS (2), /* fpconst. */
1469 COSTS_N_INSNS (2), /* neg. */
1470 COSTS_N_INSNS (2), /* compare. */
1471 COSTS_N_INSNS (4), /* widen. */
1472 COSTS_N_INSNS (4), /* narrow. */
1473 COSTS_N_INSNS (4), /* toint. */
1474 COSTS_N_INSNS (4), /* fromint. */
1475 COSTS_N_INSNS (4) /* roundint. */
1480 COSTS_N_INSNS (1) /* alu. */
1484 const struct cpu_cost_table cortexa15_extra_costs
=
1492 COSTS_N_INSNS (1), /* arith_shift. */
1493 COSTS_N_INSNS (1), /* arith_shift_reg. */
1494 COSTS_N_INSNS (1), /* log_shift. */
1495 COSTS_N_INSNS (1), /* log_shift_reg. */
1497 COSTS_N_INSNS (1), /* extend_arith. */
1498 COSTS_N_INSNS (1), /* bfi. */
1503 true /* non_exec_costs_exec. */
1508 COSTS_N_INSNS (2), /* simple. */
1509 COSTS_N_INSNS (3), /* flag_setting. */
1510 COSTS_N_INSNS (2), /* extend. */
1511 COSTS_N_INSNS (2), /* add. */
1512 COSTS_N_INSNS (2), /* extend_add. */
1513 COSTS_N_INSNS (18) /* idiv. */
1517 0, /* simple (N/A). */
1518 0, /* flag_setting (N/A). */
1519 COSTS_N_INSNS (3), /* extend. */
1521 COSTS_N_INSNS (3), /* extend_add. */
1527 COSTS_N_INSNS (3), /* load. */
1528 COSTS_N_INSNS (3), /* load_sign_extend. */
1529 COSTS_N_INSNS (3), /* ldrd. */
1530 COSTS_N_INSNS (4), /* ldm_1st. */
1531 1, /* ldm_regs_per_insn_1st. */
1532 2, /* ldm_regs_per_insn_subsequent. */
1533 COSTS_N_INSNS (4), /* loadf. */
1534 COSTS_N_INSNS (4), /* loadd. */
1535 0, /* load_unaligned. */
1538 COSTS_N_INSNS (1), /* stm_1st. */
1539 1, /* stm_regs_per_insn_1st. */
1540 2, /* stm_regs_per_insn_subsequent. */
1543 0, /* store_unaligned. */
1544 COSTS_N_INSNS (1), /* loadv. */
1545 COSTS_N_INSNS (1) /* storev. */
1550 COSTS_N_INSNS (17), /* div. */
1551 COSTS_N_INSNS (4), /* mult. */
1552 COSTS_N_INSNS (8), /* mult_addsub. */
1553 COSTS_N_INSNS (8), /* fma. */
1554 COSTS_N_INSNS (4), /* addsub. */
1555 COSTS_N_INSNS (2), /* fpconst. */
1556 COSTS_N_INSNS (2), /* neg. */
1557 COSTS_N_INSNS (5), /* compare. */
1558 COSTS_N_INSNS (4), /* widen. */
1559 COSTS_N_INSNS (4), /* narrow. */
1560 COSTS_N_INSNS (4), /* toint. */
1561 COSTS_N_INSNS (4), /* fromint. */
1562 COSTS_N_INSNS (4) /* roundint. */
1566 COSTS_N_INSNS (31), /* div. */
1567 COSTS_N_INSNS (4), /* mult. */
1568 COSTS_N_INSNS (8), /* mult_addsub. */
1569 COSTS_N_INSNS (8), /* fma. */
1570 COSTS_N_INSNS (4), /* addsub. */
1571 COSTS_N_INSNS (2), /* fpconst. */
1572 COSTS_N_INSNS (2), /* neg. */
1573 COSTS_N_INSNS (2), /* compare. */
1574 COSTS_N_INSNS (4), /* widen. */
1575 COSTS_N_INSNS (4), /* narrow. */
1576 COSTS_N_INSNS (4), /* toint. */
1577 COSTS_N_INSNS (4), /* fromint. */
1578 COSTS_N_INSNS (4) /* roundint. */
1583 COSTS_N_INSNS (1) /* alu. */
1587 const struct cpu_cost_table v7m_extra_costs
=
1595 0, /* arith_shift. */
1596 COSTS_N_INSNS (1), /* arith_shift_reg. */
1598 COSTS_N_INSNS (1), /* log_shift_reg. */
1600 COSTS_N_INSNS (1), /* extend_arith. */
1605 COSTS_N_INSNS (1), /* non_exec. */
1606 false /* non_exec_costs_exec. */
1611 COSTS_N_INSNS (1), /* simple. */
1612 COSTS_N_INSNS (1), /* flag_setting. */
1613 COSTS_N_INSNS (2), /* extend. */
1614 COSTS_N_INSNS (1), /* add. */
1615 COSTS_N_INSNS (3), /* extend_add. */
1616 COSTS_N_INSNS (8) /* idiv. */
1620 0, /* simple (N/A). */
1621 0, /* flag_setting (N/A). */
1622 COSTS_N_INSNS (2), /* extend. */
1624 COSTS_N_INSNS (3), /* extend_add. */
1630 COSTS_N_INSNS (2), /* load. */
1631 0, /* load_sign_extend. */
1632 COSTS_N_INSNS (3), /* ldrd. */
1633 COSTS_N_INSNS (2), /* ldm_1st. */
1634 1, /* ldm_regs_per_insn_1st. */
1635 1, /* ldm_regs_per_insn_subsequent. */
1636 COSTS_N_INSNS (2), /* loadf. */
1637 COSTS_N_INSNS (3), /* loadd. */
1638 COSTS_N_INSNS (1), /* load_unaligned. */
1639 COSTS_N_INSNS (2), /* store. */
1640 COSTS_N_INSNS (3), /* strd. */
1641 COSTS_N_INSNS (2), /* stm_1st. */
1642 1, /* stm_regs_per_insn_1st. */
1643 1, /* stm_regs_per_insn_subsequent. */
1644 COSTS_N_INSNS (2), /* storef. */
1645 COSTS_N_INSNS (3), /* stored. */
1646 COSTS_N_INSNS (1), /* store_unaligned. */
1647 COSTS_N_INSNS (1), /* loadv. */
1648 COSTS_N_INSNS (1) /* storev. */
1653 COSTS_N_INSNS (7), /* div. */
1654 COSTS_N_INSNS (2), /* mult. */
1655 COSTS_N_INSNS (5), /* mult_addsub. */
1656 COSTS_N_INSNS (3), /* fma. */
1657 COSTS_N_INSNS (1), /* addsub. */
1669 COSTS_N_INSNS (15), /* div. */
1670 COSTS_N_INSNS (5), /* mult. */
1671 COSTS_N_INSNS (7), /* mult_addsub. */
1672 COSTS_N_INSNS (7), /* fma. */
1673 COSTS_N_INSNS (3), /* addsub. */
1686 COSTS_N_INSNS (1) /* alu. */
1690 const struct tune_params arm_slowmul_tune
=
1692 arm_slowmul_rtx_costs
,
1693 NULL
, /* Insn extra costs. */
1694 NULL
, /* Sched adj cost. */
1695 arm_default_branch_cost
,
1696 &arm_default_vec_cost
,
1697 3, /* Constant limit. */
1698 5, /* Max cond insns. */
1699 8, /* Memset max inline. */
1700 1, /* Issue rate. */
1701 ARM_PREFETCH_NOT_BENEFICIAL
,
1702 tune_params::PREF_CONST_POOL_TRUE
,
1703 tune_params::PREF_LDRD_FALSE
,
1704 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1705 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1706 tune_params::DISPARAGE_FLAGS_NEITHER
,
1707 tune_params::PREF_NEON_64_FALSE
,
1708 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1709 tune_params::FUSE_NOTHING
,
1710 tune_params::SCHED_AUTOPREF_OFF
1713 const struct tune_params arm_fastmul_tune
=
1715 arm_fastmul_rtx_costs
,
1716 NULL
, /* Insn extra costs. */
1717 NULL
, /* Sched adj cost. */
1718 arm_default_branch_cost
,
1719 &arm_default_vec_cost
,
1720 1, /* Constant limit. */
1721 5, /* Max cond insns. */
1722 8, /* Memset max inline. */
1723 1, /* Issue rate. */
1724 ARM_PREFETCH_NOT_BENEFICIAL
,
1725 tune_params::PREF_CONST_POOL_TRUE
,
1726 tune_params::PREF_LDRD_FALSE
,
1727 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1728 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1729 tune_params::DISPARAGE_FLAGS_NEITHER
,
1730 tune_params::PREF_NEON_64_FALSE
,
1731 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1732 tune_params::FUSE_NOTHING
,
1733 tune_params::SCHED_AUTOPREF_OFF
1736 /* StrongARM has early execution of branches, so a sequence that is worth
1737 skipping is shorter. Set max_insns_skipped to a lower value. */
1739 const struct tune_params arm_strongarm_tune
=
1741 arm_fastmul_rtx_costs
,
1742 NULL
, /* Insn extra costs. */
1743 NULL
, /* Sched adj cost. */
1744 arm_default_branch_cost
,
1745 &arm_default_vec_cost
,
1746 1, /* Constant limit. */
1747 3, /* Max cond insns. */
1748 8, /* Memset max inline. */
1749 1, /* Issue rate. */
1750 ARM_PREFETCH_NOT_BENEFICIAL
,
1751 tune_params::PREF_CONST_POOL_TRUE
,
1752 tune_params::PREF_LDRD_FALSE
,
1753 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1754 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1755 tune_params::DISPARAGE_FLAGS_NEITHER
,
1756 tune_params::PREF_NEON_64_FALSE
,
1757 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1758 tune_params::FUSE_NOTHING
,
1759 tune_params::SCHED_AUTOPREF_OFF
1762 const struct tune_params arm_xscale_tune
=
1764 arm_xscale_rtx_costs
,
1765 NULL
, /* Insn extra costs. */
1766 xscale_sched_adjust_cost
,
1767 arm_default_branch_cost
,
1768 &arm_default_vec_cost
,
1769 2, /* Constant limit. */
1770 3, /* Max cond insns. */
1771 8, /* Memset max inline. */
1772 1, /* Issue rate. */
1773 ARM_PREFETCH_NOT_BENEFICIAL
,
1774 tune_params::PREF_CONST_POOL_TRUE
,
1775 tune_params::PREF_LDRD_FALSE
,
1776 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1777 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1778 tune_params::DISPARAGE_FLAGS_NEITHER
,
1779 tune_params::PREF_NEON_64_FALSE
,
1780 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1781 tune_params::FUSE_NOTHING
,
1782 tune_params::SCHED_AUTOPREF_OFF
1785 const struct tune_params arm_9e_tune
=
1788 NULL
, /* Insn extra costs. */
1789 NULL
, /* Sched adj cost. */
1790 arm_default_branch_cost
,
1791 &arm_default_vec_cost
,
1792 1, /* Constant limit. */
1793 5, /* Max cond insns. */
1794 8, /* Memset max inline. */
1795 1, /* Issue rate. */
1796 ARM_PREFETCH_NOT_BENEFICIAL
,
1797 tune_params::PREF_CONST_POOL_TRUE
,
1798 tune_params::PREF_LDRD_FALSE
,
1799 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1800 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1801 tune_params::DISPARAGE_FLAGS_NEITHER
,
1802 tune_params::PREF_NEON_64_FALSE
,
1803 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1804 tune_params::FUSE_NOTHING
,
1805 tune_params::SCHED_AUTOPREF_OFF
1808 const struct tune_params arm_marvell_pj4_tune
=
1811 NULL
, /* Insn extra costs. */
1812 NULL
, /* Sched adj cost. */
1813 arm_default_branch_cost
,
1814 &arm_default_vec_cost
,
1815 1, /* Constant limit. */
1816 5, /* Max cond insns. */
1817 8, /* Memset max inline. */
1818 2, /* Issue rate. */
1819 ARM_PREFETCH_NOT_BENEFICIAL
,
1820 tune_params::PREF_CONST_POOL_TRUE
,
1821 tune_params::PREF_LDRD_FALSE
,
1822 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1823 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1824 tune_params::DISPARAGE_FLAGS_NEITHER
,
1825 tune_params::PREF_NEON_64_FALSE
,
1826 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1827 tune_params::FUSE_NOTHING
,
1828 tune_params::SCHED_AUTOPREF_OFF
1831 const struct tune_params arm_v6t2_tune
=
1834 NULL
, /* Insn extra costs. */
1835 NULL
, /* Sched adj cost. */
1836 arm_default_branch_cost
,
1837 &arm_default_vec_cost
,
1838 1, /* Constant limit. */
1839 5, /* Max cond insns. */
1840 8, /* Memset max inline. */
1841 1, /* Issue rate. */
1842 ARM_PREFETCH_NOT_BENEFICIAL
,
1843 tune_params::PREF_CONST_POOL_FALSE
,
1844 tune_params::PREF_LDRD_FALSE
,
1845 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1846 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1847 tune_params::DISPARAGE_FLAGS_NEITHER
,
1848 tune_params::PREF_NEON_64_FALSE
,
1849 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1850 tune_params::FUSE_NOTHING
,
1851 tune_params::SCHED_AUTOPREF_OFF
1855 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1856 const struct tune_params arm_cortex_tune
=
1859 &generic_extra_costs
,
1860 NULL
, /* Sched adj cost. */
1861 arm_default_branch_cost
,
1862 &arm_default_vec_cost
,
1863 1, /* Constant limit. */
1864 5, /* Max cond insns. */
1865 8, /* Memset max inline. */
1866 2, /* Issue rate. */
1867 ARM_PREFETCH_NOT_BENEFICIAL
,
1868 tune_params::PREF_CONST_POOL_FALSE
,
1869 tune_params::PREF_LDRD_FALSE
,
1870 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1871 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1872 tune_params::DISPARAGE_FLAGS_NEITHER
,
1873 tune_params::PREF_NEON_64_FALSE
,
1874 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1875 tune_params::FUSE_NOTHING
,
1876 tune_params::SCHED_AUTOPREF_OFF
1879 const struct tune_params arm_cortex_a8_tune
=
1882 &cortexa8_extra_costs
,
1883 NULL
, /* Sched adj cost. */
1884 arm_default_branch_cost
,
1885 &arm_default_vec_cost
,
1886 1, /* Constant limit. */
1887 5, /* Max cond insns. */
1888 8, /* Memset max inline. */
1889 2, /* Issue rate. */
1890 ARM_PREFETCH_NOT_BENEFICIAL
,
1891 tune_params::PREF_CONST_POOL_FALSE
,
1892 tune_params::PREF_LDRD_FALSE
,
1893 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1894 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1895 tune_params::DISPARAGE_FLAGS_NEITHER
,
1896 tune_params::PREF_NEON_64_FALSE
,
1897 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1898 tune_params::FUSE_NOTHING
,
1899 tune_params::SCHED_AUTOPREF_OFF
1902 const struct tune_params arm_cortex_a7_tune
=
1905 &cortexa7_extra_costs
,
1906 NULL
, /* Sched adj cost. */
1907 arm_default_branch_cost
,
1908 &arm_default_vec_cost
,
1909 1, /* Constant limit. */
1910 5, /* Max cond insns. */
1911 8, /* Memset max inline. */
1912 2, /* Issue rate. */
1913 ARM_PREFETCH_NOT_BENEFICIAL
,
1914 tune_params::PREF_CONST_POOL_FALSE
,
1915 tune_params::PREF_LDRD_FALSE
,
1916 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1917 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1918 tune_params::DISPARAGE_FLAGS_NEITHER
,
1919 tune_params::PREF_NEON_64_FALSE
,
1920 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1921 tune_params::FUSE_NOTHING
,
1922 tune_params::SCHED_AUTOPREF_OFF
1925 const struct tune_params arm_cortex_a15_tune
=
1928 &cortexa15_extra_costs
,
1929 NULL
, /* Sched adj cost. */
1930 arm_default_branch_cost
,
1931 &arm_default_vec_cost
,
1932 1, /* Constant limit. */
1933 2, /* Max cond insns. */
1934 8, /* Memset max inline. */
1935 3, /* Issue rate. */
1936 ARM_PREFETCH_NOT_BENEFICIAL
,
1937 tune_params::PREF_CONST_POOL_FALSE
,
1938 tune_params::PREF_LDRD_TRUE
,
1939 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1940 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1941 tune_params::DISPARAGE_FLAGS_ALL
,
1942 tune_params::PREF_NEON_64_FALSE
,
1943 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1944 tune_params::FUSE_NOTHING
,
1945 tune_params::SCHED_AUTOPREF_FULL
1948 const struct tune_params arm_cortex_a53_tune
=
1951 &cortexa53_extra_costs
,
1952 NULL
, /* Sched adj cost. */
1953 arm_default_branch_cost
,
1954 &arm_default_vec_cost
,
1955 1, /* Constant limit. */
1956 5, /* Max cond insns. */
1957 8, /* Memset max inline. */
1958 2, /* Issue rate. */
1959 ARM_PREFETCH_NOT_BENEFICIAL
,
1960 tune_params::PREF_CONST_POOL_FALSE
,
1961 tune_params::PREF_LDRD_FALSE
,
1962 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1963 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1964 tune_params::DISPARAGE_FLAGS_NEITHER
,
1965 tune_params::PREF_NEON_64_FALSE
,
1966 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1967 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
1968 tune_params::SCHED_AUTOPREF_OFF
1971 const struct tune_params arm_cortex_a57_tune
=
1974 &cortexa57_extra_costs
,
1975 NULL
, /* Sched adj cost. */
1976 arm_default_branch_cost
,
1977 &arm_default_vec_cost
,
1978 1, /* Constant limit. */
1979 2, /* Max cond insns. */
1980 8, /* Memset max inline. */
1981 3, /* Issue rate. */
1982 ARM_PREFETCH_NOT_BENEFICIAL
,
1983 tune_params::PREF_CONST_POOL_FALSE
,
1984 tune_params::PREF_LDRD_TRUE
,
1985 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1986 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1987 tune_params::DISPARAGE_FLAGS_ALL
,
1988 tune_params::PREF_NEON_64_FALSE
,
1989 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1990 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
1991 tune_params::SCHED_AUTOPREF_FULL
1994 const struct tune_params arm_xgene1_tune
=
1997 &xgene1_extra_costs
,
1998 NULL
, /* Sched adj cost. */
1999 arm_default_branch_cost
,
2000 &arm_default_vec_cost
,
2001 1, /* Constant limit. */
2002 2, /* Max cond insns. */
2003 32, /* Memset max inline. */
2004 4, /* Issue rate. */
2005 ARM_PREFETCH_NOT_BENEFICIAL
,
2006 tune_params::PREF_CONST_POOL_FALSE
,
2007 tune_params::PREF_LDRD_TRUE
,
2008 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2009 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2010 tune_params::DISPARAGE_FLAGS_ALL
,
2011 tune_params::PREF_NEON_64_FALSE
,
2012 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2013 tune_params::FUSE_NOTHING
,
2014 tune_params::SCHED_AUTOPREF_OFF
2017 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2018 less appealing. Set max_insns_skipped to a low value. */
2020 const struct tune_params arm_cortex_a5_tune
=
2023 &cortexa5_extra_costs
,
2024 NULL
, /* Sched adj cost. */
2025 arm_cortex_a5_branch_cost
,
2026 &arm_default_vec_cost
,
2027 1, /* Constant limit. */
2028 1, /* Max cond insns. */
2029 8, /* Memset max inline. */
2030 2, /* Issue rate. */
2031 ARM_PREFETCH_NOT_BENEFICIAL
,
2032 tune_params::PREF_CONST_POOL_FALSE
,
2033 tune_params::PREF_LDRD_FALSE
,
2034 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2035 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2036 tune_params::DISPARAGE_FLAGS_NEITHER
,
2037 tune_params::PREF_NEON_64_FALSE
,
2038 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2039 tune_params::FUSE_NOTHING
,
2040 tune_params::SCHED_AUTOPREF_OFF
2043 const struct tune_params arm_cortex_a9_tune
=
2046 &cortexa9_extra_costs
,
2047 cortex_a9_sched_adjust_cost
,
2048 arm_default_branch_cost
,
2049 &arm_default_vec_cost
,
2050 1, /* Constant limit. */
2051 5, /* Max cond insns. */
2052 8, /* Memset max inline. */
2053 2, /* Issue rate. */
2054 ARM_PREFETCH_BENEFICIAL(4,32,32),
2055 tune_params::PREF_CONST_POOL_FALSE
,
2056 tune_params::PREF_LDRD_FALSE
,
2057 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2058 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2059 tune_params::DISPARAGE_FLAGS_NEITHER
,
2060 tune_params::PREF_NEON_64_FALSE
,
2061 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2062 tune_params::FUSE_NOTHING
,
2063 tune_params::SCHED_AUTOPREF_OFF
2066 const struct tune_params arm_cortex_a12_tune
=
2069 &cortexa12_extra_costs
,
2070 NULL
, /* Sched adj cost. */
2071 arm_default_branch_cost
,
2072 &arm_default_vec_cost
, /* Vectorizer costs. */
2073 1, /* Constant limit. */
2074 2, /* Max cond insns. */
2075 8, /* Memset max inline. */
2076 2, /* Issue rate. */
2077 ARM_PREFETCH_NOT_BENEFICIAL
,
2078 tune_params::PREF_CONST_POOL_FALSE
,
2079 tune_params::PREF_LDRD_TRUE
,
2080 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2081 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2082 tune_params::DISPARAGE_FLAGS_ALL
,
2083 tune_params::PREF_NEON_64_FALSE
,
2084 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2085 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2086 tune_params::SCHED_AUTOPREF_OFF
2089 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2090 cycle to execute each. An LDR from the constant pool also takes two cycles
2091 to execute, but mildly increases pipelining opportunity (consecutive
2092 loads/stores can be pipelined together, saving one cycle), and may also
2093 improve icache utilisation. Hence we prefer the constant pool for such
2096 const struct tune_params arm_v7m_tune
=
2100 NULL
, /* Sched adj cost. */
2101 arm_cortex_m_branch_cost
,
2102 &arm_default_vec_cost
,
2103 1, /* Constant limit. */
2104 2, /* Max cond insns. */
2105 8, /* Memset max inline. */
2106 1, /* Issue rate. */
2107 ARM_PREFETCH_NOT_BENEFICIAL
,
2108 tune_params::PREF_CONST_POOL_TRUE
,
2109 tune_params::PREF_LDRD_FALSE
,
2110 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2111 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2112 tune_params::DISPARAGE_FLAGS_NEITHER
,
2113 tune_params::PREF_NEON_64_FALSE
,
2114 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2115 tune_params::FUSE_NOTHING
,
2116 tune_params::SCHED_AUTOPREF_OFF
2119 /* Cortex-M7 tuning. */
2121 const struct tune_params arm_cortex_m7_tune
=
2125 NULL
, /* Sched adj cost. */
2126 arm_cortex_m7_branch_cost
,
2127 &arm_default_vec_cost
,
2128 0, /* Constant limit. */
2129 1, /* Max cond insns. */
2130 8, /* Memset max inline. */
2131 2, /* Issue rate. */
2132 ARM_PREFETCH_NOT_BENEFICIAL
,
2133 tune_params::PREF_CONST_POOL_TRUE
,
2134 tune_params::PREF_LDRD_FALSE
,
2135 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2136 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2137 tune_params::DISPARAGE_FLAGS_NEITHER
,
2138 tune_params::PREF_NEON_64_FALSE
,
2139 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2140 tune_params::FUSE_NOTHING
,
2141 tune_params::SCHED_AUTOPREF_OFF
2144 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2145 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2146 const struct tune_params arm_v6m_tune
=
2149 NULL
, /* Insn extra costs. */
2150 NULL
, /* Sched adj cost. */
2151 arm_default_branch_cost
,
2152 &arm_default_vec_cost
, /* Vectorizer costs. */
2153 1, /* Constant limit. */
2154 5, /* Max cond insns. */
2155 8, /* Memset max inline. */
2156 1, /* Issue rate. */
2157 ARM_PREFETCH_NOT_BENEFICIAL
,
2158 tune_params::PREF_CONST_POOL_FALSE
,
2159 tune_params::PREF_LDRD_FALSE
,
2160 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2161 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2162 tune_params::DISPARAGE_FLAGS_NEITHER
,
2163 tune_params::PREF_NEON_64_FALSE
,
2164 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2165 tune_params::FUSE_NOTHING
,
2166 tune_params::SCHED_AUTOPREF_OFF
2169 const struct tune_params arm_fa726te_tune
=
2172 NULL
, /* Insn extra costs. */
2173 fa726te_sched_adjust_cost
,
2174 arm_default_branch_cost
,
2175 &arm_default_vec_cost
,
2176 1, /* Constant limit. */
2177 5, /* Max cond insns. */
2178 8, /* Memset max inline. */
2179 2, /* Issue rate. */
2180 ARM_PREFETCH_NOT_BENEFICIAL
,
2181 tune_params::PREF_CONST_POOL_TRUE
,
2182 tune_params::PREF_LDRD_FALSE
,
2183 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2184 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2185 tune_params::DISPARAGE_FLAGS_NEITHER
,
2186 tune_params::PREF_NEON_64_FALSE
,
2187 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2188 tune_params::FUSE_NOTHING
,
2189 tune_params::SCHED_AUTOPREF_OFF
2193 /* Not all of these give usefully different compilation alternatives,
2194 but there is no simple way of generalizing them. */
2195 static const struct processors all_cores
[] =
2198 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2199 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2200 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2201 #include "arm-cores.def"
2203 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
2206 static const struct processors all_architectures
[] =
2208 /* ARM Architectures */
2209 /* We don't specify tuning costs here as it will be figured out
2212 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2213 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2214 #include "arm-arches.def"
2216 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
2220 /* These are populated as commandline arguments are processed, or NULL
2221 if not specified. */
2222 static const struct processors
*arm_selected_arch
;
2223 static const struct processors
*arm_selected_cpu
;
2224 static const struct processors
*arm_selected_tune
;
2226 /* The name of the preprocessor macro to define for this architecture. */
2228 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
2230 /* Available values for -mfpu=. */
2232 static const struct arm_fpu_desc all_fpus
[] =
2234 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2235 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2236 #include "arm-fpus.def"
2241 /* Supported TLS relocations. */
2249 TLS_DESCSEQ
/* GNU scheme */
2252 /* The maximum number of insns to be used when loading a constant. */
2254 arm_constant_limit (bool size_p
)
2256 return size_p
? 1 : current_tune
->constant_limit
;
2259 /* Emit an insn that's a simple single-set. Both the operands must be known
2261 inline static rtx_insn
*
2262 emit_set_insn (rtx x
, rtx y
)
2264 return emit_insn (gen_rtx_SET (x
, y
));
2267 /* Return the number of bits set in VALUE. */
2269 bit_count (unsigned long value
)
2271 unsigned long count
= 0;
2276 value
&= value
- 1; /* Clear the least-significant set bit. */
2286 } arm_fixed_mode_set
;
2288 /* A small helper for setting fixed-point library libfuncs. */
2291 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2292 const char *funcname
, const char *modename
,
2297 if (num_suffix
== 0)
2298 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2300 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2302 set_optab_libfunc (optable
, mode
, buffer
);
2306 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2307 machine_mode from
, const char *funcname
,
2308 const char *toname
, const char *fromname
)
2311 const char *maybe_suffix_2
= "";
2313 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2314 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2315 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2316 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2317 maybe_suffix_2
= "2";
2319 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2322 set_conv_libfunc (optable
, to
, from
, buffer
);
2325 /* Set up library functions unique to ARM. */
2328 arm_init_libfuncs (void)
2330 /* For Linux, we have access to kernel support for atomic operations. */
2331 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2332 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2334 /* There are no special library functions unless we are using the
2339 /* The functions below are described in Section 4 of the "Run-Time
2340 ABI for the ARM architecture", Version 1.0. */
2342 /* Double-precision floating-point arithmetic. Table 2. */
2343 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2344 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2345 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2346 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2347 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2349 /* Double-precision comparisons. Table 3. */
2350 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2351 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2352 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2353 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2354 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2355 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2356 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2358 /* Single-precision floating-point arithmetic. Table 4. */
2359 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2360 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2361 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2362 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2363 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2365 /* Single-precision comparisons. Table 5. */
2366 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2367 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2368 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2369 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2370 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2371 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2372 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2374 /* Floating-point to integer conversions. Table 6. */
2375 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2376 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2377 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2378 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2379 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2380 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2381 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2382 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2384 /* Conversions between floating types. Table 7. */
2385 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2386 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2388 /* Integer to floating-point conversions. Table 8. */
2389 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2390 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2391 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2392 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2393 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2394 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2395 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2396 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2398 /* Long long. Table 9. */
2399 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2400 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2401 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2402 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2403 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2404 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2405 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2406 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2408 /* Integer (32/32->32) division. \S 4.3.1. */
2409 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2410 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2412 /* The divmod functions are designed so that they can be used for
2413 plain division, even though they return both the quotient and the
2414 remainder. The quotient is returned in the usual location (i.e.,
2415 r0 for SImode, {r0, r1} for DImode), just as would be expected
2416 for an ordinary division routine. Because the AAPCS calling
2417 conventions specify that all of { r0, r1, r2, r3 } are
2418 callee-saved registers, there is no need to tell the compiler
2419 explicitly that those registers are clobbered by these
2421 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2422 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2424 /* For SImode division the ABI provides div-without-mod routines,
2425 which are faster. */
2426 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2427 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2429 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2430 divmod libcalls instead. */
2431 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2432 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2433 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2434 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2436 /* Half-precision float operations. The compiler handles all operations
2437 with NULL libfuncs by converting the SFmode. */
2438 switch (arm_fp16_format
)
2440 case ARM_FP16_FORMAT_IEEE
:
2441 case ARM_FP16_FORMAT_ALTERNATIVE
:
2444 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2445 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2447 : "__gnu_f2h_alternative"));
2448 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2449 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2451 : "__gnu_h2f_alternative"));
2454 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2455 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2456 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2457 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2458 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2461 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2462 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2463 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2464 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2465 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2466 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2467 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2474 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2476 const arm_fixed_mode_set fixed_arith_modes
[] =
2497 const arm_fixed_mode_set fixed_conv_modes
[] =
2527 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2529 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2530 "add", fixed_arith_modes
[i
].name
, 3);
2531 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2532 "ssadd", fixed_arith_modes
[i
].name
, 3);
2533 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2534 "usadd", fixed_arith_modes
[i
].name
, 3);
2535 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2536 "sub", fixed_arith_modes
[i
].name
, 3);
2537 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2538 "sssub", fixed_arith_modes
[i
].name
, 3);
2539 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2540 "ussub", fixed_arith_modes
[i
].name
, 3);
2541 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2542 "mul", fixed_arith_modes
[i
].name
, 3);
2543 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2544 "ssmul", fixed_arith_modes
[i
].name
, 3);
2545 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2546 "usmul", fixed_arith_modes
[i
].name
, 3);
2547 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2548 "div", fixed_arith_modes
[i
].name
, 3);
2549 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2550 "udiv", fixed_arith_modes
[i
].name
, 3);
2551 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2552 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2553 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2554 "usdiv", fixed_arith_modes
[i
].name
, 3);
2555 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2556 "neg", fixed_arith_modes
[i
].name
, 2);
2557 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2558 "ssneg", fixed_arith_modes
[i
].name
, 2);
2559 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2560 "usneg", fixed_arith_modes
[i
].name
, 2);
2561 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2562 "ashl", fixed_arith_modes
[i
].name
, 3);
2563 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2564 "ashr", fixed_arith_modes
[i
].name
, 3);
2565 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2566 "lshr", fixed_arith_modes
[i
].name
, 3);
2567 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2568 "ssashl", fixed_arith_modes
[i
].name
, 3);
2569 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2570 "usashl", fixed_arith_modes
[i
].name
, 3);
2571 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2572 "cmp", fixed_arith_modes
[i
].name
, 2);
2575 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2576 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2579 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2580 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2583 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2584 fixed_conv_modes
[j
].mode
, "fract",
2585 fixed_conv_modes
[i
].name
,
2586 fixed_conv_modes
[j
].name
);
2587 arm_set_fixed_conv_libfunc (satfract_optab
,
2588 fixed_conv_modes
[i
].mode
,
2589 fixed_conv_modes
[j
].mode
, "satfract",
2590 fixed_conv_modes
[i
].name
,
2591 fixed_conv_modes
[j
].name
);
2592 arm_set_fixed_conv_libfunc (fractuns_optab
,
2593 fixed_conv_modes
[i
].mode
,
2594 fixed_conv_modes
[j
].mode
, "fractuns",
2595 fixed_conv_modes
[i
].name
,
2596 fixed_conv_modes
[j
].name
);
2597 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2598 fixed_conv_modes
[i
].mode
,
2599 fixed_conv_modes
[j
].mode
, "satfractuns",
2600 fixed_conv_modes
[i
].name
,
2601 fixed_conv_modes
[j
].name
);
2605 if (TARGET_AAPCS_BASED
)
2606 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2609 /* On AAPCS systems, this is the "struct __va_list". */
2610 static GTY(()) tree va_list_type
;
2612 /* Return the type to use as __builtin_va_list. */
2614 arm_build_builtin_va_list (void)
2619 if (!TARGET_AAPCS_BASED
)
2620 return std_build_builtin_va_list ();
2622 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2630 The C Library ABI further reinforces this definition in \S
2633 We must follow this definition exactly. The structure tag
2634 name is visible in C++ mangled names, and thus forms a part
2635 of the ABI. The field name may be used by people who
2636 #include <stdarg.h>. */
2637 /* Create the type. */
2638 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2639 /* Give it the required name. */
2640 va_list_name
= build_decl (BUILTINS_LOCATION
,
2642 get_identifier ("__va_list"),
2644 DECL_ARTIFICIAL (va_list_name
) = 1;
2645 TYPE_NAME (va_list_type
) = va_list_name
;
2646 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2647 /* Create the __ap field. */
2648 ap_field
= build_decl (BUILTINS_LOCATION
,
2650 get_identifier ("__ap"),
2652 DECL_ARTIFICIAL (ap_field
) = 1;
2653 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2654 TYPE_FIELDS (va_list_type
) = ap_field
;
2655 /* Compute its layout. */
2656 layout_type (va_list_type
);
2658 return va_list_type
;
2661 /* Return an expression of type "void *" pointing to the next
2662 available argument in a variable-argument list. VALIST is the
2663 user-level va_list object, of type __builtin_va_list. */
2665 arm_extract_valist_ptr (tree valist
)
2667 if (TREE_TYPE (valist
) == error_mark_node
)
2668 return error_mark_node
;
2670 /* On an AAPCS target, the pointer is stored within "struct
2672 if (TARGET_AAPCS_BASED
)
2674 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2675 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2676 valist
, ap_field
, NULL_TREE
);
2682 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2684 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2686 valist
= arm_extract_valist_ptr (valist
);
2687 std_expand_builtin_va_start (valist
, nextarg
);
2690 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2692 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2695 valist
= arm_extract_valist_ptr (valist
);
2696 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2699 /* Check any incompatible options that the user has specified. */
2701 arm_option_check_internal (struct gcc_options
*opts
)
2703 int flags
= opts
->x_target_flags
;
2705 /* Make sure that the processor choice does not conflict with any of the
2706 other command line choices. */
2707 if (TARGET_ARM_P (flags
) && !(insn_flags
& FL_NOTM
))
2708 error ("target CPU does not support ARM mode");
2710 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2711 from here where no function is being compiled currently. */
2712 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2713 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2715 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2716 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2718 /* If this target is normally configured to use APCS frames, warn if they
2719 are turned off and debugging is turned on. */
2720 if (TARGET_ARM_P (flags
)
2721 && write_symbols
!= NO_DEBUG
2722 && !TARGET_APCS_FRAME
2723 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2724 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2726 /* iWMMXt unsupported under Thumb mode. */
2727 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2728 error ("iWMMXt unsupported under Thumb mode");
2730 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2731 error ("can not use -mtp=cp15 with 16-bit Thumb");
2733 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2735 error ("RTP PIC is incompatible with Thumb");
2739 /* We only support -mslow-flash-data on armv7-m targets. */
2740 if (target_slow_flash_data
2741 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
2742 || (TARGET_THUMB1_P (flags
) || flag_pic
|| TARGET_NEON
)))
2743 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2746 /* Set params depending on attributes and optimization options. */
2748 arm_option_params_internal (struct gcc_options
*opts
)
2750 int flags
= opts
->x_target_flags
;
2752 /* If we are not using the default (ARM mode) section anchor offset
2753 ranges, then set the correct ranges now. */
2754 if (TARGET_THUMB1_P (flags
))
2756 /* Thumb-1 LDR instructions cannot have negative offsets.
2757 Permissible positive offset ranges are 5-bit (for byte loads),
2758 6-bit (for halfword loads), or 7-bit (for word loads).
2759 Empirical results suggest a 7-bit anchor range gives the best
2760 overall code size. */
2761 targetm
.min_anchor_offset
= 0;
2762 targetm
.max_anchor_offset
= 127;
2764 else if (TARGET_THUMB2_P (flags
))
2766 /* The minimum is set such that the total size of the block
2767 for a particular anchor is 248 + 1 + 4095 bytes, which is
2768 divisible by eight, ensuring natural spacing of anchors. */
2769 targetm
.min_anchor_offset
= -248;
2770 targetm
.max_anchor_offset
= 4095;
2774 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2775 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2780 /* If optimizing for size, bump the number of instructions that we
2781 are prepared to conditionally execute (even on a StrongARM). */
2782 max_insns_skipped
= 6;
2784 /* For THUMB2, we limit the conditional sequence to one IT block. */
2785 if (TARGET_THUMB2_P (flags
))
2786 max_insns_skipped
= opts
->x_arm_restrict_it
? 1 : 4;
2789 /* When -mrestrict-it is in use tone down the if-conversion. */
2791 = (TARGET_THUMB2_P (opts
->x_target_flags
) && opts
->x_arm_restrict_it
)
2792 ? 1 : current_tune
->max_insns_skipped
;
2795 /* Options after initial target override. */
2796 static GTY(()) tree init_optimize
;
2798 /* Reset options between modes that the user has specified. */
2800 arm_option_override_internal (struct gcc_options
*opts
,
2801 struct gcc_options
*opts_set
)
2803 if (TARGET_THUMB_P (opts
->x_target_flags
) && !(insn_flags
& FL_THUMB
))
2805 warning (0, "target CPU does not support THUMB instructions");
2806 opts
->x_target_flags
&= ~MASK_THUMB
;
2809 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
2811 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2812 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
2815 /* Callee super interworking implies thumb interworking. Adding
2816 this to the flags here simplifies the logic elsewhere. */
2817 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
2818 opts
->x_target_flags
|= MASK_INTERWORK
;
2820 /* need to remember initial values so combinaisons of options like
2821 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2822 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
2824 if (! opts_set
->x_arm_restrict_it
)
2825 opts
->x_arm_restrict_it
= arm_arch8
;
2827 if (!TARGET_THUMB2_P (opts
->x_target_flags
))
2828 opts
->x_arm_restrict_it
= 0;
2830 /* Don't warn since it's on by default in -O2. */
2831 if (TARGET_THUMB1_P (opts
->x_target_flags
))
2832 opts
->x_flag_schedule_insns
= 0;
2834 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
2836 /* Disable shrink-wrap when optimizing function for size, since it tends to
2837 generate additional returns. */
2838 if (optimize_function_for_size_p (cfun
)
2839 && TARGET_THUMB2_P (opts
->x_target_flags
))
2840 opts
->x_flag_shrink_wrap
= false;
2842 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
2844 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
2845 - epilogue_insns - does not accurately model the corresponding insns
2846 emitted in the asm file. In particular, see the comment in thumb_exit
2847 'Find out how many of the (return) argument registers we can corrupt'.
2848 As a consequence, the epilogue may clobber registers without fipa-ra
2849 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
2850 TODO: Accurately model clobbers for epilogue_insns and reenable
2852 if (TARGET_THUMB1_P (opts
->x_target_flags
))
2853 opts
->x_flag_ipa_ra
= 0;
2855 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
2857 /* Thumb2 inline assembly code should always use unified syntax.
2858 This will apply to ARM and Thumb1 eventually. */
2859 opts
->x_inline_asm_unified
= TARGET_THUMB2_P (opts
->x_target_flags
);
2862 /* Fix up any incompatible options that the user has specified. */
2864 arm_option_override (void)
2866 arm_selected_arch
= NULL
;
2867 arm_selected_cpu
= NULL
;
2868 arm_selected_tune
= NULL
;
2870 if (global_options_set
.x_arm_arch_option
)
2871 arm_selected_arch
= &all_architectures
[arm_arch_option
];
2873 if (global_options_set
.x_arm_cpu_option
)
2875 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
2876 arm_selected_tune
= &all_cores
[(int) arm_cpu_option
];
2879 if (global_options_set
.x_arm_tune_option
)
2880 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
2882 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2883 SUBTARGET_OVERRIDE_OPTIONS
;
2886 if (arm_selected_arch
)
2888 if (arm_selected_cpu
)
2890 /* Check for conflict between mcpu and march. */
2891 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
2893 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2894 arm_selected_cpu
->name
, arm_selected_arch
->name
);
2895 /* -march wins for code generation.
2896 -mcpu wins for default tuning. */
2897 if (!arm_selected_tune
)
2898 arm_selected_tune
= arm_selected_cpu
;
2900 arm_selected_cpu
= arm_selected_arch
;
2904 arm_selected_arch
= NULL
;
2907 /* Pick a CPU based on the architecture. */
2908 arm_selected_cpu
= arm_selected_arch
;
2911 /* If the user did not specify a processor, choose one for them. */
2912 if (!arm_selected_cpu
)
2914 const struct processors
* sel
;
2915 unsigned int sought
;
2917 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
2918 if (!arm_selected_cpu
->name
)
2920 #ifdef SUBTARGET_CPU_DEFAULT
2921 /* Use the subtarget default CPU if none was specified by
2923 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
2925 /* Default to ARM6. */
2926 if (!arm_selected_cpu
->name
)
2927 arm_selected_cpu
= &all_cores
[arm6
];
2930 sel
= arm_selected_cpu
;
2931 insn_flags
= sel
->flags
;
2933 /* Now check to see if the user has specified some command line
2934 switch that require certain abilities from the cpu. */
2937 if (TARGET_INTERWORK
|| TARGET_THUMB
)
2939 sought
|= (FL_THUMB
| FL_MODE32
);
2941 /* There are no ARM processors that support both APCS-26 and
2942 interworking. Therefore we force FL_MODE26 to be removed
2943 from insn_flags here (if it was set), so that the search
2944 below will always be able to find a compatible processor. */
2945 insn_flags
&= ~FL_MODE26
;
2948 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
2950 /* Try to locate a CPU type that supports all of the abilities
2951 of the default CPU, plus the extra abilities requested by
2953 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2954 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
2957 if (sel
->name
== NULL
)
2959 unsigned current_bit_count
= 0;
2960 const struct processors
* best_fit
= NULL
;
2962 /* Ideally we would like to issue an error message here
2963 saying that it was not possible to find a CPU compatible
2964 with the default CPU, but which also supports the command
2965 line options specified by the programmer, and so they
2966 ought to use the -mcpu=<name> command line option to
2967 override the default CPU type.
2969 If we cannot find a cpu that has both the
2970 characteristics of the default cpu and the given
2971 command line options we scan the array again looking
2972 for a best match. */
2973 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2974 if ((sel
->flags
& sought
) == sought
)
2978 count
= bit_count (sel
->flags
& insn_flags
);
2980 if (count
>= current_bit_count
)
2983 current_bit_count
= count
;
2987 gcc_assert (best_fit
);
2991 arm_selected_cpu
= sel
;
2995 gcc_assert (arm_selected_cpu
);
2996 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2997 if (!arm_selected_tune
)
2998 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
3000 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
3001 insn_flags
= arm_selected_cpu
->flags
;
3002 arm_base_arch
= arm_selected_cpu
->base_arch
;
3004 arm_tune
= arm_selected_tune
->core
;
3005 tune_flags
= arm_selected_tune
->flags
;
3006 current_tune
= arm_selected_tune
->tune
;
3008 /* TBD: Dwarf info for apcs frame is not handled yet. */
3009 if (TARGET_APCS_FRAME
)
3010 flag_shrink_wrap
= false;
3012 /* BPABI targets use linker tricks to allow interworking on cores
3013 without thumb support. */
3014 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
3016 warning (0, "target CPU does not support interworking" );
3017 target_flags
&= ~MASK_INTERWORK
;
3020 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3022 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3023 target_flags
|= MASK_APCS_FRAME
;
3026 if (TARGET_POKE_FUNCTION_NAME
)
3027 target_flags
|= MASK_APCS_FRAME
;
3029 if (TARGET_APCS_REENT
&& flag_pic
)
3030 error ("-fpic and -mapcs-reent are incompatible");
3032 if (TARGET_APCS_REENT
)
3033 warning (0, "APCS reentrant code not supported. Ignored");
3035 if (TARGET_APCS_FLOAT
)
3036 warning (0, "passing floating point arguments in fp regs not yet supported");
3038 /* Initialize boolean versions of the flags, for use in the arm.md file. */
3039 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
3040 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
3041 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
3042 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
3043 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
3044 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
3045 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
3046 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
3047 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3048 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
3049 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
3050 arm_arch8
= (insn_flags
& FL_ARCH8
) != 0;
3051 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
3052 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
3054 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
3055 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
3056 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
3057 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
3058 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
3059 arm_arch_iwmmxt2
= (insn_flags
& FL_IWMMXT2
) != 0;
3060 arm_arch_thumb_hwdiv
= (insn_flags
& FL_THUMB_DIV
) != 0;
3061 arm_arch_arm_hwdiv
= (insn_flags
& FL_ARM_DIV
) != 0;
3062 arm_arch_no_volatile_ce
= (insn_flags
& FL_NO_VOLATILE_CE
) != 0;
3063 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
3064 arm_arch_crc
= (insn_flags
& FL_CRC32
) != 0;
3065 arm_m_profile_small_mul
= (insn_flags
& FL_SMALLMUL
) != 0;
3067 /* V5 code we generate is completely interworking capable, so we turn off
3068 TARGET_INTERWORK here to avoid many tests later on. */
3070 /* XXX However, we must pass the right pre-processor defines to CPP
3071 or GLD can get confused. This is a hack. */
3072 if (TARGET_INTERWORK
)
3073 arm_cpp_interwork
= 1;
3076 target_flags
&= ~MASK_INTERWORK
;
3078 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3079 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3081 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3082 error ("iwmmxt abi requires an iwmmxt capable cpu");
3084 if (!global_options_set
.x_arm_fpu_index
)
3086 const char *target_fpu_name
;
3089 #ifdef FPUTYPE_DEFAULT
3090 target_fpu_name
= FPUTYPE_DEFAULT
;
3092 target_fpu_name
= "vfp";
3095 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
3100 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
3102 switch (arm_fpu_desc
->model
)
3104 case ARM_FP_MODEL_VFP
:
3105 arm_fpu_attr
= FPU_VFP
;
3112 if (TARGET_AAPCS_BASED
)
3114 if (TARGET_CALLER_INTERWORKING
)
3115 error ("AAPCS does not support -mcaller-super-interworking");
3117 if (TARGET_CALLEE_INTERWORKING
)
3118 error ("AAPCS does not support -mcallee-super-interworking");
3121 /* iWMMXt and NEON are incompatible. */
3122 if (TARGET_IWMMXT
&& TARGET_NEON
)
3123 error ("iWMMXt and NEON are incompatible");
3125 /* __fp16 support currently assumes the core has ldrh. */
3126 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3127 sorry ("__fp16 and no ldrh");
3129 /* If soft-float is specified then don't use FPU. */
3130 if (TARGET_SOFT_FLOAT
)
3131 arm_fpu_attr
= FPU_NONE
;
3133 if (TARGET_AAPCS_BASED
)
3135 if (arm_abi
== ARM_ABI_IWMMXT
)
3136 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3137 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
3138 && TARGET_HARD_FLOAT
3140 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3142 arm_pcs_default
= ARM_PCS_AAPCS
;
3146 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
3147 sorry ("-mfloat-abi=hard and VFP");
3149 if (arm_abi
== ARM_ABI_APCS
)
3150 arm_pcs_default
= ARM_PCS_APCS
;
3152 arm_pcs_default
= ARM_PCS_ATPCS
;
3155 /* For arm2/3 there is no need to do any scheduling if we are doing
3156 software floating-point. */
3157 if (TARGET_SOFT_FLOAT
&& (tune_flags
& FL_MODE32
) == 0)
3158 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3160 /* Use the cp15 method if it is available. */
3161 if (target_thread_pointer
== TP_AUTO
)
3163 if (arm_arch6k
&& !TARGET_THUMB1
)
3164 target_thread_pointer
= TP_CP15
;
3166 target_thread_pointer
= TP_SOFT
;
3169 /* Override the default structure alignment for AAPCS ABI. */
3170 if (!global_options_set
.x_arm_structure_size_boundary
)
3172 if (TARGET_AAPCS_BASED
)
3173 arm_structure_size_boundary
= 8;
3177 if (arm_structure_size_boundary
!= 8
3178 && arm_structure_size_boundary
!= 32
3179 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3181 if (ARM_DOUBLEWORD_ALIGN
)
3183 "structure size boundary can only be set to 8, 32 or 64");
3185 warning (0, "structure size boundary can only be set to 8 or 32");
3186 arm_structure_size_boundary
3187 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3191 /* If stack checking is disabled, we can use r10 as the PIC register,
3192 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3193 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3195 if (TARGET_VXWORKS_RTP
)
3196 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3197 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3200 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3201 arm_pic_register
= 9;
3203 if (arm_pic_register_string
!= NULL
)
3205 int pic_register
= decode_reg_name (arm_pic_register_string
);
3208 warning (0, "-mpic-register= is useless without -fpic");
3210 /* Prevent the user from choosing an obviously stupid PIC register. */
3211 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3212 || pic_register
== HARD_FRAME_POINTER_REGNUM
3213 || pic_register
== STACK_POINTER_REGNUM
3214 || pic_register
>= PC_REGNUM
3215 || (TARGET_VXWORKS_RTP
3216 && (unsigned int) pic_register
!= arm_pic_register
))
3217 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3219 arm_pic_register
= pic_register
;
3222 if (TARGET_VXWORKS_RTP
3223 && !global_options_set
.x_arm_pic_data_is_text_relative
)
3224 arm_pic_data_is_text_relative
= 0;
3226 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3227 if (fix_cm3_ldrd
== 2)
3229 if (arm_selected_cpu
->core
== cortexm3
)
3235 /* Enable -munaligned-access by default for
3236 - all ARMv6 architecture-based processors
3237 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3238 - ARMv8 architecture-base processors.
3240 Disable -munaligned-access by default for
3241 - all pre-ARMv6 architecture-based processors
3242 - ARMv6-M architecture-based processors. */
3244 if (unaligned_access
== 2)
3246 if (arm_arch6
&& (arm_arch_notm
|| arm_arch7
))
3247 unaligned_access
= 1;
3249 unaligned_access
= 0;
3251 else if (unaligned_access
== 1
3252 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3254 warning (0, "target CPU does not support unaligned accesses");
3255 unaligned_access
= 0;
3258 /* Hot/Cold partitioning is not currently supported, since we can't
3259 handle literal pool placement in that case. */
3260 if (flag_reorder_blocks_and_partition
)
3262 inform (input_location
,
3263 "-freorder-blocks-and-partition not supported on this architecture");
3264 flag_reorder_blocks_and_partition
= 0;
3265 flag_reorder_blocks
= 1;
3269 /* Hoisting PIC address calculations more aggressively provides a small,
3270 but measurable, size reduction for PIC code. Therefore, we decrease
3271 the bar for unrestricted expression hoisting to the cost of PIC address
3272 calculation, which is 2 instructions. */
3273 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3274 global_options
.x_param_values
,
3275 global_options_set
.x_param_values
);
3277 /* ARM EABI defaults to strict volatile bitfields. */
3278 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3279 && abi_version_at_least(2))
3280 flag_strict_volatile_bitfields
= 1;
3282 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3283 have deemed it beneficial (signified by setting
3284 prefetch.num_slots to 1 or more). */
3285 if (flag_prefetch_loop_arrays
< 0
3288 && current_tune
->prefetch
.num_slots
> 0)
3289 flag_prefetch_loop_arrays
= 1;
3291 /* Set up parameters to be used in prefetching algorithm. Do not
3292 override the defaults unless we are tuning for a core we have
3293 researched values for. */
3294 if (current_tune
->prefetch
.num_slots
> 0)
3295 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3296 current_tune
->prefetch
.num_slots
,
3297 global_options
.x_param_values
,
3298 global_options_set
.x_param_values
);
3299 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3300 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3301 current_tune
->prefetch
.l1_cache_line_size
,
3302 global_options
.x_param_values
,
3303 global_options_set
.x_param_values
);
3304 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3305 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3306 current_tune
->prefetch
.l1_cache_size
,
3307 global_options
.x_param_values
,
3308 global_options_set
.x_param_values
);
3310 /* Use Neon to perform 64-bits operations rather than core
3312 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3313 if (use_neon_for_64bits
== 1)
3314 prefer_neon_for_64bits
= true;
3316 /* Use the alternative scheduling-pressure algorithm by default. */
3317 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3318 global_options
.x_param_values
,
3319 global_options_set
.x_param_values
);
3321 /* Look through ready list and all of queue for instructions
3322 relevant for L2 auto-prefetcher. */
3323 int param_sched_autopref_queue_depth
;
3325 switch (current_tune
->sched_autopref
)
3327 case tune_params::SCHED_AUTOPREF_OFF
:
3328 param_sched_autopref_queue_depth
= -1;
3331 case tune_params::SCHED_AUTOPREF_RANK
:
3332 param_sched_autopref_queue_depth
= 0;
3335 case tune_params::SCHED_AUTOPREF_FULL
:
3336 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3343 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3344 param_sched_autopref_queue_depth
,
3345 global_options
.x_param_values
,
3346 global_options_set
.x_param_values
);
3348 /* Currently, for slow flash data, we just disable literal pools. */
3349 if (target_slow_flash_data
)
3350 arm_disable_literal_pool
= true;
3352 /* Disable scheduling fusion by default if it's not armv7 processor
3353 or doesn't prefer ldrd/strd. */
3354 if (flag_schedule_fusion
== 2
3355 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3356 flag_schedule_fusion
= 0;
3358 /* Need to remember initial options before they are overriden. */
3359 init_optimize
= build_optimization_node (&global_options
);
3361 arm_option_override_internal (&global_options
, &global_options_set
);
3362 arm_option_check_internal (&global_options
);
3363 arm_option_params_internal (&global_options
);
3365 /* Register global variables with the garbage collector. */
3366 arm_add_gc_roots ();
3368 /* Save the initial options in case the user does function specific
3370 target_option_default_node
= target_option_current_node
3371 = build_target_option_node (&global_options
);
3375 arm_add_gc_roots (void)
3377 gcc_obstack_init(&minipool_obstack
);
3378 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3381 /* A table of known ARM exception types.
3382 For use with the interrupt function attribute. */
3386 const char *const arg
;
3387 const unsigned long return_value
;
3391 static const isr_attribute_arg isr_attribute_args
[] =
3393 { "IRQ", ARM_FT_ISR
},
3394 { "irq", ARM_FT_ISR
},
3395 { "FIQ", ARM_FT_FIQ
},
3396 { "fiq", ARM_FT_FIQ
},
3397 { "ABORT", ARM_FT_ISR
},
3398 { "abort", ARM_FT_ISR
},
3399 { "ABORT", ARM_FT_ISR
},
3400 { "abort", ARM_FT_ISR
},
3401 { "UNDEF", ARM_FT_EXCEPTION
},
3402 { "undef", ARM_FT_EXCEPTION
},
3403 { "SWI", ARM_FT_EXCEPTION
},
3404 { "swi", ARM_FT_EXCEPTION
},
3405 { NULL
, ARM_FT_NORMAL
}
3408 /* Returns the (interrupt) function type of the current
3409 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3411 static unsigned long
3412 arm_isr_value (tree argument
)
3414 const isr_attribute_arg
* ptr
;
3418 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3420 /* No argument - default to IRQ. */
3421 if (argument
== NULL_TREE
)
3424 /* Get the value of the argument. */
3425 if (TREE_VALUE (argument
) == NULL_TREE
3426 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3427 return ARM_FT_UNKNOWN
;
3429 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3431 /* Check it against the list of known arguments. */
3432 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3433 if (streq (arg
, ptr
->arg
))
3434 return ptr
->return_value
;
3436 /* An unrecognized interrupt type. */
3437 return ARM_FT_UNKNOWN
;
3440 /* Computes the type of the current function. */
3442 static unsigned long
3443 arm_compute_func_type (void)
3445 unsigned long type
= ARM_FT_UNKNOWN
;
3449 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3451 /* Decide if the current function is volatile. Such functions
3452 never return, and many memory cycles can be saved by not storing
3453 register values that will never be needed again. This optimization
3454 was added to speed up context switching in a kernel application. */
3456 && (TREE_NOTHROW (current_function_decl
)
3457 || !(flag_unwind_tables
3459 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3460 && TREE_THIS_VOLATILE (current_function_decl
))
3461 type
|= ARM_FT_VOLATILE
;
3463 if (cfun
->static_chain_decl
!= NULL
)
3464 type
|= ARM_FT_NESTED
;
3466 attr
= DECL_ATTRIBUTES (current_function_decl
);
3468 a
= lookup_attribute ("naked", attr
);
3470 type
|= ARM_FT_NAKED
;
3472 a
= lookup_attribute ("isr", attr
);
3474 a
= lookup_attribute ("interrupt", attr
);
3477 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3479 type
|= arm_isr_value (TREE_VALUE (a
));
3484 /* Returns the type of the current function. */
3487 arm_current_func_type (void)
3489 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3490 cfun
->machine
->func_type
= arm_compute_func_type ();
3492 return cfun
->machine
->func_type
;
3496 arm_allocate_stack_slots_for_args (void)
3498 /* Naked functions should not allocate stack slots for arguments. */
3499 return !IS_NAKED (arm_current_func_type ());
3503 arm_warn_func_return (tree decl
)
3505 /* Naked functions are implemented entirely in assembly, including the
3506 return sequence, so suppress warnings about this. */
3507 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3511 /* Output assembler code for a block containing the constant parts
3512 of a trampoline, leaving space for the variable parts.
3514 On the ARM, (if r8 is the static chain regnum, and remembering that
3515 referencing pc adds an offset of 8) the trampoline looks like:
3518 .word static chain value
3519 .word function's address
3520 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3523 arm_asm_trampoline_template (FILE *f
)
3525 if (TARGET_UNIFIED_ASM
)
3526 fprintf (f
, "\t.syntax unified\n");
3528 fprintf (f
, "\t.syntax divided\n");
3532 fprintf (f
, "\t.arm\n");
3533 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3534 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3536 else if (TARGET_THUMB2
)
3538 fprintf (f
, "\t.thumb\n");
3539 /* The Thumb-2 trampoline is similar to the arm implementation.
3540 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3541 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3542 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3543 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3547 ASM_OUTPUT_ALIGN (f
, 2);
3548 fprintf (f
, "\t.code\t16\n");
3549 fprintf (f
, ".Ltrampoline_start:\n");
3550 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3551 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3552 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3553 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3554 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3555 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3557 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3558 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3561 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3564 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3566 rtx fnaddr
, mem
, a_tramp
;
3568 emit_block_move (m_tramp
, assemble_trampoline_template (),
3569 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3571 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3572 emit_move_insn (mem
, chain_value
);
3574 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3575 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3576 emit_move_insn (mem
, fnaddr
);
3578 a_tramp
= XEXP (m_tramp
, 0);
3579 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3580 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3581 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3584 /* Thumb trampolines should be entered in thumb mode, so set
3585 the bottom bit of the address. */
3588 arm_trampoline_adjust_address (rtx addr
)
3591 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3592 NULL
, 0, OPTAB_LIB_WIDEN
);
3596 /* Return 1 if it is possible to return using a single instruction.
3597 If SIBLING is non-null, this is a test for a return before a sibling
3598 call. SIBLING is the call insn, so we can examine its register usage. */
3601 use_return_insn (int iscond
, rtx sibling
)
3604 unsigned int func_type
;
3605 unsigned long saved_int_regs
;
3606 unsigned HOST_WIDE_INT stack_adjust
;
3607 arm_stack_offsets
*offsets
;
3609 /* Never use a return instruction before reload has run. */
3610 if (!reload_completed
)
3613 func_type
= arm_current_func_type ();
3615 /* Naked, volatile and stack alignment functions need special
3617 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3620 /* So do interrupt functions that use the frame pointer and Thumb
3621 interrupt functions. */
3622 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3625 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3626 && !optimize_function_for_size_p (cfun
))
3629 offsets
= arm_get_frame_offsets ();
3630 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3632 /* As do variadic functions. */
3633 if (crtl
->args
.pretend_args_size
3634 || cfun
->machine
->uses_anonymous_args
3635 /* Or if the function calls __builtin_eh_return () */
3636 || crtl
->calls_eh_return
3637 /* Or if the function calls alloca */
3638 || cfun
->calls_alloca
3639 /* Or if there is a stack adjustment. However, if the stack pointer
3640 is saved on the stack, we can use a pre-incrementing stack load. */
3641 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3642 && stack_adjust
== 4)))
3645 saved_int_regs
= offsets
->saved_regs_mask
;
3647 /* Unfortunately, the insn
3649 ldmib sp, {..., sp, ...}
3651 triggers a bug on most SA-110 based devices, such that the stack
3652 pointer won't be correctly restored if the instruction takes a
3653 page fault. We work around this problem by popping r3 along with
3654 the other registers, since that is never slower than executing
3655 another instruction.
3657 We test for !arm_arch5 here, because code for any architecture
3658 less than this could potentially be run on one of the buggy
3660 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3662 /* Validate that r3 is a call-clobbered register (always true in
3663 the default abi) ... */
3664 if (!call_used_regs
[3])
3667 /* ... that it isn't being used for a return value ... */
3668 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3671 /* ... or for a tail-call argument ... */
3674 gcc_assert (CALL_P (sibling
));
3676 if (find_regno_fusage (sibling
, USE
, 3))
3680 /* ... and that there are no call-saved registers in r0-r2
3681 (always true in the default ABI). */
3682 if (saved_int_regs
& 0x7)
3686 /* Can't be done if interworking with Thumb, and any registers have been
3688 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3691 /* On StrongARM, conditional returns are expensive if they aren't
3692 taken and multiple registers have been stacked. */
3693 if (iscond
&& arm_tune_strongarm
)
3695 /* Conditional return when just the LR is stored is a simple
3696 conditional-load instruction, that's not expensive. */
3697 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3701 && arm_pic_register
!= INVALID_REGNUM
3702 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3706 /* If there are saved registers but the LR isn't saved, then we need
3707 two instructions for the return. */
3708 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3711 /* Can't be done if any of the VFP regs are pushed,
3712 since this also requires an insn. */
3713 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
3714 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3715 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
3718 if (TARGET_REALLY_IWMMXT
)
3719 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
3720 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
3726 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3727 shrink-wrapping if possible. This is the case if we need to emit a
3728 prologue, which we can test by looking at the offsets. */
3730 use_simple_return_p (void)
3732 arm_stack_offsets
*offsets
;
3734 offsets
= arm_get_frame_offsets ();
3735 return offsets
->outgoing_args
!= 0;
3738 /* Return TRUE if int I is a valid immediate ARM constant. */
3741 const_ok_for_arm (HOST_WIDE_INT i
)
3745 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3746 be all zero, or all one. */
3747 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
3748 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
3749 != ((~(unsigned HOST_WIDE_INT
) 0)
3750 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
3753 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
3755 /* Fast return for 0 and small values. We must do this for zero, since
3756 the code below can't handle that one case. */
3757 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
3760 /* Get the number of trailing zeros. */
3761 lowbit
= ffs((int) i
) - 1;
3763 /* Only even shifts are allowed in ARM mode so round down to the
3764 nearest even number. */
3768 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
3773 /* Allow rotated constants in ARM mode. */
3775 && ((i
& ~0xc000003f) == 0
3776 || (i
& ~0xf000000f) == 0
3777 || (i
& ~0xfc000003) == 0))
3784 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3787 if (i
== v
|| i
== (v
| (v
<< 8)))
3790 /* Allow repeated pattern 0xXY00XY00. */
3800 /* Return true if I is a valid constant for the operation CODE. */
3802 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
3804 if (const_ok_for_arm (i
))
3810 /* See if we can use movw. */
3811 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
3814 /* Otherwise, try mvn. */
3815 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3818 /* See if we can use addw or subw. */
3820 && ((i
& 0xfffff000) == 0
3821 || ((-i
) & 0xfffff000) == 0))
3823 /* else fall through. */
3843 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
3845 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
3851 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3855 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3862 /* Return true if I is a valid di mode constant for the operation CODE. */
3864 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
3866 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
3867 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
3868 rtx hi
= GEN_INT (hi_val
);
3869 rtx lo
= GEN_INT (lo_val
);
3879 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
3880 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
3882 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
3889 /* Emit a sequence of insns to handle a large constant.
3890 CODE is the code of the operation required, it can be any of SET, PLUS,
3891 IOR, AND, XOR, MINUS;
3892 MODE is the mode in which the operation is being performed;
3893 VAL is the integer to operate on;
3894 SOURCE is the other operand (a register, or a null-pointer for SET);
3895 SUBTARGETS means it is safe to create scratch registers if that will
3896 either produce a simpler sequence, or we will want to cse the values.
3897 Return value is the number of insns emitted. */
3899 /* ??? Tweak this for thumb2. */
3901 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
3902 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
3906 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
3907 cond
= COND_EXEC_TEST (PATTERN (insn
));
3911 if (subtargets
|| code
== SET
3912 || (REG_P (target
) && REG_P (source
)
3913 && REGNO (target
) != REGNO (source
)))
3915 /* After arm_reorg has been called, we can't fix up expensive
3916 constants by pushing them into memory so we must synthesize
3917 them in-line, regardless of the cost. This is only likely to
3918 be more costly on chips that have load delay slots and we are
3919 compiling without running the scheduler (so no splitting
3920 occurred before the final instruction emission).
3922 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3924 if (!cfun
->machine
->after_arm_reorg
3926 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
3928 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
3933 /* Currently SET is the only monadic value for CODE, all
3934 the rest are diadic. */
3935 if (TARGET_USE_MOVT
)
3936 arm_emit_movpair (target
, GEN_INT (val
));
3938 emit_set_insn (target
, GEN_INT (val
));
3944 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
3946 if (TARGET_USE_MOVT
)
3947 arm_emit_movpair (temp
, GEN_INT (val
));
3949 emit_set_insn (temp
, GEN_INT (val
));
3951 /* For MINUS, the value is subtracted from, since we never
3952 have subtraction of a constant. */
3954 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
3956 emit_set_insn (target
,
3957 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
3963 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
3967 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3968 ARM/THUMB2 immediates, and add up to VAL.
3969 Thr function return value gives the number of insns required. */
3971 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3972 struct four_ints
*return_sequence
)
3974 int best_consecutive_zeros
= 0;
3978 struct four_ints tmp_sequence
;
3980 /* If we aren't targeting ARM, the best place to start is always at
3981 the bottom, otherwise look more closely. */
3984 for (i
= 0; i
< 32; i
+= 2)
3986 int consecutive_zeros
= 0;
3988 if (!(val
& (3 << i
)))
3990 while ((i
< 32) && !(val
& (3 << i
)))
3992 consecutive_zeros
+= 2;
3995 if (consecutive_zeros
> best_consecutive_zeros
)
3997 best_consecutive_zeros
= consecutive_zeros
;
3998 best_start
= i
- consecutive_zeros
;
4005 /* So long as it won't require any more insns to do so, it's
4006 desirable to emit a small constant (in bits 0...9) in the last
4007 insn. This way there is more chance that it can be combined with
4008 a later addressing insn to form a pre-indexed load or store
4009 operation. Consider:
4011 *((volatile int *)0xe0000100) = 1;
4012 *((volatile int *)0xe0000110) = 2;
4014 We want this to wind up as:
4018 str rB, [rA, #0x100]
4020 str rB, [rA, #0x110]
4022 rather than having to synthesize both large constants from scratch.
4024 Therefore, we calculate how many insns would be required to emit
4025 the constant starting from `best_start', and also starting from
4026 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4027 yield a shorter sequence, we may as well use zero. */
4028 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4030 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < val
))
4032 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4033 if (insns2
<= insns1
)
4035 *return_sequence
= tmp_sequence
;
4043 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4045 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4046 struct four_ints
*return_sequence
, int i
)
4048 int remainder
= val
& 0xffffffff;
4051 /* Try and find a way of doing the job in either two or three
4054 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4055 location. We start at position I. This may be the MSB, or
4056 optimial_immediate_sequence may have positioned it at the largest block
4057 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4058 wrapping around to the top of the word when we drop off the bottom.
4059 In the worst case this code should produce no more than four insns.
4061 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4062 constants, shifted to any arbitrary location. We should always start
4067 unsigned int b1
, b2
, b3
, b4
;
4068 unsigned HOST_WIDE_INT result
;
4071 gcc_assert (insns
< 4);
4076 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4077 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4080 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4081 /* We can use addw/subw for the last 12 bits. */
4085 /* Use an 8-bit shifted/rotated immediate. */
4089 result
= remainder
& ((0x0ff << end
)
4090 | ((i
< end
) ? (0xff >> (32 - end
))
4097 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4098 arbitrary shifts. */
4099 i
-= TARGET_ARM
? 2 : 1;
4103 /* Next, see if we can do a better job with a thumb2 replicated
4106 We do it this way around to catch the cases like 0x01F001E0 where
4107 two 8-bit immediates would work, but a replicated constant would
4110 TODO: 16-bit constants that don't clear all the bits, but still win.
4111 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4114 b1
= (remainder
& 0xff000000) >> 24;
4115 b2
= (remainder
& 0x00ff0000) >> 16;
4116 b3
= (remainder
& 0x0000ff00) >> 8;
4117 b4
= remainder
& 0xff;
4121 /* The 8-bit immediate already found clears b1 (and maybe b2),
4122 but must leave b3 and b4 alone. */
4124 /* First try to find a 32-bit replicated constant that clears
4125 almost everything. We can assume that we can't do it in one,
4126 or else we wouldn't be here. */
4127 unsigned int tmp
= b1
& b2
& b3
& b4
;
4128 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4130 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4131 + (tmp
== b3
) + (tmp
== b4
);
4133 && (matching_bytes
>= 3
4134 || (matching_bytes
== 2
4135 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4137 /* At least 3 of the bytes match, and the fourth has at
4138 least as many bits set, or two of the bytes match
4139 and it will only require one more insn to finish. */
4147 /* Second, try to find a 16-bit replicated constant that can
4148 leave three of the bytes clear. If b2 or b4 is already
4149 zero, then we can. If the 8-bit from above would not
4150 clear b2 anyway, then we still win. */
4151 else if (b1
== b3
&& (!b2
|| !b4
4152 || (remainder
& 0x00ff0000 & ~result
)))
4154 result
= remainder
& 0xff00ff00;
4160 /* The 8-bit immediate already found clears b2 (and maybe b3)
4161 and we don't get here unless b1 is alredy clear, but it will
4162 leave b4 unchanged. */
4164 /* If we can clear b2 and b4 at once, then we win, since the
4165 8-bits couldn't possibly reach that far. */
4168 result
= remainder
& 0x00ff00ff;
4174 return_sequence
->i
[insns
++] = result
;
4175 remainder
&= ~result
;
4177 if (code
== SET
|| code
== MINUS
)
4185 /* Emit an instruction with the indicated PATTERN. If COND is
4186 non-NULL, conditionalize the execution of the instruction on COND
4190 emit_constant_insn (rtx cond
, rtx pattern
)
4193 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4194 emit_insn (pattern
);
4197 /* As above, but extra parameter GENERATE which, if clear, suppresses
4201 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4202 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
4207 int final_invert
= 0;
4209 int set_sign_bit_copies
= 0;
4210 int clear_sign_bit_copies
= 0;
4211 int clear_zero_bit_copies
= 0;
4212 int set_zero_bit_copies
= 0;
4213 int insns
= 0, neg_insns
, inv_insns
;
4214 unsigned HOST_WIDE_INT temp1
, temp2
;
4215 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4216 struct four_ints
*immediates
;
4217 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4219 /* Find out which operations are safe for a given CODE. Also do a quick
4220 check for degenerate cases; these can occur when DImode operations
4233 if (remainder
== 0xffffffff)
4236 emit_constant_insn (cond
,
4237 gen_rtx_SET (target
,
4238 GEN_INT (ARM_SIGN_EXTEND (val
))));
4244 if (reload_completed
&& rtx_equal_p (target
, source
))
4248 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4257 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4260 if (remainder
== 0xffffffff)
4262 if (reload_completed
&& rtx_equal_p (target
, source
))
4265 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4274 if (reload_completed
&& rtx_equal_p (target
, source
))
4277 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4281 if (remainder
== 0xffffffff)
4284 emit_constant_insn (cond
,
4285 gen_rtx_SET (target
,
4286 gen_rtx_NOT (mode
, source
)));
4293 /* We treat MINUS as (val - source), since (source - val) is always
4294 passed as (source + (-val)). */
4298 emit_constant_insn (cond
,
4299 gen_rtx_SET (target
,
4300 gen_rtx_NEG (mode
, source
)));
4303 if (const_ok_for_arm (val
))
4306 emit_constant_insn (cond
,
4307 gen_rtx_SET (target
,
4308 gen_rtx_MINUS (mode
, GEN_INT (val
),
4319 /* If we can do it in one insn get out quickly. */
4320 if (const_ok_for_op (val
, code
))
4323 emit_constant_insn (cond
,
4324 gen_rtx_SET (target
,
4326 ? gen_rtx_fmt_ee (code
, mode
, source
,
4332 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4334 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4335 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4339 if (mode
== SImode
&& i
== 16)
4340 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4342 emit_constant_insn (cond
,
4343 gen_zero_extendhisi2
4344 (target
, gen_lowpart (HImode
, source
)));
4346 /* Extz only supports SImode, but we can coerce the operands
4348 emit_constant_insn (cond
,
4349 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4350 gen_lowpart (SImode
, source
),
4351 GEN_INT (i
), const0_rtx
));
4357 /* Calculate a few attributes that may be useful for specific
4359 /* Count number of leading zeros. */
4360 for (i
= 31; i
>= 0; i
--)
4362 if ((remainder
& (1 << i
)) == 0)
4363 clear_sign_bit_copies
++;
4368 /* Count number of leading 1's. */
4369 for (i
= 31; i
>= 0; i
--)
4371 if ((remainder
& (1 << i
)) != 0)
4372 set_sign_bit_copies
++;
4377 /* Count number of trailing zero's. */
4378 for (i
= 0; i
<= 31; i
++)
4380 if ((remainder
& (1 << i
)) == 0)
4381 clear_zero_bit_copies
++;
4386 /* Count number of trailing 1's. */
4387 for (i
= 0; i
<= 31; i
++)
4389 if ((remainder
& (1 << i
)) != 0)
4390 set_zero_bit_copies
++;
4398 /* See if we can do this by sign_extending a constant that is known
4399 to be negative. This is a good, way of doing it, since the shift
4400 may well merge into a subsequent insn. */
4401 if (set_sign_bit_copies
> 1)
4403 if (const_ok_for_arm
4404 (temp1
= ARM_SIGN_EXTEND (remainder
4405 << (set_sign_bit_copies
- 1))))
4409 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4410 emit_constant_insn (cond
,
4411 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4412 emit_constant_insn (cond
,
4413 gen_ashrsi3 (target
, new_src
,
4414 GEN_INT (set_sign_bit_copies
- 1)));
4418 /* For an inverted constant, we will need to set the low bits,
4419 these will be shifted out of harm's way. */
4420 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4421 if (const_ok_for_arm (~temp1
))
4425 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4426 emit_constant_insn (cond
,
4427 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4428 emit_constant_insn (cond
,
4429 gen_ashrsi3 (target
, new_src
,
4430 GEN_INT (set_sign_bit_copies
- 1)));
4436 /* See if we can calculate the value as the difference between two
4437 valid immediates. */
4438 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4440 int topshift
= clear_sign_bit_copies
& ~1;
4442 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4443 & (0xff000000 >> topshift
));
4445 /* If temp1 is zero, then that means the 9 most significant
4446 bits of remainder were 1 and we've caused it to overflow.
4447 When topshift is 0 we don't need to do anything since we
4448 can borrow from 'bit 32'. */
4449 if (temp1
== 0 && topshift
!= 0)
4450 temp1
= 0x80000000 >> (topshift
- 1);
4452 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4454 if (const_ok_for_arm (temp2
))
4458 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4459 emit_constant_insn (cond
,
4460 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4461 emit_constant_insn (cond
,
4462 gen_addsi3 (target
, new_src
,
4470 /* See if we can generate this by setting the bottom (or the top)
4471 16 bits, and then shifting these into the other half of the
4472 word. We only look for the simplest cases, to do more would cost
4473 too much. Be careful, however, not to generate this when the
4474 alternative would take fewer insns. */
4475 if (val
& 0xffff0000)
4477 temp1
= remainder
& 0xffff0000;
4478 temp2
= remainder
& 0x0000ffff;
4480 /* Overlaps outside this range are best done using other methods. */
4481 for (i
= 9; i
< 24; i
++)
4483 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4484 && !const_ok_for_arm (temp2
))
4486 rtx new_src
= (subtargets
4487 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4489 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4490 source
, subtargets
, generate
);
4498 gen_rtx_ASHIFT (mode
, source
,
4505 /* Don't duplicate cases already considered. */
4506 for (i
= 17; i
< 24; i
++)
4508 if (((temp1
| (temp1
>> i
)) == remainder
)
4509 && !const_ok_for_arm (temp1
))
4511 rtx new_src
= (subtargets
4512 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4514 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4515 source
, subtargets
, generate
);
4520 gen_rtx_SET (target
,
4523 gen_rtx_LSHIFTRT (mode
, source
,
4534 /* If we have IOR or XOR, and the constant can be loaded in a
4535 single instruction, and we can find a temporary to put it in,
4536 then this can be done in two instructions instead of 3-4. */
4538 /* TARGET can't be NULL if SUBTARGETS is 0 */
4539 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4541 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4545 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4547 emit_constant_insn (cond
,
4548 gen_rtx_SET (sub
, GEN_INT (val
)));
4549 emit_constant_insn (cond
,
4550 gen_rtx_SET (target
,
4551 gen_rtx_fmt_ee (code
, mode
,
4562 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4563 and the remainder 0s for e.g. 0xfff00000)
4564 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4566 This can be done in 2 instructions by using shifts with mov or mvn.
4571 mvn r0, r0, lsr #12 */
4572 if (set_sign_bit_copies
> 8
4573 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
4577 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4578 rtx shift
= GEN_INT (set_sign_bit_copies
);
4584 gen_rtx_ASHIFT (mode
,
4589 gen_rtx_SET (target
,
4591 gen_rtx_LSHIFTRT (mode
, sub
,
4598 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4600 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4602 For eg. r0 = r0 | 0xfff
4607 if (set_zero_bit_copies
> 8
4608 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4612 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4613 rtx shift
= GEN_INT (set_zero_bit_copies
);
4619 gen_rtx_LSHIFTRT (mode
,
4624 gen_rtx_SET (target
,
4626 gen_rtx_ASHIFT (mode
, sub
,
4632 /* This will never be reached for Thumb2 because orn is a valid
4633 instruction. This is for Thumb1 and the ARM 32 bit cases.
4635 x = y | constant (such that ~constant is a valid constant)
4637 x = ~(~y & ~constant).
4639 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4643 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4644 emit_constant_insn (cond
,
4646 gen_rtx_NOT (mode
, source
)));
4649 sub
= gen_reg_rtx (mode
);
4650 emit_constant_insn (cond
,
4652 gen_rtx_AND (mode
, source
,
4654 emit_constant_insn (cond
,
4655 gen_rtx_SET (target
,
4656 gen_rtx_NOT (mode
, sub
)));
4663 /* See if two shifts will do 2 or more insn's worth of work. */
4664 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4666 HOST_WIDE_INT shift_mask
= ((0xffffffff
4667 << (32 - clear_sign_bit_copies
))
4670 if ((remainder
| shift_mask
) != 0xffffffff)
4672 HOST_WIDE_INT new_val
4673 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4677 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4678 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
4679 new_src
, source
, subtargets
, 1);
4684 rtx targ
= subtargets
? NULL_RTX
: target
;
4685 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4686 targ
, source
, subtargets
, 0);
4692 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4693 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4695 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4696 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4702 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4704 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4706 if ((remainder
| shift_mask
) != 0xffffffff)
4708 HOST_WIDE_INT new_val
4709 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4712 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4714 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4715 new_src
, source
, subtargets
, 1);
4720 rtx targ
= subtargets
? NULL_RTX
: target
;
4722 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4723 targ
, source
, subtargets
, 0);
4729 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4730 rtx shift
= GEN_INT (clear_zero_bit_copies
);
4732 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
4733 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
4745 /* Calculate what the instruction sequences would be if we generated it
4746 normally, negated, or inverted. */
4748 /* AND cannot be split into multiple insns, so invert and use BIC. */
4751 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
4754 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
4759 if (can_invert
|| final_invert
)
4760 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
4765 immediates
= &pos_immediates
;
4767 /* Is the negated immediate sequence more efficient? */
4768 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
4771 immediates
= &neg_immediates
;
4776 /* Is the inverted immediate sequence more efficient?
4777 We must allow for an extra NOT instruction for XOR operations, although
4778 there is some chance that the final 'mvn' will get optimized later. */
4779 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
4782 immediates
= &inv_immediates
;
4790 /* Now output the chosen sequence as instructions. */
4793 for (i
= 0; i
< insns
; i
++)
4795 rtx new_src
, temp1_rtx
;
4797 temp1
= immediates
->i
[i
];
4799 if (code
== SET
|| code
== MINUS
)
4800 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
4801 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
4802 new_src
= gen_reg_rtx (mode
);
4808 else if (can_negate
)
4811 temp1
= trunc_int_for_mode (temp1
, mode
);
4812 temp1_rtx
= GEN_INT (temp1
);
4816 else if (code
== MINUS
)
4817 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
4819 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
4821 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
4826 can_negate
= can_invert
;
4830 else if (code
== MINUS
)
4838 emit_constant_insn (cond
, gen_rtx_SET (target
,
4839 gen_rtx_NOT (mode
, source
)));
4846 /* Canonicalize a comparison so that we are more likely to recognize it.
4847 This can be done for a few constant compares, where we can make the
4848 immediate value easier to load. */
4851 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
4852 bool op0_preserve_value
)
4855 unsigned HOST_WIDE_INT i
, maxval
;
4857 mode
= GET_MODE (*op0
);
4858 if (mode
== VOIDmode
)
4859 mode
= GET_MODE (*op1
);
4861 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
4863 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4864 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4865 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4866 for GTU/LEU in Thumb mode. */
4870 if (*code
== GT
|| *code
== LE
4871 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
4873 /* Missing comparison. First try to use an available
4875 if (CONST_INT_P (*op1
))
4883 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4885 *op1
= GEN_INT (i
+ 1);
4886 *code
= *code
== GT
? GE
: LT
;
4892 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4893 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4895 *op1
= GEN_INT (i
+ 1);
4896 *code
= *code
== GTU
? GEU
: LTU
;
4905 /* If that did not work, reverse the condition. */
4906 if (!op0_preserve_value
)
4908 std::swap (*op0
, *op1
);
4909 *code
= (int)swap_condition ((enum rtx_code
)*code
);
4915 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4916 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4917 to facilitate possible combining with a cmp into 'ands'. */
4919 && GET_CODE (*op0
) == ZERO_EXTEND
4920 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
4921 && GET_MODE (XEXP (*op0
, 0)) == QImode
4922 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
4923 && subreg_lowpart_p (XEXP (*op0
, 0))
4924 && *op1
== const0_rtx
)
4925 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
4928 /* Comparisons smaller than DImode. Only adjust comparisons against
4929 an out-of-range constant. */
4930 if (!CONST_INT_P (*op1
)
4931 || const_ok_for_arm (INTVAL (*op1
))
4932 || const_ok_for_arm (- INTVAL (*op1
)))
4946 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4948 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
4949 *code
= *code
== GT
? GE
: LT
;
4957 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4959 *op1
= GEN_INT (i
- 1);
4960 *code
= *code
== GE
? GT
: LE
;
4967 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4968 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4970 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
4971 *code
= *code
== GTU
? GEU
: LTU
;
4979 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4981 *op1
= GEN_INT (i
- 1);
4982 *code
= *code
== GEU
? GTU
: LEU
;
4993 /* Define how to find the value returned by a function. */
4996 arm_function_value(const_tree type
, const_tree func
,
4997 bool outgoing ATTRIBUTE_UNUSED
)
5000 int unsignedp ATTRIBUTE_UNUSED
;
5001 rtx r ATTRIBUTE_UNUSED
;
5003 mode
= TYPE_MODE (type
);
5005 if (TARGET_AAPCS_BASED
)
5006 return aapcs_allocate_return_reg (mode
, type
, func
);
5008 /* Promote integer types. */
5009 if (INTEGRAL_TYPE_P (type
))
5010 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5012 /* Promotes small structs returned in a register to full-word size
5013 for big-endian AAPCS. */
5014 if (arm_return_in_msb (type
))
5016 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5017 if (size
% UNITS_PER_WORD
!= 0)
5019 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5020 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5024 return arm_libcall_value_1 (mode
);
5027 /* libcall hashtable helpers. */
5029 struct libcall_hasher
: typed_noop_remove
<rtx_def
>
5031 typedef const rtx_def
*value_type
;
5032 typedef const rtx_def
*compare_type
;
5033 static inline hashval_t
hash (const rtx_def
*);
5034 static inline bool equal (const rtx_def
*, const rtx_def
*);
5035 static inline void remove (rtx_def
*);
5039 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5041 return rtx_equal_p (p1
, p2
);
5045 libcall_hasher::hash (const rtx_def
*p1
)
5047 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5050 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5053 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5055 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5059 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5061 static bool init_done
= false;
5062 static libcall_table_type
*libcall_htab
= NULL
;
5068 libcall_htab
= new libcall_table_type (31);
5069 add_libcall (libcall_htab
,
5070 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5071 add_libcall (libcall_htab
,
5072 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5073 add_libcall (libcall_htab
,
5074 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5075 add_libcall (libcall_htab
,
5076 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5078 add_libcall (libcall_htab
,
5079 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5080 add_libcall (libcall_htab
,
5081 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5082 add_libcall (libcall_htab
,
5083 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5084 add_libcall (libcall_htab
,
5085 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5087 add_libcall (libcall_htab
,
5088 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5089 add_libcall (libcall_htab
,
5090 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5091 add_libcall (libcall_htab
,
5092 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5093 add_libcall (libcall_htab
,
5094 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5095 add_libcall (libcall_htab
,
5096 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5097 add_libcall (libcall_htab
,
5098 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5099 add_libcall (libcall_htab
,
5100 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5101 add_libcall (libcall_htab
,
5102 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5104 /* Values from double-precision helper functions are returned in core
5105 registers if the selected core only supports single-precision
5106 arithmetic, even if we are using the hard-float ABI. The same is
5107 true for single-precision helpers, but we will never be using the
5108 hard-float ABI on a CPU which doesn't support single-precision
5109 operations in hardware. */
5110 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5111 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5112 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5113 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5114 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5115 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5116 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5117 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5118 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5119 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5120 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5121 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5123 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5127 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5131 arm_libcall_value_1 (machine_mode mode
)
5133 if (TARGET_AAPCS_BASED
)
5134 return aapcs_libcall_value (mode
);
5135 else if (TARGET_IWMMXT_ABI
5136 && arm_vector_mode_supported_p (mode
))
5137 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5139 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5142 /* Define how to find the value returned by a library function
5143 assuming the value has mode MODE. */
5146 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5148 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5149 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5151 /* The following libcalls return their result in integer registers,
5152 even though they return a floating point value. */
5153 if (arm_libcall_uses_aapcs_base (libcall
))
5154 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5158 return arm_libcall_value_1 (mode
);
5161 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5164 arm_function_value_regno_p (const unsigned int regno
)
5166 if (regno
== ARG_REGISTER (1)
5168 && TARGET_AAPCS_BASED
5170 && TARGET_HARD_FLOAT
5171 && regno
== FIRST_VFP_REGNUM
)
5172 || (TARGET_IWMMXT_ABI
5173 && regno
== FIRST_IWMMXT_REGNUM
))
5179 /* Determine the amount of memory needed to store the possible return
5180 registers of an untyped call. */
5182 arm_apply_result_size (void)
5188 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
5190 if (TARGET_IWMMXT_ABI
)
5197 /* Decide whether TYPE should be returned in memory (true)
5198 or in a register (false). FNTYPE is the type of the function making
5201 arm_return_in_memory (const_tree type
, const_tree fntype
)
5205 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5207 if (TARGET_AAPCS_BASED
)
5209 /* Simple, non-aggregate types (ie not including vectors and
5210 complex) are always returned in a register (or registers).
5211 We don't care about which register here, so we can short-cut
5212 some of the detail. */
5213 if (!AGGREGATE_TYPE_P (type
)
5214 && TREE_CODE (type
) != VECTOR_TYPE
5215 && TREE_CODE (type
) != COMPLEX_TYPE
)
5218 /* Any return value that is no larger than one word can be
5220 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5223 /* Check any available co-processors to see if they accept the
5224 type as a register candidate (VFP, for example, can return
5225 some aggregates in consecutive registers). These aren't
5226 available if the call is variadic. */
5227 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5230 /* Vector values should be returned using ARM registers, not
5231 memory (unless they're over 16 bytes, which will break since
5232 we only have four call-clobbered registers to play with). */
5233 if (TREE_CODE (type
) == VECTOR_TYPE
)
5234 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5236 /* The rest go in memory. */
5240 if (TREE_CODE (type
) == VECTOR_TYPE
)
5241 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5243 if (!AGGREGATE_TYPE_P (type
) &&
5244 (TREE_CODE (type
) != VECTOR_TYPE
))
5245 /* All simple types are returned in registers. */
5248 if (arm_abi
!= ARM_ABI_APCS
)
5250 /* ATPCS and later return aggregate types in memory only if they are
5251 larger than a word (or are variable size). */
5252 return (size
< 0 || size
> UNITS_PER_WORD
);
5255 /* For the arm-wince targets we choose to be compatible with Microsoft's
5256 ARM and Thumb compilers, which always return aggregates in memory. */
5258 /* All structures/unions bigger than one word are returned in memory.
5259 Also catch the case where int_size_in_bytes returns -1. In this case
5260 the aggregate is either huge or of variable size, and in either case
5261 we will want to return it via memory and not in a register. */
5262 if (size
< 0 || size
> UNITS_PER_WORD
)
5265 if (TREE_CODE (type
) == RECORD_TYPE
)
5269 /* For a struct the APCS says that we only return in a register
5270 if the type is 'integer like' and every addressable element
5271 has an offset of zero. For practical purposes this means
5272 that the structure can have at most one non bit-field element
5273 and that this element must be the first one in the structure. */
5275 /* Find the first field, ignoring non FIELD_DECL things which will
5276 have been created by C++. */
5277 for (field
= TYPE_FIELDS (type
);
5278 field
&& TREE_CODE (field
) != FIELD_DECL
;
5279 field
= DECL_CHAIN (field
))
5283 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5285 /* Check that the first field is valid for returning in a register. */
5287 /* ... Floats are not allowed */
5288 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5291 /* ... Aggregates that are not themselves valid for returning in
5292 a register are not allowed. */
5293 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5296 /* Now check the remaining fields, if any. Only bitfields are allowed,
5297 since they are not addressable. */
5298 for (field
= DECL_CHAIN (field
);
5300 field
= DECL_CHAIN (field
))
5302 if (TREE_CODE (field
) != FIELD_DECL
)
5305 if (!DECL_BIT_FIELD_TYPE (field
))
5312 if (TREE_CODE (type
) == UNION_TYPE
)
5316 /* Unions can be returned in registers if every element is
5317 integral, or can be returned in an integer register. */
5318 for (field
= TYPE_FIELDS (type
);
5320 field
= DECL_CHAIN (field
))
5322 if (TREE_CODE (field
) != FIELD_DECL
)
5325 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5328 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5334 #endif /* not ARM_WINCE */
5336 /* Return all other types in memory. */
5340 const struct pcs_attribute_arg
5344 } pcs_attribute_args
[] =
5346 {"aapcs", ARM_PCS_AAPCS
},
5347 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5349 /* We could recognize these, but changes would be needed elsewhere
5350 * to implement them. */
5351 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5352 {"atpcs", ARM_PCS_ATPCS
},
5353 {"apcs", ARM_PCS_APCS
},
5355 {NULL
, ARM_PCS_UNKNOWN
}
5359 arm_pcs_from_attribute (tree attr
)
5361 const struct pcs_attribute_arg
*ptr
;
5364 /* Get the value of the argument. */
5365 if (TREE_VALUE (attr
) == NULL_TREE
5366 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5367 return ARM_PCS_UNKNOWN
;
5369 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5371 /* Check it against the list of known arguments. */
5372 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5373 if (streq (arg
, ptr
->arg
))
5376 /* An unrecognized interrupt type. */
5377 return ARM_PCS_UNKNOWN
;
5380 /* Get the PCS variant to use for this call. TYPE is the function's type
5381 specification, DECL is the specific declartion. DECL may be null if
5382 the call could be indirect or if this is a library call. */
5384 arm_get_pcs_model (const_tree type
, const_tree decl
)
5386 bool user_convention
= false;
5387 enum arm_pcs user_pcs
= arm_pcs_default
;
5392 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5395 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5396 user_convention
= true;
5399 if (TARGET_AAPCS_BASED
)
5401 /* Detect varargs functions. These always use the base rules
5402 (no argument is ever a candidate for a co-processor
5404 bool base_rules
= stdarg_p (type
);
5406 if (user_convention
)
5408 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5409 sorry ("non-AAPCS derived PCS variant");
5410 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5411 error ("variadic functions must use the base AAPCS variant");
5415 return ARM_PCS_AAPCS
;
5416 else if (user_convention
)
5418 else if (decl
&& flag_unit_at_a_time
)
5420 /* Local functions never leak outside this compilation unit,
5421 so we are free to use whatever conventions are
5423 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5424 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5426 return ARM_PCS_AAPCS_LOCAL
;
5429 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5430 sorry ("PCS variant");
5432 /* For everything else we use the target's default. */
5433 return arm_pcs_default
;
5438 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5439 const_tree fntype ATTRIBUTE_UNUSED
,
5440 rtx libcall ATTRIBUTE_UNUSED
,
5441 const_tree fndecl ATTRIBUTE_UNUSED
)
5443 /* Record the unallocated VFP registers. */
5444 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5445 pcum
->aapcs_vfp_reg_alloc
= 0;
5448 /* Walk down the type tree of TYPE counting consecutive base elements.
5449 If *MODEP is VOIDmode, then set it to the first valid floating point
5450 type. If a non-floating point type is found, or if a floating point
5451 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5452 otherwise return the count in the sub-tree. */
5454 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5459 switch (TREE_CODE (type
))
5462 mode
= TYPE_MODE (type
);
5463 if (mode
!= DFmode
&& mode
!= SFmode
)
5466 if (*modep
== VOIDmode
)
5475 mode
= TYPE_MODE (TREE_TYPE (type
));
5476 if (mode
!= DFmode
&& mode
!= SFmode
)
5479 if (*modep
== VOIDmode
)
5488 /* Use V2SImode and V4SImode as representatives of all 64-bit
5489 and 128-bit vector types, whether or not those modes are
5490 supported with the present options. */
5491 size
= int_size_in_bytes (type
);
5504 if (*modep
== VOIDmode
)
5507 /* Vector modes are considered to be opaque: two vectors are
5508 equivalent for the purposes of being homogeneous aggregates
5509 if they are the same size. */
5518 tree index
= TYPE_DOMAIN (type
);
5520 /* Can't handle incomplete types nor sizes that are not
5522 if (!COMPLETE_TYPE_P (type
)
5523 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5526 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5529 || !TYPE_MAX_VALUE (index
)
5530 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5531 || !TYPE_MIN_VALUE (index
)
5532 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5536 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5537 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5539 /* There must be no padding. */
5540 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5552 /* Can't handle incomplete types nor sizes that are not
5554 if (!COMPLETE_TYPE_P (type
)
5555 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5558 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5560 if (TREE_CODE (field
) != FIELD_DECL
)
5563 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5569 /* There must be no padding. */
5570 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5577 case QUAL_UNION_TYPE
:
5579 /* These aren't very interesting except in a degenerate case. */
5584 /* Can't handle incomplete types nor sizes that are not
5586 if (!COMPLETE_TYPE_P (type
)
5587 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5590 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5592 if (TREE_CODE (field
) != FIELD_DECL
)
5595 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5598 count
= count
> sub_count
? count
: sub_count
;
5601 /* There must be no padding. */
5602 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5615 /* Return true if PCS_VARIANT should use VFP registers. */
5617 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5619 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5621 static bool seen_thumb1_vfp
= false;
5623 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5625 sorry ("Thumb-1 hard-float VFP ABI");
5626 /* sorry() is not immediately fatal, so only display this once. */
5627 seen_thumb1_vfp
= true;
5633 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5636 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
5637 (TARGET_VFP_DOUBLE
|| !is_double
));
5640 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5641 suitable for passing or returning in VFP registers for the PCS
5642 variant selected. If it is, then *BASE_MODE is updated to contain
5643 a machine mode describing each element of the argument's type and
5644 *COUNT to hold the number of such elements. */
5646 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5647 machine_mode mode
, const_tree type
,
5648 machine_mode
*base_mode
, int *count
)
5650 machine_mode new_mode
= VOIDmode
;
5652 /* If we have the type information, prefer that to working things
5653 out from the mode. */
5656 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5658 if (ag_count
> 0 && ag_count
<= 4)
5663 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5664 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5665 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5670 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5673 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5679 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5682 *base_mode
= new_mode
;
5687 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5688 machine_mode mode
, const_tree type
)
5690 int count ATTRIBUTE_UNUSED
;
5691 machine_mode ag_mode ATTRIBUTE_UNUSED
;
5693 if (!use_vfp_abi (pcs_variant
, false))
5695 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5700 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5703 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5706 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5707 &pcum
->aapcs_vfp_rmode
,
5708 &pcum
->aapcs_vfp_rcount
);
5712 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5713 const_tree type ATTRIBUTE_UNUSED
)
5715 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
5716 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
5719 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
5720 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
5722 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
5724 || (mode
== TImode
&& ! TARGET_NEON
)
5725 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
5728 int rcount
= pcum
->aapcs_vfp_rcount
;
5730 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
5734 /* Avoid using unsupported vector modes. */
5735 if (rmode
== V2SImode
)
5737 else if (rmode
== V4SImode
)
5744 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
5745 for (i
= 0; i
< rcount
; i
++)
5747 rtx tmp
= gen_rtx_REG (rmode
,
5748 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
5749 tmp
= gen_rtx_EXPR_LIST
5751 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
5752 XVECEXP (par
, 0, i
) = tmp
;
5755 pcum
->aapcs_reg
= par
;
5758 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
5765 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
5767 const_tree type ATTRIBUTE_UNUSED
)
5769 if (!use_vfp_abi (pcs_variant
, false))
5772 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
5775 machine_mode ag_mode
;
5780 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5785 if (ag_mode
== V2SImode
)
5787 else if (ag_mode
== V4SImode
)
5793 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
5794 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
5795 for (i
= 0; i
< count
; i
++)
5797 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
5798 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
5799 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
5800 XVECEXP (par
, 0, i
) = tmp
;
5806 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
5810 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5811 machine_mode mode ATTRIBUTE_UNUSED
,
5812 const_tree type ATTRIBUTE_UNUSED
)
5814 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
5815 pcum
->aapcs_vfp_reg_alloc
= 0;
5819 #define AAPCS_CP(X) \
5821 aapcs_ ## X ## _cum_init, \
5822 aapcs_ ## X ## _is_call_candidate, \
5823 aapcs_ ## X ## _allocate, \
5824 aapcs_ ## X ## _is_return_candidate, \
5825 aapcs_ ## X ## _allocate_return_reg, \
5826 aapcs_ ## X ## _advance \
5829 /* Table of co-processors that can be used to pass arguments in
5830 registers. Idealy no arugment should be a candidate for more than
5831 one co-processor table entry, but the table is processed in order
5832 and stops after the first match. If that entry then fails to put
5833 the argument into a co-processor register, the argument will go on
5837 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5838 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
5840 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5841 BLKmode) is a candidate for this co-processor's registers; this
5842 function should ignore any position-dependent state in
5843 CUMULATIVE_ARGS and only use call-type dependent information. */
5844 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5846 /* Return true if the argument does get a co-processor register; it
5847 should set aapcs_reg to an RTX of the register allocated as is
5848 required for a return from FUNCTION_ARG. */
5849 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5851 /* Return true if a result of mode MODE (or type TYPE if MODE is
5852 BLKmode) is can be returned in this co-processor's registers. */
5853 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
5855 /* Allocate and return an RTX element to hold the return type of a
5856 call, this routine must not fail and will only be called if
5857 is_return_candidate returned true with the same parameters. */
5858 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
5860 /* Finish processing this argument and prepare to start processing
5862 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5863 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
5871 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5876 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5877 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
5884 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
5886 /* We aren't passed a decl, so we can't check that a call is local.
5887 However, it isn't clear that that would be a win anyway, since it
5888 might limit some tail-calling opportunities. */
5889 enum arm_pcs pcs_variant
;
5893 const_tree fndecl
= NULL_TREE
;
5895 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5898 fntype
= TREE_TYPE (fntype
);
5901 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5904 pcs_variant
= arm_pcs_default
;
5906 if (pcs_variant
!= ARM_PCS_AAPCS
)
5910 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5911 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
5920 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
5923 /* We aren't passed a decl, so we can't check that a call is local.
5924 However, it isn't clear that that would be a win anyway, since it
5925 might limit some tail-calling opportunities. */
5926 enum arm_pcs pcs_variant
;
5927 int unsignedp ATTRIBUTE_UNUSED
;
5931 const_tree fndecl
= NULL_TREE
;
5933 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5936 fntype
= TREE_TYPE (fntype
);
5939 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5942 pcs_variant
= arm_pcs_default
;
5944 /* Promote integer types. */
5945 if (type
&& INTEGRAL_TYPE_P (type
))
5946 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
5948 if (pcs_variant
!= ARM_PCS_AAPCS
)
5952 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5953 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
5955 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
5959 /* Promotes small structs returned in a register to full-word size
5960 for big-endian AAPCS. */
5961 if (type
&& arm_return_in_msb (type
))
5963 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5964 if (size
% UNITS_PER_WORD
!= 0)
5966 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5967 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5971 return gen_rtx_REG (mode
, R0_REGNUM
);
5975 aapcs_libcall_value (machine_mode mode
)
5977 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
5978 && GET_MODE_SIZE (mode
) <= 4)
5981 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
5984 /* Lay out a function argument using the AAPCS rules. The rule
5985 numbers referred to here are those in the AAPCS. */
5987 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5988 const_tree type
, bool named
)
5993 /* We only need to do this once per argument. */
5994 if (pcum
->aapcs_arg_processed
)
5997 pcum
->aapcs_arg_processed
= true;
5999 /* Special case: if named is false then we are handling an incoming
6000 anonymous argument which is on the stack. */
6004 /* Is this a potential co-processor register candidate? */
6005 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6007 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6008 pcum
->aapcs_cprc_slot
= slot
;
6010 /* We don't have to apply any of the rules from part B of the
6011 preparation phase, these are handled elsewhere in the
6016 /* A Co-processor register candidate goes either in its own
6017 class of registers or on the stack. */
6018 if (!pcum
->aapcs_cprc_failed
[slot
])
6020 /* C1.cp - Try to allocate the argument to co-processor
6022 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6025 /* C2.cp - Put the argument on the stack and note that we
6026 can't assign any more candidates in this slot. We also
6027 need to note that we have allocated stack space, so that
6028 we won't later try to split a non-cprc candidate between
6029 core registers and the stack. */
6030 pcum
->aapcs_cprc_failed
[slot
] = true;
6031 pcum
->can_split
= false;
6034 /* We didn't get a register, so this argument goes on the
6036 gcc_assert (pcum
->can_split
== false);
6041 /* C3 - For double-word aligned arguments, round the NCRN up to the
6042 next even number. */
6043 ncrn
= pcum
->aapcs_ncrn
;
6044 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
6047 nregs
= ARM_NUM_REGS2(mode
, type
);
6049 /* Sigh, this test should really assert that nregs > 0, but a GCC
6050 extension allows empty structs and then gives them empty size; it
6051 then allows such a structure to be passed by value. For some of
6052 the code below we have to pretend that such an argument has
6053 non-zero size so that we 'locate' it correctly either in
6054 registers or on the stack. */
6055 gcc_assert (nregs
>= 0);
6057 nregs2
= nregs
? nregs
: 1;
6059 /* C4 - Argument fits entirely in core registers. */
6060 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6062 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6063 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6067 /* C5 - Some core registers left and there are no arguments already
6068 on the stack: split this argument between the remaining core
6069 registers and the stack. */
6070 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6072 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6073 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6074 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6078 /* C6 - NCRN is set to 4. */
6079 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6081 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6085 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6086 for a call to a function whose data type is FNTYPE.
6087 For a library call, FNTYPE is NULL. */
6089 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6091 tree fndecl ATTRIBUTE_UNUSED
)
6093 /* Long call handling. */
6095 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6097 pcum
->pcs_variant
= arm_pcs_default
;
6099 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6101 if (arm_libcall_uses_aapcs_base (libname
))
6102 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6104 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6105 pcum
->aapcs_reg
= NULL_RTX
;
6106 pcum
->aapcs_partial
= 0;
6107 pcum
->aapcs_arg_processed
= false;
6108 pcum
->aapcs_cprc_slot
= -1;
6109 pcum
->can_split
= true;
6111 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6115 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6117 pcum
->aapcs_cprc_failed
[i
] = false;
6118 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6126 /* On the ARM, the offset starts at 0. */
6128 pcum
->iwmmxt_nregs
= 0;
6129 pcum
->can_split
= true;
6131 /* Varargs vectors are treated the same as long long.
6132 named_count avoids having to change the way arm handles 'named' */
6133 pcum
->named_count
= 0;
6136 if (TARGET_REALLY_IWMMXT
&& fntype
)
6140 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6142 fn_arg
= TREE_CHAIN (fn_arg
))
6143 pcum
->named_count
+= 1;
6145 if (! pcum
->named_count
)
6146 pcum
->named_count
= INT_MAX
;
6150 /* Return true if mode/type need doubleword alignment. */
6152 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6154 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
6155 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
6159 /* Determine where to put an argument to a function.
6160 Value is zero to push the argument on the stack,
6161 or a hard register in which to store the argument.
6163 MODE is the argument's machine mode.
6164 TYPE is the data type of the argument (as a tree).
6165 This is null for libcalls where that information may
6167 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6168 the preceding args and about the function being called.
6169 NAMED is nonzero if this argument is a named parameter
6170 (otherwise it is an extra parameter matching an ellipsis).
6172 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6173 other arguments are passed on the stack. If (NAMED == 0) (which happens
6174 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6175 defined), say it is passed in the stack (function_prologue will
6176 indeed make it pass in the stack if necessary). */
6179 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6180 const_tree type
, bool named
)
6182 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6185 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6186 a call insn (op3 of a call_value insn). */
6187 if (mode
== VOIDmode
)
6190 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6192 aapcs_layout_arg (pcum
, mode
, type
, named
);
6193 return pcum
->aapcs_reg
;
6196 /* Varargs vectors are treated the same as long long.
6197 named_count avoids having to change the way arm handles 'named' */
6198 if (TARGET_IWMMXT_ABI
6199 && arm_vector_mode_supported_p (mode
)
6200 && pcum
->named_count
> pcum
->nargs
+ 1)
6202 if (pcum
->iwmmxt_nregs
<= 9)
6203 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6206 pcum
->can_split
= false;
6211 /* Put doubleword aligned quantities in even register pairs. */
6213 && ARM_DOUBLEWORD_ALIGN
6214 && arm_needs_doubleword_align (mode
, type
))
6217 /* Only allow splitting an arg between regs and memory if all preceding
6218 args were allocated to regs. For args passed by reference we only count
6219 the reference pointer. */
6220 if (pcum
->can_split
)
6223 nregs
= ARM_NUM_REGS2 (mode
, type
);
6225 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6228 return gen_rtx_REG (mode
, pcum
->nregs
);
6232 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6234 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
6235 ? DOUBLEWORD_ALIGNMENT
6240 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6241 tree type
, bool named
)
6243 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6244 int nregs
= pcum
->nregs
;
6246 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6248 aapcs_layout_arg (pcum
, mode
, type
, named
);
6249 return pcum
->aapcs_partial
;
6252 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6255 if (NUM_ARG_REGS
> nregs
6256 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6258 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6263 /* Update the data in PCUM to advance over an argument
6264 of mode MODE and data type TYPE.
6265 (TYPE is null for libcalls where that information may not be available.) */
6268 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6269 const_tree type
, bool named
)
6271 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6273 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6275 aapcs_layout_arg (pcum
, mode
, type
, named
);
6277 if (pcum
->aapcs_cprc_slot
>= 0)
6279 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6281 pcum
->aapcs_cprc_slot
= -1;
6284 /* Generic stuff. */
6285 pcum
->aapcs_arg_processed
= false;
6286 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6287 pcum
->aapcs_reg
= NULL_RTX
;
6288 pcum
->aapcs_partial
= 0;
6293 if (arm_vector_mode_supported_p (mode
)
6294 && pcum
->named_count
> pcum
->nargs
6295 && TARGET_IWMMXT_ABI
)
6296 pcum
->iwmmxt_nregs
+= 1;
6298 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6302 /* Variable sized types are passed by reference. This is a GCC
6303 extension to the ARM ABI. */
6306 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6307 machine_mode mode ATTRIBUTE_UNUSED
,
6308 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6310 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6313 /* Encode the current state of the #pragma [no_]long_calls. */
6316 OFF
, /* No #pragma [no_]long_calls is in effect. */
6317 LONG
, /* #pragma long_calls is in effect. */
6318 SHORT
/* #pragma no_long_calls is in effect. */
6321 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6324 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6326 arm_pragma_long_calls
= LONG
;
6330 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6332 arm_pragma_long_calls
= SHORT
;
6336 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6338 arm_pragma_long_calls
= OFF
;
6341 /* Handle an attribute requiring a FUNCTION_DECL;
6342 arguments as in struct attribute_spec.handler. */
6344 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6345 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6347 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6349 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6351 *no_add_attrs
= true;
6357 /* Handle an "interrupt" or "isr" attribute;
6358 arguments as in struct attribute_spec.handler. */
6360 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6365 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6367 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6369 *no_add_attrs
= true;
6371 /* FIXME: the argument if any is checked for type attributes;
6372 should it be checked for decl ones? */
6376 if (TREE_CODE (*node
) == FUNCTION_TYPE
6377 || TREE_CODE (*node
) == METHOD_TYPE
)
6379 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6381 warning (OPT_Wattributes
, "%qE attribute ignored",
6383 *no_add_attrs
= true;
6386 else if (TREE_CODE (*node
) == POINTER_TYPE
6387 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6388 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6389 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6391 *node
= build_variant_type_copy (*node
);
6392 TREE_TYPE (*node
) = build_type_attribute_variant
6394 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6395 *no_add_attrs
= true;
6399 /* Possibly pass this attribute on from the type to a decl. */
6400 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6401 | (int) ATTR_FLAG_FUNCTION_NEXT
6402 | (int) ATTR_FLAG_ARRAY_NEXT
))
6404 *no_add_attrs
= true;
6405 return tree_cons (name
, args
, NULL_TREE
);
6409 warning (OPT_Wattributes
, "%qE attribute ignored",
6418 /* Handle a "pcs" attribute; arguments as in struct
6419 attribute_spec.handler. */
6421 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6422 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6424 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6426 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6427 *no_add_attrs
= true;
6432 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6433 /* Handle the "notshared" attribute. This attribute is another way of
6434 requesting hidden visibility. ARM's compiler supports
6435 "__declspec(notshared)"; we support the same thing via an
6439 arm_handle_notshared_attribute (tree
*node
,
6440 tree name ATTRIBUTE_UNUSED
,
6441 tree args ATTRIBUTE_UNUSED
,
6442 int flags ATTRIBUTE_UNUSED
,
6445 tree decl
= TYPE_NAME (*node
);
6449 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6450 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6451 *no_add_attrs
= false;
6457 /* Return 0 if the attributes for two types are incompatible, 1 if they
6458 are compatible, and 2 if they are nearly compatible (which causes a
6459 warning to be generated). */
6461 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
6465 /* Check for mismatch of non-default calling convention. */
6466 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
6469 /* Check for mismatched call attributes. */
6470 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6471 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6472 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6473 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6475 /* Only bother to check if an attribute is defined. */
6476 if (l1
| l2
| s1
| s2
)
6478 /* If one type has an attribute, the other must have the same attribute. */
6479 if ((l1
!= l2
) || (s1
!= s2
))
6482 /* Disallow mixed attributes. */
6483 if ((l1
& s2
) || (l2
& s1
))
6487 /* Check for mismatched ISR attribute. */
6488 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
6490 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
6491 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
6493 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
6500 /* Assigns default attributes to newly defined type. This is used to
6501 set short_call/long_call attributes for function types of
6502 functions defined inside corresponding #pragma scopes. */
6504 arm_set_default_type_attributes (tree type
)
6506 /* Add __attribute__ ((long_call)) to all functions, when
6507 inside #pragma long_calls or __attribute__ ((short_call)),
6508 when inside #pragma no_long_calls. */
6509 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
6511 tree type_attr_list
, attr_name
;
6512 type_attr_list
= TYPE_ATTRIBUTES (type
);
6514 if (arm_pragma_long_calls
== LONG
)
6515 attr_name
= get_identifier ("long_call");
6516 else if (arm_pragma_long_calls
== SHORT
)
6517 attr_name
= get_identifier ("short_call");
6521 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
6522 TYPE_ATTRIBUTES (type
) = type_attr_list
;
6526 /* Return true if DECL is known to be linked into section SECTION. */
6529 arm_function_in_section_p (tree decl
, section
*section
)
6531 /* We can only be certain about the prevailing symbol definition. */
6532 if (!decl_binds_to_current_def_p (decl
))
6535 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6536 if (!DECL_SECTION_NAME (decl
))
6538 /* Make sure that we will not create a unique section for DECL. */
6539 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
6543 return function_section (decl
) == section
;
6546 /* Return nonzero if a 32-bit "long_call" should be generated for
6547 a call from the current function to DECL. We generate a long_call
6550 a. has an __attribute__((long call))
6551 or b. is within the scope of a #pragma long_calls
6552 or c. the -mlong-calls command line switch has been specified
6554 However we do not generate a long call if the function:
6556 d. has an __attribute__ ((short_call))
6557 or e. is inside the scope of a #pragma no_long_calls
6558 or f. is defined in the same section as the current function. */
6561 arm_is_long_call_p (tree decl
)
6566 return TARGET_LONG_CALLS
;
6568 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
6569 if (lookup_attribute ("short_call", attrs
))
6572 /* For "f", be conservative, and only cater for cases in which the
6573 whole of the current function is placed in the same section. */
6574 if (!flag_reorder_blocks_and_partition
6575 && TREE_CODE (decl
) == FUNCTION_DECL
6576 && arm_function_in_section_p (decl
, current_function_section ()))
6579 if (lookup_attribute ("long_call", attrs
))
6582 return TARGET_LONG_CALLS
;
6585 /* Return nonzero if it is ok to make a tail-call to DECL. */
6587 arm_function_ok_for_sibcall (tree decl
, tree exp
)
6589 unsigned long func_type
;
6591 if (cfun
->machine
->sibcall_blocked
)
6594 /* Never tailcall something if we are generating code for Thumb-1. */
6598 /* The PIC register is live on entry to VxWorks PLT entries, so we
6599 must make the call before restoring the PIC register. */
6600 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
6603 /* If we are interworking and the function is not declared static
6604 then we can't tail-call it unless we know that it exists in this
6605 compilation unit (since it might be a Thumb routine). */
6606 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
6607 && !TREE_ASM_WRITTEN (decl
))
6610 func_type
= arm_current_func_type ();
6611 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6612 if (IS_INTERRUPT (func_type
))
6615 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6617 /* Check that the return value locations are the same. For
6618 example that we aren't returning a value from the sibling in
6619 a VFP register but then need to transfer it to a core
6623 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
6624 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6626 if (!rtx_equal_p (a
, b
))
6630 /* Never tailcall if function may be called with a misaligned SP. */
6631 if (IS_STACKALIGN (func_type
))
6634 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6635 references should become a NOP. Don't convert such calls into
6637 if (TARGET_AAPCS_BASED
6638 && arm_abi
== ARM_ABI_AAPCS
6640 && DECL_WEAK (decl
))
6643 /* Everything else is ok. */
6648 /* Addressing mode support functions. */
6650 /* Return nonzero if X is a legitimate immediate operand when compiling
6651 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6653 legitimate_pic_operand_p (rtx x
)
6655 if (GET_CODE (x
) == SYMBOL_REF
6656 || (GET_CODE (x
) == CONST
6657 && GET_CODE (XEXP (x
, 0)) == PLUS
6658 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6664 /* Record that the current function needs a PIC register. Initialize
6665 cfun->machine->pic_reg if we have not already done so. */
6668 require_pic_register (void)
6670 /* A lot of the logic here is made obscure by the fact that this
6671 routine gets called as part of the rtx cost estimation process.
6672 We don't want those calls to affect any assumptions about the real
6673 function; and further, we can't call entry_of_function() until we
6674 start the real expansion process. */
6675 if (!crtl
->uses_pic_offset_table
)
6677 gcc_assert (can_create_pseudo_p ());
6678 if (arm_pic_register
!= INVALID_REGNUM
6679 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
6681 if (!cfun
->machine
->pic_reg
)
6682 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
6684 /* Play games to avoid marking the function as needing pic
6685 if we are being called as part of the cost-estimation
6687 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6688 crtl
->uses_pic_offset_table
= 1;
6692 rtx_insn
*seq
, *insn
;
6694 if (!cfun
->machine
->pic_reg
)
6695 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
6697 /* Play games to avoid marking the function as needing pic
6698 if we are being called as part of the cost-estimation
6700 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6702 crtl
->uses_pic_offset_table
= 1;
6705 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
6706 && arm_pic_register
> LAST_LO_REGNUM
)
6707 emit_move_insn (cfun
->machine
->pic_reg
,
6708 gen_rtx_REG (Pmode
, arm_pic_register
));
6710 arm_load_pic_register (0UL);
6715 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
6717 INSN_LOCATION (insn
) = prologue_location
;
6719 /* We can be called during expansion of PHI nodes, where
6720 we can't yet emit instructions directly in the final
6721 insn stream. Queue the insns on the entry edge, they will
6722 be committed after everything else is expanded. */
6723 insert_insn_on_edge (seq
,
6724 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
6731 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
6733 if (GET_CODE (orig
) == SYMBOL_REF
6734 || GET_CODE (orig
) == LABEL_REF
)
6740 gcc_assert (can_create_pseudo_p ());
6741 reg
= gen_reg_rtx (Pmode
);
6744 /* VxWorks does not impose a fixed gap between segments; the run-time
6745 gap can be different from the object-file gap. We therefore can't
6746 use GOTOFF unless we are absolutely sure that the symbol is in the
6747 same segment as the GOT. Unfortunately, the flexibility of linker
6748 scripts means that we can't be sure of that in general, so assume
6749 that GOTOFF is never valid on VxWorks. */
6750 if ((GET_CODE (orig
) == LABEL_REF
6751 || (GET_CODE (orig
) == SYMBOL_REF
&&
6752 SYMBOL_REF_LOCAL_P (orig
)))
6754 && arm_pic_data_is_text_relative
)
6755 insn
= arm_pic_static_addr (orig
, reg
);
6761 /* If this function doesn't have a pic register, create one now. */
6762 require_pic_register ();
6764 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
6766 /* Make the MEM as close to a constant as possible. */
6767 mem
= SET_SRC (pat
);
6768 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
6769 MEM_READONLY_P (mem
) = 1;
6770 MEM_NOTRAP_P (mem
) = 1;
6772 insn
= emit_insn (pat
);
6775 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6777 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
6781 else if (GET_CODE (orig
) == CONST
)
6785 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6786 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
6789 /* Handle the case where we have: const (UNSPEC_TLS). */
6790 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
6791 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
6794 /* Handle the case where we have:
6795 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6797 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6798 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
6799 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
6801 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
6807 gcc_assert (can_create_pseudo_p ());
6808 reg
= gen_reg_rtx (Pmode
);
6811 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
6813 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
6814 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
6815 base
== reg
? 0 : reg
);
6817 if (CONST_INT_P (offset
))
6819 /* The base register doesn't really matter, we only want to
6820 test the index for the appropriate mode. */
6821 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
6823 gcc_assert (can_create_pseudo_p ());
6824 offset
= force_reg (Pmode
, offset
);
6827 if (CONST_INT_P (offset
))
6828 return plus_constant (Pmode
, base
, INTVAL (offset
));
6831 if (GET_MODE_SIZE (mode
) > 4
6832 && (GET_MODE_CLASS (mode
) == MODE_INT
6833 || TARGET_SOFT_FLOAT
))
6835 emit_insn (gen_addsi3 (reg
, base
, offset
));
6839 return gen_rtx_PLUS (Pmode
, base
, offset
);
6846 /* Find a spare register to use during the prolog of a function. */
6849 thumb_find_work_register (unsigned long pushed_regs_mask
)
6853 /* Check the argument registers first as these are call-used. The
6854 register allocation order means that sometimes r3 might be used
6855 but earlier argument registers might not, so check them all. */
6856 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
6857 if (!df_regs_ever_live_p (reg
))
6860 /* Before going on to check the call-saved registers we can try a couple
6861 more ways of deducing that r3 is available. The first is when we are
6862 pushing anonymous arguments onto the stack and we have less than 4
6863 registers worth of fixed arguments(*). In this case r3 will be part of
6864 the variable argument list and so we can be sure that it will be
6865 pushed right at the start of the function. Hence it will be available
6866 for the rest of the prologue.
6867 (*): ie crtl->args.pretend_args_size is greater than 0. */
6868 if (cfun
->machine
->uses_anonymous_args
6869 && crtl
->args
.pretend_args_size
> 0)
6870 return LAST_ARG_REGNUM
;
6872 /* The other case is when we have fixed arguments but less than 4 registers
6873 worth. In this case r3 might be used in the body of the function, but
6874 it is not being used to convey an argument into the function. In theory
6875 we could just check crtl->args.size to see how many bytes are
6876 being passed in argument registers, but it seems that it is unreliable.
6877 Sometimes it will have the value 0 when in fact arguments are being
6878 passed. (See testcase execute/20021111-1.c for an example). So we also
6879 check the args_info.nregs field as well. The problem with this field is
6880 that it makes no allowances for arguments that are passed to the
6881 function but which are not used. Hence we could miss an opportunity
6882 when a function has an unused argument in r3. But it is better to be
6883 safe than to be sorry. */
6884 if (! cfun
->machine
->uses_anonymous_args
6885 && crtl
->args
.size
>= 0
6886 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
6887 && (TARGET_AAPCS_BASED
6888 ? crtl
->args
.info
.aapcs_ncrn
< 4
6889 : crtl
->args
.info
.nregs
< 4))
6890 return LAST_ARG_REGNUM
;
6892 /* Otherwise look for a call-saved register that is going to be pushed. */
6893 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
6894 if (pushed_regs_mask
& (1 << reg
))
6899 /* Thumb-2 can use high regs. */
6900 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
6901 if (pushed_regs_mask
& (1 << reg
))
6904 /* Something went wrong - thumb_compute_save_reg_mask()
6905 should have arranged for a suitable register to be pushed. */
6909 static GTY(()) int pic_labelno
;
6911 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6915 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
6917 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
6919 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
6922 gcc_assert (flag_pic
);
6924 pic_reg
= cfun
->machine
->pic_reg
;
6925 if (TARGET_VXWORKS_RTP
)
6927 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
6928 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6929 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
6931 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
6933 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
6934 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
6938 /* We use an UNSPEC rather than a LABEL_REF because this label
6939 never appears in the code stream. */
6941 labelno
= GEN_INT (pic_labelno
++);
6942 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6943 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6945 /* On the ARM the PC register contains 'dot + 8' at the time of the
6946 addition, on the Thumb it is 'dot + 4'. */
6947 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6948 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
6950 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6954 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6956 else /* TARGET_THUMB1 */
6958 if (arm_pic_register
!= INVALID_REGNUM
6959 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
6961 /* We will have pushed the pic register, so we should always be
6962 able to find a work register. */
6963 pic_tmp
= gen_rtx_REG (SImode
,
6964 thumb_find_work_register (saved_regs
));
6965 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
6966 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
6967 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
6969 else if (arm_pic_register
!= INVALID_REGNUM
6970 && arm_pic_register
> LAST_LO_REGNUM
6971 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
6973 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6974 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
6975 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
6978 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6982 /* Need to emit this whether or not we obey regdecls,
6983 since setjmp/longjmp can cause life info to screw up. */
6987 /* Generate code to load the address of a static var when flag_pic is set. */
6989 arm_pic_static_addr (rtx orig
, rtx reg
)
6991 rtx l1
, labelno
, offset_rtx
, insn
;
6993 gcc_assert (flag_pic
);
6995 /* We use an UNSPEC rather than a LABEL_REF because this label
6996 never appears in the code stream. */
6997 labelno
= GEN_INT (pic_labelno
++);
6998 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6999 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7001 /* On the ARM the PC register contains 'dot + 8' at the time of the
7002 addition, on the Thumb it is 'dot + 4'. */
7003 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7004 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
7005 UNSPEC_SYMBOL_OFFSET
);
7006 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
7008 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
7012 /* Return nonzero if X is valid as an ARM state addressing register. */
7014 arm_address_register_rtx_p (rtx x
, int strict_p
)
7024 return ARM_REGNO_OK_FOR_BASE_P (regno
);
7026 return (regno
<= LAST_ARM_REGNUM
7027 || regno
>= FIRST_PSEUDO_REGISTER
7028 || regno
== FRAME_POINTER_REGNUM
7029 || regno
== ARG_POINTER_REGNUM
);
7032 /* Return TRUE if this rtx is the difference of a symbol and a label,
7033 and will reduce to a PC-relative relocation in the object file.
7034 Expressions like this can be left alone when generating PIC, rather
7035 than forced through the GOT. */
7037 pcrel_constant_p (rtx x
)
7039 if (GET_CODE (x
) == MINUS
)
7040 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
7045 /* Return true if X will surely end up in an index register after next
7048 will_be_in_index_register (const_rtx x
)
7050 /* arm.md: calculate_pic_address will split this into a register. */
7051 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
7054 /* Return nonzero if X is a valid ARM state address operand. */
7056 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
7060 enum rtx_code code
= GET_CODE (x
);
7062 if (arm_address_register_rtx_p (x
, strict_p
))
7065 use_ldrd
= (TARGET_LDRD
7067 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
7069 if (code
== POST_INC
|| code
== PRE_DEC
7070 || ((code
== PRE_INC
|| code
== POST_DEC
)
7071 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7072 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7074 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7075 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7076 && GET_CODE (XEXP (x
, 1)) == PLUS
7077 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7079 rtx addend
= XEXP (XEXP (x
, 1), 1);
7081 /* Don't allow ldrd post increment by register because it's hard
7082 to fixup invalid register choices. */
7084 && GET_CODE (x
) == POST_MODIFY
7088 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
7089 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
7092 /* After reload constants split into minipools will have addresses
7093 from a LABEL_REF. */
7094 else if (reload_completed
7095 && (code
== LABEL_REF
7097 && GET_CODE (XEXP (x
, 0)) == PLUS
7098 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7099 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7102 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7105 else if (code
== PLUS
)
7107 rtx xop0
= XEXP (x
, 0);
7108 rtx xop1
= XEXP (x
, 1);
7110 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7111 && ((CONST_INT_P (xop1
)
7112 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7113 || (!strict_p
&& will_be_in_index_register (xop1
))))
7114 || (arm_address_register_rtx_p (xop1
, strict_p
)
7115 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7119 /* Reload currently can't handle MINUS, so disable this for now */
7120 else if (GET_CODE (x
) == MINUS
)
7122 rtx xop0
= XEXP (x
, 0);
7123 rtx xop1
= XEXP (x
, 1);
7125 return (arm_address_register_rtx_p (xop0
, strict_p
)
7126 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7130 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7131 && code
== SYMBOL_REF
7132 && CONSTANT_POOL_ADDRESS_P (x
)
7134 && symbol_mentioned_p (get_pool_constant (x
))
7135 && ! pcrel_constant_p (get_pool_constant (x
))))
7141 /* Return nonzero if X is a valid Thumb-2 address operand. */
7143 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7146 enum rtx_code code
= GET_CODE (x
);
7148 if (arm_address_register_rtx_p (x
, strict_p
))
7151 use_ldrd
= (TARGET_LDRD
7153 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
7155 if (code
== POST_INC
|| code
== PRE_DEC
7156 || ((code
== PRE_INC
|| code
== POST_DEC
)
7157 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7158 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7160 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7161 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7162 && GET_CODE (XEXP (x
, 1)) == PLUS
7163 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7165 /* Thumb-2 only has autoincrement by constant. */
7166 rtx addend
= XEXP (XEXP (x
, 1), 1);
7167 HOST_WIDE_INT offset
;
7169 if (!CONST_INT_P (addend
))
7172 offset
= INTVAL(addend
);
7173 if (GET_MODE_SIZE (mode
) <= 4)
7174 return (offset
> -256 && offset
< 256);
7176 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7177 && (offset
& 3) == 0);
7180 /* After reload constants split into minipools will have addresses
7181 from a LABEL_REF. */
7182 else if (reload_completed
7183 && (code
== LABEL_REF
7185 && GET_CODE (XEXP (x
, 0)) == PLUS
7186 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7187 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7190 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7193 else if (code
== PLUS
)
7195 rtx xop0
= XEXP (x
, 0);
7196 rtx xop1
= XEXP (x
, 1);
7198 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7199 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7200 || (!strict_p
&& will_be_in_index_register (xop1
))))
7201 || (arm_address_register_rtx_p (xop1
, strict_p
)
7202 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7205 /* Normally we can assign constant values to target registers without
7206 the help of constant pool. But there are cases we have to use constant
7208 1) assign a label to register.
7209 2) sign-extend a 8bit value to 32bit and then assign to register.
7211 Constant pool access in format:
7212 (set (reg r0) (mem (symbol_ref (".LC0"))))
7213 will cause the use of literal pool (later in function arm_reorg).
7214 So here we mark such format as an invalid format, then the compiler
7215 will adjust it into:
7216 (set (reg r0) (symbol_ref (".LC0")))
7217 (set (reg r0) (mem (reg r0))).
7218 No extra register is required, and (mem (reg r0)) won't cause the use
7219 of literal pools. */
7220 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
7221 && CONSTANT_POOL_ADDRESS_P (x
))
7224 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7225 && code
== SYMBOL_REF
7226 && CONSTANT_POOL_ADDRESS_P (x
)
7228 && symbol_mentioned_p (get_pool_constant (x
))
7229 && ! pcrel_constant_p (get_pool_constant (x
))))
7235 /* Return nonzero if INDEX is valid for an address index operand in
7238 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7241 HOST_WIDE_INT range
;
7242 enum rtx_code code
= GET_CODE (index
);
7244 /* Standard coprocessor addressing modes. */
7245 if (TARGET_HARD_FLOAT
7247 && (mode
== SFmode
|| mode
== DFmode
))
7248 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7249 && INTVAL (index
) > -1024
7250 && (INTVAL (index
) & 3) == 0);
7252 /* For quad modes, we restrict the constant offset to be slightly less
7253 than what the instruction format permits. We do this because for
7254 quad mode moves, we will actually decompose them into two separate
7255 double-mode reads or writes. INDEX must therefore be a valid
7256 (double-mode) offset and so should INDEX+8. */
7257 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7258 return (code
== CONST_INT
7259 && INTVAL (index
) < 1016
7260 && INTVAL (index
) > -1024
7261 && (INTVAL (index
) & 3) == 0);
7263 /* We have no such constraint on double mode offsets, so we permit the
7264 full range of the instruction format. */
7265 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7266 return (code
== CONST_INT
7267 && INTVAL (index
) < 1024
7268 && INTVAL (index
) > -1024
7269 && (INTVAL (index
) & 3) == 0);
7271 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7272 return (code
== CONST_INT
7273 && INTVAL (index
) < 1024
7274 && INTVAL (index
) > -1024
7275 && (INTVAL (index
) & 3) == 0);
7277 if (arm_address_register_rtx_p (index
, strict_p
)
7278 && (GET_MODE_SIZE (mode
) <= 4))
7281 if (mode
== DImode
|| mode
== DFmode
)
7283 if (code
== CONST_INT
)
7285 HOST_WIDE_INT val
= INTVAL (index
);
7288 return val
> -256 && val
< 256;
7290 return val
> -4096 && val
< 4092;
7293 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7296 if (GET_MODE_SIZE (mode
) <= 4
7300 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7304 rtx xiop0
= XEXP (index
, 0);
7305 rtx xiop1
= XEXP (index
, 1);
7307 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7308 && power_of_two_operand (xiop1
, SImode
))
7309 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7310 && power_of_two_operand (xiop0
, SImode
)));
7312 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7313 || code
== ASHIFT
|| code
== ROTATERT
)
7315 rtx op
= XEXP (index
, 1);
7317 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7320 && INTVAL (op
) <= 31);
7324 /* For ARM v4 we may be doing a sign-extend operation during the
7330 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7336 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7338 return (code
== CONST_INT
7339 && INTVAL (index
) < range
7340 && INTVAL (index
) > -range
);
7343 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7344 index operand. i.e. 1, 2, 4 or 8. */
7346 thumb2_index_mul_operand (rtx op
)
7350 if (!CONST_INT_P (op
))
7354 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7357 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7359 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
7361 enum rtx_code code
= GET_CODE (index
);
7363 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7364 /* Standard coprocessor addressing modes. */
7365 if (TARGET_HARD_FLOAT
7367 && (mode
== SFmode
|| mode
== DFmode
))
7368 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7369 /* Thumb-2 allows only > -256 index range for it's core register
7370 load/stores. Since we allow SF/DF in core registers, we have
7371 to use the intersection between -256~4096 (core) and -1024~1024
7373 && INTVAL (index
) > -256
7374 && (INTVAL (index
) & 3) == 0);
7376 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7378 /* For DImode assume values will usually live in core regs
7379 and only allow LDRD addressing modes. */
7380 if (!TARGET_LDRD
|| mode
!= DImode
)
7381 return (code
== CONST_INT
7382 && INTVAL (index
) < 1024
7383 && INTVAL (index
) > -1024
7384 && (INTVAL (index
) & 3) == 0);
7387 /* For quad modes, we restrict the constant offset to be slightly less
7388 than what the instruction format permits. We do this because for
7389 quad mode moves, we will actually decompose them into two separate
7390 double-mode reads or writes. INDEX must therefore be a valid
7391 (double-mode) offset and so should INDEX+8. */
7392 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7393 return (code
== CONST_INT
7394 && INTVAL (index
) < 1016
7395 && INTVAL (index
) > -1024
7396 && (INTVAL (index
) & 3) == 0);
7398 /* We have no such constraint on double mode offsets, so we permit the
7399 full range of the instruction format. */
7400 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7401 return (code
== CONST_INT
7402 && INTVAL (index
) < 1024
7403 && INTVAL (index
) > -1024
7404 && (INTVAL (index
) & 3) == 0);
7406 if (arm_address_register_rtx_p (index
, strict_p
)
7407 && (GET_MODE_SIZE (mode
) <= 4))
7410 if (mode
== DImode
|| mode
== DFmode
)
7412 if (code
== CONST_INT
)
7414 HOST_WIDE_INT val
= INTVAL (index
);
7415 /* ??? Can we assume ldrd for thumb2? */
7416 /* Thumb-2 ldrd only has reg+const addressing modes. */
7417 /* ldrd supports offsets of +-1020.
7418 However the ldr fallback does not. */
7419 return val
> -256 && val
< 256 && (val
& 3) == 0;
7427 rtx xiop0
= XEXP (index
, 0);
7428 rtx xiop1
= XEXP (index
, 1);
7430 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7431 && thumb2_index_mul_operand (xiop1
))
7432 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7433 && thumb2_index_mul_operand (xiop0
)));
7435 else if (code
== ASHIFT
)
7437 rtx op
= XEXP (index
, 1);
7439 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7442 && INTVAL (op
) <= 3);
7445 return (code
== CONST_INT
7446 && INTVAL (index
) < 4096
7447 && INTVAL (index
) > -256);
7450 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7452 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
7462 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
7464 return (regno
<= LAST_LO_REGNUM
7465 || regno
> LAST_VIRTUAL_REGISTER
7466 || regno
== FRAME_POINTER_REGNUM
7467 || (GET_MODE_SIZE (mode
) >= 4
7468 && (regno
== STACK_POINTER_REGNUM
7469 || regno
>= FIRST_PSEUDO_REGISTER
7470 || x
== hard_frame_pointer_rtx
7471 || x
== arg_pointer_rtx
)));
7474 /* Return nonzero if x is a legitimate index register. This is the case
7475 for any base register that can access a QImode object. */
7477 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
7479 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
7482 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7484 The AP may be eliminated to either the SP or the FP, so we use the
7485 least common denominator, e.g. SImode, and offsets from 0 to 64.
7487 ??? Verify whether the above is the right approach.
7489 ??? Also, the FP may be eliminated to the SP, so perhaps that
7490 needs special handling also.
7492 ??? Look at how the mips16 port solves this problem. It probably uses
7493 better ways to solve some of these problems.
7495 Although it is not incorrect, we don't accept QImode and HImode
7496 addresses based on the frame pointer or arg pointer until the
7497 reload pass starts. This is so that eliminating such addresses
7498 into stack based ones won't produce impossible code. */
7500 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7502 /* ??? Not clear if this is right. Experiment. */
7503 if (GET_MODE_SIZE (mode
) < 4
7504 && !(reload_in_progress
|| reload_completed
)
7505 && (reg_mentioned_p (frame_pointer_rtx
, x
)
7506 || reg_mentioned_p (arg_pointer_rtx
, x
)
7507 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
7508 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
7509 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
7510 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
7513 /* Accept any base register. SP only in SImode or larger. */
7514 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
7517 /* This is PC relative data before arm_reorg runs. */
7518 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
7519 && GET_CODE (x
) == SYMBOL_REF
7520 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
7523 /* This is PC relative data after arm_reorg runs. */
7524 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
7526 && (GET_CODE (x
) == LABEL_REF
7527 || (GET_CODE (x
) == CONST
7528 && GET_CODE (XEXP (x
, 0)) == PLUS
7529 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7530 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7533 /* Post-inc indexing only supported for SImode and larger. */
7534 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
7535 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
7538 else if (GET_CODE (x
) == PLUS
)
7540 /* REG+REG address can be any two index registers. */
7541 /* We disallow FRAME+REG addressing since we know that FRAME
7542 will be replaced with STACK, and SP relative addressing only
7543 permits SP+OFFSET. */
7544 if (GET_MODE_SIZE (mode
) <= 4
7545 && XEXP (x
, 0) != frame_pointer_rtx
7546 && XEXP (x
, 1) != frame_pointer_rtx
7547 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7548 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
7549 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
7552 /* REG+const has 5-7 bit offset for non-SP registers. */
7553 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7554 || XEXP (x
, 0) == arg_pointer_rtx
)
7555 && CONST_INT_P (XEXP (x
, 1))
7556 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7559 /* REG+const has 10-bit offset for SP, but only SImode and
7560 larger is supported. */
7561 /* ??? Should probably check for DI/DFmode overflow here
7562 just like GO_IF_LEGITIMATE_OFFSET does. */
7563 else if (REG_P (XEXP (x
, 0))
7564 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
7565 && GET_MODE_SIZE (mode
) >= 4
7566 && CONST_INT_P (XEXP (x
, 1))
7567 && INTVAL (XEXP (x
, 1)) >= 0
7568 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
7569 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7572 else if (REG_P (XEXP (x
, 0))
7573 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
7574 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
7575 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
7576 && REGNO (XEXP (x
, 0))
7577 <= LAST_VIRTUAL_POINTER_REGISTER
))
7578 && GET_MODE_SIZE (mode
) >= 4
7579 && CONST_INT_P (XEXP (x
, 1))
7580 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7584 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7585 && GET_MODE_SIZE (mode
) == 4
7586 && GET_CODE (x
) == SYMBOL_REF
7587 && CONSTANT_POOL_ADDRESS_P (x
)
7589 && symbol_mentioned_p (get_pool_constant (x
))
7590 && ! pcrel_constant_p (get_pool_constant (x
))))
7596 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7597 instruction of mode MODE. */
7599 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
7601 switch (GET_MODE_SIZE (mode
))
7604 return val
>= 0 && val
< 32;
7607 return val
>= 0 && val
< 64 && (val
& 1) == 0;
7611 && (val
+ GET_MODE_SIZE (mode
)) <= 128
7617 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
7620 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
7621 else if (TARGET_THUMB2
)
7622 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
7623 else /* if (TARGET_THUMB1) */
7624 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
7627 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7629 Given an rtx X being reloaded into a reg required to be
7630 in class CLASS, return the class of reg to actually use.
7631 In general this is just CLASS, but for the Thumb core registers and
7632 immediate constants we prefer a LO_REGS class or a subset. */
7635 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
7641 if (rclass
== GENERAL_REGS
)
7648 /* Build the SYMBOL_REF for __tls_get_addr. */
7650 static GTY(()) rtx tls_get_addr_libfunc
;
7653 get_tls_get_addr (void)
7655 if (!tls_get_addr_libfunc
)
7656 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
7657 return tls_get_addr_libfunc
;
7661 arm_load_tp (rtx target
)
7664 target
= gen_reg_rtx (SImode
);
7668 /* Can return in any reg. */
7669 emit_insn (gen_load_tp_hard (target
));
7673 /* Always returned in r0. Immediately copy the result into a pseudo,
7674 otherwise other uses of r0 (e.g. setting up function arguments) may
7675 clobber the value. */
7679 emit_insn (gen_load_tp_soft ());
7681 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
7682 emit_move_insn (target
, tmp
);
7688 load_tls_operand (rtx x
, rtx reg
)
7692 if (reg
== NULL_RTX
)
7693 reg
= gen_reg_rtx (SImode
);
7695 tmp
= gen_rtx_CONST (SImode
, x
);
7697 emit_move_insn (reg
, tmp
);
7703 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
7705 rtx insns
, label
, labelno
, sum
;
7707 gcc_assert (reloc
!= TLS_DESCSEQ
);
7710 labelno
= GEN_INT (pic_labelno
++);
7711 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7712 label
= gen_rtx_CONST (VOIDmode
, label
);
7714 sum
= gen_rtx_UNSPEC (Pmode
,
7715 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
7716 GEN_INT (TARGET_ARM
? 8 : 4)),
7718 reg
= load_tls_operand (sum
, reg
);
7721 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
7723 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7725 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
7726 LCT_PURE
, /* LCT_CONST? */
7727 Pmode
, 1, reg
, Pmode
);
7729 insns
= get_insns ();
7736 arm_tls_descseq_addr (rtx x
, rtx reg
)
7738 rtx labelno
= GEN_INT (pic_labelno
++);
7739 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7740 rtx sum
= gen_rtx_UNSPEC (Pmode
,
7741 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
7742 gen_rtx_CONST (VOIDmode
, label
),
7743 GEN_INT (!TARGET_ARM
)),
7745 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
7747 emit_insn (gen_tlscall (x
, labelno
));
7749 reg
= gen_reg_rtx (SImode
);
7751 gcc_assert (REGNO (reg
) != R0_REGNUM
);
7753 emit_move_insn (reg
, reg0
);
7759 legitimize_tls_address (rtx x
, rtx reg
)
7761 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
7762 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
7766 case TLS_MODEL_GLOBAL_DYNAMIC
:
7767 if (TARGET_GNU2_TLS
)
7769 reg
= arm_tls_descseq_addr (x
, reg
);
7771 tp
= arm_load_tp (NULL_RTX
);
7773 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7777 /* Original scheme */
7778 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
7779 dest
= gen_reg_rtx (Pmode
);
7780 emit_libcall_block (insns
, dest
, ret
, x
);
7784 case TLS_MODEL_LOCAL_DYNAMIC
:
7785 if (TARGET_GNU2_TLS
)
7787 reg
= arm_tls_descseq_addr (x
, reg
);
7789 tp
= arm_load_tp (NULL_RTX
);
7791 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7795 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
7797 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7798 share the LDM result with other LD model accesses. */
7799 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
7801 dest
= gen_reg_rtx (Pmode
);
7802 emit_libcall_block (insns
, dest
, ret
, eqv
);
7804 /* Load the addend. */
7805 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
7806 GEN_INT (TLS_LDO32
)),
7808 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
7809 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
7813 case TLS_MODEL_INITIAL_EXEC
:
7814 labelno
= GEN_INT (pic_labelno
++);
7815 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7816 label
= gen_rtx_CONST (VOIDmode
, label
);
7817 sum
= gen_rtx_UNSPEC (Pmode
,
7818 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
7819 GEN_INT (TARGET_ARM
? 8 : 4)),
7821 reg
= load_tls_operand (sum
, reg
);
7824 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
7825 else if (TARGET_THUMB2
)
7826 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
7829 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7830 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
7833 tp
= arm_load_tp (NULL_RTX
);
7835 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7837 case TLS_MODEL_LOCAL_EXEC
:
7838 tp
= arm_load_tp (NULL_RTX
);
7840 reg
= gen_rtx_UNSPEC (Pmode
,
7841 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
7843 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
7845 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7852 /* Try machine-dependent ways of modifying an illegitimate address
7853 to be legitimate. If we find one, return the new, valid address. */
7855 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
7857 if (arm_tls_referenced_p (x
))
7861 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
7863 addend
= XEXP (XEXP (x
, 0), 1);
7864 x
= XEXP (XEXP (x
, 0), 0);
7867 if (GET_CODE (x
) != SYMBOL_REF
)
7870 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
7872 x
= legitimize_tls_address (x
, NULL_RTX
);
7876 x
= gen_rtx_PLUS (SImode
, x
, addend
);
7885 /* TODO: legitimize_address for Thumb2. */
7888 return thumb_legitimize_address (x
, orig_x
, mode
);
7891 if (GET_CODE (x
) == PLUS
)
7893 rtx xop0
= XEXP (x
, 0);
7894 rtx xop1
= XEXP (x
, 1);
7896 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
7897 xop0
= force_reg (SImode
, xop0
);
7899 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
7900 && !symbol_mentioned_p (xop1
))
7901 xop1
= force_reg (SImode
, xop1
);
7903 if (ARM_BASE_REGISTER_RTX_P (xop0
)
7904 && CONST_INT_P (xop1
))
7906 HOST_WIDE_INT n
, low_n
;
7910 /* VFP addressing modes actually allow greater offsets, but for
7911 now we just stick with the lowest common denominator. */
7913 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
7925 low_n
= ((mode
) == TImode
? 0
7926 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
7930 base_reg
= gen_reg_rtx (SImode
);
7931 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
7932 emit_move_insn (base_reg
, val
);
7933 x
= plus_constant (Pmode
, base_reg
, low_n
);
7935 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7936 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7939 /* XXX We don't allow MINUS any more -- see comment in
7940 arm_legitimate_address_outer_p (). */
7941 else if (GET_CODE (x
) == MINUS
)
7943 rtx xop0
= XEXP (x
, 0);
7944 rtx xop1
= XEXP (x
, 1);
7946 if (CONSTANT_P (xop0
))
7947 xop0
= force_reg (SImode
, xop0
);
7949 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
7950 xop1
= force_reg (SImode
, xop1
);
7952 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7953 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
7956 /* Make sure to take full advantage of the pre-indexed addressing mode
7957 with absolute addresses which often allows for the base register to
7958 be factorized for multiple adjacent memory references, and it might
7959 even allows for the mini pool to be avoided entirely. */
7960 else if (CONST_INT_P (x
) && optimize
> 0)
7963 HOST_WIDE_INT mask
, base
, index
;
7966 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7967 use a 8-bit index. So let's use a 12-bit index for SImode only and
7968 hope that arm_gen_constant will enable ldrb to use more bits. */
7969 bits
= (mode
== SImode
) ? 12 : 8;
7970 mask
= (1 << bits
) - 1;
7971 base
= INTVAL (x
) & ~mask
;
7972 index
= INTVAL (x
) & mask
;
7973 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
7975 /* It'll most probably be more efficient to generate the base
7976 with more bits set and use a negative index instead. */
7980 base_reg
= force_reg (SImode
, GEN_INT (base
));
7981 x
= plus_constant (Pmode
, base_reg
, index
);
7986 /* We need to find and carefully transform any SYMBOL and LABEL
7987 references; so go back to the original address expression. */
7988 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7990 if (new_x
!= orig_x
)
7998 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7999 to be legitimate. If we find one, return the new, valid address. */
8001 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8003 if (GET_CODE (x
) == PLUS
8004 && CONST_INT_P (XEXP (x
, 1))
8005 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
8006 || INTVAL (XEXP (x
, 1)) < 0))
8008 rtx xop0
= XEXP (x
, 0);
8009 rtx xop1
= XEXP (x
, 1);
8010 HOST_WIDE_INT offset
= INTVAL (xop1
);
8012 /* Try and fold the offset into a biasing of the base register and
8013 then offsetting that. Don't do this when optimizing for space
8014 since it can cause too many CSEs. */
8015 if (optimize_size
&& offset
>= 0
8016 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
8018 HOST_WIDE_INT delta
;
8021 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
8022 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
8023 delta
= 31 * GET_MODE_SIZE (mode
);
8025 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
8027 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
8029 x
= plus_constant (Pmode
, xop0
, delta
);
8031 else if (offset
< 0 && offset
> -256)
8032 /* Small negative offsets are best done with a subtract before the
8033 dereference, forcing these into a register normally takes two
8035 x
= force_operand (x
, NULL_RTX
);
8038 /* For the remaining cases, force the constant into a register. */
8039 xop1
= force_reg (SImode
, xop1
);
8040 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8043 else if (GET_CODE (x
) == PLUS
8044 && s_register_operand (XEXP (x
, 1), SImode
)
8045 && !s_register_operand (XEXP (x
, 0), SImode
))
8047 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
8049 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
8054 /* We need to find and carefully transform any SYMBOL and LABEL
8055 references; so go back to the original address expression. */
8056 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8058 if (new_x
!= orig_x
)
8065 /* Return TRUE if X contains any TLS symbol references. */
8068 arm_tls_referenced_p (rtx x
)
8070 if (! TARGET_HAVE_TLS
)
8073 subrtx_iterator::array_type array
;
8074 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8076 const_rtx x
= *iter
;
8077 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8080 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8081 TLS offsets, not real symbol references. */
8082 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8083 iter
.skip_subrtxes ();
8088 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8090 On the ARM, allow any integer (invalid ones are removed later by insn
8091 patterns), nice doubles and symbol_refs which refer to the function's
8094 When generating pic allow anything. */
8097 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8099 return flag_pic
|| !label_mentioned_p (x
);
8103 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8105 return (CONST_INT_P (x
)
8106 || CONST_DOUBLE_P (x
)
8107 || CONSTANT_ADDRESS_P (x
)
8112 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8114 return (!arm_cannot_force_const_mem (mode
, x
)
8116 ? arm_legitimate_constant_p_1 (mode
, x
)
8117 : thumb_legitimate_constant_p (mode
, x
)));
8120 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8123 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8127 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8129 split_const (x
, &base
, &offset
);
8130 if (GET_CODE (base
) == SYMBOL_REF
8131 && !offset_within_block_p (base
, INTVAL (offset
)))
8134 return arm_tls_referenced_p (x
);
8137 #define REG_OR_SUBREG_REG(X) \
8139 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8141 #define REG_OR_SUBREG_RTX(X) \
8142 (REG_P (X) ? (X) : SUBREG_REG (X))
8145 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8147 machine_mode mode
= GET_MODE (x
);
8156 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8163 return COSTS_N_INSNS (1);
8166 if (CONST_INT_P (XEXP (x
, 1)))
8169 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8176 return COSTS_N_INSNS (2) + cycles
;
8178 return COSTS_N_INSNS (1) + 16;
8181 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8183 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8184 return (COSTS_N_INSNS (words
)
8185 + 4 * ((MEM_P (SET_SRC (x
)))
8186 + MEM_P (SET_DEST (x
))));
8191 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8193 if (thumb_shiftable_const (INTVAL (x
)))
8194 return COSTS_N_INSNS (2);
8195 return COSTS_N_INSNS (3);
8197 else if ((outer
== PLUS
|| outer
== COMPARE
)
8198 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8200 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8201 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8202 return COSTS_N_INSNS (1);
8203 else if (outer
== AND
)
8206 /* This duplicates the tests in the andsi3 expander. */
8207 for (i
= 9; i
<= 31; i
++)
8208 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8209 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8210 return COSTS_N_INSNS (2);
8212 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8213 || outer
== LSHIFTRT
)
8215 return COSTS_N_INSNS (2);
8221 return COSTS_N_INSNS (3);
8239 /* XXX another guess. */
8240 /* Memory costs quite a lot for the first word, but subsequent words
8241 load at the equivalent of a single insn each. */
8242 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8243 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8248 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8254 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8255 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8261 return total
+ COSTS_N_INSNS (1);
8263 /* Assume a two-shift sequence. Increase the cost slightly so
8264 we prefer actual shifts over an extend operation. */
8265 return total
+ 1 + COSTS_N_INSNS (2);
8273 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
8275 machine_mode mode
= GET_MODE (x
);
8276 enum rtx_code subcode
;
8278 enum rtx_code code
= GET_CODE (x
);
8284 /* Memory costs quite a lot for the first word, but subsequent words
8285 load at the equivalent of a single insn each. */
8286 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8293 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
8294 *total
= COSTS_N_INSNS (2);
8295 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
8296 *total
= COSTS_N_INSNS (4);
8298 *total
= COSTS_N_INSNS (20);
8302 if (REG_P (XEXP (x
, 1)))
8303 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
8304 else if (!CONST_INT_P (XEXP (x
, 1)))
8305 *total
= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8311 *total
+= COSTS_N_INSNS (4);
8316 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
8317 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8320 *total
+= COSTS_N_INSNS (3);
8324 *total
+= COSTS_N_INSNS (1);
8325 /* Increase the cost of complex shifts because they aren't any faster,
8326 and reduce dual issue opportunities. */
8327 if (arm_tune_cortex_a9
8328 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
8336 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8337 if (CONST_INT_P (XEXP (x
, 0))
8338 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8340 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8344 if (CONST_INT_P (XEXP (x
, 1))
8345 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8347 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8354 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8356 if (TARGET_HARD_FLOAT
8358 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8360 *total
= COSTS_N_INSNS (1);
8361 if (CONST_DOUBLE_P (XEXP (x
, 0))
8362 && arm_const_double_rtx (XEXP (x
, 0)))
8364 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8368 if (CONST_DOUBLE_P (XEXP (x
, 1))
8369 && arm_const_double_rtx (XEXP (x
, 1)))
8371 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8377 *total
= COSTS_N_INSNS (20);
8381 *total
= COSTS_N_INSNS (1);
8382 if (CONST_INT_P (XEXP (x
, 0))
8383 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8385 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8389 subcode
= GET_CODE (XEXP (x
, 1));
8390 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8391 || subcode
== LSHIFTRT
8392 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8394 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8395 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8399 /* A shift as a part of RSB costs no more than RSB itself. */
8400 if (GET_CODE (XEXP (x
, 0)) == MULT
8401 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8403 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed
);
8404 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8409 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
8411 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8412 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8416 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
8417 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
8419 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8420 if (REG_P (XEXP (XEXP (x
, 1), 0))
8421 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
8422 *total
+= COSTS_N_INSNS (1);
8430 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
8431 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8432 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8434 *total
= COSTS_N_INSNS (1);
8435 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
8437 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8441 /* MLA: All arguments must be registers. We filter out
8442 multiplication by a power of two, so that we fall down into
8444 if (GET_CODE (XEXP (x
, 0)) == MULT
8445 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8447 /* The cost comes from the cost of the multiply. */
8451 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8453 if (TARGET_HARD_FLOAT
8455 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8457 *total
= COSTS_N_INSNS (1);
8458 if (CONST_DOUBLE_P (XEXP (x
, 1))
8459 && arm_const_double_rtx (XEXP (x
, 1)))
8461 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8468 *total
= COSTS_N_INSNS (20);
8472 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
8473 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
8475 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8476 if (REG_P (XEXP (XEXP (x
, 0), 0))
8477 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
8478 *total
+= COSTS_N_INSNS (1);
8484 case AND
: case XOR
: case IOR
:
8486 /* Normally the frame registers will be spilt into reg+const during
8487 reload, so it is a bad idea to combine them with other instructions,
8488 since then they might not be moved outside of loops. As a compromise
8489 we allow integration with ops that have a constant as their second
8491 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
8492 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
8493 && !CONST_INT_P (XEXP (x
, 1)))
8494 *total
= COSTS_N_INSNS (1);
8498 *total
+= COSTS_N_INSNS (2);
8499 if (CONST_INT_P (XEXP (x
, 1))
8500 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8502 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8509 *total
+= COSTS_N_INSNS (1);
8510 if (CONST_INT_P (XEXP (x
, 1))
8511 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8513 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8516 subcode
= GET_CODE (XEXP (x
, 0));
8517 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8518 || subcode
== LSHIFTRT
8519 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8521 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8522 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8527 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8529 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8530 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8534 if (subcode
== UMIN
|| subcode
== UMAX
8535 || subcode
== SMIN
|| subcode
== SMAX
)
8537 *total
= COSTS_N_INSNS (3);
8544 /* This should have been handled by the CPU specific routines. */
8548 if (arm_arch3m
&& mode
== SImode
8549 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
8550 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8551 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
8552 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
8553 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
8554 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
8556 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, 0, speed
);
8559 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8563 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8565 if (TARGET_HARD_FLOAT
8567 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8569 *total
= COSTS_N_INSNS (1);
8572 *total
= COSTS_N_INSNS (2);
8578 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
8579 if (mode
== SImode
&& code
== NOT
)
8581 subcode
= GET_CODE (XEXP (x
, 0));
8582 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8583 || subcode
== LSHIFTRT
8584 || subcode
== ROTATE
|| subcode
== ROTATERT
8586 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
8588 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8589 /* Register shifts cost an extra cycle. */
8590 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
8591 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
8600 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8602 *total
= COSTS_N_INSNS (4);
8606 operand
= XEXP (x
, 0);
8608 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
8609 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
8610 && REG_P (XEXP (operand
, 0))
8611 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
8612 *total
+= COSTS_N_INSNS (1);
8613 *total
+= (rtx_cost (XEXP (x
, 1), code
, 1, speed
)
8614 + rtx_cost (XEXP (x
, 2), code
, 2, speed
));
8618 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8620 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8626 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8627 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8629 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8635 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8636 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8638 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8658 /* SCC insns. In the case where the comparison has already been
8659 performed, then they cost 2 instructions. Otherwise they need
8660 an additional comparison before them. */
8661 *total
= COSTS_N_INSNS (2);
8662 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8669 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8675 *total
+= COSTS_N_INSNS (1);
8676 if (CONST_INT_P (XEXP (x
, 1))
8677 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8679 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8683 subcode
= GET_CODE (XEXP (x
, 0));
8684 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8685 || subcode
== LSHIFTRT
8686 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8688 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8689 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8694 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8696 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8697 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8707 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8708 if (!CONST_INT_P (XEXP (x
, 1))
8709 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8710 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8714 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8716 if (TARGET_HARD_FLOAT
8718 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8720 *total
= COSTS_N_INSNS (1);
8723 *total
= COSTS_N_INSNS (20);
8726 *total
= COSTS_N_INSNS (1);
8728 *total
+= COSTS_N_INSNS (3);
8734 if (GET_MODE_CLASS (mode
) == MODE_INT
)
8736 rtx op
= XEXP (x
, 0);
8737 machine_mode opmode
= GET_MODE (op
);
8740 *total
+= COSTS_N_INSNS (1);
8742 if (opmode
!= SImode
)
8746 /* If !arm_arch4, we use one of the extendhisi2_mem
8747 or movhi_bytes patterns for HImode. For a QImode
8748 sign extension, we first zero-extend from memory
8749 and then perform a shift sequence. */
8750 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
8751 *total
+= COSTS_N_INSNS (2);
8754 *total
+= COSTS_N_INSNS (1);
8756 /* We don't have the necessary insn, so we need to perform some
8758 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
8759 /* An and with constant 255. */
8760 *total
+= COSTS_N_INSNS (1);
8762 /* A shift sequence. Increase costs slightly to avoid
8763 combining two shifts into an extend operation. */
8764 *total
+= COSTS_N_INSNS (2) + 1;
8770 switch (GET_MODE (XEXP (x
, 0)))
8777 *total
= COSTS_N_INSNS (1);
8787 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8791 if (const_ok_for_arm (INTVAL (x
))
8792 || const_ok_for_arm (~INTVAL (x
)))
8793 *total
= COSTS_N_INSNS (1);
8795 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
8796 INTVAL (x
), NULL_RTX
,
8803 *total
= COSTS_N_INSNS (3);
8807 *total
= COSTS_N_INSNS (1);
8811 *total
= COSTS_N_INSNS (1);
8812 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8816 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
8817 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8818 *total
= COSTS_N_INSNS (1);
8820 *total
= COSTS_N_INSNS (4);
8824 /* The vec_extract patterns accept memory operands that require an
8825 address reload. Account for the cost of that reload to give the
8826 auto-inc-dec pass an incentive to try to replace them. */
8827 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
8828 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
8830 *total
= rtx_cost (SET_DEST (x
), code
, 0, speed
);
8831 if (!neon_vector_mem_operand (SET_DEST (x
), 2, true))
8832 *total
+= COSTS_N_INSNS (1);
8835 /* Likewise for the vec_set patterns. */
8836 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
8837 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
8838 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
8840 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
8841 *total
= rtx_cost (mem
, code
, 0, speed
);
8842 if (!neon_vector_mem_operand (mem
, 2, true))
8843 *total
+= COSTS_N_INSNS (1);
8849 /* We cost this as high as our memory costs to allow this to
8850 be hoisted from loops. */
8851 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
8853 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8859 && TARGET_HARD_FLOAT
8861 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8862 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8863 *total
= COSTS_N_INSNS (1);
8865 *total
= COSTS_N_INSNS (4);
8869 *total
= COSTS_N_INSNS (4);
8874 /* Estimates the size cost of thumb1 instructions.
8875 For now most of the code is copied from thumb1_rtx_costs. We need more
8876 fine grain tuning when we have more related test cases. */
8878 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8880 machine_mode mode
= GET_MODE (x
);
8889 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8893 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8894 defined by RTL expansion, especially for the expansion of
8896 if ((GET_CODE (XEXP (x
, 0)) == MULT
8897 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8898 || (GET_CODE (XEXP (x
, 1)) == MULT
8899 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8900 return COSTS_N_INSNS (2);
8901 /* On purpose fall through for normal RTX. */
8905 return COSTS_N_INSNS (1);
8908 if (CONST_INT_P (XEXP (x
, 1)))
8910 /* Thumb1 mul instruction can't operate on const. We must Load it
8911 into a register first. */
8912 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8913 /* For the targets which have a very small and high-latency multiply
8914 unit, we prefer to synthesize the mult with up to 5 instructions,
8915 giving a good balance between size and performance. */
8916 if (arm_arch6m
&& arm_m_profile_small_mul
)
8917 return COSTS_N_INSNS (5);
8919 return COSTS_N_INSNS (1) + const_size
;
8921 return COSTS_N_INSNS (1);
8924 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8926 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8927 return COSTS_N_INSNS (words
)
8928 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x
))
8929 || satisfies_constraint_K (SET_SRC (x
))
8930 /* thumb1_movdi_insn. */
8931 || ((words
> 1) && MEM_P (SET_SRC (x
))));
8936 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8937 return COSTS_N_INSNS (1);
8938 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8939 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8940 return COSTS_N_INSNS (2);
8941 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8942 if (thumb_shiftable_const (INTVAL (x
)))
8943 return COSTS_N_INSNS (2);
8944 return COSTS_N_INSNS (3);
8946 else if ((outer
== PLUS
|| outer
== COMPARE
)
8947 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8949 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8950 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8951 return COSTS_N_INSNS (1);
8952 else if (outer
== AND
)
8955 /* This duplicates the tests in the andsi3 expander. */
8956 for (i
= 9; i
<= 31; i
++)
8957 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8958 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8959 return COSTS_N_INSNS (2);
8961 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8962 || outer
== LSHIFTRT
)
8964 return COSTS_N_INSNS (2);
8970 return COSTS_N_INSNS (3);
8984 return COSTS_N_INSNS (1);
8987 return (COSTS_N_INSNS (1)
8989 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8990 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8991 ? COSTS_N_INSNS (1) : 0));
8995 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9000 /* XXX still guessing. */
9001 switch (GET_MODE (XEXP (x
, 0)))
9004 return (1 + (mode
== DImode
? 4 : 0)
9005 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9008 return (4 + (mode
== DImode
? 4 : 0)
9009 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9012 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9023 /* RTX costs when optimizing for size. */
9025 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9028 machine_mode mode
= GET_MODE (x
);
9031 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9035 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9039 /* A memory access costs 1 insn if the mode is small, or the address is
9040 a single register, otherwise it costs one insn per word. */
9041 if (REG_P (XEXP (x
, 0)))
9042 *total
= COSTS_N_INSNS (1);
9044 && GET_CODE (XEXP (x
, 0)) == PLUS
9045 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9046 /* This will be split into two instructions.
9047 See arm.md:calculate_pic_address. */
9048 *total
= COSTS_N_INSNS (2);
9050 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9057 /* Needs a libcall, so it costs about this. */
9058 *total
= COSTS_N_INSNS (2);
9062 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9064 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9072 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9074 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9077 else if (mode
== SImode
)
9079 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9080 /* Slightly disparage register shifts, but not by much. */
9081 if (!CONST_INT_P (XEXP (x
, 1)))
9082 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, 1, false);
9086 /* Needs a libcall. */
9087 *total
= COSTS_N_INSNS (2);
9091 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9092 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9094 *total
= COSTS_N_INSNS (1);
9100 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
9101 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
9103 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
9104 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
9105 || subcode1
== ROTATE
|| subcode1
== ROTATERT
9106 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
9107 || subcode1
== ASHIFTRT
)
9109 /* It's just the cost of the two operands. */
9114 *total
= COSTS_N_INSNS (1);
9118 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9122 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9123 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9125 *total
= COSTS_N_INSNS (1);
9129 /* A shift as a part of ADD costs nothing. */
9130 if (GET_CODE (XEXP (x
, 0)) == MULT
9131 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
9133 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
9134 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, false);
9135 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, false);
9140 case AND
: case XOR
: case IOR
:
9143 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9145 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
9146 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
9147 || (code
== AND
&& subcode
== NOT
))
9149 /* It's just the cost of the two operands. */
9155 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9159 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9163 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9164 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9166 *total
= COSTS_N_INSNS (1);
9172 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9181 if (cc_register (XEXP (x
, 0), VOIDmode
))
9184 *total
= COSTS_N_INSNS (1);
9188 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9189 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9190 *total
= COSTS_N_INSNS (1);
9192 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
9197 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
9200 if (const_ok_for_arm (INTVAL (x
)))
9201 /* A multiplication by a constant requires another instruction
9202 to load the constant to a register. */
9203 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
9205 else if (const_ok_for_arm (~INTVAL (x
)))
9206 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
9207 else if (const_ok_for_arm (-INTVAL (x
)))
9209 if (outer_code
== COMPARE
|| outer_code
== PLUS
9210 || outer_code
== MINUS
)
9213 *total
= COSTS_N_INSNS (1);
9216 *total
= COSTS_N_INSNS (2);
9222 *total
= COSTS_N_INSNS (2);
9226 *total
= COSTS_N_INSNS (4);
9231 && TARGET_HARD_FLOAT
9232 && outer_code
== SET
9233 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
9234 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
9235 *total
= COSTS_N_INSNS (1);
9237 *total
= COSTS_N_INSNS (4);
9242 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9243 cost of these slightly. */
9244 *total
= COSTS_N_INSNS (1) + 1;
9251 if (mode
!= VOIDmode
)
9252 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9254 *total
= COSTS_N_INSNS (4); /* How knows? */
9259 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9260 operand, then return the operand that is being shifted. If the shift
9261 is not by a constant, then set SHIFT_REG to point to the operand.
9262 Return NULL if OP is not a shifter operand. */
9264 shifter_op_p (rtx op
, rtx
*shift_reg
)
9266 enum rtx_code code
= GET_CODE (op
);
9268 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9269 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9270 return XEXP (op
, 0);
9271 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9272 return XEXP (op
, 0);
9273 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9274 || code
== ASHIFTRT
)
9276 if (!CONST_INT_P (XEXP (op
, 1)))
9277 *shift_reg
= XEXP (op
, 1);
9278 return XEXP (op
, 0);
9285 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9287 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9288 rtx_code code
= GET_CODE (x
);
9289 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9291 switch (XINT (x
, 1))
9293 case UNSPEC_UNALIGNED_LOAD
:
9294 /* We can only do unaligned loads into the integer unit, and we can't
9296 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9298 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9299 + extra_cost
->ldst
.load_unaligned
);
9302 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9303 ADDR_SPACE_GENERIC
, speed_p
);
9307 case UNSPEC_UNALIGNED_STORE
:
9308 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9310 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9311 + extra_cost
->ldst
.store_unaligned
);
9313 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), UNSPEC
, 0, speed_p
);
9315 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9316 ADDR_SPACE_GENERIC
, speed_p
);
9326 *cost
= COSTS_N_INSNS (1);
9328 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9332 *cost
= COSTS_N_INSNS (2);
9338 /* Cost of a libcall. We assume one insn per argument, an amount for the
9339 call (one insn for -Os) and then one for processing the result. */
9340 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9342 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9345 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9346 if (shift_op != NULL \
9347 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9352 *cost += extra_cost->alu.arith_shift_reg; \
9353 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9356 *cost += extra_cost->alu.arith_shift; \
9358 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9359 + rtx_cost (XEXP (x, 1 - IDX), \
9366 /* RTX costs. Make an estimate of the cost of executing the operation
9367 X, which is contained with an operation with code OUTER_CODE.
9368 SPEED_P indicates whether the cost desired is the performance cost,
9369 or the size cost. The estimate is stored in COST and the return
9370 value is TRUE if the cost calculation is final, or FALSE if the
9371 caller should recurse through the operands of X to add additional
9374 We currently make no attempt to model the size savings of Thumb-2
9375 16-bit instructions. At the normal points in compilation where
9376 this code is called we have no measure of whether the condition
9377 flags are live or not, and thus no realistic way to determine what
9378 the size will eventually be. */
9380 arm_new_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9381 const struct cpu_cost_table
*extra_cost
,
9382 int *cost
, bool speed_p
)
9384 machine_mode mode
= GET_MODE (x
);
9389 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9391 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9399 /* SET RTXs don't have a mode so we get it from the destination. */
9400 mode
= GET_MODE (SET_DEST (x
));
9402 if (REG_P (SET_SRC (x
))
9403 && REG_P (SET_DEST (x
)))
9405 /* Assume that most copies can be done with a single insn,
9406 unless we don't have HW FP, in which case everything
9407 larger than word mode will require two insns. */
9408 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9409 && GET_MODE_SIZE (mode
) > 4)
9412 /* Conditional register moves can be encoded
9413 in 16 bits in Thumb mode. */
9414 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9420 if (CONST_INT_P (SET_SRC (x
)))
9422 /* Handle CONST_INT here, since the value doesn't have a mode
9423 and we would otherwise be unable to work out the true cost. */
9424 *cost
= rtx_cost (SET_DEST (x
), SET
, 0, speed_p
);
9426 /* Slightly lower the cost of setting a core reg to a constant.
9427 This helps break up chains and allows for better scheduling. */
9428 if (REG_P (SET_DEST (x
))
9429 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9432 /* Immediate moves with an immediate in the range [0, 255] can be
9433 encoded in 16 bits in Thumb mode. */
9434 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9435 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9437 goto const_int_cost
;
9443 /* A memory access costs 1 insn if the mode is small, or the address is
9444 a single register, otherwise it costs one insn per word. */
9445 if (REG_P (XEXP (x
, 0)))
9446 *cost
= COSTS_N_INSNS (1);
9448 && GET_CODE (XEXP (x
, 0)) == PLUS
9449 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9450 /* This will be split into two instructions.
9451 See arm.md:calculate_pic_address. */
9452 *cost
= COSTS_N_INSNS (2);
9454 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9456 /* For speed optimizations, add the costs of the address and
9457 accessing memory. */
9460 *cost
+= (extra_cost
->ldst
.load
9461 + arm_address_cost (XEXP (x
, 0), mode
,
9462 ADDR_SPACE_GENERIC
, speed_p
));
9464 *cost
+= extra_cost
->ldst
.load
;
9470 /* Calculations of LDM costs are complex. We assume an initial cost
9471 (ldm_1st) which will load the number of registers mentioned in
9472 ldm_regs_per_insn_1st registers; then each additional
9473 ldm_regs_per_insn_subsequent registers cost one more insn. The
9474 formula for N regs is thus:
9476 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9477 + ldm_regs_per_insn_subsequent - 1)
9478 / ldm_regs_per_insn_subsequent).
9480 Additional costs may also be added for addressing. A similar
9481 formula is used for STM. */
9483 bool is_ldm
= load_multiple_operation (x
, SImode
);
9484 bool is_stm
= store_multiple_operation (x
, SImode
);
9486 *cost
= COSTS_N_INSNS (1);
9488 if (is_ldm
|| is_stm
)
9492 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9493 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9494 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9495 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9496 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9497 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9498 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9500 *cost
+= regs_per_insn_1st
9501 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9502 + regs_per_insn_sub
- 1)
9503 / regs_per_insn_sub
);
9512 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9513 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9514 *cost
= COSTS_N_INSNS (speed_p
9515 ? extra_cost
->fp
[mode
!= SFmode
].div
: 1);
9516 else if (mode
== SImode
&& TARGET_IDIV
)
9517 *cost
= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 1);
9519 *cost
= LIBCALL_COST (2);
9520 return false; /* All arguments must be in registers. */
9524 *cost
= LIBCALL_COST (2);
9525 return false; /* All arguments must be in registers. */
9528 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9530 *cost
= (COSTS_N_INSNS (2)
9531 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9533 *cost
+= extra_cost
->alu
.shift_reg
;
9541 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9543 *cost
= (COSTS_N_INSNS (3)
9544 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9546 *cost
+= 2 * extra_cost
->alu
.shift
;
9549 else if (mode
== SImode
)
9551 *cost
= (COSTS_N_INSNS (1)
9552 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9553 /* Slightly disparage register shifts at -Os, but not by much. */
9554 if (!CONST_INT_P (XEXP (x
, 1)))
9555 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9556 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9559 else if (GET_MODE_CLASS (mode
) == MODE_INT
9560 && GET_MODE_SIZE (mode
) < 4)
9564 *cost
= (COSTS_N_INSNS (1)
9565 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9566 /* Slightly disparage register shifts at -Os, but not by
9568 if (!CONST_INT_P (XEXP (x
, 1)))
9569 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9570 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9572 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9574 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9576 /* Can use SBFX/UBFX. */
9577 *cost
= COSTS_N_INSNS (1);
9579 *cost
+= extra_cost
->alu
.bfx
;
9580 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9584 *cost
= COSTS_N_INSNS (2);
9585 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9588 if (CONST_INT_P (XEXP (x
, 1)))
9589 *cost
+= 2 * extra_cost
->alu
.shift
;
9591 *cost
+= (extra_cost
->alu
.shift
9592 + extra_cost
->alu
.shift_reg
);
9595 /* Slightly disparage register shifts. */
9596 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9601 *cost
= COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x
, 1)));
9602 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9605 if (CONST_INT_P (XEXP (x
, 1)))
9606 *cost
+= (2 * extra_cost
->alu
.shift
9607 + extra_cost
->alu
.log_shift
);
9609 *cost
+= (extra_cost
->alu
.shift
9610 + extra_cost
->alu
.shift_reg
9611 + extra_cost
->alu
.log_shift_reg
);
9617 *cost
= LIBCALL_COST (2);
9625 *cost
= COSTS_N_INSNS (1);
9627 *cost
+= extra_cost
->alu
.rev
;
9634 /* No rev instruction available. Look at arm_legacy_rev
9635 and thumb_legacy_rev for the form of RTL used then. */
9638 *cost
= COSTS_N_INSNS (10);
9642 *cost
+= 6 * extra_cost
->alu
.shift
;
9643 *cost
+= 3 * extra_cost
->alu
.logical
;
9648 *cost
= COSTS_N_INSNS (5);
9652 *cost
+= 2 * extra_cost
->alu
.shift
;
9653 *cost
+= extra_cost
->alu
.arith_shift
;
9654 *cost
+= 2 * extra_cost
->alu
.logical
;
9662 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9663 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9665 *cost
= COSTS_N_INSNS (1);
9666 if (GET_CODE (XEXP (x
, 0)) == MULT
9667 || GET_CODE (XEXP (x
, 1)) == MULT
)
9669 rtx mul_op0
, mul_op1
, sub_op
;
9672 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9674 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9676 mul_op0
= XEXP (XEXP (x
, 0), 0);
9677 mul_op1
= XEXP (XEXP (x
, 0), 1);
9678 sub_op
= XEXP (x
, 1);
9682 mul_op0
= XEXP (XEXP (x
, 1), 0);
9683 mul_op1
= XEXP (XEXP (x
, 1), 1);
9684 sub_op
= XEXP (x
, 0);
9687 /* The first operand of the multiply may be optionally
9689 if (GET_CODE (mul_op0
) == NEG
)
9690 mul_op0
= XEXP (mul_op0
, 0);
9692 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9693 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9694 + rtx_cost (sub_op
, code
, 0, speed_p
));
9700 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9706 rtx shift_by_reg
= NULL
;
9710 *cost
= COSTS_N_INSNS (1);
9712 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9713 if (shift_op
== NULL
)
9715 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9716 non_shift_op
= XEXP (x
, 0);
9719 non_shift_op
= XEXP (x
, 1);
9721 if (shift_op
!= NULL
)
9723 if (shift_by_reg
!= NULL
)
9726 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9727 *cost
+= rtx_cost (shift_by_reg
, code
, 0, speed_p
);
9730 *cost
+= extra_cost
->alu
.arith_shift
;
9732 *cost
+= (rtx_cost (shift_op
, code
, 0, speed_p
)
9733 + rtx_cost (non_shift_op
, code
, 0, speed_p
));
9738 && GET_CODE (XEXP (x
, 1)) == MULT
)
9742 *cost
+= extra_cost
->mult
[0].add
;
9743 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9744 + rtx_cost (XEXP (XEXP (x
, 1), 0), MULT
, 0, speed_p
)
9745 + rtx_cost (XEXP (XEXP (x
, 1), 1), MULT
, 1, speed_p
));
9749 if (CONST_INT_P (XEXP (x
, 0)))
9751 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9752 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9754 *cost
= COSTS_N_INSNS (insns
);
9756 *cost
+= insns
* extra_cost
->alu
.arith
;
9757 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9761 *cost
+= extra_cost
->alu
.arith
;
9766 if (GET_MODE_CLASS (mode
) == MODE_INT
9767 && GET_MODE_SIZE (mode
) < 4)
9769 rtx shift_op
, shift_reg
;
9772 /* We check both sides of the MINUS for shifter operands since,
9773 unlike PLUS, it's not commutative. */
9775 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9776 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9778 /* Slightly disparage, as we might need to widen the result. */
9779 *cost
= 1 + COSTS_N_INSNS (1);
9781 *cost
+= extra_cost
->alu
.arith
;
9783 if (CONST_INT_P (XEXP (x
, 0)))
9785 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9794 *cost
= COSTS_N_INSNS (2);
9796 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9798 rtx op1
= XEXP (x
, 1);
9801 *cost
+= 2 * extra_cost
->alu
.arith
;
9803 if (GET_CODE (op1
) == ZERO_EXTEND
)
9804 *cost
+= rtx_cost (XEXP (op1
, 0), ZERO_EXTEND
, 0, speed_p
);
9806 *cost
+= rtx_cost (op1
, MINUS
, 1, speed_p
);
9807 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
,
9811 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9814 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9815 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), SIGN_EXTEND
,
9817 + rtx_cost (XEXP (x
, 1), MINUS
, 1, speed_p
));
9820 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9821 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9824 *cost
+= (extra_cost
->alu
.arith
9825 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9826 ? extra_cost
->alu
.arith
9827 : extra_cost
->alu
.arith_shift
));
9828 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9829 + rtx_cost (XEXP (XEXP (x
, 1), 0),
9830 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9835 *cost
+= 2 * extra_cost
->alu
.arith
;
9841 *cost
= LIBCALL_COST (2);
9845 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9846 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9848 *cost
= COSTS_N_INSNS (1);
9849 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9851 rtx mul_op0
, mul_op1
, add_op
;
9854 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9856 mul_op0
= XEXP (XEXP (x
, 0), 0);
9857 mul_op1
= XEXP (XEXP (x
, 0), 1);
9858 add_op
= XEXP (x
, 1);
9860 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9861 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9862 + rtx_cost (add_op
, code
, 0, speed_p
));
9868 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9871 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9873 *cost
= LIBCALL_COST (2);
9877 /* Narrow modes can be synthesized in SImode, but the range
9878 of useful sub-operations is limited. Check for shift operations
9879 on one of the operands. Only left shifts can be used in the
9881 if (GET_MODE_CLASS (mode
) == MODE_INT
9882 && GET_MODE_SIZE (mode
) < 4)
9884 rtx shift_op
, shift_reg
;
9887 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9889 if (CONST_INT_P (XEXP (x
, 1)))
9891 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9892 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9894 *cost
= COSTS_N_INSNS (insns
);
9896 *cost
+= insns
* extra_cost
->alu
.arith
;
9897 /* Slightly penalize a narrow operation as the result may
9899 *cost
+= 1 + rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9903 /* Slightly penalize a narrow operation as the result may
9905 *cost
= 1 + COSTS_N_INSNS (1);
9907 *cost
+= extra_cost
->alu
.arith
;
9914 rtx shift_op
, shift_reg
;
9916 *cost
= COSTS_N_INSNS (1);
9918 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9919 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9921 /* UXTA[BH] or SXTA[BH]. */
9923 *cost
+= extra_cost
->alu
.extend_arith
;
9924 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9926 + rtx_cost (XEXP (x
, 1), PLUS
, 0, speed_p
));
9931 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9932 if (shift_op
!= NULL
)
9937 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9938 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9941 *cost
+= extra_cost
->alu
.arith_shift
;
9943 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9944 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9947 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9949 rtx mul_op
= XEXP (x
, 0);
9951 *cost
= COSTS_N_INSNS (1);
9953 if (TARGET_DSP_MULTIPLY
9954 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9955 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9956 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9957 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9958 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9959 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9960 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9961 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9962 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9963 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9964 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9965 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9970 *cost
+= extra_cost
->mult
[0].extend_add
;
9971 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0),
9972 SIGN_EXTEND
, 0, speed_p
)
9973 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0),
9974 SIGN_EXTEND
, 0, speed_p
)
9975 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9980 *cost
+= extra_cost
->mult
[0].add
;
9981 *cost
+= (rtx_cost (XEXP (mul_op
, 0), MULT
, 0, speed_p
)
9982 + rtx_cost (XEXP (mul_op
, 1), MULT
, 1, speed_p
)
9983 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9986 if (CONST_INT_P (XEXP (x
, 1)))
9988 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9989 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9991 *cost
= COSTS_N_INSNS (insns
);
9993 *cost
+= insns
* extra_cost
->alu
.arith
;
9994 *cost
+= rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9998 *cost
+= extra_cost
->alu
.arith
;
10003 if (mode
== DImode
)
10006 && GET_CODE (XEXP (x
, 0)) == MULT
10007 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
10008 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
10009 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
10010 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
10012 *cost
= COSTS_N_INSNS (1);
10014 *cost
+= extra_cost
->mult
[1].extend_add
;
10015 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
10016 ZERO_EXTEND
, 0, speed_p
)
10017 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0),
10018 ZERO_EXTEND
, 0, speed_p
)
10019 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
10023 *cost
= COSTS_N_INSNS (2);
10025 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10026 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10029 *cost
+= (extra_cost
->alu
.arith
10030 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10031 ? extra_cost
->alu
.arith
10032 : extra_cost
->alu
.arith_shift
));
10034 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
10036 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
10041 *cost
+= 2 * extra_cost
->alu
.arith
;
10046 *cost
= LIBCALL_COST (2);
10049 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
10051 *cost
= COSTS_N_INSNS (1);
10053 *cost
+= extra_cost
->alu
.rev
;
10057 /* Fall through. */
10058 case AND
: case XOR
:
10059 if (mode
== SImode
)
10061 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
10062 rtx op0
= XEXP (x
, 0);
10063 rtx shift_op
, shift_reg
;
10065 *cost
= COSTS_N_INSNS (1);
10069 || (code
== IOR
&& TARGET_THUMB2
)))
10070 op0
= XEXP (op0
, 0);
10073 shift_op
= shifter_op_p (op0
, &shift_reg
);
10074 if (shift_op
!= NULL
)
10079 *cost
+= extra_cost
->alu
.log_shift_reg
;
10080 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10083 *cost
+= extra_cost
->alu
.log_shift
;
10085 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10086 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
10090 if (CONST_INT_P (XEXP (x
, 1)))
10092 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
10093 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10096 *cost
= COSTS_N_INSNS (insns
);
10098 *cost
+= insns
* extra_cost
->alu
.logical
;
10099 *cost
+= rtx_cost (op0
, code
, 0, speed_p
);
10104 *cost
+= extra_cost
->alu
.logical
;
10105 *cost
+= (rtx_cost (op0
, code
, 0, speed_p
)
10106 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
10110 if (mode
== DImode
)
10112 rtx op0
= XEXP (x
, 0);
10113 enum rtx_code subcode
= GET_CODE (op0
);
10115 *cost
= COSTS_N_INSNS (2);
10119 || (code
== IOR
&& TARGET_THUMB2
)))
10120 op0
= XEXP (op0
, 0);
10122 if (GET_CODE (op0
) == ZERO_EXTEND
)
10125 *cost
+= 2 * extra_cost
->alu
.logical
;
10127 *cost
+= (rtx_cost (XEXP (op0
, 0), ZERO_EXTEND
, 0, speed_p
)
10128 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
10131 else if (GET_CODE (op0
) == SIGN_EXTEND
)
10134 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
10136 *cost
+= (rtx_cost (XEXP (op0
, 0), SIGN_EXTEND
, 0, speed_p
)
10137 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
10142 *cost
+= 2 * extra_cost
->alu
.logical
;
10148 *cost
= LIBCALL_COST (2);
10152 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10153 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10155 rtx op0
= XEXP (x
, 0);
10157 *cost
= COSTS_N_INSNS (1);
10159 if (GET_CODE (op0
) == NEG
)
10160 op0
= XEXP (op0
, 0);
10163 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10165 *cost
+= (rtx_cost (op0
, MULT
, 0, speed_p
)
10166 + rtx_cost (XEXP (x
, 1), MULT
, 1, speed_p
));
10169 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10171 *cost
= LIBCALL_COST (2);
10175 if (mode
== SImode
)
10177 *cost
= COSTS_N_INSNS (1);
10178 if (TARGET_DSP_MULTIPLY
10179 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10180 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10181 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10182 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10183 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10184 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10185 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10186 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10187 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10188 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10189 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10190 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10193 /* SMUL[TB][TB]. */
10195 *cost
+= extra_cost
->mult
[0].extend
;
10196 *cost
+= (rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed_p
)
10197 + rtx_cost (XEXP (x
, 1), SIGN_EXTEND
, 0, speed_p
));
10201 *cost
+= extra_cost
->mult
[0].simple
;
10205 if (mode
== DImode
)
10208 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10209 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10210 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10211 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10213 *cost
= COSTS_N_INSNS (1);
10215 *cost
+= extra_cost
->mult
[1].extend
;
10216 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0),
10217 ZERO_EXTEND
, 0, speed_p
)
10218 + rtx_cost (XEXP (XEXP (x
, 1), 0),
10219 ZERO_EXTEND
, 0, speed_p
));
10223 *cost
= LIBCALL_COST (2);
10228 *cost
= LIBCALL_COST (2);
10232 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10233 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10235 *cost
= COSTS_N_INSNS (1);
10237 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10241 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10243 *cost
= LIBCALL_COST (1);
10247 if (mode
== SImode
)
10249 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10251 *cost
= COSTS_N_INSNS (2);
10252 /* Assume the non-flag-changing variant. */
10254 *cost
+= (extra_cost
->alu
.log_shift
10255 + extra_cost
->alu
.arith_shift
);
10256 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ABS
, 0, speed_p
);
10260 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10261 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10263 *cost
= COSTS_N_INSNS (2);
10264 /* No extra cost for MOV imm and MVN imm. */
10265 /* If the comparison op is using the flags, there's no further
10266 cost, otherwise we need to add the cost of the comparison. */
10267 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10268 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10269 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10271 *cost
+= (COSTS_N_INSNS (1)
10272 + rtx_cost (XEXP (XEXP (x
, 0), 0), COMPARE
, 0,
10274 + rtx_cost (XEXP (XEXP (x
, 0), 1), COMPARE
, 1,
10277 *cost
+= extra_cost
->alu
.arith
;
10281 *cost
= COSTS_N_INSNS (1);
10283 *cost
+= extra_cost
->alu
.arith
;
10287 if (GET_MODE_CLASS (mode
) == MODE_INT
10288 && GET_MODE_SIZE (mode
) < 4)
10290 /* Slightly disparage, as we might need an extend operation. */
10291 *cost
= 1 + COSTS_N_INSNS (1);
10293 *cost
+= extra_cost
->alu
.arith
;
10297 if (mode
== DImode
)
10299 *cost
= COSTS_N_INSNS (2);
10301 *cost
+= 2 * extra_cost
->alu
.arith
;
10306 *cost
= LIBCALL_COST (1);
10310 if (mode
== SImode
)
10313 rtx shift_reg
= NULL
;
10315 *cost
= COSTS_N_INSNS (1);
10316 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10320 if (shift_reg
!= NULL
)
10323 *cost
+= extra_cost
->alu
.log_shift_reg
;
10324 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10327 *cost
+= extra_cost
->alu
.log_shift
;
10328 *cost
+= rtx_cost (shift_op
, ASHIFT
, 0, speed_p
);
10333 *cost
+= extra_cost
->alu
.logical
;
10336 if (mode
== DImode
)
10338 *cost
= COSTS_N_INSNS (2);
10344 *cost
+= LIBCALL_COST (1);
10349 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10351 *cost
= COSTS_N_INSNS (4);
10354 int op1cost
= rtx_cost (XEXP (x
, 1), SET
, 1, speed_p
);
10355 int op2cost
= rtx_cost (XEXP (x
, 2), SET
, 1, speed_p
);
10357 *cost
= rtx_cost (XEXP (x
, 0), IF_THEN_ELSE
, 0, speed_p
);
10358 /* Assume that if one arm of the if_then_else is a register,
10359 that it will be tied with the result and eliminate the
10360 conditional insn. */
10361 if (REG_P (XEXP (x
, 1)))
10363 else if (REG_P (XEXP (x
, 2)))
10369 if (extra_cost
->alu
.non_exec_costs_exec
)
10370 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10372 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10375 *cost
+= op1cost
+ op2cost
;
10381 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10385 machine_mode op0mode
;
10386 /* We'll mostly assume that the cost of a compare is the cost of the
10387 LHS. However, there are some notable exceptions. */
10389 /* Floating point compares are never done as side-effects. */
10390 op0mode
= GET_MODE (XEXP (x
, 0));
10391 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10392 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10394 *cost
= COSTS_N_INSNS (1);
10396 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10398 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10400 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10406 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10408 *cost
= LIBCALL_COST (2);
10412 /* DImode compares normally take two insns. */
10413 if (op0mode
== DImode
)
10415 *cost
= COSTS_N_INSNS (2);
10417 *cost
+= 2 * extra_cost
->alu
.arith
;
10421 if (op0mode
== SImode
)
10426 if (XEXP (x
, 1) == const0_rtx
10427 && !(REG_P (XEXP (x
, 0))
10428 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10429 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10431 *cost
= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10433 /* Multiply operations that set the flags are often
10434 significantly more expensive. */
10436 && GET_CODE (XEXP (x
, 0)) == MULT
10437 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10438 *cost
+= extra_cost
->mult
[0].flag_setting
;
10441 && GET_CODE (XEXP (x
, 0)) == PLUS
10442 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10443 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10445 *cost
+= extra_cost
->mult
[0].flag_setting
;
10450 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10451 if (shift_op
!= NULL
)
10453 *cost
= COSTS_N_INSNS (1);
10454 if (shift_reg
!= NULL
)
10456 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10458 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10461 *cost
+= extra_cost
->alu
.arith_shift
;
10462 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10463 + rtx_cost (XEXP (x
, 1), COMPARE
, 1, speed_p
));
10467 *cost
= COSTS_N_INSNS (1);
10469 *cost
+= extra_cost
->alu
.arith
;
10470 if (CONST_INT_P (XEXP (x
, 1))
10471 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10473 *cost
+= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10481 *cost
= LIBCALL_COST (2);
10504 if (outer_code
== SET
)
10506 /* Is it a store-flag operation? */
10507 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10508 && XEXP (x
, 1) == const0_rtx
)
10510 /* Thumb also needs an IT insn. */
10511 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10514 if (XEXP (x
, 1) == const0_rtx
)
10519 /* LSR Rd, Rn, #31. */
10520 *cost
= COSTS_N_INSNS (1);
10522 *cost
+= extra_cost
->alu
.shift
;
10532 *cost
= COSTS_N_INSNS (2);
10536 /* RSBS T1, Rn, Rn, LSR #31
10538 *cost
= COSTS_N_INSNS (2);
10540 *cost
+= extra_cost
->alu
.arith_shift
;
10544 /* RSB Rd, Rn, Rn, ASR #1
10545 LSR Rd, Rd, #31. */
10546 *cost
= COSTS_N_INSNS (2);
10548 *cost
+= (extra_cost
->alu
.arith_shift
10549 + extra_cost
->alu
.shift
);
10555 *cost
= COSTS_N_INSNS (2);
10557 *cost
+= extra_cost
->alu
.shift
;
10561 /* Remaining cases are either meaningless or would take
10562 three insns anyway. */
10563 *cost
= COSTS_N_INSNS (3);
10566 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10571 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 4 : 3);
10572 if (CONST_INT_P (XEXP (x
, 1))
10573 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10575 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10582 /* Not directly inside a set. If it involves the condition code
10583 register it must be the condition for a branch, cond_exec or
10584 I_T_E operation. Since the comparison is performed elsewhere
10585 this is just the control part which has no additional
10587 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10588 && XEXP (x
, 1) == const0_rtx
)
10596 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10597 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10599 *cost
= COSTS_N_INSNS (1);
10601 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10605 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10607 *cost
= LIBCALL_COST (1);
10611 if (mode
== SImode
)
10613 *cost
= COSTS_N_INSNS (1);
10615 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10619 *cost
= LIBCALL_COST (1);
10623 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10624 && MEM_P (XEXP (x
, 0)))
10626 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10628 if (mode
== DImode
)
10629 *cost
+= COSTS_N_INSNS (1);
10634 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10635 *cost
+= extra_cost
->ldst
.load
;
10637 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10639 if (mode
== DImode
)
10640 *cost
+= extra_cost
->alu
.shift
;
10645 /* Widening from less than 32-bits requires an extend operation. */
10646 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10648 /* We have SXTB/SXTH. */
10649 *cost
= COSTS_N_INSNS (1);
10650 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10652 *cost
+= extra_cost
->alu
.extend
;
10654 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10656 /* Needs two shifts. */
10657 *cost
= COSTS_N_INSNS (2);
10658 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10660 *cost
+= 2 * extra_cost
->alu
.shift
;
10663 /* Widening beyond 32-bits requires one more insn. */
10664 if (mode
== DImode
)
10666 *cost
+= COSTS_N_INSNS (1);
10668 *cost
+= extra_cost
->alu
.shift
;
10675 || GET_MODE (XEXP (x
, 0)) == SImode
10676 || GET_MODE (XEXP (x
, 0)) == QImode
)
10677 && MEM_P (XEXP (x
, 0)))
10679 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10681 if (mode
== DImode
)
10682 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10687 /* Widening from less than 32-bits requires an extend operation. */
10688 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10690 /* UXTB can be a shorter instruction in Thumb2, but it might
10691 be slower than the AND Rd, Rn, #255 alternative. When
10692 optimizing for speed it should never be slower to use
10693 AND, and we don't really model 16-bit vs 32-bit insns
10695 *cost
= COSTS_N_INSNS (1);
10697 *cost
+= extra_cost
->alu
.logical
;
10699 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10701 /* We have UXTB/UXTH. */
10702 *cost
= COSTS_N_INSNS (1);
10703 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10705 *cost
+= extra_cost
->alu
.extend
;
10707 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10709 /* Needs two shifts. It's marginally preferable to use
10710 shifts rather than two BIC instructions as the second
10711 shift may merge with a subsequent insn as a shifter
10713 *cost
= COSTS_N_INSNS (2);
10714 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10716 *cost
+= 2 * extra_cost
->alu
.shift
;
10718 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10719 *cost
= COSTS_N_INSNS (1);
10721 /* Widening beyond 32-bits requires one more insn. */
10722 if (mode
== DImode
)
10724 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10731 /* CONST_INT has no mode, so we cannot tell for sure how many
10732 insns are really going to be needed. The best we can do is
10733 look at the value passed. If it fits in SImode, then assume
10734 that's the mode it will be used for. Otherwise assume it
10735 will be used in DImode. */
10736 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10741 /* Avoid blowing up in arm_gen_constant (). */
10742 if (!(outer_code
== PLUS
10743 || outer_code
== AND
10744 || outer_code
== IOR
10745 || outer_code
== XOR
10746 || outer_code
== MINUS
))
10750 if (mode
== SImode
)
10752 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10753 INTVAL (x
), NULL
, NULL
,
10759 *cost
+= COSTS_N_INSNS (arm_gen_constant
10760 (outer_code
, SImode
, NULL
,
10761 trunc_int_for_mode (INTVAL (x
), SImode
),
10763 + arm_gen_constant (outer_code
, SImode
, NULL
,
10764 INTVAL (x
) >> 32, NULL
,
10776 if (arm_arch_thumb2
&& !flag_pic
)
10777 *cost
= COSTS_N_INSNS (2);
10779 *cost
= COSTS_N_INSNS (1) + extra_cost
->ldst
.load
;
10782 *cost
= COSTS_N_INSNS (2);
10786 *cost
+= COSTS_N_INSNS (1);
10788 *cost
+= extra_cost
->alu
.arith
;
10794 *cost
= COSTS_N_INSNS (4);
10799 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10800 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10802 if (vfp3_const_double_rtx (x
))
10804 *cost
= COSTS_N_INSNS (1);
10806 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10812 *cost
= COSTS_N_INSNS (1);
10813 if (mode
== DFmode
)
10814 *cost
+= extra_cost
->ldst
.loadd
;
10816 *cost
+= extra_cost
->ldst
.loadf
;
10819 *cost
= COSTS_N_INSNS (2 + (mode
== DFmode
));
10823 *cost
= COSTS_N_INSNS (4);
10829 && TARGET_HARD_FLOAT
10830 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10831 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10832 *cost
= COSTS_N_INSNS (1);
10834 *cost
= COSTS_N_INSNS (4);
10839 *cost
= COSTS_N_INSNS (1);
10840 /* When optimizing for size, we prefer constant pool entries to
10841 MOVW/MOVT pairs, so bump the cost of these slightly. */
10847 *cost
= COSTS_N_INSNS (1);
10849 *cost
+= extra_cost
->alu
.clz
;
10853 if (XEXP (x
, 1) == const0_rtx
)
10855 *cost
= COSTS_N_INSNS (1);
10857 *cost
+= extra_cost
->alu
.log_shift
;
10858 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10861 /* Fall through. */
10865 *cost
= COSTS_N_INSNS (2);
10869 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10870 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10871 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10872 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10873 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10874 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10875 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10876 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10879 *cost
= COSTS_N_INSNS (1);
10881 *cost
+= extra_cost
->mult
[1].extend
;
10882 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), ZERO_EXTEND
, 0,
10884 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), ZERO_EXTEND
,
10888 *cost
= LIBCALL_COST (1);
10891 case UNSPEC_VOLATILE
:
10893 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10896 /* Reading the PC is like reading any other register. Writing it
10897 is more expensive, but we take that into account elsewhere. */
10902 /* TODO: Simple zero_extract of bottom bits using AND. */
10903 /* Fall through. */
10907 && CONST_INT_P (XEXP (x
, 1))
10908 && CONST_INT_P (XEXP (x
, 2)))
10910 *cost
= COSTS_N_INSNS (1);
10912 *cost
+= extra_cost
->alu
.bfx
;
10913 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10916 /* Without UBFX/SBFX, need to resort to shift operations. */
10917 *cost
= COSTS_N_INSNS (2);
10919 *cost
+= 2 * extra_cost
->alu
.shift
;
10920 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed_p
);
10924 if (TARGET_HARD_FLOAT
)
10926 *cost
= COSTS_N_INSNS (1);
10928 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10929 if (!TARGET_FPU_ARMV8
10930 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10932 /* Pre v8, widening HF->DF is a two-step process, first
10933 widening to SFmode. */
10934 *cost
+= COSTS_N_INSNS (1);
10936 *cost
+= extra_cost
->fp
[0].widen
;
10938 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10942 *cost
= LIBCALL_COST (1);
10945 case FLOAT_TRUNCATE
:
10946 if (TARGET_HARD_FLOAT
)
10948 *cost
= COSTS_N_INSNS (1);
10950 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10951 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10953 /* Vector modes? */
10955 *cost
= LIBCALL_COST (1);
10959 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10961 rtx op0
= XEXP (x
, 0);
10962 rtx op1
= XEXP (x
, 1);
10963 rtx op2
= XEXP (x
, 2);
10965 *cost
= COSTS_N_INSNS (1);
10967 /* vfms or vfnma. */
10968 if (GET_CODE (op0
) == NEG
)
10969 op0
= XEXP (op0
, 0);
10971 /* vfnms or vfnma. */
10972 if (GET_CODE (op2
) == NEG
)
10973 op2
= XEXP (op2
, 0);
10975 *cost
+= rtx_cost (op0
, FMA
, 0, speed_p
);
10976 *cost
+= rtx_cost (op1
, FMA
, 1, speed_p
);
10977 *cost
+= rtx_cost (op2
, FMA
, 2, speed_p
);
10980 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
10985 *cost
= LIBCALL_COST (3);
10990 if (TARGET_HARD_FLOAT
)
10992 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10994 *cost
= COSTS_N_INSNS (1);
10996 *cost
+= extra_cost
->fp
[GET_MODE (XEXP (x
, 0)) == DFmode
].toint
;
10997 /* Strip of the 'cost' of rounding towards zero. */
10998 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10999 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed_p
);
11001 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
11002 /* ??? Increase the cost to deal with transferring from
11003 FP -> CORE registers? */
11006 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11007 && TARGET_FPU_ARMV8
)
11009 *cost
= COSTS_N_INSNS (1);
11011 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
11014 /* Vector costs? */
11016 *cost
= LIBCALL_COST (1);
11020 case UNSIGNED_FLOAT
:
11021 if (TARGET_HARD_FLOAT
)
11023 /* ??? Increase the cost to deal with transferring from CORE
11024 -> FP registers? */
11025 *cost
= COSTS_N_INSNS (1);
11027 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
11030 *cost
= LIBCALL_COST (1);
11034 *cost
= COSTS_N_INSNS (1);
11039 /* Just a guess. Guess number of instructions in the asm
11040 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11041 though (see PR60663). */
11042 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
11043 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
11045 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
11049 if (mode
!= VOIDmode
)
11050 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
11052 *cost
= COSTS_N_INSNS (4); /* Who knows? */
11057 #undef HANDLE_NARROW_SHIFT_ARITH
11059 /* RTX costs when optimizing for size. */
11061 arm_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
11062 int *total
, bool speed
)
11066 if (TARGET_OLD_RTX_COSTS
11067 || (!current_tune
->insn_extra_cost
&& !TARGET_NEW_GENERIC_COSTS
))
11069 /* Old way. (Deprecated.) */
11071 result
= arm_size_rtx_costs (x
, (enum rtx_code
) code
,
11072 (enum rtx_code
) outer_code
, total
);
11074 result
= current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
11075 (enum rtx_code
) outer_code
, total
,
11081 if (current_tune
->insn_extra_cost
)
11082 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11083 (enum rtx_code
) outer_code
,
11084 current_tune
->insn_extra_cost
,
11086 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11087 && current_tune->insn_extra_cost != NULL */
11089 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11090 (enum rtx_code
) outer_code
,
11091 &generic_extra_costs
, total
, speed
);
11094 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
11096 print_rtl_single (dump_file
, x
);
11097 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
11098 *total
, result
? "final" : "partial");
11103 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11104 supported on any "slowmul" cores, so it can be ignored. */
11107 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11108 int *total
, bool speed
)
11110 machine_mode mode
= GET_MODE (x
);
11114 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11121 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11124 *total
= COSTS_N_INSNS (20);
11128 if (CONST_INT_P (XEXP (x
, 1)))
11130 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11131 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11132 int cost
, const_ok
= const_ok_for_arm (i
);
11133 int j
, booth_unit_size
;
11135 /* Tune as appropriate. */
11136 cost
= const_ok
? 4 : 8;
11137 booth_unit_size
= 2;
11138 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11140 i
>>= booth_unit_size
;
11144 *total
= COSTS_N_INSNS (cost
);
11145 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
11149 *total
= COSTS_N_INSNS (20);
11153 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
11158 /* RTX cost for cores with a fast multiply unit (M variants). */
11161 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11162 int *total
, bool speed
)
11164 machine_mode mode
= GET_MODE (x
);
11168 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11172 /* ??? should thumb2 use different costs? */
11176 /* There is no point basing this on the tuning, since it is always the
11177 fast variant if it exists at all. */
11179 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11180 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11181 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11183 *total
= COSTS_N_INSNS(2);
11188 if (mode
== DImode
)
11190 *total
= COSTS_N_INSNS (5);
11194 if (CONST_INT_P (XEXP (x
, 1)))
11196 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11197 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11198 int cost
, const_ok
= const_ok_for_arm (i
);
11199 int j
, booth_unit_size
;
11201 /* Tune as appropriate. */
11202 cost
= const_ok
? 4 : 8;
11203 booth_unit_size
= 8;
11204 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11206 i
>>= booth_unit_size
;
11210 *total
= COSTS_N_INSNS(cost
);
11214 if (mode
== SImode
)
11216 *total
= COSTS_N_INSNS (4);
11220 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11222 if (TARGET_HARD_FLOAT
11224 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11226 *total
= COSTS_N_INSNS (1);
11231 /* Requires a lib call */
11232 *total
= COSTS_N_INSNS (20);
11236 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11241 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11242 so it can be ignored. */
11245 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11246 int *total
, bool speed
)
11248 machine_mode mode
= GET_MODE (x
);
11252 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11259 if (GET_CODE (XEXP (x
, 0)) != MULT
)
11260 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11262 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11263 will stall until the multiplication is complete. */
11264 *total
= COSTS_N_INSNS (3);
11268 /* There is no point basing this on the tuning, since it is always the
11269 fast variant if it exists at all. */
11271 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11272 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11273 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11275 *total
= COSTS_N_INSNS (2);
11280 if (mode
== DImode
)
11282 *total
= COSTS_N_INSNS (5);
11286 if (CONST_INT_P (XEXP (x
, 1)))
11288 /* If operand 1 is a constant we can more accurately
11289 calculate the cost of the multiply. The multiplier can
11290 retire 15 bits on the first cycle and a further 12 on the
11291 second. We do, of course, have to load the constant into
11292 a register first. */
11293 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
11294 /* There's a general overhead of one cycle. */
11296 unsigned HOST_WIDE_INT masked_const
;
11298 if (i
& 0x80000000)
11301 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
11303 masked_const
= i
& 0xffff8000;
11304 if (masked_const
!= 0)
11307 masked_const
= i
& 0xf8000000;
11308 if (masked_const
!= 0)
11311 *total
= COSTS_N_INSNS (cost
);
11315 if (mode
== SImode
)
11317 *total
= COSTS_N_INSNS (3);
11321 /* Requires a lib call */
11322 *total
= COSTS_N_INSNS (20);
11326 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11331 /* RTX costs for 9e (and later) cores. */
11334 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11335 int *total
, bool speed
)
11337 machine_mode mode
= GET_MODE (x
);
11344 /* Small multiply: 32 cycles for an integer multiply inst. */
11345 if (arm_arch6m
&& arm_m_profile_small_mul
)
11346 *total
= COSTS_N_INSNS (32);
11348 *total
= COSTS_N_INSNS (3);
11352 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11360 /* There is no point basing this on the tuning, since it is always the
11361 fast variant if it exists at all. */
11363 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11364 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11365 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11367 *total
= COSTS_N_INSNS (2);
11372 if (mode
== DImode
)
11374 *total
= COSTS_N_INSNS (5);
11378 if (mode
== SImode
)
11380 *total
= COSTS_N_INSNS (2);
11384 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11386 if (TARGET_HARD_FLOAT
11388 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11390 *total
= COSTS_N_INSNS (1);
11395 *total
= COSTS_N_INSNS (20);
11399 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11402 /* All address computations that can be done are free, but rtx cost returns
11403 the same for practically all of them. So we weight the different types
11404 of address here in the order (most pref first):
11405 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11407 arm_arm_address_cost (rtx x
)
11409 enum rtx_code c
= GET_CODE (x
);
11411 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11413 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11418 if (CONST_INT_P (XEXP (x
, 1)))
11421 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11431 arm_thumb_address_cost (rtx x
)
11433 enum rtx_code c
= GET_CODE (x
);
11438 && REG_P (XEXP (x
, 0))
11439 && CONST_INT_P (XEXP (x
, 1)))
11446 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
11447 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11449 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11452 /* Adjust cost hook for XScale. */
11454 xscale_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11456 /* Some true dependencies can have a higher cost depending
11457 on precisely how certain input operands are used. */
11458 if (REG_NOTE_KIND(link
) == 0
11459 && recog_memoized (insn
) >= 0
11460 && recog_memoized (dep
) >= 0)
11462 int shift_opnum
= get_attr_shift (insn
);
11463 enum attr_type attr_type
= get_attr_type (dep
);
11465 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11466 operand for INSN. If we have a shifted input operand and the
11467 instruction we depend on is another ALU instruction, then we may
11468 have to account for an additional stall. */
11469 if (shift_opnum
!= 0
11470 && (attr_type
== TYPE_ALU_SHIFT_IMM
11471 || attr_type
== TYPE_ALUS_SHIFT_IMM
11472 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11473 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11474 || attr_type
== TYPE_ALU_SHIFT_REG
11475 || attr_type
== TYPE_ALUS_SHIFT_REG
11476 || attr_type
== TYPE_LOGIC_SHIFT_REG
11477 || attr_type
== TYPE_LOGICS_SHIFT_REG
11478 || attr_type
== TYPE_MOV_SHIFT
11479 || attr_type
== TYPE_MVN_SHIFT
11480 || attr_type
== TYPE_MOV_SHIFT_REG
11481 || attr_type
== TYPE_MVN_SHIFT_REG
))
11483 rtx shifted_operand
;
11486 /* Get the shifted operand. */
11487 extract_insn (insn
);
11488 shifted_operand
= recog_data
.operand
[shift_opnum
];
11490 /* Iterate over all the operands in DEP. If we write an operand
11491 that overlaps with SHIFTED_OPERAND, then we have increase the
11492 cost of this dependency. */
11493 extract_insn (dep
);
11494 preprocess_constraints (dep
);
11495 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11497 /* We can ignore strict inputs. */
11498 if (recog_data
.operand_type
[opno
] == OP_IN
)
11501 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11513 /* Adjust cost hook for Cortex A9. */
11515 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11517 switch (REG_NOTE_KIND (link
))
11524 case REG_DEP_OUTPUT
:
11525 if (recog_memoized (insn
) >= 0
11526 && recog_memoized (dep
) >= 0)
11528 if (GET_CODE (PATTERN (insn
)) == SET
)
11531 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11533 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11535 enum attr_type attr_type_insn
= get_attr_type (insn
);
11536 enum attr_type attr_type_dep
= get_attr_type (dep
);
11538 /* By default all dependencies of the form
11541 have an extra latency of 1 cycle because
11542 of the input and output dependency in this
11543 case. However this gets modeled as an true
11544 dependency and hence all these checks. */
11545 if (REG_P (SET_DEST (PATTERN (insn
)))
11546 && REG_P (SET_DEST (PATTERN (dep
)))
11547 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
11548 SET_DEST (PATTERN (dep
))))
11550 /* FMACS is a special case where the dependent
11551 instruction can be issued 3 cycles before
11552 the normal latency in case of an output
11554 if ((attr_type_insn
== TYPE_FMACS
11555 || attr_type_insn
== TYPE_FMACD
)
11556 && (attr_type_dep
== TYPE_FMACS
11557 || attr_type_dep
== TYPE_FMACD
))
11559 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11560 *cost
= insn_default_latency (dep
) - 3;
11562 *cost
= insn_default_latency (dep
);
11567 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11568 *cost
= insn_default_latency (dep
) + 1;
11570 *cost
= insn_default_latency (dep
);
11580 gcc_unreachable ();
11586 /* Adjust cost hook for FA726TE. */
11588 fa726te_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11590 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11591 have penalty of 3. */
11592 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
11593 && recog_memoized (insn
) >= 0
11594 && recog_memoized (dep
) >= 0
11595 && get_attr_conds (dep
) == CONDS_SET
)
11597 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11598 if (get_attr_conds (insn
) == CONDS_USE
11599 && get_attr_type (insn
) != TYPE_BRANCH
)
11605 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11606 || get_attr_conds (insn
) == CONDS_USE
)
11616 /* Implement TARGET_REGISTER_MOVE_COST.
11618 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11619 it is typically more expensive than a single memory access. We set
11620 the cost to less than two memory accesses so that floating
11621 point to integer conversion does not go through memory. */
11624 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11625 reg_class_t from
, reg_class_t to
)
11629 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11630 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11632 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11633 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11635 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11642 if (from
== HI_REGS
|| to
== HI_REGS
)
11649 /* Implement TARGET_MEMORY_MOVE_COST. */
11652 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11653 bool in ATTRIBUTE_UNUSED
)
11659 if (GET_MODE_SIZE (mode
) < 4)
11662 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11666 /* Vectorizer cost model implementation. */
11668 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11670 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11672 int misalign ATTRIBUTE_UNUSED
)
11676 switch (type_of_cost
)
11679 return current_tune
->vec_costs
->scalar_stmt_cost
;
11682 return current_tune
->vec_costs
->scalar_load_cost
;
11685 return current_tune
->vec_costs
->scalar_store_cost
;
11688 return current_tune
->vec_costs
->vec_stmt_cost
;
11691 return current_tune
->vec_costs
->vec_align_load_cost
;
11694 return current_tune
->vec_costs
->vec_store_cost
;
11696 case vec_to_scalar
:
11697 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11699 case scalar_to_vec
:
11700 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11702 case unaligned_load
:
11703 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11705 case unaligned_store
:
11706 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11708 case cond_branch_taken
:
11709 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11711 case cond_branch_not_taken
:
11712 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11715 case vec_promote_demote
:
11716 return current_tune
->vec_costs
->vec_stmt_cost
;
11718 case vec_construct
:
11719 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11720 return elements
/ 2 + 1;
11723 gcc_unreachable ();
11727 /* Implement targetm.vectorize.add_stmt_cost. */
11730 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11731 struct _stmt_vec_info
*stmt_info
, int misalign
,
11732 enum vect_cost_model_location where
)
11734 unsigned *cost
= (unsigned *) data
;
11735 unsigned retval
= 0;
11737 if (flag_vect_cost_model
)
11739 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11740 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11742 /* Statements in an inner loop relative to the loop being
11743 vectorized are weighted more heavily. The value here is
11744 arbitrary and could potentially be improved with analysis. */
11745 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11746 count
*= 50; /* FIXME. */
11748 retval
= (unsigned) (count
* stmt_cost
);
11749 cost
[where
] += retval
;
11755 /* Return true if and only if this insn can dual-issue only as older. */
11757 cortexa7_older_only (rtx_insn
*insn
)
11759 if (recog_memoized (insn
) < 0)
11762 switch (get_attr_type (insn
))
11764 case TYPE_ALU_DSP_REG
:
11765 case TYPE_ALU_SREG
:
11766 case TYPE_ALUS_SREG
:
11767 case TYPE_LOGIC_REG
:
11768 case TYPE_LOGICS_REG
:
11770 case TYPE_ADCS_REG
:
11775 case TYPE_SHIFT_IMM
:
11776 case TYPE_SHIFT_REG
:
11777 case TYPE_LOAD_BYTE
:
11780 case TYPE_FFARITHS
:
11782 case TYPE_FFARITHD
:
11800 case TYPE_F_STORES
:
11807 /* Return true if and only if this insn can dual-issue as younger. */
11809 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11811 if (recog_memoized (insn
) < 0)
11814 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11818 switch (get_attr_type (insn
))
11821 case TYPE_ALUS_IMM
:
11822 case TYPE_LOGIC_IMM
:
11823 case TYPE_LOGICS_IMM
:
11828 case TYPE_MOV_SHIFT
:
11829 case TYPE_MOV_SHIFT_REG
:
11839 /* Look for an instruction that can dual issue only as an older
11840 instruction, and move it in front of any instructions that can
11841 dual-issue as younger, while preserving the relative order of all
11842 other instructions in the ready list. This is a hueuristic to help
11843 dual-issue in later cycles, by postponing issue of more flexible
11844 instructions. This heuristic may affect dual issue opportunities
11845 in the current cycle. */
11847 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11848 int *n_readyp
, int clock
)
11851 int first_older_only
= -1, first_younger
= -1;
11855 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11859 /* Traverse the ready list from the head (the instruction to issue
11860 first), and looking for the first instruction that can issue as
11861 younger and the first instruction that can dual-issue only as
11863 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11865 rtx_insn
*insn
= ready
[i
];
11866 if (cortexa7_older_only (insn
))
11868 first_older_only
= i
;
11870 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11873 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11877 /* Nothing to reorder because either no younger insn found or insn
11878 that can dual-issue only as older appears before any insn that
11879 can dual-issue as younger. */
11880 if (first_younger
== -1)
11883 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11887 /* Nothing to reorder because no older-only insn in the ready list. */
11888 if (first_older_only
== -1)
11891 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11895 /* Move first_older_only insn before first_younger. */
11897 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11898 INSN_UID(ready
[first_older_only
]),
11899 INSN_UID(ready
[first_younger
]));
11900 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11901 for (i
= first_older_only
; i
< first_younger
; i
++)
11903 ready
[i
] = ready
[i
+1];
11906 ready
[i
] = first_older_only_insn
;
11910 /* Implement TARGET_SCHED_REORDER. */
11912 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11918 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11921 /* Do nothing for other cores. */
11925 return arm_issue_rate ();
11928 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11929 It corrects the value of COST based on the relationship between
11930 INSN and DEP through the dependence LINK. It returns the new
11931 value. There is a per-core adjust_cost hook to adjust scheduler costs
11932 and the per-core hook can choose to completely override the generic
11933 adjust_cost function. Only put bits of code into arm_adjust_cost that
11934 are common across all cores. */
11936 arm_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int cost
)
11940 /* When generating Thumb-1 code, we want to place flag-setting operations
11941 close to a conditional branch which depends on them, so that we can
11942 omit the comparison. */
11944 && REG_NOTE_KIND (link
) == 0
11945 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11946 && recog_memoized (dep
) >= 0
11947 && get_attr_conds (dep
) == CONDS_SET
)
11950 if (current_tune
->sched_adjust_cost
!= NULL
)
11952 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
11956 /* XXX Is this strictly true? */
11957 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
11958 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11961 /* Call insns don't incur a stall, even if they follow a load. */
11962 if (REG_NOTE_KIND (link
) == 0
11966 if ((i_pat
= single_set (insn
)) != NULL
11967 && MEM_P (SET_SRC (i_pat
))
11968 && (d_pat
= single_set (dep
)) != NULL
11969 && MEM_P (SET_DEST (d_pat
)))
11971 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11972 /* This is a load after a store, there is no conflict if the load reads
11973 from a cached area. Assume that loads from the stack, and from the
11974 constant pool are cached, and that others will miss. This is a
11977 if ((GET_CODE (src_mem
) == SYMBOL_REF
11978 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11979 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11980 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11981 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11989 arm_max_conditional_execute (void)
11991 return max_insns_skipped
;
11995 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11998 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
12000 return (optimize
> 0) ? 2 : 0;
12004 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
12006 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12009 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12010 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12011 sequences of non-executed instructions in IT blocks probably take the same
12012 amount of time as executed instructions (and the IT instruction itself takes
12013 space in icache). This function was experimentally determined to give good
12014 results on a popular embedded benchmark. */
12017 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
12019 return (TARGET_32BIT
&& speed_p
) ? 1
12020 : arm_default_branch_cost (speed_p
, predictable_p
);
12024 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
12026 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12029 static bool fp_consts_inited
= false;
12031 static REAL_VALUE_TYPE value_fp0
;
12034 init_fp_table (void)
12038 r
= REAL_VALUE_ATOF ("0", DFmode
);
12040 fp_consts_inited
= true;
12043 /* Return TRUE if rtx X is a valid immediate FP constant. */
12045 arm_const_double_rtx (rtx x
)
12049 if (!fp_consts_inited
)
12052 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12053 if (REAL_VALUE_MINUS_ZERO (r
))
12056 if (REAL_VALUES_EQUAL (r
, value_fp0
))
12062 /* VFPv3 has a fairly wide range of representable immediates, formed from
12063 "quarter-precision" floating-point values. These can be evaluated using this
12064 formula (with ^ for exponentiation):
12068 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12069 16 <= n <= 31 and 0 <= r <= 7.
12071 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12073 - A (most-significant) is the sign bit.
12074 - BCD are the exponent (encoded as r XOR 3).
12075 - EFGH are the mantissa (encoded as n - 16).
12078 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12079 fconst[sd] instruction, or -1 if X isn't suitable. */
12081 vfp3_const_double_index (rtx x
)
12083 REAL_VALUE_TYPE r
, m
;
12084 int sign
, exponent
;
12085 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
12086 unsigned HOST_WIDE_INT mask
;
12087 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12090 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
12093 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12095 /* We can't represent these things, so detect them first. */
12096 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
12099 /* Extract sign, exponent and mantissa. */
12100 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12101 r
= real_value_abs (&r
);
12102 exponent
= REAL_EXP (&r
);
12103 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12104 highest (sign) bit, with a fixed binary point at bit point_pos.
12105 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12106 bits for the mantissa, this may fail (low bits would be lost). */
12107 real_ldexp (&m
, &r
, point_pos
- exponent
);
12108 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12109 mantissa
= w
.elt (0);
12110 mant_hi
= w
.elt (1);
12112 /* If there are bits set in the low part of the mantissa, we can't
12113 represent this value. */
12117 /* Now make it so that mantissa contains the most-significant bits, and move
12118 the point_pos to indicate that the least-significant bits have been
12120 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12121 mantissa
= mant_hi
;
12123 /* We can permit four significant bits of mantissa only, plus a high bit
12124 which is always 1. */
12125 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
12126 if ((mantissa
& mask
) != 0)
12129 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12130 mantissa
>>= point_pos
- 5;
12132 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12133 floating-point immediate zero with Neon using an integer-zero load, but
12134 that case is handled elsewhere.) */
12138 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12140 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12141 normalized significands are in the range [1, 2). (Our mantissa is shifted
12142 left 4 places at this point relative to normalized IEEE754 values). GCC
12143 internally uses [0.5, 1) (see real.c), so the exponent returned from
12144 REAL_EXP must be altered. */
12145 exponent
= 5 - exponent
;
12147 if (exponent
< 0 || exponent
> 7)
12150 /* Sign, mantissa and exponent are now in the correct form to plug into the
12151 formula described in the comment above. */
12152 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12155 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12157 vfp3_const_double_rtx (rtx x
)
12162 return vfp3_const_double_index (x
) != -1;
12165 /* Recognize immediates which can be used in various Neon instructions. Legal
12166 immediates are described by the following table (for VMVN variants, the
12167 bitwise inverse of the constant shown is recognized. In either case, VMOV
12168 is output and the correct instruction to use for a given constant is chosen
12169 by the assembler). The constant shown is replicated across all elements of
12170 the destination vector.
12172 insn elems variant constant (binary)
12173 ---- ----- ------- -----------------
12174 vmov i32 0 00000000 00000000 00000000 abcdefgh
12175 vmov i32 1 00000000 00000000 abcdefgh 00000000
12176 vmov i32 2 00000000 abcdefgh 00000000 00000000
12177 vmov i32 3 abcdefgh 00000000 00000000 00000000
12178 vmov i16 4 00000000 abcdefgh
12179 vmov i16 5 abcdefgh 00000000
12180 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12181 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12182 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12183 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12184 vmvn i16 10 00000000 abcdefgh
12185 vmvn i16 11 abcdefgh 00000000
12186 vmov i32 12 00000000 00000000 abcdefgh 11111111
12187 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12188 vmov i32 14 00000000 abcdefgh 11111111 11111111
12189 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12190 vmov i8 16 abcdefgh
12191 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12192 eeeeeeee ffffffff gggggggg hhhhhhhh
12193 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12194 vmov f32 19 00000000 00000000 00000000 00000000
12196 For case 18, B = !b. Representable values are exactly those accepted by
12197 vfp3_const_double_index, but are output as floating-point numbers rather
12200 For case 19, we will change it to vmov.i32 when assembling.
12202 Variants 0-5 (inclusive) may also be used as immediates for the second
12203 operand of VORR/VBIC instructions.
12205 The INVERSE argument causes the bitwise inverse of the given operand to be
12206 recognized instead (used for recognizing legal immediates for the VAND/VORN
12207 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12208 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12209 output, rather than the real insns vbic/vorr).
12211 INVERSE makes no difference to the recognition of float vectors.
12213 The return value is the variant of immediate as shown in the above table, or
12214 -1 if the given value doesn't match any of the listed patterns.
12217 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
12218 rtx
*modconst
, int *elementwidth
)
12220 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12222 for (i = 0; i < idx; i += (STRIDE)) \
12227 immtype = (CLASS); \
12228 elsize = (ELSIZE); \
12232 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12233 unsigned int innersize
;
12234 unsigned char bytes
[16];
12235 int immtype
= -1, matches
;
12236 unsigned int invmask
= inverse
? 0xff : 0;
12237 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12241 n_elts
= CONST_VECTOR_NUNITS (op
);
12242 innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12247 if (mode
== VOIDmode
)
12249 innersize
= GET_MODE_SIZE (mode
);
12252 /* Vectors of float constants. */
12253 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
12255 rtx el0
= CONST_VECTOR_ELT (op
, 0);
12256 REAL_VALUE_TYPE r0
;
12258 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
12261 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
12263 for (i
= 1; i
< n_elts
; i
++)
12265 rtx elt
= CONST_VECTOR_ELT (op
, i
);
12266 REAL_VALUE_TYPE re
;
12268 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
12270 if (!REAL_VALUES_EQUAL (r0
, re
))
12275 *modconst
= CONST_VECTOR_ELT (op
, 0);
12280 if (el0
== CONST0_RTX (GET_MODE (el0
)))
12286 /* Splat vector constant out into a byte vector. */
12287 for (i
= 0; i
< n_elts
; i
++)
12289 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
12290 unsigned HOST_WIDE_INT elpart
;
12291 unsigned int part
, parts
;
12293 if (CONST_INT_P (el
))
12295 elpart
= INTVAL (el
);
12298 else if (CONST_DOUBLE_P (el
))
12300 elpart
= CONST_DOUBLE_LOW (el
);
12304 gcc_unreachable ();
12306 for (part
= 0; part
< parts
; part
++)
12309 for (byte
= 0; byte
< innersize
; byte
++)
12311 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
12312 elpart
>>= BITS_PER_UNIT
;
12314 if (CONST_DOUBLE_P (el
))
12315 elpart
= CONST_DOUBLE_HIGH (el
);
12319 /* Sanity check. */
12320 gcc_assert (idx
== GET_MODE_SIZE (mode
));
12324 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
12325 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12327 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12328 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12330 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12331 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12333 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12334 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
12336 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
12338 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
12340 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
12341 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12343 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12344 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12346 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12347 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12349 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12350 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
12352 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
12354 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
12356 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12357 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12359 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12360 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12362 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12363 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12365 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12366 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12368 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
12370 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
12371 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
12379 *elementwidth
= elsize
;
12383 unsigned HOST_WIDE_INT imm
= 0;
12385 /* Un-invert bytes of recognized vector, if necessary. */
12387 for (i
= 0; i
< idx
; i
++)
12388 bytes
[i
] ^= invmask
;
12392 /* FIXME: Broken on 32-bit H_W_I hosts. */
12393 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12395 for (i
= 0; i
< 8; i
++)
12396 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12397 << (i
* BITS_PER_UNIT
);
12399 *modconst
= GEN_INT (imm
);
12403 unsigned HOST_WIDE_INT imm
= 0;
12405 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12406 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12408 *modconst
= GEN_INT (imm
);
12416 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12417 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12418 float elements), and a modified constant (whatever should be output for a
12419 VMOV) in *MODCONST. */
12422 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
12423 rtx
*modconst
, int *elementwidth
)
12427 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12433 *modconst
= tmpconst
;
12436 *elementwidth
= tmpwidth
;
12441 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12442 the immediate is valid, write a constant suitable for using as an operand
12443 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12444 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12447 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
12448 rtx
*modconst
, int *elementwidth
)
12452 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12454 if (retval
< 0 || retval
> 5)
12458 *modconst
= tmpconst
;
12461 *elementwidth
= tmpwidth
;
12466 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12467 the immediate is valid, write a constant suitable for using as an operand
12468 to VSHR/VSHL to *MODCONST and the corresponding element width to
12469 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12470 because they have different limitations. */
12473 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
12474 rtx
*modconst
, int *elementwidth
,
12477 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12478 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12479 unsigned HOST_WIDE_INT last_elt
= 0;
12480 unsigned HOST_WIDE_INT maxshift
;
12482 /* Split vector constant out into a byte vector. */
12483 for (i
= 0; i
< n_elts
; i
++)
12485 rtx el
= CONST_VECTOR_ELT (op
, i
);
12486 unsigned HOST_WIDE_INT elpart
;
12488 if (CONST_INT_P (el
))
12489 elpart
= INTVAL (el
);
12490 else if (CONST_DOUBLE_P (el
))
12493 gcc_unreachable ();
12495 if (i
!= 0 && elpart
!= last_elt
)
12501 /* Shift less than element size. */
12502 maxshift
= innersize
* 8;
12506 /* Left shift immediate value can be from 0 to <size>-1. */
12507 if (last_elt
>= maxshift
)
12512 /* Right shift immediate value can be from 1 to <size>. */
12513 if (last_elt
== 0 || last_elt
> maxshift
)
12518 *elementwidth
= innersize
* 8;
12521 *modconst
= CONST_VECTOR_ELT (op
, 0);
12526 /* Return a string suitable for output of Neon immediate logic operation
12530 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12531 int inverse
, int quad
)
12533 int width
, is_valid
;
12534 static char templ
[40];
12536 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12538 gcc_assert (is_valid
!= 0);
12541 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12543 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12548 /* Return a string suitable for output of Neon immediate shift operation
12549 (VSHR or VSHL) MNEM. */
12552 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12553 machine_mode mode
, int quad
,
12556 int width
, is_valid
;
12557 static char templ
[40];
12559 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12560 gcc_assert (is_valid
!= 0);
12563 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12565 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12570 /* Output a sequence of pairwise operations to implement a reduction.
12571 NOTE: We do "too much work" here, because pairwise operations work on two
12572 registers-worth of operands in one go. Unfortunately we can't exploit those
12573 extra calculations to do the full operation in fewer steps, I don't think.
12574 Although all vector elements of the result but the first are ignored, we
12575 actually calculate the same result in each of the elements. An alternative
12576 such as initially loading a vector with zero to use as each of the second
12577 operands would use up an additional register and take an extra instruction,
12578 for no particular gain. */
12581 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12582 rtx (*reduc
) (rtx
, rtx
, rtx
))
12584 machine_mode inner
= GET_MODE_INNER (mode
);
12585 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
12588 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12590 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12591 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12596 /* If VALS is a vector constant that can be loaded into a register
12597 using VDUP, generate instructions to do so and return an RTX to
12598 assign to the register. Otherwise return NULL_RTX. */
12601 neon_vdup_constant (rtx vals
)
12603 machine_mode mode
= GET_MODE (vals
);
12604 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12605 int n_elts
= GET_MODE_NUNITS (mode
);
12606 bool all_same
= true;
12610 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12613 for (i
= 0; i
< n_elts
; ++i
)
12615 x
= XVECEXP (vals
, 0, i
);
12616 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12621 /* The elements are not all the same. We could handle repeating
12622 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12623 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12627 /* We can load this constant by using VDUP and a constant in a
12628 single ARM register. This will be cheaper than a vector
12631 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12632 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12635 /* Generate code to load VALS, which is a PARALLEL containing only
12636 constants (for vec_init) or CONST_VECTOR, efficiently into a
12637 register. Returns an RTX to copy into the register, or NULL_RTX
12638 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12641 neon_make_constant (rtx vals
)
12643 machine_mode mode
= GET_MODE (vals
);
12645 rtx const_vec
= NULL_RTX
;
12646 int n_elts
= GET_MODE_NUNITS (mode
);
12650 if (GET_CODE (vals
) == CONST_VECTOR
)
12652 else if (GET_CODE (vals
) == PARALLEL
)
12654 /* A CONST_VECTOR must contain only CONST_INTs and
12655 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12656 Only store valid constants in a CONST_VECTOR. */
12657 for (i
= 0; i
< n_elts
; ++i
)
12659 rtx x
= XVECEXP (vals
, 0, i
);
12660 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12663 if (n_const
== n_elts
)
12664 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12667 gcc_unreachable ();
12669 if (const_vec
!= NULL
12670 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12671 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12673 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12674 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12675 pipeline cycle; creating the constant takes one or two ARM
12676 pipeline cycles. */
12678 else if (const_vec
!= NULL_RTX
)
12679 /* Load from constant pool. On Cortex-A8 this takes two cycles
12680 (for either double or quad vectors). We can not take advantage
12681 of single-cycle VLD1 because we need a PC-relative addressing
12685 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12686 We can not construct an initializer. */
12690 /* Initialize vector TARGET to VALS. */
12693 neon_expand_vector_init (rtx target
, rtx vals
)
12695 machine_mode mode
= GET_MODE (target
);
12696 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12697 int n_elts
= GET_MODE_NUNITS (mode
);
12698 int n_var
= 0, one_var
= -1;
12699 bool all_same
= true;
12703 for (i
= 0; i
< n_elts
; ++i
)
12705 x
= XVECEXP (vals
, 0, i
);
12706 if (!CONSTANT_P (x
))
12707 ++n_var
, one_var
= i
;
12709 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12715 rtx constant
= neon_make_constant (vals
);
12716 if (constant
!= NULL_RTX
)
12718 emit_move_insn (target
, constant
);
12723 /* Splat a single non-constant element if we can. */
12724 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12726 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12727 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
12731 /* One field is non-constant. Load constant then overwrite varying
12732 field. This is more efficient than using the stack. */
12735 rtx copy
= copy_rtx (vals
);
12736 rtx index
= GEN_INT (one_var
);
12738 /* Load constant part of vector, substitute neighboring value for
12739 varying element. */
12740 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12741 neon_expand_vector_init (target
, copy
);
12743 /* Insert variable. */
12744 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12748 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12751 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12754 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12757 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12760 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12763 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12766 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12769 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12772 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12775 gcc_unreachable ();
12780 /* Construct the vector in memory one field at a time
12781 and load the whole vector. */
12782 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12783 for (i
= 0; i
< n_elts
; i
++)
12784 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12785 i
* GET_MODE_SIZE (inner_mode
)),
12786 XVECEXP (vals
, 0, i
));
12787 emit_move_insn (target
, mem
);
12790 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12791 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12792 reported source locations are bogus. */
12795 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12798 HOST_WIDE_INT lane
;
12800 gcc_assert (CONST_INT_P (operand
));
12802 lane
= INTVAL (operand
);
12804 if (lane
< low
|| lane
>= high
)
12808 /* Bounds-check lanes. */
12811 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12813 bounds_check (operand
, low
, high
, "lane out of range");
12816 /* Bounds-check constants. */
12819 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12821 bounds_check (operand
, low
, high
, "constant out of range");
12825 neon_element_bits (machine_mode mode
)
12827 if (mode
== DImode
)
12828 return GET_MODE_BITSIZE (mode
);
12830 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
12834 /* Predicates for `match_operand' and `match_operator'. */
12836 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12837 WB is true if full writeback address modes are allowed and is false
12838 if limited writeback address modes (POST_INC and PRE_DEC) are
12842 arm_coproc_mem_operand (rtx op
, bool wb
)
12846 /* Reject eliminable registers. */
12847 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12848 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12849 || reg_mentioned_p (arg_pointer_rtx
, op
)
12850 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12851 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12852 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12853 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12856 /* Constants are converted into offsets from labels. */
12860 ind
= XEXP (op
, 0);
12862 if (reload_completed
12863 && (GET_CODE (ind
) == LABEL_REF
12864 || (GET_CODE (ind
) == CONST
12865 && GET_CODE (XEXP (ind
, 0)) == PLUS
12866 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12867 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12870 /* Match: (mem (reg)). */
12872 return arm_address_register_rtx_p (ind
, 0);
12874 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12875 acceptable in any case (subject to verification by
12876 arm_address_register_rtx_p). We need WB to be true to accept
12877 PRE_INC and POST_DEC. */
12878 if (GET_CODE (ind
) == POST_INC
12879 || GET_CODE (ind
) == PRE_DEC
12881 && (GET_CODE (ind
) == PRE_INC
12882 || GET_CODE (ind
) == POST_DEC
)))
12883 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12886 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12887 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12888 && GET_CODE (XEXP (ind
, 1)) == PLUS
12889 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12890 ind
= XEXP (ind
, 1);
12895 if (GET_CODE (ind
) == PLUS
12896 && REG_P (XEXP (ind
, 0))
12897 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12898 && CONST_INT_P (XEXP (ind
, 1))
12899 && INTVAL (XEXP (ind
, 1)) > -1024
12900 && INTVAL (XEXP (ind
, 1)) < 1024
12901 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12907 /* Return TRUE if OP is a memory operand which we can load or store a vector
12908 to/from. TYPE is one of the following values:
12909 0 - Vector load/stor (vldr)
12910 1 - Core registers (ldm)
12911 2 - Element/structure loads (vld1)
12914 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12918 /* Reject eliminable registers. */
12919 if (! (reload_in_progress
|| reload_completed
)
12920 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12921 || reg_mentioned_p (arg_pointer_rtx
, op
)
12922 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12923 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12924 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12925 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12928 /* Constants are converted into offsets from labels. */
12932 ind
= XEXP (op
, 0);
12934 if (reload_completed
12935 && (GET_CODE (ind
) == LABEL_REF
12936 || (GET_CODE (ind
) == CONST
12937 && GET_CODE (XEXP (ind
, 0)) == PLUS
12938 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12939 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12942 /* Match: (mem (reg)). */
12944 return arm_address_register_rtx_p (ind
, 0);
12946 /* Allow post-increment with Neon registers. */
12947 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12948 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12949 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12951 /* Allow post-increment by register for VLDn */
12952 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12953 && GET_CODE (XEXP (ind
, 1)) == PLUS
12954 && REG_P (XEXP (XEXP (ind
, 1), 1)))
12961 && GET_CODE (ind
) == PLUS
12962 && REG_P (XEXP (ind
, 0))
12963 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12964 && CONST_INT_P (XEXP (ind
, 1))
12965 && INTVAL (XEXP (ind
, 1)) > -1024
12966 /* For quad modes, we restrict the constant offset to be slightly less
12967 than what the instruction format permits. We have no such constraint
12968 on double mode offsets. (This must match arm_legitimate_index_p.) */
12969 && (INTVAL (XEXP (ind
, 1))
12970 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12971 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12977 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12980 neon_struct_mem_operand (rtx op
)
12984 /* Reject eliminable registers. */
12985 if (! (reload_in_progress
|| reload_completed
)
12986 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12987 || reg_mentioned_p (arg_pointer_rtx
, op
)
12988 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12989 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12990 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12991 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12994 /* Constants are converted into offsets from labels. */
12998 ind
= XEXP (op
, 0);
13000 if (reload_completed
13001 && (GET_CODE (ind
) == LABEL_REF
13002 || (GET_CODE (ind
) == CONST
13003 && GET_CODE (XEXP (ind
, 0)) == PLUS
13004 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13005 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13008 /* Match: (mem (reg)). */
13010 return arm_address_register_rtx_p (ind
, 0);
13012 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13013 if (GET_CODE (ind
) == POST_INC
13014 || GET_CODE (ind
) == PRE_DEC
)
13015 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13020 /* Return true if X is a register that will be eliminated later on. */
13022 arm_eliminable_register (rtx x
)
13024 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
13025 || REGNO (x
) == ARG_POINTER_REGNUM
13026 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
13027 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
13030 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13031 coprocessor registers. Otherwise return NO_REGS. */
13034 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
13036 if (mode
== HFmode
)
13038 if (!TARGET_NEON_FP16
)
13039 return GENERAL_REGS
;
13040 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
13042 return GENERAL_REGS
;
13045 /* The neon move patterns handle all legitimate vector and struct
13048 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
13049 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
13050 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
13051 || VALID_NEON_STRUCT_MODE (mode
)))
13054 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
13057 return GENERAL_REGS
;
13060 /* Values which must be returned in the most-significant end of the return
13064 arm_return_in_msb (const_tree valtype
)
13066 return (TARGET_AAPCS_BASED
13067 && BYTES_BIG_ENDIAN
13068 && (AGGREGATE_TYPE_P (valtype
)
13069 || TREE_CODE (valtype
) == COMPLEX_TYPE
13070 || FIXED_POINT_TYPE_P (valtype
)));
13073 /* Return TRUE if X references a SYMBOL_REF. */
13075 symbol_mentioned_p (rtx x
)
13080 if (GET_CODE (x
) == SYMBOL_REF
)
13083 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13084 are constant offsets, not symbols. */
13085 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13088 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13090 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13096 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13097 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
13100 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
13107 /* Return TRUE if X references a LABEL_REF. */
13109 label_mentioned_p (rtx x
)
13114 if (GET_CODE (x
) == LABEL_REF
)
13117 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13118 instruction, but they are constant offsets, not symbols. */
13119 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13122 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13123 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13129 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13130 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
13133 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
13141 tls_mentioned_p (rtx x
)
13143 switch (GET_CODE (x
))
13146 return tls_mentioned_p (XEXP (x
, 0));
13149 if (XINT (x
, 1) == UNSPEC_TLS
)
13157 /* Must not copy any rtx that uses a pc-relative address. */
13160 arm_cannot_copy_insn_p (rtx_insn
*insn
)
13162 /* The tls call insn cannot be copied, as it is paired with a data
13164 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
13167 subrtx_iterator::array_type array
;
13168 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
13170 const_rtx x
= *iter
;
13171 if (GET_CODE (x
) == UNSPEC
13172 && (XINT (x
, 1) == UNSPEC_PIC_BASE
13173 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
13180 minmax_code (rtx x
)
13182 enum rtx_code code
= GET_CODE (x
);
13195 gcc_unreachable ();
13199 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13202 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
13203 int *mask
, bool *signed_sat
)
13205 /* The high bound must be a power of two minus one. */
13206 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
13210 /* The low bound is either zero (for usat) or one less than the
13211 negation of the high bound (for ssat). */
13212 if (INTVAL (lo_bound
) == 0)
13217 *signed_sat
= false;
13222 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
13227 *signed_sat
= true;
13235 /* Return 1 if memory locations are adjacent. */
13237 adjacent_mem_locations (rtx a
, rtx b
)
13239 /* We don't guarantee to preserve the order of these memory refs. */
13240 if (volatile_refs_p (a
) || volatile_refs_p (b
))
13243 if ((REG_P (XEXP (a
, 0))
13244 || (GET_CODE (XEXP (a
, 0)) == PLUS
13245 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
13246 && (REG_P (XEXP (b
, 0))
13247 || (GET_CODE (XEXP (b
, 0)) == PLUS
13248 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
13250 HOST_WIDE_INT val0
= 0, val1
= 0;
13254 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
13256 reg0
= XEXP (XEXP (a
, 0), 0);
13257 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
13260 reg0
= XEXP (a
, 0);
13262 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
13264 reg1
= XEXP (XEXP (b
, 0), 0);
13265 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
13268 reg1
= XEXP (b
, 0);
13270 /* Don't accept any offset that will require multiple
13271 instructions to handle, since this would cause the
13272 arith_adjacentmem pattern to output an overlong sequence. */
13273 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
13276 /* Don't allow an eliminable register: register elimination can make
13277 the offset too large. */
13278 if (arm_eliminable_register (reg0
))
13281 val_diff
= val1
- val0
;
13285 /* If the target has load delay slots, then there's no benefit
13286 to using an ldm instruction unless the offset is zero and
13287 we are optimizing for size. */
13288 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
13289 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
13290 && (val_diff
== 4 || val_diff
== -4));
13293 return ((REGNO (reg0
) == REGNO (reg1
))
13294 && (val_diff
== 4 || val_diff
== -4));
13300 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13301 for load operations, false for store operations. CONSECUTIVE is true
13302 if the register numbers in the operation must be consecutive in the register
13303 bank. RETURN_PC is true if value is to be loaded in PC.
13304 The pattern we are trying to match for load is:
13305 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13306 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13309 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13312 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13313 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13314 3. If consecutive is TRUE, then for kth register being loaded,
13315 REGNO (R_dk) = REGNO (R_d0) + k.
13316 The pattern for store is similar. */
13318 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
13319 bool consecutive
, bool return_pc
)
13321 HOST_WIDE_INT count
= XVECLEN (op
, 0);
13322 rtx reg
, mem
, addr
;
13324 unsigned first_regno
;
13325 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
13327 bool addr_reg_in_reglist
= false;
13328 bool update
= false;
13333 /* If not in SImode, then registers must be consecutive
13334 (e.g., VLDM instructions for DFmode). */
13335 gcc_assert ((mode
== SImode
) || consecutive
);
13336 /* Setting return_pc for stores is illegal. */
13337 gcc_assert (!return_pc
|| load
);
13339 /* Set up the increments and the regs per val based on the mode. */
13340 reg_increment
= GET_MODE_SIZE (mode
);
13341 regs_per_val
= reg_increment
/ 4;
13342 offset_adj
= return_pc
? 1 : 0;
13345 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
13346 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
13349 /* Check if this is a write-back. */
13350 elt
= XVECEXP (op
, 0, offset_adj
);
13351 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
13357 /* The offset adjustment must be the number of registers being
13358 popped times the size of a single register. */
13359 if (!REG_P (SET_DEST (elt
))
13360 || !REG_P (XEXP (SET_SRC (elt
), 0))
13361 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
13362 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
13363 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
13364 ((count
- 1 - offset_adj
) * reg_increment
))
13368 i
= i
+ offset_adj
;
13369 base
= base
+ offset_adj
;
13370 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13371 success depends on the type: VLDM can do just one reg,
13372 LDM must do at least two. */
13373 if ((count
<= i
) && (mode
== SImode
))
13376 elt
= XVECEXP (op
, 0, i
- 1);
13377 if (GET_CODE (elt
) != SET
)
13382 reg
= SET_DEST (elt
);
13383 mem
= SET_SRC (elt
);
13387 reg
= SET_SRC (elt
);
13388 mem
= SET_DEST (elt
);
13391 if (!REG_P (reg
) || !MEM_P (mem
))
13394 regno
= REGNO (reg
);
13395 first_regno
= regno
;
13396 addr
= XEXP (mem
, 0);
13397 if (GET_CODE (addr
) == PLUS
)
13399 if (!CONST_INT_P (XEXP (addr
, 1)))
13402 offset
= INTVAL (XEXP (addr
, 1));
13403 addr
= XEXP (addr
, 0);
13409 /* Don't allow SP to be loaded unless it is also the base register. It
13410 guarantees that SP is reset correctly when an LDM instruction
13411 is interrupted. Otherwise, we might end up with a corrupt stack. */
13412 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13415 for (; i
< count
; i
++)
13417 elt
= XVECEXP (op
, 0, i
);
13418 if (GET_CODE (elt
) != SET
)
13423 reg
= SET_DEST (elt
);
13424 mem
= SET_SRC (elt
);
13428 reg
= SET_SRC (elt
);
13429 mem
= SET_DEST (elt
);
13433 || GET_MODE (reg
) != mode
13434 || REGNO (reg
) <= regno
13437 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13438 /* Don't allow SP to be loaded unless it is also the base register. It
13439 guarantees that SP is reset correctly when an LDM instruction
13440 is interrupted. Otherwise, we might end up with a corrupt stack. */
13441 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13443 || GET_MODE (mem
) != mode
13444 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13445 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13446 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13447 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13448 offset
+ (i
- base
) * reg_increment
))
13449 && (!REG_P (XEXP (mem
, 0))
13450 || offset
+ (i
- base
) * reg_increment
!= 0)))
13453 regno
= REGNO (reg
);
13454 if (regno
== REGNO (addr
))
13455 addr_reg_in_reglist
= true;
13460 if (update
&& addr_reg_in_reglist
)
13463 /* For Thumb-1, address register is always modified - either by write-back
13464 or by explicit load. If the pattern does not describe an update,
13465 then the address register must be in the list of loaded registers. */
13467 return update
|| addr_reg_in_reglist
;
13473 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13474 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13475 instruction. ADD_OFFSET is nonzero if the base address register needs
13476 to be modified with an add instruction before we can use it. */
13479 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13480 int nops
, HOST_WIDE_INT add_offset
)
13482 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13483 if the offset isn't small enough. The reason 2 ldrs are faster
13484 is because these ARMs are able to do more than one cache access
13485 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13486 whilst the ARM8 has a double bandwidth cache. This means that
13487 these cores can do both an instruction fetch and a data fetch in
13488 a single cycle, so the trick of calculating the address into a
13489 scratch register (one of the result regs) and then doing a load
13490 multiple actually becomes slower (and no smaller in code size).
13491 That is the transformation
13493 ldr rd1, [rbase + offset]
13494 ldr rd2, [rbase + offset + 4]
13498 add rd1, rbase, offset
13499 ldmia rd1, {rd1, rd2}
13501 produces worse code -- '3 cycles + any stalls on rd2' instead of
13502 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13503 access per cycle, the first sequence could never complete in less
13504 than 6 cycles, whereas the ldm sequence would only take 5 and
13505 would make better use of sequential accesses if not hitting the
13508 We cheat here and test 'arm_ld_sched' which we currently know to
13509 only be true for the ARM8, ARM9 and StrongARM. If this ever
13510 changes, then the test below needs to be reworked. */
13511 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13514 /* XScale has load-store double instructions, but they have stricter
13515 alignment requirements than load-store multiple, so we cannot
13518 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13519 the pipeline until completion.
13527 An ldr instruction takes 1-3 cycles, but does not block the
13536 Best case ldr will always win. However, the more ldr instructions
13537 we issue, the less likely we are to be able to schedule them well.
13538 Using ldr instructions also increases code size.
13540 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13541 for counts of 3 or 4 regs. */
13542 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13547 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13548 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13549 an array ORDER which describes the sequence to use when accessing the
13550 offsets that produces an ascending order. In this sequence, each
13551 offset must be larger by exactly 4 than the previous one. ORDER[0]
13552 must have been filled in with the lowest offset by the caller.
13553 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13554 we use to verify that ORDER produces an ascending order of registers.
13555 Return true if it was possible to construct such an order, false if
13559 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13560 int *unsorted_regs
)
13563 for (i
= 1; i
< nops
; i
++)
13567 order
[i
] = order
[i
- 1];
13568 for (j
= 0; j
< nops
; j
++)
13569 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13571 /* We must find exactly one offset that is higher than the
13572 previous one by 4. */
13573 if (order
[i
] != order
[i
- 1])
13577 if (order
[i
] == order
[i
- 1])
13579 /* The register numbers must be ascending. */
13580 if (unsorted_regs
!= NULL
13581 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13587 /* Used to determine in a peephole whether a sequence of load
13588 instructions can be changed into a load-multiple instruction.
13589 NOPS is the number of separate load instructions we are examining. The
13590 first NOPS entries in OPERANDS are the destination registers, the
13591 next NOPS entries are memory operands. If this function is
13592 successful, *BASE is set to the common base register of the memory
13593 accesses; *LOAD_OFFSET is set to the first memory location's offset
13594 from that base register.
13595 REGS is an array filled in with the destination register numbers.
13596 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13597 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13598 the sequence of registers in REGS matches the loads from ascending memory
13599 locations, and the function verifies that the register numbers are
13600 themselves ascending. If CHECK_REGS is false, the register numbers
13601 are stored in the order they are found in the operands. */
13603 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13604 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13606 int unsorted_regs
[MAX_LDM_STM_OPS
];
13607 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13608 int order
[MAX_LDM_STM_OPS
];
13609 rtx base_reg_rtx
= NULL
;
13613 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13614 easily extended if required. */
13615 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13617 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13619 /* Loop over the operands and check that the memory references are
13620 suitable (i.e. immediate offsets from the same base register). At
13621 the same time, extract the target register, and the memory
13623 for (i
= 0; i
< nops
; i
++)
13628 /* Convert a subreg of a mem into the mem itself. */
13629 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13630 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13632 gcc_assert (MEM_P (operands
[nops
+ i
]));
13634 /* Don't reorder volatile memory references; it doesn't seem worth
13635 looking for the case where the order is ok anyway. */
13636 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13639 offset
= const0_rtx
;
13641 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13642 || (GET_CODE (reg
) == SUBREG
13643 && REG_P (reg
= SUBREG_REG (reg
))))
13644 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13645 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13646 || (GET_CODE (reg
) == SUBREG
13647 && REG_P (reg
= SUBREG_REG (reg
))))
13648 && (CONST_INT_P (offset
13649 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13653 base_reg
= REGNO (reg
);
13654 base_reg_rtx
= reg
;
13655 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13658 else if (base_reg
!= (int) REGNO (reg
))
13659 /* Not addressed from the same base register. */
13662 unsorted_regs
[i
] = (REG_P (operands
[i
])
13663 ? REGNO (operands
[i
])
13664 : REGNO (SUBREG_REG (operands
[i
])));
13666 /* If it isn't an integer register, or if it overwrites the
13667 base register but isn't the last insn in the list, then
13668 we can't do this. */
13669 if (unsorted_regs
[i
] < 0
13670 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13671 || unsorted_regs
[i
] > 14
13672 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13675 /* Don't allow SP to be loaded unless it is also the base
13676 register. It guarantees that SP is reset correctly when
13677 an LDM instruction is interrupted. Otherwise, we might
13678 end up with a corrupt stack. */
13679 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13682 unsorted_offsets
[i
] = INTVAL (offset
);
13683 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13687 /* Not a suitable memory address. */
13691 /* All the useful information has now been extracted from the
13692 operands into unsorted_regs and unsorted_offsets; additionally,
13693 order[0] has been set to the lowest offset in the list. Sort
13694 the offsets into order, verifying that they are adjacent, and
13695 check that the register numbers are ascending. */
13696 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13697 check_regs
? unsorted_regs
: NULL
))
13701 memcpy (saved_order
, order
, sizeof order
);
13707 for (i
= 0; i
< nops
; i
++)
13708 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13710 *load_offset
= unsorted_offsets
[order
[0]];
13714 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13717 if (unsorted_offsets
[order
[0]] == 0)
13718 ldm_case
= 1; /* ldmia */
13719 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13720 ldm_case
= 2; /* ldmib */
13721 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13722 ldm_case
= 3; /* ldmda */
13723 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13724 ldm_case
= 4; /* ldmdb */
13725 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13726 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13731 if (!multiple_operation_profitable_p (false, nops
,
13733 ? unsorted_offsets
[order
[0]] : 0))
13739 /* Used to determine in a peephole whether a sequence of store instructions can
13740 be changed into a store-multiple instruction.
13741 NOPS is the number of separate store instructions we are examining.
13742 NOPS_TOTAL is the total number of instructions recognized by the peephole
13744 The first NOPS entries in OPERANDS are the source registers, the next
13745 NOPS entries are memory operands. If this function is successful, *BASE is
13746 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13747 to the first memory location's offset from that base register. REGS is an
13748 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13749 likewise filled with the corresponding rtx's.
13750 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13751 numbers to an ascending order of stores.
13752 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13753 from ascending memory locations, and the function verifies that the register
13754 numbers are themselves ascending. If CHECK_REGS is false, the register
13755 numbers are stored in the order they are found in the operands. */
13757 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13758 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13759 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13761 int unsorted_regs
[MAX_LDM_STM_OPS
];
13762 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13763 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13764 int order
[MAX_LDM_STM_OPS
];
13766 rtx base_reg_rtx
= NULL
;
13769 /* Write back of base register is currently only supported for Thumb 1. */
13770 int base_writeback
= TARGET_THUMB1
;
13772 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13773 easily extended if required. */
13774 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13776 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13778 /* Loop over the operands and check that the memory references are
13779 suitable (i.e. immediate offsets from the same base register). At
13780 the same time, extract the target register, and the memory
13782 for (i
= 0; i
< nops
; i
++)
13787 /* Convert a subreg of a mem into the mem itself. */
13788 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13789 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13791 gcc_assert (MEM_P (operands
[nops
+ i
]));
13793 /* Don't reorder volatile memory references; it doesn't seem worth
13794 looking for the case where the order is ok anyway. */
13795 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13798 offset
= const0_rtx
;
13800 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13801 || (GET_CODE (reg
) == SUBREG
13802 && REG_P (reg
= SUBREG_REG (reg
))))
13803 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13804 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13805 || (GET_CODE (reg
) == SUBREG
13806 && REG_P (reg
= SUBREG_REG (reg
))))
13807 && (CONST_INT_P (offset
13808 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13810 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13811 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13812 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13816 base_reg
= REGNO (reg
);
13817 base_reg_rtx
= reg
;
13818 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13821 else if (base_reg
!= (int) REGNO (reg
))
13822 /* Not addressed from the same base register. */
13825 /* If it isn't an integer register, then we can't do this. */
13826 if (unsorted_regs
[i
] < 0
13827 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13828 /* The effects are unpredictable if the base register is
13829 both updated and stored. */
13830 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13831 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13832 || unsorted_regs
[i
] > 14)
13835 unsorted_offsets
[i
] = INTVAL (offset
);
13836 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13840 /* Not a suitable memory address. */
13844 /* All the useful information has now been extracted from the
13845 operands into unsorted_regs and unsorted_offsets; additionally,
13846 order[0] has been set to the lowest offset in the list. Sort
13847 the offsets into order, verifying that they are adjacent, and
13848 check that the register numbers are ascending. */
13849 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13850 check_regs
? unsorted_regs
: NULL
))
13854 memcpy (saved_order
, order
, sizeof order
);
13860 for (i
= 0; i
< nops
; i
++)
13862 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13864 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13867 *load_offset
= unsorted_offsets
[order
[0]];
13871 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13874 if (unsorted_offsets
[order
[0]] == 0)
13875 stm_case
= 1; /* stmia */
13876 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13877 stm_case
= 2; /* stmib */
13878 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13879 stm_case
= 3; /* stmda */
13880 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13881 stm_case
= 4; /* stmdb */
13885 if (!multiple_operation_profitable_p (false, nops
, 0))
13891 /* Routines for use in generating RTL. */
13893 /* Generate a load-multiple instruction. COUNT is the number of loads in
13894 the instruction; REGS and MEMS are arrays containing the operands.
13895 BASEREG is the base register to be used in addressing the memory operands.
13896 WBACK_OFFSET is nonzero if the instruction should update the base
13900 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13901 HOST_WIDE_INT wback_offset
)
13906 if (!multiple_operation_profitable_p (false, count
, 0))
13912 for (i
= 0; i
< count
; i
++)
13913 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13915 if (wback_offset
!= 0)
13916 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13918 seq
= get_insns ();
13924 result
= gen_rtx_PARALLEL (VOIDmode
,
13925 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13926 if (wback_offset
!= 0)
13928 XVECEXP (result
, 0, 0)
13929 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13934 for (j
= 0; i
< count
; i
++, j
++)
13935 XVECEXP (result
, 0, i
)
13936 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13941 /* Generate a store-multiple instruction. COUNT is the number of stores in
13942 the instruction; REGS and MEMS are arrays containing the operands.
13943 BASEREG is the base register to be used in addressing the memory operands.
13944 WBACK_OFFSET is nonzero if the instruction should update the base
13948 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13949 HOST_WIDE_INT wback_offset
)
13954 if (GET_CODE (basereg
) == PLUS
)
13955 basereg
= XEXP (basereg
, 0);
13957 if (!multiple_operation_profitable_p (false, count
, 0))
13963 for (i
= 0; i
< count
; i
++)
13964 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13966 if (wback_offset
!= 0)
13967 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13969 seq
= get_insns ();
13975 result
= gen_rtx_PARALLEL (VOIDmode
,
13976 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13977 if (wback_offset
!= 0)
13979 XVECEXP (result
, 0, 0)
13980 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13985 for (j
= 0; i
< count
; i
++, j
++)
13986 XVECEXP (result
, 0, i
)
13987 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13992 /* Generate either a load-multiple or a store-multiple instruction. This
13993 function can be used in situations where we can start with a single MEM
13994 rtx and adjust its address upwards.
13995 COUNT is the number of operations in the instruction, not counting a
13996 possible update of the base register. REGS is an array containing the
13998 BASEREG is the base register to be used in addressing the memory operands,
13999 which are constructed from BASEMEM.
14000 WRITE_BACK specifies whether the generated instruction should include an
14001 update of the base register.
14002 OFFSETP is used to pass an offset to and from this function; this offset
14003 is not used when constructing the address (instead BASEMEM should have an
14004 appropriate offset in its address), it is used only for setting
14005 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14008 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
14009 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
14011 rtx mems
[MAX_LDM_STM_OPS
];
14012 HOST_WIDE_INT offset
= *offsetp
;
14015 gcc_assert (count
<= MAX_LDM_STM_OPS
);
14017 if (GET_CODE (basereg
) == PLUS
)
14018 basereg
= XEXP (basereg
, 0);
14020 for (i
= 0; i
< count
; i
++)
14022 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
14023 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
14031 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
14032 write_back
? 4 * count
: 0);
14034 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
14035 write_back
? 4 * count
: 0);
14039 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14040 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14042 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
14047 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14048 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14050 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
14054 /* Called from a peephole2 expander to turn a sequence of loads into an
14055 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14056 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14057 is true if we can reorder the registers because they are used commutatively
14059 Returns true iff we could generate a new instruction. */
14062 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
14064 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14065 rtx mems
[MAX_LDM_STM_OPS
];
14066 int i
, j
, base_reg
;
14068 HOST_WIDE_INT offset
;
14069 int write_back
= FALSE
;
14073 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
14074 &base_reg
, &offset
, !sort_regs
);
14080 for (i
= 0; i
< nops
- 1; i
++)
14081 for (j
= i
+ 1; j
< nops
; j
++)
14082 if (regs
[i
] > regs
[j
])
14088 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14092 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
14093 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
14099 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
14100 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
14102 if (!TARGET_THUMB1
)
14104 base_reg
= regs
[0];
14105 base_reg_rtx
= newbase
;
14109 for (i
= 0; i
< nops
; i
++)
14111 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14112 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14115 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14116 write_back
? offset
+ i
* 4 : 0));
14120 /* Called from a peephole2 expander to turn a sequence of stores into an
14121 STM instruction. OPERANDS are the operands found by the peephole matcher;
14122 NOPS indicates how many separate stores we are trying to combine.
14123 Returns true iff we could generate a new instruction. */
14126 gen_stm_seq (rtx
*operands
, int nops
)
14129 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14130 rtx mems
[MAX_LDM_STM_OPS
];
14133 HOST_WIDE_INT offset
;
14134 int write_back
= FALSE
;
14137 bool base_reg_dies
;
14139 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
14140 mem_order
, &base_reg
, &offset
, true);
14145 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14147 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
14150 gcc_assert (base_reg_dies
);
14156 gcc_assert (base_reg_dies
);
14157 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14161 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14163 for (i
= 0; i
< nops
; i
++)
14165 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14166 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14169 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14170 write_back
? offset
+ i
* 4 : 0));
14174 /* Called from a peephole2 expander to turn a sequence of stores that are
14175 preceded by constant loads into an STM instruction. OPERANDS are the
14176 operands found by the peephole matcher; NOPS indicates how many
14177 separate stores we are trying to combine; there are 2 * NOPS
14178 instructions in the peephole.
14179 Returns true iff we could generate a new instruction. */
14182 gen_const_stm_seq (rtx
*operands
, int nops
)
14184 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
14185 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14186 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
14187 rtx mems
[MAX_LDM_STM_OPS
];
14190 HOST_WIDE_INT offset
;
14191 int write_back
= FALSE
;
14194 bool base_reg_dies
;
14196 HARD_REG_SET allocated
;
14198 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
14199 mem_order
, &base_reg
, &offset
, false);
14204 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
14206 /* If the same register is used more than once, try to find a free
14208 CLEAR_HARD_REG_SET (allocated
);
14209 for (i
= 0; i
< nops
; i
++)
14211 for (j
= i
+ 1; j
< nops
; j
++)
14212 if (regs
[i
] == regs
[j
])
14214 rtx t
= peep2_find_free_register (0, nops
* 2,
14215 TARGET_THUMB1
? "l" : "r",
14216 SImode
, &allocated
);
14220 regs
[i
] = REGNO (t
);
14224 /* Compute an ordering that maps the register numbers to an ascending
14227 for (i
= 0; i
< nops
; i
++)
14228 if (regs
[i
] < regs
[reg_order
[0]])
14231 for (i
= 1; i
< nops
; i
++)
14233 int this_order
= reg_order
[i
- 1];
14234 for (j
= 0; j
< nops
; j
++)
14235 if (regs
[j
] > regs
[reg_order
[i
- 1]]
14236 && (this_order
== reg_order
[i
- 1]
14237 || regs
[j
] < regs
[this_order
]))
14239 reg_order
[i
] = this_order
;
14242 /* Ensure that registers that must be live after the instruction end
14243 up with the correct value. */
14244 for (i
= 0; i
< nops
; i
++)
14246 int this_order
= reg_order
[i
];
14247 if ((this_order
!= mem_order
[i
]
14248 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
14249 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
14253 /* Load the constants. */
14254 for (i
= 0; i
< nops
; i
++)
14256 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
14257 sorted_regs
[i
] = regs
[reg_order
[i
]];
14258 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
14261 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14263 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
14266 gcc_assert (base_reg_dies
);
14272 gcc_assert (base_reg_dies
);
14273 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14277 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14279 for (i
= 0; i
< nops
; i
++)
14281 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14282 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14285 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
14286 write_back
? offset
+ i
* 4 : 0));
14290 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14291 unaligned copies on processors which support unaligned semantics for those
14292 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14293 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14294 An interleave factor of 1 (the minimum) will perform no interleaving.
14295 Load/store multiple are used for aligned addresses where possible. */
14298 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
14299 HOST_WIDE_INT length
,
14300 unsigned int interleave_factor
)
14302 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
14303 int *regnos
= XALLOCAVEC (int, interleave_factor
);
14304 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
14305 HOST_WIDE_INT i
, j
;
14306 HOST_WIDE_INT remaining
= length
, words
;
14307 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
14309 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
14310 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
14311 HOST_WIDE_INT srcoffset
, dstoffset
;
14312 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
14315 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
14317 /* Use hard registers if we have aligned source or destination so we can use
14318 load/store multiple with contiguous registers. */
14319 if (dst_aligned
|| src_aligned
)
14320 for (i
= 0; i
< interleave_factor
; i
++)
14321 regs
[i
] = gen_rtx_REG (SImode
, i
);
14323 for (i
= 0; i
< interleave_factor
; i
++)
14324 regs
[i
] = gen_reg_rtx (SImode
);
14326 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
14327 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
14329 srcoffset
= dstoffset
= 0;
14331 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14332 For copying the last bytes we want to subtract this offset again. */
14333 src_autoinc
= dst_autoinc
= 0;
14335 for (i
= 0; i
< interleave_factor
; i
++)
14338 /* Copy BLOCK_SIZE_BYTES chunks. */
14340 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
14343 if (src_aligned
&& interleave_factor
> 1)
14345 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
14346 TRUE
, srcbase
, &srcoffset
));
14347 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14351 for (j
= 0; j
< interleave_factor
; j
++)
14353 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
14355 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14356 srcoffset
+ j
* UNITS_PER_WORD
);
14357 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14359 srcoffset
+= block_size_bytes
;
14363 if (dst_aligned
&& interleave_factor
> 1)
14365 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
14366 TRUE
, dstbase
, &dstoffset
));
14367 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14371 for (j
= 0; j
< interleave_factor
; j
++)
14373 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
14375 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14376 dstoffset
+ j
* UNITS_PER_WORD
);
14377 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14379 dstoffset
+= block_size_bytes
;
14382 remaining
-= block_size_bytes
;
14385 /* Copy any whole words left (note these aren't interleaved with any
14386 subsequent halfword/byte load/stores in the interests of simplicity). */
14388 words
= remaining
/ UNITS_PER_WORD
;
14390 gcc_assert (words
< interleave_factor
);
14392 if (src_aligned
&& words
> 1)
14394 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
14396 src_autoinc
+= UNITS_PER_WORD
* words
;
14400 for (j
= 0; j
< words
; j
++)
14402 addr
= plus_constant (Pmode
, src
,
14403 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
14404 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14405 srcoffset
+ j
* UNITS_PER_WORD
);
14406 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14408 srcoffset
+= words
* UNITS_PER_WORD
;
14411 if (dst_aligned
&& words
> 1)
14413 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14415 dst_autoinc
+= words
* UNITS_PER_WORD
;
14419 for (j
= 0; j
< words
; j
++)
14421 addr
= plus_constant (Pmode
, dst
,
14422 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14423 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14424 dstoffset
+ j
* UNITS_PER_WORD
);
14425 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14427 dstoffset
+= words
* UNITS_PER_WORD
;
14430 remaining
-= words
* UNITS_PER_WORD
;
14432 gcc_assert (remaining
< 4);
14434 /* Copy a halfword if necessary. */
14436 if (remaining
>= 2)
14438 halfword_tmp
= gen_reg_rtx (SImode
);
14440 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14441 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14442 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14444 /* Either write out immediately, or delay until we've loaded the last
14445 byte, depending on interleave factor. */
14446 if (interleave_factor
== 1)
14448 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14449 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14450 emit_insn (gen_unaligned_storehi (mem
,
14451 gen_lowpart (HImode
, halfword_tmp
)));
14452 halfword_tmp
= NULL
;
14460 gcc_assert (remaining
< 2);
14462 /* Copy last byte. */
14464 if ((remaining
& 1) != 0)
14466 byte_tmp
= gen_reg_rtx (SImode
);
14468 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14469 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14470 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14472 if (interleave_factor
== 1)
14474 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14475 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14476 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14485 /* Store last halfword if we haven't done so already. */
14489 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14490 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14491 emit_insn (gen_unaligned_storehi (mem
,
14492 gen_lowpart (HImode
, halfword_tmp
)));
14496 /* Likewise for last byte. */
14500 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14501 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14502 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14506 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14509 /* From mips_adjust_block_mem:
14511 Helper function for doing a loop-based block operation on memory
14512 reference MEM. Each iteration of the loop will operate on LENGTH
14515 Create a new base register for use within the loop and point it to
14516 the start of MEM. Create a new memory reference that uses this
14517 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14520 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14523 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14525 /* Although the new mem does not refer to a known location,
14526 it does keep up to LENGTH bytes of alignment. */
14527 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14528 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14531 /* From mips_block_move_loop:
14533 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14534 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14535 the memory regions do not overlap. */
14538 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14539 unsigned int interleave_factor
,
14540 HOST_WIDE_INT bytes_per_iter
)
14542 rtx src_reg
, dest_reg
, final_src
, test
;
14543 HOST_WIDE_INT leftover
;
14545 leftover
= length
% bytes_per_iter
;
14546 length
-= leftover
;
14548 /* Create registers and memory references for use within the loop. */
14549 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14550 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14552 /* Calculate the value that SRC_REG should have after the last iteration of
14554 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14555 0, 0, OPTAB_WIDEN
);
14557 /* Emit the start of the loop. */
14558 rtx_code_label
*label
= gen_label_rtx ();
14559 emit_label (label
);
14561 /* Emit the loop body. */
14562 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14563 interleave_factor
);
14565 /* Move on to the next block. */
14566 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14567 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14569 /* Emit the loop condition. */
14570 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14571 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14573 /* Mop up any left-over bytes. */
14575 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14578 /* Emit a block move when either the source or destination is unaligned (not
14579 aligned to a four-byte boundary). This may need further tuning depending on
14580 core type, optimize_size setting, etc. */
14583 arm_movmemqi_unaligned (rtx
*operands
)
14585 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14589 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14590 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14591 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14592 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14593 or dst_aligned though: allow more interleaving in those cases since the
14594 resulting code can be smaller. */
14595 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14596 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14599 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14600 interleave_factor
, bytes_per_iter
);
14602 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14603 interleave_factor
);
14607 /* Note that the loop created by arm_block_move_unaligned_loop may be
14608 subject to loop unrolling, which makes tuning this condition a little
14611 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14613 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14620 arm_gen_movmemqi (rtx
*operands
)
14622 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14623 HOST_WIDE_INT srcoffset
, dstoffset
;
14625 rtx src
, dst
, srcbase
, dstbase
;
14626 rtx part_bytes_reg
= NULL
;
14629 if (!CONST_INT_P (operands
[2])
14630 || !CONST_INT_P (operands
[3])
14631 || INTVAL (operands
[2]) > 64)
14634 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14635 return arm_movmemqi_unaligned (operands
);
14637 if (INTVAL (operands
[3]) & 3)
14640 dstbase
= operands
[0];
14641 srcbase
= operands
[1];
14643 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14644 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14646 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14647 out_words_to_go
= INTVAL (operands
[2]) / 4;
14648 last_bytes
= INTVAL (operands
[2]) & 3;
14649 dstoffset
= srcoffset
= 0;
14651 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14652 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14654 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14656 if (in_words_to_go
> 4)
14657 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14658 TRUE
, srcbase
, &srcoffset
));
14660 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14661 src
, FALSE
, srcbase
,
14664 if (out_words_to_go
)
14666 if (out_words_to_go
> 4)
14667 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14668 TRUE
, dstbase
, &dstoffset
));
14669 else if (out_words_to_go
!= 1)
14670 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14671 out_words_to_go
, dst
,
14674 dstbase
, &dstoffset
));
14677 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14678 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14679 if (last_bytes
!= 0)
14681 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14687 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14688 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14691 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14692 if (out_words_to_go
)
14696 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14697 sreg
= copy_to_reg (mem
);
14699 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14700 emit_move_insn (mem
, sreg
);
14703 gcc_assert (!in_words_to_go
); /* Sanity check */
14706 if (in_words_to_go
)
14708 gcc_assert (in_words_to_go
> 0);
14710 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14711 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14714 gcc_assert (!last_bytes
|| part_bytes_reg
);
14716 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14718 rtx tmp
= gen_reg_rtx (SImode
);
14720 /* The bytes we want are in the top end of the word. */
14721 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14722 GEN_INT (8 * (4 - last_bytes
))));
14723 part_bytes_reg
= tmp
;
14727 mem
= adjust_automodify_address (dstbase
, QImode
,
14728 plus_constant (Pmode
, dst
,
14730 dstoffset
+ last_bytes
- 1);
14731 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14735 tmp
= gen_reg_rtx (SImode
);
14736 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14737 part_bytes_reg
= tmp
;
14744 if (last_bytes
> 1)
14746 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14747 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14751 rtx tmp
= gen_reg_rtx (SImode
);
14752 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14753 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14754 part_bytes_reg
= tmp
;
14761 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14762 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14769 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14772 next_consecutive_mem (rtx mem
)
14774 machine_mode mode
= GET_MODE (mem
);
14775 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14776 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14778 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14781 /* Copy using LDRD/STRD instructions whenever possible.
14782 Returns true upon success. */
14784 gen_movmem_ldrd_strd (rtx
*operands
)
14786 unsigned HOST_WIDE_INT len
;
14787 HOST_WIDE_INT align
;
14788 rtx src
, dst
, base
;
14790 bool src_aligned
, dst_aligned
;
14791 bool src_volatile
, dst_volatile
;
14793 gcc_assert (CONST_INT_P (operands
[2]));
14794 gcc_assert (CONST_INT_P (operands
[3]));
14796 len
= UINTVAL (operands
[2]);
14800 /* Maximum alignment we can assume for both src and dst buffers. */
14801 align
= INTVAL (operands
[3]);
14803 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14806 /* Place src and dst addresses in registers
14807 and update the corresponding mem rtx. */
14809 dst_volatile
= MEM_VOLATILE_P (dst
);
14810 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14811 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14812 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14815 src_volatile
= MEM_VOLATILE_P (src
);
14816 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14817 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14818 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14820 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14823 if (src_volatile
|| dst_volatile
)
14826 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14827 if (!(dst_aligned
|| src_aligned
))
14828 return arm_gen_movmemqi (operands
);
14830 src
= adjust_address (src
, DImode
, 0);
14831 dst
= adjust_address (dst
, DImode
, 0);
14835 reg0
= gen_reg_rtx (DImode
);
14837 emit_move_insn (reg0
, src
);
14839 emit_insn (gen_unaligned_loaddi (reg0
, src
));
14842 emit_move_insn (dst
, reg0
);
14844 emit_insn (gen_unaligned_storedi (dst
, reg0
));
14846 src
= next_consecutive_mem (src
);
14847 dst
= next_consecutive_mem (dst
);
14850 gcc_assert (len
< 8);
14853 /* More than a word but less than a double-word to copy. Copy a word. */
14854 reg0
= gen_reg_rtx (SImode
);
14855 src
= adjust_address (src
, SImode
, 0);
14856 dst
= adjust_address (dst
, SImode
, 0);
14858 emit_move_insn (reg0
, src
);
14860 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14863 emit_move_insn (dst
, reg0
);
14865 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14867 src
= next_consecutive_mem (src
);
14868 dst
= next_consecutive_mem (dst
);
14875 /* Copy the remaining bytes. */
14878 dst
= adjust_address (dst
, HImode
, 0);
14879 src
= adjust_address (src
, HImode
, 0);
14880 reg0
= gen_reg_rtx (SImode
);
14882 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14884 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14887 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14889 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14891 src
= next_consecutive_mem (src
);
14892 dst
= next_consecutive_mem (dst
);
14897 dst
= adjust_address (dst
, QImode
, 0);
14898 src
= adjust_address (src
, QImode
, 0);
14899 reg0
= gen_reg_rtx (QImode
);
14900 emit_move_insn (reg0
, src
);
14901 emit_move_insn (dst
, reg0
);
14905 /* Select a dominance comparison mode if possible for a test of the general
14906 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14907 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14908 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14909 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14910 In all cases OP will be either EQ or NE, but we don't need to know which
14911 here. If we are unable to support a dominance comparison we return
14912 CC mode. This will then fail to match for the RTL expressions that
14913 generate this call. */
14915 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14917 enum rtx_code cond1
, cond2
;
14920 /* Currently we will probably get the wrong result if the individual
14921 comparisons are not simple. This also ensures that it is safe to
14922 reverse a comparison if necessary. */
14923 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14925 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14929 /* The if_then_else variant of this tests the second condition if the
14930 first passes, but is true if the first fails. Reverse the first
14931 condition to get a true "inclusive-or" expression. */
14932 if (cond_or
== DOM_CC_NX_OR_Y
)
14933 cond1
= reverse_condition (cond1
);
14935 /* If the comparisons are not equal, and one doesn't dominate the other,
14936 then we can't do this. */
14938 && !comparison_dominates_p (cond1
, cond2
)
14939 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14943 std::swap (cond1
, cond2
);
14948 if (cond_or
== DOM_CC_X_AND_Y
)
14953 case EQ
: return CC_DEQmode
;
14954 case LE
: return CC_DLEmode
;
14955 case LEU
: return CC_DLEUmode
;
14956 case GE
: return CC_DGEmode
;
14957 case GEU
: return CC_DGEUmode
;
14958 default: gcc_unreachable ();
14962 if (cond_or
== DOM_CC_X_AND_Y
)
14974 gcc_unreachable ();
14978 if (cond_or
== DOM_CC_X_AND_Y
)
14990 gcc_unreachable ();
14994 if (cond_or
== DOM_CC_X_AND_Y
)
14995 return CC_DLTUmode
;
15000 return CC_DLTUmode
;
15002 return CC_DLEUmode
;
15006 gcc_unreachable ();
15010 if (cond_or
== DOM_CC_X_AND_Y
)
15011 return CC_DGTUmode
;
15016 return CC_DGTUmode
;
15018 return CC_DGEUmode
;
15022 gcc_unreachable ();
15025 /* The remaining cases only occur when both comparisons are the
15028 gcc_assert (cond1
== cond2
);
15032 gcc_assert (cond1
== cond2
);
15036 gcc_assert (cond1
== cond2
);
15040 gcc_assert (cond1
== cond2
);
15041 return CC_DLEUmode
;
15044 gcc_assert (cond1
== cond2
);
15045 return CC_DGEUmode
;
15048 gcc_unreachable ();
15053 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
15055 /* All floating point compares return CCFP if it is an equality
15056 comparison, and CCFPE otherwise. */
15057 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
15080 gcc_unreachable ();
15084 /* A compare with a shifted operand. Because of canonicalization, the
15085 comparison will have to be swapped when we emit the assembler. */
15086 if (GET_MODE (y
) == SImode
15087 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15088 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15089 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
15090 || GET_CODE (x
) == ROTATERT
))
15093 /* This operation is performed swapped, but since we only rely on the Z
15094 flag we don't need an additional mode. */
15095 if (GET_MODE (y
) == SImode
15096 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15097 && GET_CODE (x
) == NEG
15098 && (op
== EQ
|| op
== NE
))
15101 /* This is a special case that is used by combine to allow a
15102 comparison of a shifted byte load to be split into a zero-extend
15103 followed by a comparison of the shifted integer (only valid for
15104 equalities and unsigned inequalities). */
15105 if (GET_MODE (x
) == SImode
15106 && GET_CODE (x
) == ASHIFT
15107 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
15108 && GET_CODE (XEXP (x
, 0)) == SUBREG
15109 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
15110 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
15111 && (op
== EQ
|| op
== NE
15112 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
15113 && CONST_INT_P (y
))
15116 /* A construct for a conditional compare, if the false arm contains
15117 0, then both conditions must be true, otherwise either condition
15118 must be true. Not all conditions are possible, so CCmode is
15119 returned if it can't be done. */
15120 if (GET_CODE (x
) == IF_THEN_ELSE
15121 && (XEXP (x
, 2) == const0_rtx
15122 || XEXP (x
, 2) == const1_rtx
)
15123 && COMPARISON_P (XEXP (x
, 0))
15124 && COMPARISON_P (XEXP (x
, 1)))
15125 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15126 INTVAL (XEXP (x
, 2)));
15128 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15129 if (GET_CODE (x
) == AND
15130 && (op
== EQ
|| op
== NE
)
15131 && COMPARISON_P (XEXP (x
, 0))
15132 && COMPARISON_P (XEXP (x
, 1)))
15133 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15136 if (GET_CODE (x
) == IOR
15137 && (op
== EQ
|| op
== NE
)
15138 && COMPARISON_P (XEXP (x
, 0))
15139 && COMPARISON_P (XEXP (x
, 1)))
15140 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15143 /* An operation (on Thumb) where we want to test for a single bit.
15144 This is done by shifting that bit up into the top bit of a
15145 scratch register; we can then branch on the sign bit. */
15147 && GET_MODE (x
) == SImode
15148 && (op
== EQ
|| op
== NE
)
15149 && GET_CODE (x
) == ZERO_EXTRACT
15150 && XEXP (x
, 1) == const1_rtx
)
15153 /* An operation that sets the condition codes as a side-effect, the
15154 V flag is not set correctly, so we can only use comparisons where
15155 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15157 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15158 if (GET_MODE (x
) == SImode
15160 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
15161 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
15162 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
15163 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
15164 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
15165 || GET_CODE (x
) == LSHIFTRT
15166 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15167 || GET_CODE (x
) == ROTATERT
15168 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
15169 return CC_NOOVmode
;
15171 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
15174 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
15175 && GET_CODE (x
) == PLUS
15176 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
15179 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
15185 /* A DImode comparison against zero can be implemented by
15186 or'ing the two halves together. */
15187 if (y
== const0_rtx
)
15190 /* We can do an equality test in three Thumb instructions. */
15200 /* DImode unsigned comparisons can be implemented by cmp +
15201 cmpeq without a scratch register. Not worth doing in
15212 /* DImode signed and unsigned comparisons can be implemented
15213 by cmp + sbcs with a scratch register, but that does not
15214 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15215 gcc_assert (op
!= EQ
&& op
!= NE
);
15219 gcc_unreachable ();
15223 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
15224 return GET_MODE (x
);
15229 /* X and Y are two things to compare using CODE. Emit the compare insn and
15230 return the rtx for register 0 in the proper mode. FP means this is a
15231 floating point compare: I don't think that it is needed on the arm. */
15233 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
15237 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
15239 /* We might have X as a constant, Y as a register because of the predicates
15240 used for cmpdi. If so, force X to a register here. */
15241 if (dimode_comparison
&& !REG_P (x
))
15242 x
= force_reg (DImode
, x
);
15244 mode
= SELECT_CC_MODE (code
, x
, y
);
15245 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
15247 if (dimode_comparison
15248 && mode
!= CC_CZmode
)
15252 /* To compare two non-zero values for equality, XOR them and
15253 then compare against zero. Not used for ARM mode; there
15254 CC_CZmode is cheaper. */
15255 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
15257 gcc_assert (!reload_completed
);
15258 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
15262 /* A scratch register is required. */
15263 if (reload_completed
)
15264 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
15266 scratch
= gen_rtx_SCRATCH (SImode
);
15268 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
15269 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15270 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
15273 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15278 /* Generate a sequence of insns that will generate the correct return
15279 address mask depending on the physical architecture that the program
15282 arm_gen_return_addr_mask (void)
15284 rtx reg
= gen_reg_rtx (Pmode
);
15286 emit_insn (gen_return_addr_mask (reg
));
15291 arm_reload_in_hi (rtx
*operands
)
15293 rtx ref
= operands
[1];
15295 HOST_WIDE_INT offset
= 0;
15297 if (GET_CODE (ref
) == SUBREG
)
15299 offset
= SUBREG_BYTE (ref
);
15300 ref
= SUBREG_REG (ref
);
15305 /* We have a pseudo which has been spilt onto the stack; there
15306 are two cases here: the first where there is a simple
15307 stack-slot replacement and a second where the stack-slot is
15308 out of range, or is used as a subreg. */
15309 if (reg_equiv_mem (REGNO (ref
)))
15311 ref
= reg_equiv_mem (REGNO (ref
));
15312 base
= find_replacement (&XEXP (ref
, 0));
15315 /* The slot is out of range, or was dressed up in a SUBREG. */
15316 base
= reg_equiv_address (REGNO (ref
));
15319 base
= find_replacement (&XEXP (ref
, 0));
15321 /* Handle the case where the address is too complex to be offset by 1. */
15322 if (GET_CODE (base
) == MINUS
15323 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15325 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15327 emit_set_insn (base_plus
, base
);
15330 else if (GET_CODE (base
) == PLUS
)
15332 /* The addend must be CONST_INT, or we would have dealt with it above. */
15333 HOST_WIDE_INT hi
, lo
;
15335 offset
+= INTVAL (XEXP (base
, 1));
15336 base
= XEXP (base
, 0);
15338 /* Rework the address into a legal sequence of insns. */
15339 /* Valid range for lo is -4095 -> 4095 */
15342 : -((-offset
) & 0xfff));
15344 /* Corner case, if lo is the max offset then we would be out of range
15345 once we have added the additional 1 below, so bump the msb into the
15346 pre-loading insn(s). */
15350 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15351 ^ (HOST_WIDE_INT
) 0x80000000)
15352 - (HOST_WIDE_INT
) 0x80000000);
15354 gcc_assert (hi
+ lo
== offset
);
15358 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15360 /* Get the base address; addsi3 knows how to handle constants
15361 that require more than one insn. */
15362 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15368 /* Operands[2] may overlap operands[0] (though it won't overlap
15369 operands[1]), that's why we asked for a DImode reg -- so we can
15370 use the bit that does not overlap. */
15371 if (REGNO (operands
[2]) == REGNO (operands
[0]))
15372 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15374 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15376 emit_insn (gen_zero_extendqisi2 (scratch
,
15377 gen_rtx_MEM (QImode
,
15378 plus_constant (Pmode
, base
,
15380 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15381 gen_rtx_MEM (QImode
,
15382 plus_constant (Pmode
, base
,
15384 if (!BYTES_BIG_ENDIAN
)
15385 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15386 gen_rtx_IOR (SImode
,
15389 gen_rtx_SUBREG (SImode
, operands
[0], 0),
15393 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15394 gen_rtx_IOR (SImode
,
15395 gen_rtx_ASHIFT (SImode
, scratch
,
15397 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
15400 /* Handle storing a half-word to memory during reload by synthesizing as two
15401 byte stores. Take care not to clobber the input values until after we
15402 have moved them somewhere safe. This code assumes that if the DImode
15403 scratch in operands[2] overlaps either the input value or output address
15404 in some way, then that value must die in this insn (we absolutely need
15405 two scratch registers for some corner cases). */
15407 arm_reload_out_hi (rtx
*operands
)
15409 rtx ref
= operands
[0];
15410 rtx outval
= operands
[1];
15412 HOST_WIDE_INT offset
= 0;
15414 if (GET_CODE (ref
) == SUBREG
)
15416 offset
= SUBREG_BYTE (ref
);
15417 ref
= SUBREG_REG (ref
);
15422 /* We have a pseudo which has been spilt onto the stack; there
15423 are two cases here: the first where there is a simple
15424 stack-slot replacement and a second where the stack-slot is
15425 out of range, or is used as a subreg. */
15426 if (reg_equiv_mem (REGNO (ref
)))
15428 ref
= reg_equiv_mem (REGNO (ref
));
15429 base
= find_replacement (&XEXP (ref
, 0));
15432 /* The slot is out of range, or was dressed up in a SUBREG. */
15433 base
= reg_equiv_address (REGNO (ref
));
15436 base
= find_replacement (&XEXP (ref
, 0));
15438 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15440 /* Handle the case where the address is too complex to be offset by 1. */
15441 if (GET_CODE (base
) == MINUS
15442 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15444 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15446 /* Be careful not to destroy OUTVAL. */
15447 if (reg_overlap_mentioned_p (base_plus
, outval
))
15449 /* Updating base_plus might destroy outval, see if we can
15450 swap the scratch and base_plus. */
15451 if (!reg_overlap_mentioned_p (scratch
, outval
))
15452 std::swap (scratch
, base_plus
);
15455 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15457 /* Be conservative and copy OUTVAL into the scratch now,
15458 this should only be necessary if outval is a subreg
15459 of something larger than a word. */
15460 /* XXX Might this clobber base? I can't see how it can,
15461 since scratch is known to overlap with OUTVAL, and
15462 must be wider than a word. */
15463 emit_insn (gen_movhi (scratch_hi
, outval
));
15464 outval
= scratch_hi
;
15468 emit_set_insn (base_plus
, base
);
15471 else if (GET_CODE (base
) == PLUS
)
15473 /* The addend must be CONST_INT, or we would have dealt with it above. */
15474 HOST_WIDE_INT hi
, lo
;
15476 offset
+= INTVAL (XEXP (base
, 1));
15477 base
= XEXP (base
, 0);
15479 /* Rework the address into a legal sequence of insns. */
15480 /* Valid range for lo is -4095 -> 4095 */
15483 : -((-offset
) & 0xfff));
15485 /* Corner case, if lo is the max offset then we would be out of range
15486 once we have added the additional 1 below, so bump the msb into the
15487 pre-loading insn(s). */
15491 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15492 ^ (HOST_WIDE_INT
) 0x80000000)
15493 - (HOST_WIDE_INT
) 0x80000000);
15495 gcc_assert (hi
+ lo
== offset
);
15499 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15501 /* Be careful not to destroy OUTVAL. */
15502 if (reg_overlap_mentioned_p (base_plus
, outval
))
15504 /* Updating base_plus might destroy outval, see if we
15505 can swap the scratch and base_plus. */
15506 if (!reg_overlap_mentioned_p (scratch
, outval
))
15507 std::swap (scratch
, base_plus
);
15510 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15512 /* Be conservative and copy outval into scratch now,
15513 this should only be necessary if outval is a
15514 subreg of something larger than a word. */
15515 /* XXX Might this clobber base? I can't see how it
15516 can, since scratch is known to overlap with
15518 emit_insn (gen_movhi (scratch_hi
, outval
));
15519 outval
= scratch_hi
;
15523 /* Get the base address; addsi3 knows how to handle constants
15524 that require more than one insn. */
15525 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15531 if (BYTES_BIG_ENDIAN
)
15533 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15534 plus_constant (Pmode
, base
,
15536 gen_lowpart (QImode
, outval
)));
15537 emit_insn (gen_lshrsi3 (scratch
,
15538 gen_rtx_SUBREG (SImode
, outval
, 0),
15540 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15542 gen_lowpart (QImode
, scratch
)));
15546 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15548 gen_lowpart (QImode
, outval
)));
15549 emit_insn (gen_lshrsi3 (scratch
,
15550 gen_rtx_SUBREG (SImode
, outval
, 0),
15552 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15553 plus_constant (Pmode
, base
,
15555 gen_lowpart (QImode
, scratch
)));
15559 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15560 (padded to the size of a word) should be passed in a register. */
15563 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15565 if (TARGET_AAPCS_BASED
)
15566 return must_pass_in_stack_var_size (mode
, type
);
15568 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15572 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15573 Return true if an argument passed on the stack should be padded upwards,
15574 i.e. if the least-significant byte has useful data.
15575 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15576 aggregate types are placed in the lowest memory address. */
15579 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15581 if (!TARGET_AAPCS_BASED
)
15582 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15584 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15591 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15592 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15593 register has useful data, and return the opposite if the most
15594 significant byte does. */
15597 arm_pad_reg_upward (machine_mode mode
,
15598 tree type
, int first ATTRIBUTE_UNUSED
)
15600 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15602 /* For AAPCS, small aggregates, small fixed-point types,
15603 and small complex types are always padded upwards. */
15606 if ((AGGREGATE_TYPE_P (type
)
15607 || TREE_CODE (type
) == COMPLEX_TYPE
15608 || FIXED_POINT_TYPE_P (type
))
15609 && int_size_in_bytes (type
) <= 4)
15614 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15615 && GET_MODE_SIZE (mode
) <= 4)
15620 /* Otherwise, use default padding. */
15621 return !BYTES_BIG_ENDIAN
;
15624 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15625 assuming that the address in the base register is word aligned. */
15627 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15629 HOST_WIDE_INT max_offset
;
15631 /* Offset must be a multiple of 4 in Thumb mode. */
15632 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15637 else if (TARGET_ARM
)
15642 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15645 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15646 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15647 Assumes that the address in the base register RN is word aligned. Pattern
15648 guarantees that both memory accesses use the same base register,
15649 the offsets are constants within the range, and the gap between the offsets is 4.
15650 If preload complete then check that registers are legal. WBACK indicates whether
15651 address is updated. LOAD indicates whether memory access is load or store. */
15653 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15654 bool wback
, bool load
)
15656 unsigned int t
, t2
, n
;
15658 if (!reload_completed
)
15661 if (!offset_ok_for_ldrd_strd (offset
))
15668 if ((TARGET_THUMB2
)
15669 && ((wback
&& (n
== t
|| n
== t2
))
15670 || (t
== SP_REGNUM
)
15671 || (t
== PC_REGNUM
)
15672 || (t2
== SP_REGNUM
)
15673 || (t2
== PC_REGNUM
)
15674 || (!load
&& (n
== PC_REGNUM
))
15675 || (load
&& (t
== t2
))
15676 /* Triggers Cortex-M3 LDRD errata. */
15677 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15681 && ((wback
&& (n
== t
|| n
== t2
))
15682 || (t2
== PC_REGNUM
)
15683 || (t
% 2 != 0) /* First destination register is not even. */
15685 /* PC can be used as base register (for offset addressing only),
15686 but it is depricated. */
15687 || (n
== PC_REGNUM
)))
15693 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15694 operand MEM's address contains an immediate offset from the base
15695 register and has no side effects, in which case it sets BASE and
15696 OFFSET accordingly. */
15698 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15702 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15704 /* TODO: Handle more general memory operand patterns, such as
15705 PRE_DEC and PRE_INC. */
15707 if (side_effects_p (mem
))
15710 /* Can't deal with subregs. */
15711 if (GET_CODE (mem
) == SUBREG
)
15714 gcc_assert (MEM_P (mem
));
15716 *offset
= const0_rtx
;
15718 addr
= XEXP (mem
, 0);
15720 /* If addr isn't valid for DImode, then we can't handle it. */
15721 if (!arm_legitimate_address_p (DImode
, addr
,
15722 reload_in_progress
|| reload_completed
))
15730 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15732 *base
= XEXP (addr
, 0);
15733 *offset
= XEXP (addr
, 1);
15734 return (REG_P (*base
) && CONST_INT_P (*offset
));
15740 /* Called from a peephole2 to replace two word-size accesses with a
15741 single LDRD/STRD instruction. Returns true iff we can generate a
15742 new instruction sequence. That is, both accesses use the same base
15743 register and the gap between constant offsets is 4. This function
15744 may reorder its operands to match ldrd/strd RTL templates.
15745 OPERANDS are the operands found by the peephole matcher;
15746 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15747 corresponding memory operands. LOAD indicaates whether the access
15748 is load or store. CONST_STORE indicates a store of constant
15749 integer values held in OPERANDS[4,5] and assumes that the pattern
15750 is of length 4 insn, for the purpose of checking dead registers.
15751 COMMUTE indicates that register operands may be reordered. */
15753 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15754 bool const_store
, bool commute
)
15757 HOST_WIDE_INT offsets
[2], offset
;
15758 rtx base
= NULL_RTX
;
15759 rtx cur_base
, cur_offset
, tmp
;
15761 HARD_REG_SET regset
;
15763 gcc_assert (!const_store
|| !load
);
15764 /* Check that the memory references are immediate offsets from the
15765 same base register. Extract the base register, the destination
15766 registers, and the corresponding memory offsets. */
15767 for (i
= 0; i
< nops
; i
++)
15769 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15774 else if (REGNO (base
) != REGNO (cur_base
))
15777 offsets
[i
] = INTVAL (cur_offset
);
15778 if (GET_CODE (operands
[i
]) == SUBREG
)
15780 tmp
= SUBREG_REG (operands
[i
]);
15781 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15786 /* Make sure there is no dependency between the individual loads. */
15787 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15788 return false; /* RAW */
15790 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15791 return false; /* WAW */
15793 /* If the same input register is used in both stores
15794 when storing different constants, try to find a free register.
15795 For example, the code
15800 can be transformed into
15803 in Thumb mode assuming that r1 is free. */
15805 && REGNO (operands
[0]) == REGNO (operands
[1])
15806 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15810 CLEAR_HARD_REG_SET (regset
);
15811 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15812 if (tmp
== NULL_RTX
)
15815 /* Use the new register in the first load to ensure that
15816 if the original input register is not dead after peephole,
15817 then it will have the correct constant value. */
15820 else if (TARGET_ARM
)
15823 int regno
= REGNO (operands
[0]);
15824 if (!peep2_reg_dead_p (4, operands
[0]))
15826 /* When the input register is even and is not dead after the
15827 pattern, it has to hold the second constant but we cannot
15828 form a legal STRD in ARM mode with this register as the second
15830 if (regno
% 2 == 0)
15833 /* Is regno-1 free? */
15834 SET_HARD_REG_SET (regset
);
15835 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15836 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15837 if (tmp
== NULL_RTX
)
15844 /* Find a DImode register. */
15845 CLEAR_HARD_REG_SET (regset
);
15846 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15847 if (tmp
!= NULL_RTX
)
15849 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15850 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15854 /* Can we use the input register to form a DI register? */
15855 SET_HARD_REG_SET (regset
);
15856 CLEAR_HARD_REG_BIT(regset
,
15857 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15858 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15859 if (tmp
== NULL_RTX
)
15861 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15865 gcc_assert (operands
[0] != NULL_RTX
);
15866 gcc_assert (operands
[1] != NULL_RTX
);
15867 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15868 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15872 /* Make sure the instructions are ordered with lower memory access first. */
15873 if (offsets
[0] > offsets
[1])
15875 gap
= offsets
[0] - offsets
[1];
15876 offset
= offsets
[1];
15878 /* Swap the instructions such that lower memory is accessed first. */
15879 std::swap (operands
[0], operands
[1]);
15880 std::swap (operands
[2], operands
[3]);
15882 std::swap (operands
[4], operands
[5]);
15886 gap
= offsets
[1] - offsets
[0];
15887 offset
= offsets
[0];
15890 /* Make sure accesses are to consecutive memory locations. */
15894 /* Make sure we generate legal instructions. */
15895 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15899 /* In Thumb state, where registers are almost unconstrained, there
15900 is little hope to fix it. */
15904 if (load
&& commute
)
15906 /* Try reordering registers. */
15907 std::swap (operands
[0], operands
[1]);
15908 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15915 /* If input registers are dead after this pattern, they can be
15916 reordered or replaced by other registers that are free in the
15917 current pattern. */
15918 if (!peep2_reg_dead_p (4, operands
[0])
15919 || !peep2_reg_dead_p (4, operands
[1]))
15922 /* Try to reorder the input registers. */
15923 /* For example, the code
15928 can be transformed into
15933 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15936 std::swap (operands
[0], operands
[1]);
15940 /* Try to find a free DI register. */
15941 CLEAR_HARD_REG_SET (regset
);
15942 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15943 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15946 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15947 if (tmp
== NULL_RTX
)
15950 /* DREG must be an even-numbered register in DImode.
15951 Split it into SI registers. */
15952 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15953 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15954 gcc_assert (operands
[0] != NULL_RTX
);
15955 gcc_assert (operands
[1] != NULL_RTX
);
15956 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15957 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15959 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15971 /* Print a symbolic form of X to the debug file, F. */
15973 arm_print_value (FILE *f
, rtx x
)
15975 switch (GET_CODE (x
))
15978 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15982 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15990 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15992 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15993 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
16001 fprintf (f
, "\"%s\"", XSTR (x
, 0));
16005 fprintf (f
, "`%s'", XSTR (x
, 0));
16009 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
16013 arm_print_value (f
, XEXP (x
, 0));
16017 arm_print_value (f
, XEXP (x
, 0));
16019 arm_print_value (f
, XEXP (x
, 1));
16027 fprintf (f
, "????");
16032 /* Routines for manipulation of the constant pool. */
16034 /* Arm instructions cannot load a large constant directly into a
16035 register; they have to come from a pc relative load. The constant
16036 must therefore be placed in the addressable range of the pc
16037 relative load. Depending on the precise pc relative load
16038 instruction the range is somewhere between 256 bytes and 4k. This
16039 means that we often have to dump a constant inside a function, and
16040 generate code to branch around it.
16042 It is important to minimize this, since the branches will slow
16043 things down and make the code larger.
16045 Normally we can hide the table after an existing unconditional
16046 branch so that there is no interruption of the flow, but in the
16047 worst case the code looks like this:
16065 We fix this by performing a scan after scheduling, which notices
16066 which instructions need to have their operands fetched from the
16067 constant table and builds the table.
16069 The algorithm starts by building a table of all the constants that
16070 need fixing up and all the natural barriers in the function (places
16071 where a constant table can be dropped without breaking the flow).
16072 For each fixup we note how far the pc-relative replacement will be
16073 able to reach and the offset of the instruction into the function.
16075 Having built the table we then group the fixes together to form
16076 tables that are as large as possible (subject to addressing
16077 constraints) and emit each table of constants after the last
16078 barrier that is within range of all the instructions in the group.
16079 If a group does not contain a barrier, then we forcibly create one
16080 by inserting a jump instruction into the flow. Once the table has
16081 been inserted, the insns are then modified to reference the
16082 relevant entry in the pool.
16084 Possible enhancements to the algorithm (not implemented) are:
16086 1) For some processors and object formats, there may be benefit in
16087 aligning the pools to the start of cache lines; this alignment
16088 would need to be taken into account when calculating addressability
16091 /* These typedefs are located at the start of this file, so that
16092 they can be used in the prototypes there. This comment is to
16093 remind readers of that fact so that the following structures
16094 can be understood more easily.
16096 typedef struct minipool_node Mnode;
16097 typedef struct minipool_fixup Mfix; */
16099 struct minipool_node
16101 /* Doubly linked chain of entries. */
16104 /* The maximum offset into the code that this entry can be placed. While
16105 pushing fixes for forward references, all entries are sorted in order
16106 of increasing max_address. */
16107 HOST_WIDE_INT max_address
;
16108 /* Similarly for an entry inserted for a backwards ref. */
16109 HOST_WIDE_INT min_address
;
16110 /* The number of fixes referencing this entry. This can become zero
16111 if we "unpush" an entry. In this case we ignore the entry when we
16112 come to emit the code. */
16114 /* The offset from the start of the minipool. */
16115 HOST_WIDE_INT offset
;
16116 /* The value in table. */
16118 /* The mode of value. */
16120 /* The size of the value. With iWMMXt enabled
16121 sizes > 4 also imply an alignment of 8-bytes. */
16125 struct minipool_fixup
16129 HOST_WIDE_INT address
;
16135 HOST_WIDE_INT forwards
;
16136 HOST_WIDE_INT backwards
;
16139 /* Fixes less than a word need padding out to a word boundary. */
16140 #define MINIPOOL_FIX_SIZE(mode) \
16141 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16143 static Mnode
* minipool_vector_head
;
16144 static Mnode
* minipool_vector_tail
;
16145 static rtx_code_label
*minipool_vector_label
;
16146 static int minipool_pad
;
16148 /* The linked list of all minipool fixes required for this function. */
16149 Mfix
* minipool_fix_head
;
16150 Mfix
* minipool_fix_tail
;
16151 /* The fix entry for the current minipool, once it has been placed. */
16152 Mfix
* minipool_barrier
;
16154 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16155 #define JUMP_TABLES_IN_TEXT_SECTION 0
16158 static HOST_WIDE_INT
16159 get_jump_table_size (rtx_jump_table_data
*insn
)
16161 /* ADDR_VECs only take room if read-only data does into the text
16163 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
16165 rtx body
= PATTERN (insn
);
16166 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
16167 HOST_WIDE_INT size
;
16168 HOST_WIDE_INT modesize
;
16170 modesize
= GET_MODE_SIZE (GET_MODE (body
));
16171 size
= modesize
* XVECLEN (body
, elt
);
16175 /* Round up size of TBB table to a halfword boundary. */
16176 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
16179 /* No padding necessary for TBH. */
16182 /* Add two bytes for alignment on Thumb. */
16187 gcc_unreachable ();
16195 /* Return the maximum amount of padding that will be inserted before
16198 static HOST_WIDE_INT
16199 get_label_padding (rtx label
)
16201 HOST_WIDE_INT align
, min_insn_size
;
16203 align
= 1 << label_to_alignment (label
);
16204 min_insn_size
= TARGET_THUMB
? 2 : 4;
16205 return align
> min_insn_size
? align
- min_insn_size
: 0;
16208 /* Move a minipool fix MP from its current location to before MAX_MP.
16209 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16210 constraints may need updating. */
16212 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
16213 HOST_WIDE_INT max_address
)
16215 /* The code below assumes these are different. */
16216 gcc_assert (mp
!= max_mp
);
16218 if (max_mp
== NULL
)
16220 if (max_address
< mp
->max_address
)
16221 mp
->max_address
= max_address
;
16225 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16226 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16228 mp
->max_address
= max_address
;
16230 /* Unlink MP from its current position. Since max_mp is non-null,
16231 mp->prev must be non-null. */
16232 mp
->prev
->next
= mp
->next
;
16233 if (mp
->next
!= NULL
)
16234 mp
->next
->prev
= mp
->prev
;
16236 minipool_vector_tail
= mp
->prev
;
16238 /* Re-insert it before MAX_MP. */
16240 mp
->prev
= max_mp
->prev
;
16243 if (mp
->prev
!= NULL
)
16244 mp
->prev
->next
= mp
;
16246 minipool_vector_head
= mp
;
16249 /* Save the new entry. */
16252 /* Scan over the preceding entries and adjust their addresses as
16254 while (mp
->prev
!= NULL
16255 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16257 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16264 /* Add a constant to the minipool for a forward reference. Returns the
16265 node added or NULL if the constant will not fit in this pool. */
16267 add_minipool_forward_ref (Mfix
*fix
)
16269 /* If set, max_mp is the first pool_entry that has a lower
16270 constraint than the one we are trying to add. */
16271 Mnode
* max_mp
= NULL
;
16272 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
16275 /* If the minipool starts before the end of FIX->INSN then this FIX
16276 can not be placed into the current pool. Furthermore, adding the
16277 new constant pool entry may cause the pool to start FIX_SIZE bytes
16279 if (minipool_vector_head
&&
16280 (fix
->address
+ get_attr_length (fix
->insn
)
16281 >= minipool_vector_head
->max_address
- fix
->fix_size
))
16284 /* Scan the pool to see if a constant with the same value has
16285 already been added. While we are doing this, also note the
16286 location where we must insert the constant if it doesn't already
16288 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16290 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16291 && fix
->mode
== mp
->mode
16292 && (!LABEL_P (fix
->value
)
16293 || (CODE_LABEL_NUMBER (fix
->value
)
16294 == CODE_LABEL_NUMBER (mp
->value
)))
16295 && rtx_equal_p (fix
->value
, mp
->value
))
16297 /* More than one fix references this entry. */
16299 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
16302 /* Note the insertion point if necessary. */
16304 && mp
->max_address
> max_address
)
16307 /* If we are inserting an 8-bytes aligned quantity and
16308 we have not already found an insertion point, then
16309 make sure that all such 8-byte aligned quantities are
16310 placed at the start of the pool. */
16311 if (ARM_DOUBLEWORD_ALIGN
16313 && fix
->fix_size
>= 8
16314 && mp
->fix_size
< 8)
16317 max_address
= mp
->max_address
;
16321 /* The value is not currently in the minipool, so we need to create
16322 a new entry for it. If MAX_MP is NULL, the entry will be put on
16323 the end of the list since the placement is less constrained than
16324 any existing entry. Otherwise, we insert the new fix before
16325 MAX_MP and, if necessary, adjust the constraints on the other
16328 mp
->fix_size
= fix
->fix_size
;
16329 mp
->mode
= fix
->mode
;
16330 mp
->value
= fix
->value
;
16332 /* Not yet required for a backwards ref. */
16333 mp
->min_address
= -65536;
16335 if (max_mp
== NULL
)
16337 mp
->max_address
= max_address
;
16339 mp
->prev
= minipool_vector_tail
;
16341 if (mp
->prev
== NULL
)
16343 minipool_vector_head
= mp
;
16344 minipool_vector_label
= gen_label_rtx ();
16347 mp
->prev
->next
= mp
;
16349 minipool_vector_tail
= mp
;
16353 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16354 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16356 mp
->max_address
= max_address
;
16359 mp
->prev
= max_mp
->prev
;
16361 if (mp
->prev
!= NULL
)
16362 mp
->prev
->next
= mp
;
16364 minipool_vector_head
= mp
;
16367 /* Save the new entry. */
16370 /* Scan over the preceding entries and adjust their addresses as
16372 while (mp
->prev
!= NULL
16373 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16375 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16383 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16384 HOST_WIDE_INT min_address
)
16386 HOST_WIDE_INT offset
;
16388 /* The code below assumes these are different. */
16389 gcc_assert (mp
!= min_mp
);
16391 if (min_mp
== NULL
)
16393 if (min_address
> mp
->min_address
)
16394 mp
->min_address
= min_address
;
16398 /* We will adjust this below if it is too loose. */
16399 mp
->min_address
= min_address
;
16401 /* Unlink MP from its current position. Since min_mp is non-null,
16402 mp->next must be non-null. */
16403 mp
->next
->prev
= mp
->prev
;
16404 if (mp
->prev
!= NULL
)
16405 mp
->prev
->next
= mp
->next
;
16407 minipool_vector_head
= mp
->next
;
16409 /* Reinsert it after MIN_MP. */
16411 mp
->next
= min_mp
->next
;
16413 if (mp
->next
!= NULL
)
16414 mp
->next
->prev
= mp
;
16416 minipool_vector_tail
= mp
;
16422 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16424 mp
->offset
= offset
;
16425 if (mp
->refcount
> 0)
16426 offset
+= mp
->fix_size
;
16428 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16429 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16435 /* Add a constant to the minipool for a backward reference. Returns the
16436 node added or NULL if the constant will not fit in this pool.
16438 Note that the code for insertion for a backwards reference can be
16439 somewhat confusing because the calculated offsets for each fix do
16440 not take into account the size of the pool (which is still under
16443 add_minipool_backward_ref (Mfix
*fix
)
16445 /* If set, min_mp is the last pool_entry that has a lower constraint
16446 than the one we are trying to add. */
16447 Mnode
*min_mp
= NULL
;
16448 /* This can be negative, since it is only a constraint. */
16449 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16452 /* If we can't reach the current pool from this insn, or if we can't
16453 insert this entry at the end of the pool without pushing other
16454 fixes out of range, then we don't try. This ensures that we
16455 can't fail later on. */
16456 if (min_address
>= minipool_barrier
->address
16457 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16458 >= minipool_barrier
->address
))
16461 /* Scan the pool to see if a constant with the same value has
16462 already been added. While we are doing this, also note the
16463 location where we must insert the constant if it doesn't already
16465 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16467 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16468 && fix
->mode
== mp
->mode
16469 && (!LABEL_P (fix
->value
)
16470 || (CODE_LABEL_NUMBER (fix
->value
)
16471 == CODE_LABEL_NUMBER (mp
->value
)))
16472 && rtx_equal_p (fix
->value
, mp
->value
)
16473 /* Check that there is enough slack to move this entry to the
16474 end of the table (this is conservative). */
16475 && (mp
->max_address
16476 > (minipool_barrier
->address
16477 + minipool_vector_tail
->offset
16478 + minipool_vector_tail
->fix_size
)))
16481 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16484 if (min_mp
!= NULL
)
16485 mp
->min_address
+= fix
->fix_size
;
16488 /* Note the insertion point if necessary. */
16489 if (mp
->min_address
< min_address
)
16491 /* For now, we do not allow the insertion of 8-byte alignment
16492 requiring nodes anywhere but at the start of the pool. */
16493 if (ARM_DOUBLEWORD_ALIGN
16494 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16499 else if (mp
->max_address
16500 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16502 /* Inserting before this entry would push the fix beyond
16503 its maximum address (which can happen if we have
16504 re-located a forwards fix); force the new fix to come
16506 if (ARM_DOUBLEWORD_ALIGN
16507 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16512 min_address
= mp
->min_address
+ fix
->fix_size
;
16515 /* Do not insert a non-8-byte aligned quantity before 8-byte
16516 aligned quantities. */
16517 else if (ARM_DOUBLEWORD_ALIGN
16518 && fix
->fix_size
< 8
16519 && mp
->fix_size
>= 8)
16522 min_address
= mp
->min_address
+ fix
->fix_size
;
16527 /* We need to create a new entry. */
16529 mp
->fix_size
= fix
->fix_size
;
16530 mp
->mode
= fix
->mode
;
16531 mp
->value
= fix
->value
;
16533 mp
->max_address
= minipool_barrier
->address
+ 65536;
16535 mp
->min_address
= min_address
;
16537 if (min_mp
== NULL
)
16540 mp
->next
= minipool_vector_head
;
16542 if (mp
->next
== NULL
)
16544 minipool_vector_tail
= mp
;
16545 minipool_vector_label
= gen_label_rtx ();
16548 mp
->next
->prev
= mp
;
16550 minipool_vector_head
= mp
;
16554 mp
->next
= min_mp
->next
;
16558 if (mp
->next
!= NULL
)
16559 mp
->next
->prev
= mp
;
16561 minipool_vector_tail
= mp
;
16564 /* Save the new entry. */
16572 /* Scan over the following entries and adjust their offsets. */
16573 while (mp
->next
!= NULL
)
16575 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16576 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16579 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16581 mp
->next
->offset
= mp
->offset
;
16590 assign_minipool_offsets (Mfix
*barrier
)
16592 HOST_WIDE_INT offset
= 0;
16595 minipool_barrier
= barrier
;
16597 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16599 mp
->offset
= offset
;
16601 if (mp
->refcount
> 0)
16602 offset
+= mp
->fix_size
;
16606 /* Output the literal table */
16608 dump_minipool (rtx_insn
*scan
)
16614 if (ARM_DOUBLEWORD_ALIGN
)
16615 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16616 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16623 fprintf (dump_file
,
16624 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16625 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16627 scan
= emit_label_after (gen_label_rtx (), scan
);
16628 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16629 scan
= emit_label_after (minipool_vector_label
, scan
);
16631 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16633 if (mp
->refcount
> 0)
16637 fprintf (dump_file
,
16638 ";; Offset %u, min %ld, max %ld ",
16639 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16640 (unsigned long) mp
->max_address
);
16641 arm_print_value (dump_file
, mp
->value
);
16642 fputc ('\n', dump_file
);
16645 switch (GET_MODE_SIZE (mp
->mode
))
16647 #ifdef HAVE_consttable_1
16649 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
16653 #ifdef HAVE_consttable_2
16655 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
16659 #ifdef HAVE_consttable_4
16661 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
16665 #ifdef HAVE_consttable_8
16667 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
16671 #ifdef HAVE_consttable_16
16673 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
16678 gcc_unreachable ();
16686 minipool_vector_head
= minipool_vector_tail
= NULL
;
16687 scan
= emit_insn_after (gen_consttable_end (), scan
);
16688 scan
= emit_barrier_after (scan
);
16691 /* Return the cost of forcibly inserting a barrier after INSN. */
16693 arm_barrier_cost (rtx_insn
*insn
)
16695 /* Basing the location of the pool on the loop depth is preferable,
16696 but at the moment, the basic block information seems to be
16697 corrupt by this stage of the compilation. */
16698 int base_cost
= 50;
16699 rtx_insn
*next
= next_nonnote_insn (insn
);
16701 if (next
!= NULL
&& LABEL_P (next
))
16704 switch (GET_CODE (insn
))
16707 /* It will always be better to place the table before the label, rather
16716 return base_cost
- 10;
16719 return base_cost
+ 10;
16723 /* Find the best place in the insn stream in the range
16724 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16725 Create the barrier by inserting a jump and add a new fix entry for
16728 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16730 HOST_WIDE_INT count
= 0;
16731 rtx_barrier
*barrier
;
16732 rtx_insn
*from
= fix
->insn
;
16733 /* The instruction after which we will insert the jump. */
16734 rtx_insn
*selected
= NULL
;
16736 /* The address at which the jump instruction will be placed. */
16737 HOST_WIDE_INT selected_address
;
16739 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16740 rtx_code_label
*label
= gen_label_rtx ();
16742 selected_cost
= arm_barrier_cost (from
);
16743 selected_address
= fix
->address
;
16745 while (from
&& count
< max_count
)
16747 rtx_jump_table_data
*tmp
;
16750 /* This code shouldn't have been called if there was a natural barrier
16752 gcc_assert (!BARRIER_P (from
));
16754 /* Count the length of this insn. This must stay in sync with the
16755 code that pushes minipool fixes. */
16756 if (LABEL_P (from
))
16757 count
+= get_label_padding (from
);
16759 count
+= get_attr_length (from
);
16761 /* If there is a jump table, add its length. */
16762 if (tablejump_p (from
, NULL
, &tmp
))
16764 count
+= get_jump_table_size (tmp
);
16766 /* Jump tables aren't in a basic block, so base the cost on
16767 the dispatch insn. If we select this location, we will
16768 still put the pool after the table. */
16769 new_cost
= arm_barrier_cost (from
);
16771 if (count
< max_count
16772 && (!selected
|| new_cost
<= selected_cost
))
16775 selected_cost
= new_cost
;
16776 selected_address
= fix
->address
+ count
;
16779 /* Continue after the dispatch table. */
16780 from
= NEXT_INSN (tmp
);
16784 new_cost
= arm_barrier_cost (from
);
16786 if (count
< max_count
16787 && (!selected
|| new_cost
<= selected_cost
))
16790 selected_cost
= new_cost
;
16791 selected_address
= fix
->address
+ count
;
16794 from
= NEXT_INSN (from
);
16797 /* Make sure that we found a place to insert the jump. */
16798 gcc_assert (selected
);
16800 /* Make sure we do not split a call and its corresponding
16801 CALL_ARG_LOCATION note. */
16802 if (CALL_P (selected
))
16804 rtx_insn
*next
= NEXT_INSN (selected
);
16805 if (next
&& NOTE_P (next
)
16806 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16810 /* Create a new JUMP_INSN that branches around a barrier. */
16811 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16812 JUMP_LABEL (from
) = label
;
16813 barrier
= emit_barrier_after (from
);
16814 emit_label_after (label
, barrier
);
16816 /* Create a minipool barrier entry for the new barrier. */
16817 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16818 new_fix
->insn
= barrier
;
16819 new_fix
->address
= selected_address
;
16820 new_fix
->next
= fix
->next
;
16821 fix
->next
= new_fix
;
16826 /* Record that there is a natural barrier in the insn stream at
16829 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16831 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16834 fix
->address
= address
;
16837 if (minipool_fix_head
!= NULL
)
16838 minipool_fix_tail
->next
= fix
;
16840 minipool_fix_head
= fix
;
16842 minipool_fix_tail
= fix
;
16845 /* Record INSN, which will need fixing up to load a value from the
16846 minipool. ADDRESS is the offset of the insn since the start of the
16847 function; LOC is a pointer to the part of the insn which requires
16848 fixing; VALUE is the constant that must be loaded, which is of type
16851 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16852 machine_mode mode
, rtx value
)
16854 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16857 fix
->address
= address
;
16860 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16861 fix
->value
= value
;
16862 fix
->forwards
= get_attr_pool_range (insn
);
16863 fix
->backwards
= get_attr_neg_pool_range (insn
);
16864 fix
->minipool
= NULL
;
16866 /* If an insn doesn't have a range defined for it, then it isn't
16867 expecting to be reworked by this code. Better to stop now than
16868 to generate duff assembly code. */
16869 gcc_assert (fix
->forwards
|| fix
->backwards
);
16871 /* If an entry requires 8-byte alignment then assume all constant pools
16872 require 4 bytes of padding. Trying to do this later on a per-pool
16873 basis is awkward because existing pool entries have to be modified. */
16874 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16879 fprintf (dump_file
,
16880 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16881 GET_MODE_NAME (mode
),
16882 INSN_UID (insn
), (unsigned long) address
,
16883 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16884 arm_print_value (dump_file
, fix
->value
);
16885 fprintf (dump_file
, "\n");
16888 /* Add it to the chain of fixes. */
16891 if (minipool_fix_head
!= NULL
)
16892 minipool_fix_tail
->next
= fix
;
16894 minipool_fix_head
= fix
;
16896 minipool_fix_tail
= fix
;
16899 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16900 Returns the number of insns needed, or 99 if we always want to synthesize
16903 arm_max_const_double_inline_cost ()
16905 /* Let the value get synthesized to avoid the use of literal pools. */
16906 if (arm_disable_literal_pool
)
16909 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16912 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16913 Returns the number of insns needed, or 99 if we don't know how to
16916 arm_const_double_inline_cost (rtx val
)
16918 rtx lowpart
, highpart
;
16921 mode
= GET_MODE (val
);
16923 if (mode
== VOIDmode
)
16926 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16928 lowpart
= gen_lowpart (SImode
, val
);
16929 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16931 gcc_assert (CONST_INT_P (lowpart
));
16932 gcc_assert (CONST_INT_P (highpart
));
16934 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16935 NULL_RTX
, NULL_RTX
, 0, 0)
16936 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16937 NULL_RTX
, NULL_RTX
, 0, 0));
16940 /* Cost of loading a SImode constant. */
16942 arm_const_inline_cost (enum rtx_code code
, rtx val
)
16944 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
16945 NULL_RTX
, NULL_RTX
, 1, 0);
16948 /* Return true if it is worthwhile to split a 64-bit constant into two
16949 32-bit operations. This is the case if optimizing for size, or
16950 if we have load delay slots, or if one 32-bit part can be done with
16951 a single data operation. */
16953 arm_const_double_by_parts (rtx val
)
16955 machine_mode mode
= GET_MODE (val
);
16958 if (optimize_size
|| arm_ld_sched
)
16961 if (mode
== VOIDmode
)
16964 part
= gen_highpart_mode (SImode
, mode
, val
);
16966 gcc_assert (CONST_INT_P (part
));
16968 if (const_ok_for_arm (INTVAL (part
))
16969 || const_ok_for_arm (~INTVAL (part
)))
16972 part
= gen_lowpart (SImode
, val
);
16974 gcc_assert (CONST_INT_P (part
));
16976 if (const_ok_for_arm (INTVAL (part
))
16977 || const_ok_for_arm (~INTVAL (part
)))
16983 /* Return true if it is possible to inline both the high and low parts
16984 of a 64-bit constant into 32-bit data processing instructions. */
16986 arm_const_double_by_immediates (rtx val
)
16988 machine_mode mode
= GET_MODE (val
);
16991 if (mode
== VOIDmode
)
16994 part
= gen_highpart_mode (SImode
, mode
, val
);
16996 gcc_assert (CONST_INT_P (part
));
16998 if (!const_ok_for_arm (INTVAL (part
)))
17001 part
= gen_lowpart (SImode
, val
);
17003 gcc_assert (CONST_INT_P (part
));
17005 if (!const_ok_for_arm (INTVAL (part
)))
17011 /* Scan INSN and note any of its operands that need fixing.
17012 If DO_PUSHES is false we do not actually push any of the fixups
17015 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
17019 extract_constrain_insn (insn
);
17021 if (recog_data
.n_alternatives
== 0)
17024 /* Fill in recog_op_alt with information about the constraints of
17026 preprocess_constraints (insn
);
17028 const operand_alternative
*op_alt
= which_op_alt ();
17029 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
17031 /* Things we need to fix can only occur in inputs. */
17032 if (recog_data
.operand_type
[opno
] != OP_IN
)
17035 /* If this alternative is a memory reference, then any mention
17036 of constants in this alternative is really to fool reload
17037 into allowing us to accept one there. We need to fix them up
17038 now so that we output the right code. */
17039 if (op_alt
[opno
].memory_ok
)
17041 rtx op
= recog_data
.operand
[opno
];
17043 if (CONSTANT_P (op
))
17046 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
17047 recog_data
.operand_mode
[opno
], op
);
17049 else if (MEM_P (op
)
17050 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
17051 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
17055 rtx cop
= avoid_constant_pool_reference (op
);
17057 /* Casting the address of something to a mode narrower
17058 than a word can cause avoid_constant_pool_reference()
17059 to return the pool reference itself. That's no good to
17060 us here. Lets just hope that we can use the
17061 constant pool value directly. */
17063 cop
= get_pool_constant (XEXP (op
, 0));
17065 push_minipool_fix (insn
, address
,
17066 recog_data
.operand_loc
[opno
],
17067 recog_data
.operand_mode
[opno
], cop
);
17077 /* Rewrite move insn into subtract of 0 if the condition codes will
17078 be useful in next conditional jump insn. */
17081 thumb1_reorg (void)
17085 FOR_EACH_BB_FN (bb
, cfun
)
17088 rtx pat
, op0
, set
= NULL
;
17089 rtx_insn
*prev
, *insn
= BB_END (bb
);
17090 bool insn_clobbered
= false;
17092 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17093 insn
= PREV_INSN (insn
);
17095 /* Find the last cbranchsi4_insn in basic block BB. */
17096 if (insn
== BB_HEAD (bb
)
17097 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17100 /* Get the register with which we are comparing. */
17101 pat
= PATTERN (insn
);
17102 op0
= XEXP (XEXP (SET_SRC (pat
), 0), 0);
17104 /* Find the first flag setting insn before INSN in basic block BB. */
17105 gcc_assert (insn
!= BB_HEAD (bb
));
17106 for (prev
= PREV_INSN (insn
);
17108 && prev
!= BB_HEAD (bb
)
17110 || DEBUG_INSN_P (prev
)
17111 || ((set
= single_set (prev
)) != NULL
17112 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17113 prev
= PREV_INSN (prev
))
17115 if (reg_set_p (op0
, prev
))
17116 insn_clobbered
= true;
17119 /* Skip if op0 is clobbered by insn other than prev. */
17120 if (insn_clobbered
)
17126 dest
= SET_DEST (set
);
17127 src
= SET_SRC (set
);
17128 if (!low_register_operand (dest
, SImode
)
17129 || !low_register_operand (src
, SImode
))
17132 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17133 in INSN. Both src and dest of the move insn are checked. */
17134 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17136 dest
= copy_rtx (dest
);
17137 src
= copy_rtx (src
);
17138 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17139 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
17140 INSN_CODE (prev
) = -1;
17141 /* Set test register in INSN to dest. */
17142 XEXP (XEXP (SET_SRC (pat
), 0), 0) = copy_rtx (dest
);
17143 INSN_CODE (insn
) = -1;
17148 /* Convert instructions to their cc-clobbering variant if possible, since
17149 that allows us to use smaller encodings. */
17152 thumb2_reorg (void)
17157 INIT_REG_SET (&live
);
17159 /* We are freeing block_for_insn in the toplev to keep compatibility
17160 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17161 compute_bb_for_insn ();
17164 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17166 FOR_EACH_BB_FN (bb
, cfun
)
17168 if ((current_tune
->disparage_flag_setting_t16_encodings
17169 == tune_params::DISPARAGE_FLAGS_ALL
)
17170 && optimize_bb_for_speed_p (bb
))
17174 Convert_Action action
= SKIP
;
17175 Convert_Action action_for_partial_flag_setting
17176 = ((current_tune
->disparage_flag_setting_t16_encodings
17177 != tune_params::DISPARAGE_FLAGS_NEITHER
)
17178 && optimize_bb_for_speed_p (bb
))
17181 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17182 df_simulate_initialize_backwards (bb
, &live
);
17183 FOR_BB_INSNS_REVERSE (bb
, insn
)
17185 if (NONJUMP_INSN_P (insn
)
17186 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17187 && GET_CODE (PATTERN (insn
)) == SET
)
17190 rtx pat
= PATTERN (insn
);
17191 rtx dst
= XEXP (pat
, 0);
17192 rtx src
= XEXP (pat
, 1);
17193 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17195 if (UNARY_P (src
) || BINARY_P (src
))
17196 op0
= XEXP (src
, 0);
17198 if (BINARY_P (src
))
17199 op1
= XEXP (src
, 1);
17201 if (low_register_operand (dst
, SImode
))
17203 switch (GET_CODE (src
))
17206 /* Adding two registers and storing the result
17207 in the first source is already a 16-bit
17209 if (rtx_equal_p (dst
, op0
)
17210 && register_operand (op1
, SImode
))
17213 if (low_register_operand (op0
, SImode
))
17215 /* ADDS <Rd>,<Rn>,<Rm> */
17216 if (low_register_operand (op1
, SImode
))
17218 /* ADDS <Rdn>,#<imm8> */
17219 /* SUBS <Rdn>,#<imm8> */
17220 else if (rtx_equal_p (dst
, op0
)
17221 && CONST_INT_P (op1
)
17222 && IN_RANGE (INTVAL (op1
), -255, 255))
17224 /* ADDS <Rd>,<Rn>,#<imm3> */
17225 /* SUBS <Rd>,<Rn>,#<imm3> */
17226 else if (CONST_INT_P (op1
)
17227 && IN_RANGE (INTVAL (op1
), -7, 7))
17230 /* ADCS <Rd>, <Rn> */
17231 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17232 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17233 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17235 && COMPARISON_P (op1
)
17236 && cc_register (XEXP (op1
, 0), VOIDmode
)
17237 && maybe_get_arm_condition_code (op1
) == ARM_CS
17238 && XEXP (op1
, 1) == const0_rtx
)
17243 /* RSBS <Rd>,<Rn>,#0
17244 Not handled here: see NEG below. */
17245 /* SUBS <Rd>,<Rn>,#<imm3>
17247 Not handled here: see PLUS above. */
17248 /* SUBS <Rd>,<Rn>,<Rm> */
17249 if (low_register_operand (op0
, SImode
)
17250 && low_register_operand (op1
, SImode
))
17255 /* MULS <Rdm>,<Rn>,<Rdm>
17256 As an exception to the rule, this is only used
17257 when optimizing for size since MULS is slow on all
17258 known implementations. We do not even want to use
17259 MULS in cold code, if optimizing for speed, so we
17260 test the global flag here. */
17261 if (!optimize_size
)
17263 /* else fall through. */
17267 /* ANDS <Rdn>,<Rm> */
17268 if (rtx_equal_p (dst
, op0
)
17269 && low_register_operand (op1
, SImode
))
17270 action
= action_for_partial_flag_setting
;
17271 else if (rtx_equal_p (dst
, op1
)
17272 && low_register_operand (op0
, SImode
))
17273 action
= action_for_partial_flag_setting
== SKIP
17274 ? SKIP
: SWAP_CONV
;
17280 /* ASRS <Rdn>,<Rm> */
17281 /* LSRS <Rdn>,<Rm> */
17282 /* LSLS <Rdn>,<Rm> */
17283 if (rtx_equal_p (dst
, op0
)
17284 && low_register_operand (op1
, SImode
))
17285 action
= action_for_partial_flag_setting
;
17286 /* ASRS <Rd>,<Rm>,#<imm5> */
17287 /* LSRS <Rd>,<Rm>,#<imm5> */
17288 /* LSLS <Rd>,<Rm>,#<imm5> */
17289 else if (low_register_operand (op0
, SImode
)
17290 && CONST_INT_P (op1
)
17291 && IN_RANGE (INTVAL (op1
), 0, 31))
17292 action
= action_for_partial_flag_setting
;
17296 /* RORS <Rdn>,<Rm> */
17297 if (rtx_equal_p (dst
, op0
)
17298 && low_register_operand (op1
, SImode
))
17299 action
= action_for_partial_flag_setting
;
17303 /* MVNS <Rd>,<Rm> */
17304 if (low_register_operand (op0
, SImode
))
17305 action
= action_for_partial_flag_setting
;
17309 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17310 if (low_register_operand (op0
, SImode
))
17315 /* MOVS <Rd>,#<imm8> */
17316 if (CONST_INT_P (src
)
17317 && IN_RANGE (INTVAL (src
), 0, 255))
17318 action
= action_for_partial_flag_setting
;
17322 /* MOVS and MOV<c> with registers have different
17323 encodings, so are not relevant here. */
17331 if (action
!= SKIP
)
17333 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17334 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17337 if (action
== SWAP_CONV
)
17339 src
= copy_rtx (src
);
17340 XEXP (src
, 0) = op1
;
17341 XEXP (src
, 1) = op0
;
17342 pat
= gen_rtx_SET (dst
, src
);
17343 vec
= gen_rtvec (2, pat
, clobber
);
17345 else /* action == CONV */
17346 vec
= gen_rtvec (2, pat
, clobber
);
17348 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17349 INSN_CODE (insn
) = -1;
17353 if (NONDEBUG_INSN_P (insn
))
17354 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17358 CLEAR_REG_SET (&live
);
17361 /* Gcc puts the pool in the wrong place for ARM, since we can only
17362 load addresses a limited distance around the pc. We do some
17363 special munging to move the constant pool values to the correct
17364 point in the code. */
17369 HOST_WIDE_INT address
= 0;
17374 else if (TARGET_THUMB2
)
17377 /* Ensure all insns that must be split have been split at this point.
17378 Otherwise, the pool placement code below may compute incorrect
17379 insn lengths. Note that when optimizing, all insns have already
17380 been split at this point. */
17382 split_all_insns_noflow ();
17384 minipool_fix_head
= minipool_fix_tail
= NULL
;
17386 /* The first insn must always be a note, or the code below won't
17387 scan it properly. */
17388 insn
= get_insns ();
17389 gcc_assert (NOTE_P (insn
));
17392 /* Scan all the insns and record the operands that will need fixing. */
17393 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17395 if (BARRIER_P (insn
))
17396 push_minipool_barrier (insn
, address
);
17397 else if (INSN_P (insn
))
17399 rtx_jump_table_data
*table
;
17401 note_invalid_constants (insn
, address
, true);
17402 address
+= get_attr_length (insn
);
17404 /* If the insn is a vector jump, add the size of the table
17405 and skip the table. */
17406 if (tablejump_p (insn
, NULL
, &table
))
17408 address
+= get_jump_table_size (table
);
17412 else if (LABEL_P (insn
))
17413 /* Add the worst-case padding due to alignment. We don't add
17414 the _current_ padding because the minipool insertions
17415 themselves might change it. */
17416 address
+= get_label_padding (insn
);
17419 fix
= minipool_fix_head
;
17421 /* Now scan the fixups and perform the required changes. */
17426 Mfix
* last_added_fix
;
17427 Mfix
* last_barrier
= NULL
;
17430 /* Skip any further barriers before the next fix. */
17431 while (fix
&& BARRIER_P (fix
->insn
))
17434 /* No more fixes. */
17438 last_added_fix
= NULL
;
17440 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17442 if (BARRIER_P (ftmp
->insn
))
17444 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17447 last_barrier
= ftmp
;
17449 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17452 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17455 /* If we found a barrier, drop back to that; any fixes that we
17456 could have reached but come after the barrier will now go in
17457 the next mini-pool. */
17458 if (last_barrier
!= NULL
)
17460 /* Reduce the refcount for those fixes that won't go into this
17462 for (fdel
= last_barrier
->next
;
17463 fdel
&& fdel
!= ftmp
;
17466 fdel
->minipool
->refcount
--;
17467 fdel
->minipool
= NULL
;
17470 ftmp
= last_barrier
;
17474 /* ftmp is first fix that we can't fit into this pool and
17475 there no natural barriers that we could use. Insert a
17476 new barrier in the code somewhere between the previous
17477 fix and this one, and arrange to jump around it. */
17478 HOST_WIDE_INT max_address
;
17480 /* The last item on the list of fixes must be a barrier, so
17481 we can never run off the end of the list of fixes without
17482 last_barrier being set. */
17485 max_address
= minipool_vector_head
->max_address
;
17486 /* Check that there isn't another fix that is in range that
17487 we couldn't fit into this pool because the pool was
17488 already too large: we need to put the pool before such an
17489 instruction. The pool itself may come just after the
17490 fix because create_fix_barrier also allows space for a
17491 jump instruction. */
17492 if (ftmp
->address
< max_address
)
17493 max_address
= ftmp
->address
+ 1;
17495 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17498 assign_minipool_offsets (last_barrier
);
17502 if (!BARRIER_P (ftmp
->insn
)
17503 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17510 /* Scan over the fixes we have identified for this pool, fixing them
17511 up and adding the constants to the pool itself. */
17512 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17513 this_fix
= this_fix
->next
)
17514 if (!BARRIER_P (this_fix
->insn
))
17517 = plus_constant (Pmode
,
17518 gen_rtx_LABEL_REF (VOIDmode
,
17519 minipool_vector_label
),
17520 this_fix
->minipool
->offset
);
17521 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17524 dump_minipool (last_barrier
->insn
);
17528 /* From now on we must synthesize any constants that we can't handle
17529 directly. This can happen if the RTL gets split during final
17530 instruction generation. */
17531 cfun
->machine
->after_arm_reorg
= 1;
17533 /* Free the minipool memory. */
17534 obstack_free (&minipool_obstack
, minipool_startobj
);
17537 /* Routines to output assembly language. */
17539 /* Return string representation of passed in real value. */
17540 static const char *
17541 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17543 if (!fp_consts_inited
)
17546 gcc_assert (REAL_VALUES_EQUAL (*r
, value_fp0
));
17550 /* OPERANDS[0] is the entire list of insns that constitute pop,
17551 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17552 is in the list, UPDATE is true iff the list contains explicit
17553 update of base register. */
17555 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17561 const char *conditional
;
17562 int num_saves
= XVECLEN (operands
[0], 0);
17563 unsigned int regno
;
17564 unsigned int regno_base
= REGNO (operands
[1]);
17567 offset
+= update
? 1 : 0;
17568 offset
+= return_pc
? 1 : 0;
17570 /* Is the base register in the list? */
17571 for (i
= offset
; i
< num_saves
; i
++)
17573 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17574 /* If SP is in the list, then the base register must be SP. */
17575 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17576 /* If base register is in the list, there must be no explicit update. */
17577 if (regno
== regno_base
)
17578 gcc_assert (!update
);
17581 conditional
= reverse
? "%?%D0" : "%?%d0";
17582 if ((regno_base
== SP_REGNUM
) && TARGET_UNIFIED_ASM
)
17584 /* Output pop (not stmfd) because it has a shorter encoding. */
17585 gcc_assert (update
);
17586 sprintf (pattern
, "pop%s\t{", conditional
);
17590 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17591 It's just a convention, their semantics are identical. */
17592 if (regno_base
== SP_REGNUM
)
17593 sprintf (pattern
, "ldm%sfd\t", conditional
);
17594 else if (TARGET_UNIFIED_ASM
)
17595 sprintf (pattern
, "ldmia%s\t", conditional
);
17597 sprintf (pattern
, "ldm%sia\t", conditional
);
17599 strcat (pattern
, reg_names
[regno_base
]);
17601 strcat (pattern
, "!, {");
17603 strcat (pattern
, ", {");
17606 /* Output the first destination register. */
17608 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17610 /* Output the rest of the destination registers. */
17611 for (i
= offset
+ 1; i
< num_saves
; i
++)
17613 strcat (pattern
, ", ");
17615 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17618 strcat (pattern
, "}");
17620 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc
)
17621 strcat (pattern
, "^");
17623 output_asm_insn (pattern
, &cond
);
17627 /* Output the assembly for a store multiple. */
17630 vfp_output_vstmd (rtx
* operands
)
17636 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17637 ? XEXP (operands
[0], 0)
17638 : XEXP (XEXP (operands
[0], 0), 0);
17639 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17642 strcpy (pattern
, "vpush%?.64\t{%P1");
17644 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17646 p
= strlen (pattern
);
17648 gcc_assert (REG_P (operands
[1]));
17650 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17651 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17653 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17655 strcpy (&pattern
[p
], "}");
17657 output_asm_insn (pattern
, operands
);
17662 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17663 number of bytes pushed. */
17666 vfp_emit_fstmd (int base_reg
, int count
)
17673 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17674 register pairs are stored by a store multiple insn. We avoid this
17675 by pushing an extra pair. */
17676 if (count
== 2 && !arm_arch6
)
17678 if (base_reg
== LAST_VFP_REGNUM
- 3)
17683 /* FSTMD may not store more than 16 doubleword registers at once. Split
17684 larger stores into multiple parts (up to a maximum of two, in
17689 /* NOTE: base_reg is an internal register number, so each D register
17691 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17692 saved
+= vfp_emit_fstmd (base_reg
, 16);
17696 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17697 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17699 reg
= gen_rtx_REG (DFmode
, base_reg
);
17702 XVECEXP (par
, 0, 0)
17703 = gen_rtx_SET (gen_frame_mem
17705 gen_rtx_PRE_MODIFY (Pmode
,
17708 (Pmode
, stack_pointer_rtx
,
17711 gen_rtx_UNSPEC (BLKmode
,
17712 gen_rtvec (1, reg
),
17713 UNSPEC_PUSH_MULT
));
17715 tmp
= gen_rtx_SET (stack_pointer_rtx
,
17716 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17717 RTX_FRAME_RELATED_P (tmp
) = 1;
17718 XVECEXP (dwarf
, 0, 0) = tmp
;
17720 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
17721 RTX_FRAME_RELATED_P (tmp
) = 1;
17722 XVECEXP (dwarf
, 0, 1) = tmp
;
17724 for (i
= 1; i
< count
; i
++)
17726 reg
= gen_rtx_REG (DFmode
, base_reg
);
17728 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17730 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
17731 plus_constant (Pmode
,
17735 RTX_FRAME_RELATED_P (tmp
) = 1;
17736 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17739 par
= emit_insn (par
);
17740 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17741 RTX_FRAME_RELATED_P (par
) = 1;
17746 /* Emit a call instruction with pattern PAT. ADDR is the address of
17747 the call target. */
17750 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
17754 insn
= emit_call_insn (pat
);
17756 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17757 If the call might use such an entry, add a use of the PIC register
17758 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17759 if (TARGET_VXWORKS_RTP
17762 && GET_CODE (addr
) == SYMBOL_REF
17763 && (SYMBOL_REF_DECL (addr
)
17764 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17765 : !SYMBOL_REF_LOCAL_P (addr
)))
17767 require_pic_register ();
17768 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17771 if (TARGET_AAPCS_BASED
)
17773 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17774 linker. We need to add an IP clobber to allow setting
17775 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17776 is not needed since it's a fixed register. */
17777 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
17778 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
17782 /* Output a 'call' insn. */
17784 output_call (rtx
*operands
)
17786 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17788 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17789 if (REGNO (operands
[0]) == LR_REGNUM
)
17791 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17792 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17795 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17797 if (TARGET_INTERWORK
|| arm_arch4t
)
17798 output_asm_insn ("bx%?\t%0", operands
);
17800 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17805 /* Output a 'call' insn that is a reference in memory. This is
17806 disabled for ARMv5 and we prefer a blx instead because otherwise
17807 there's a significant performance overhead. */
17809 output_call_mem (rtx
*operands
)
17811 gcc_assert (!arm_arch5
);
17812 if (TARGET_INTERWORK
)
17814 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17815 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17816 output_asm_insn ("bx%?\t%|ip", operands
);
17818 else if (regno_use_in (LR_REGNUM
, operands
[0]))
17820 /* LR is used in the memory address. We load the address in the
17821 first instruction. It's safe to use IP as the target of the
17822 load since the call will kill it anyway. */
17823 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17824 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17826 output_asm_insn ("bx%?\t%|ip", operands
);
17828 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
17832 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17833 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
17840 /* Output a move from arm registers to arm registers of a long double
17841 OPERANDS[0] is the destination.
17842 OPERANDS[1] is the source. */
17844 output_mov_long_double_arm_from_arm (rtx
*operands
)
17846 /* We have to be careful here because the two might overlap. */
17847 int dest_start
= REGNO (operands
[0]);
17848 int src_start
= REGNO (operands
[1]);
17852 if (dest_start
< src_start
)
17854 for (i
= 0; i
< 3; i
++)
17856 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17857 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17858 output_asm_insn ("mov%?\t%0, %1", ops
);
17863 for (i
= 2; i
>= 0; i
--)
17865 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17866 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17867 output_asm_insn ("mov%?\t%0, %1", ops
);
17875 arm_emit_movpair (rtx dest
, rtx src
)
17877 /* If the src is an immediate, simplify it. */
17878 if (CONST_INT_P (src
))
17880 HOST_WIDE_INT val
= INTVAL (src
);
17881 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17882 if ((val
>> 16) & 0x0000ffff)
17883 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17885 GEN_INT ((val
>> 16) & 0x0000ffff));
17888 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17889 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17892 /* Output a move between double words. It must be REG<-MEM
17895 output_move_double (rtx
*operands
, bool emit
, int *count
)
17897 enum rtx_code code0
= GET_CODE (operands
[0]);
17898 enum rtx_code code1
= GET_CODE (operands
[1]);
17903 /* The only case when this might happen is when
17904 you are looking at the length of a DImode instruction
17905 that has an invalid constant in it. */
17906 if (code0
== REG
&& code1
!= MEM
)
17908 gcc_assert (!emit
);
17915 unsigned int reg0
= REGNO (operands
[0]);
17917 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17919 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17921 switch (GET_CODE (XEXP (operands
[1], 0)))
17928 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17929 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
17931 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
17936 gcc_assert (TARGET_LDRD
);
17938 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
17945 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
17947 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
17955 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
17957 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
17962 gcc_assert (TARGET_LDRD
);
17964 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
17969 /* Autoicrement addressing modes should never have overlapping
17970 base and destination registers, and overlapping index registers
17971 are already prohibited, so this doesn't need to worry about
17973 otherops
[0] = operands
[0];
17974 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
17975 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
17977 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
17979 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
17981 /* Registers overlap so split out the increment. */
17984 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
17985 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
17992 /* Use a single insn if we can.
17993 FIXME: IWMMXT allows offsets larger than ldrd can
17994 handle, fix these up with a pair of ldr. */
17996 || !CONST_INT_P (otherops
[2])
17997 || (INTVAL (otherops
[2]) > -256
17998 && INTVAL (otherops
[2]) < 256))
18001 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
18007 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
18008 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18018 /* Use a single insn if we can.
18019 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18020 fix these up with a pair of ldr. */
18022 || !CONST_INT_P (otherops
[2])
18023 || (INTVAL (otherops
[2]) > -256
18024 && INTVAL (otherops
[2]) < 256))
18027 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
18033 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18034 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18044 /* We might be able to use ldrd %0, %1 here. However the range is
18045 different to ldr/adr, and it is broken on some ARMv7-M
18046 implementations. */
18047 /* Use the second register of the pair to avoid problematic
18049 otherops
[1] = operands
[1];
18051 output_asm_insn ("adr%?\t%0, %1", otherops
);
18052 operands
[1] = otherops
[0];
18056 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
18058 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
18065 /* ??? This needs checking for thumb2. */
18067 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18068 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18070 otherops
[0] = operands
[0];
18071 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18072 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18074 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18076 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18078 switch ((int) INTVAL (otherops
[2]))
18082 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
18088 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
18094 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
18098 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18099 operands
[1] = otherops
[0];
18101 && (REG_P (otherops
[2])
18103 || (CONST_INT_P (otherops
[2])
18104 && INTVAL (otherops
[2]) > -256
18105 && INTVAL (otherops
[2]) < 256)))
18107 if (reg_overlap_mentioned_p (operands
[0],
18110 /* Swap base and index registers over to
18111 avoid a conflict. */
18112 std::swap (otherops
[1], otherops
[2]);
18114 /* If both registers conflict, it will usually
18115 have been fixed by a splitter. */
18116 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18117 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18121 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18122 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
18129 otherops
[0] = operands
[0];
18131 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
18136 if (CONST_INT_P (otherops
[2]))
18140 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18141 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18143 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18149 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18155 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18162 return "ldr%(d%)\t%0, [%1]";
18164 return "ldm%(ia%)\t%1, %M0";
18168 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18169 /* Take care of overlapping base/data reg. */
18170 if (reg_mentioned_p (operands
[0], operands
[1]))
18174 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18175 output_asm_insn ("ldr%?\t%0, %1", operands
);
18185 output_asm_insn ("ldr%?\t%0, %1", operands
);
18186 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18196 /* Constraints should ensure this. */
18197 gcc_assert (code0
== MEM
&& code1
== REG
);
18198 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18199 || (TARGET_ARM
&& TARGET_LDRD
));
18201 switch (GET_CODE (XEXP (operands
[0], 0)))
18207 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
18209 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18214 gcc_assert (TARGET_LDRD
);
18216 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
18223 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
18225 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
18233 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
18235 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
18240 gcc_assert (TARGET_LDRD
);
18242 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
18247 otherops
[0] = operands
[1];
18248 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18249 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18251 /* IWMMXT allows offsets larger than ldrd can handle,
18252 fix these up with a pair of ldr. */
18254 && CONST_INT_P (otherops
[2])
18255 && (INTVAL(otherops
[2]) <= -256
18256 || INTVAL(otherops
[2]) >= 256))
18258 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18262 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18263 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18272 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18273 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18279 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18282 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
18287 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
18292 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18293 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18295 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18299 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
18306 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
18313 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
18318 && (REG_P (otherops
[2])
18320 || (CONST_INT_P (otherops
[2])
18321 && INTVAL (otherops
[2]) > -256
18322 && INTVAL (otherops
[2]) < 256)))
18324 otherops
[0] = operands
[1];
18325 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18327 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
18333 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18334 otherops
[1] = operands
[1];
18337 output_asm_insn ("str%?\t%1, %0", operands
);
18338 output_asm_insn ("str%?\t%H1, %0", otherops
);
18348 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18349 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18352 output_move_quad (rtx
*operands
)
18354 if (REG_P (operands
[0]))
18356 /* Load, or reg->reg move. */
18358 if (MEM_P (operands
[1]))
18360 switch (GET_CODE (XEXP (operands
[1], 0)))
18363 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
18368 output_asm_insn ("adr%?\t%0, %1", operands
);
18369 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
18373 gcc_unreachable ();
18381 gcc_assert (REG_P (operands
[1]));
18383 dest
= REGNO (operands
[0]);
18384 src
= REGNO (operands
[1]);
18386 /* This seems pretty dumb, but hopefully GCC won't try to do it
18389 for (i
= 0; i
< 4; i
++)
18391 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18392 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18393 output_asm_insn ("mov%?\t%0, %1", ops
);
18396 for (i
= 3; i
>= 0; i
--)
18398 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18399 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18400 output_asm_insn ("mov%?\t%0, %1", ops
);
18406 gcc_assert (MEM_P (operands
[0]));
18407 gcc_assert (REG_P (operands
[1]));
18408 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18410 switch (GET_CODE (XEXP (operands
[0], 0)))
18413 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18417 gcc_unreachable ();
18424 /* Output a VFP load or store instruction. */
18427 output_move_vfp (rtx
*operands
)
18429 rtx reg
, mem
, addr
, ops
[2];
18430 int load
= REG_P (operands
[0]);
18431 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18432 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18437 reg
= operands
[!load
];
18438 mem
= operands
[load
];
18440 mode
= GET_MODE (reg
);
18442 gcc_assert (REG_P (reg
));
18443 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18444 gcc_assert (mode
== SFmode
18448 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18449 gcc_assert (MEM_P (mem
));
18451 addr
= XEXP (mem
, 0);
18453 switch (GET_CODE (addr
))
18456 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18457 ops
[0] = XEXP (addr
, 0);
18462 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18463 ops
[0] = XEXP (addr
, 0);
18468 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18474 sprintf (buff
, templ
,
18475 load
? "ld" : "st",
18478 integer_p
? "\t%@ int" : "");
18479 output_asm_insn (buff
, ops
);
18484 /* Output a Neon double-word or quad-word load or store, or a load
18485 or store for larger structure modes.
18487 WARNING: The ordering of elements is weird in big-endian mode,
18488 because the EABI requires that vectors stored in memory appear
18489 as though they were stored by a VSTM, as required by the EABI.
18490 GCC RTL defines element ordering based on in-memory order.
18491 This can be different from the architectural ordering of elements
18492 within a NEON register. The intrinsics defined in arm_neon.h use the
18493 NEON register element ordering, not the GCC RTL element ordering.
18495 For example, the in-memory ordering of a big-endian a quadword
18496 vector with 16-bit elements when stored from register pair {d0,d1}
18497 will be (lowest address first, d0[N] is NEON register element N):
18499 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18501 When necessary, quadword registers (dN, dN+1) are moved to ARM
18502 registers from rN in the order:
18504 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18506 So that STM/LDM can be used on vectors in ARM registers, and the
18507 same memory layout will result as if VSTM/VLDM were used.
18509 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18510 possible, which allows use of appropriate alignment tags.
18511 Note that the choice of "64" is independent of the actual vector
18512 element size; this size simply ensures that the behavior is
18513 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18515 Due to limitations of those instructions, use of VST1.64/VLD1.64
18516 is not possible if:
18517 - the address contains PRE_DEC, or
18518 - the mode refers to more than 4 double-word registers
18520 In those cases, it would be possible to replace VSTM/VLDM by a
18521 sequence of instructions; this is not currently implemented since
18522 this is not certain to actually improve performance. */
18525 output_move_neon (rtx
*operands
)
18527 rtx reg
, mem
, addr
, ops
[2];
18528 int regno
, nregs
, load
= REG_P (operands
[0]);
18533 reg
= operands
[!load
];
18534 mem
= operands
[load
];
18536 mode
= GET_MODE (reg
);
18538 gcc_assert (REG_P (reg
));
18539 regno
= REGNO (reg
);
18540 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18541 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18542 || NEON_REGNO_OK_FOR_QUAD (regno
));
18543 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18544 || VALID_NEON_QREG_MODE (mode
)
18545 || VALID_NEON_STRUCT_MODE (mode
));
18546 gcc_assert (MEM_P (mem
));
18548 addr
= XEXP (mem
, 0);
18550 /* Strip off const from addresses like (const (plus (...))). */
18551 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18552 addr
= XEXP (addr
, 0);
18554 switch (GET_CODE (addr
))
18557 /* We have to use vldm / vstm for too-large modes. */
18560 templ
= "v%smia%%?\t%%0!, %%h1";
18561 ops
[0] = XEXP (addr
, 0);
18565 templ
= "v%s1.64\t%%h1, %%A0";
18572 /* We have to use vldm / vstm in this case, since there is no
18573 pre-decrement form of the vld1 / vst1 instructions. */
18574 templ
= "v%smdb%%?\t%%0!, %%h1";
18575 ops
[0] = XEXP (addr
, 0);
18580 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18581 gcc_unreachable ();
18584 /* We have to use vldm / vstm for too-large modes. */
18588 templ
= "v%smia%%?\t%%m0, %%h1";
18590 templ
= "v%s1.64\t%%h1, %%A0";
18596 /* Fall through. */
18602 for (i
= 0; i
< nregs
; i
++)
18604 /* We're only using DImode here because it's a convenient size. */
18605 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18606 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18607 if (reg_overlap_mentioned_p (ops
[0], mem
))
18609 gcc_assert (overlap
== -1);
18614 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18615 output_asm_insn (buff
, ops
);
18620 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18621 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18622 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18623 output_asm_insn (buff
, ops
);
18630 gcc_unreachable ();
18633 sprintf (buff
, templ
, load
? "ld" : "st");
18634 output_asm_insn (buff
, ops
);
18639 /* Compute and return the length of neon_mov<mode>, where <mode> is
18640 one of VSTRUCT modes: EI, OI, CI or XI. */
18642 arm_attr_length_move_neon (rtx_insn
*insn
)
18644 rtx reg
, mem
, addr
;
18648 extract_insn_cached (insn
);
18650 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18652 mode
= GET_MODE (recog_data
.operand
[0]);
18663 gcc_unreachable ();
18667 load
= REG_P (recog_data
.operand
[0]);
18668 reg
= recog_data
.operand
[!load
];
18669 mem
= recog_data
.operand
[load
];
18671 gcc_assert (MEM_P (mem
));
18673 mode
= GET_MODE (reg
);
18674 addr
= XEXP (mem
, 0);
18676 /* Strip off const from addresses like (const (plus (...))). */
18677 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18678 addr
= XEXP (addr
, 0);
18680 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18682 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18689 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18693 arm_address_offset_is_imm (rtx_insn
*insn
)
18697 extract_insn_cached (insn
);
18699 if (REG_P (recog_data
.operand
[0]))
18702 mem
= recog_data
.operand
[0];
18704 gcc_assert (MEM_P (mem
));
18706 addr
= XEXP (mem
, 0);
18709 || (GET_CODE (addr
) == PLUS
18710 && REG_P (XEXP (addr
, 0))
18711 && CONST_INT_P (XEXP (addr
, 1))))
18717 /* Output an ADD r, s, #n where n may be too big for one instruction.
18718 If adding zero to one register, output nothing. */
18720 output_add_immediate (rtx
*operands
)
18722 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18724 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18727 output_multi_immediate (operands
,
18728 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18731 output_multi_immediate (operands
,
18732 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18739 /* Output a multiple immediate operation.
18740 OPERANDS is the vector of operands referred to in the output patterns.
18741 INSTR1 is the output pattern to use for the first constant.
18742 INSTR2 is the output pattern to use for subsequent constants.
18743 IMMED_OP is the index of the constant slot in OPERANDS.
18744 N is the constant value. */
18745 static const char *
18746 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18747 int immed_op
, HOST_WIDE_INT n
)
18749 #if HOST_BITS_PER_WIDE_INT > 32
18755 /* Quick and easy output. */
18756 operands
[immed_op
] = const0_rtx
;
18757 output_asm_insn (instr1
, operands
);
18762 const char * instr
= instr1
;
18764 /* Note that n is never zero here (which would give no output). */
18765 for (i
= 0; i
< 32; i
+= 2)
18769 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18770 output_asm_insn (instr
, operands
);
18780 /* Return the name of a shifter operation. */
18781 static const char *
18782 arm_shift_nmem(enum rtx_code code
)
18787 return ARM_LSL_NAME
;
18803 /* Return the appropriate ARM instruction for the operation code.
18804 The returned result should not be overwritten. OP is the rtx of the
18805 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18808 arithmetic_instr (rtx op
, int shift_first_arg
)
18810 switch (GET_CODE (op
))
18816 return shift_first_arg
? "rsb" : "sub";
18831 return arm_shift_nmem(GET_CODE(op
));
18834 gcc_unreachable ();
18838 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18839 for the operation code. The returned result should not be overwritten.
18840 OP is the rtx code of the shift.
18841 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18843 static const char *
18844 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18847 enum rtx_code code
= GET_CODE (op
);
18852 if (!CONST_INT_P (XEXP (op
, 1)))
18854 output_operand_lossage ("invalid shift operand");
18859 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18867 mnem
= arm_shift_nmem(code
);
18868 if (CONST_INT_P (XEXP (op
, 1)))
18870 *amountp
= INTVAL (XEXP (op
, 1));
18872 else if (REG_P (XEXP (op
, 1)))
18879 output_operand_lossage ("invalid shift operand");
18885 /* We never have to worry about the amount being other than a
18886 power of 2, since this case can never be reloaded from a reg. */
18887 if (!CONST_INT_P (XEXP (op
, 1)))
18889 output_operand_lossage ("invalid shift operand");
18893 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18895 /* Amount must be a power of two. */
18896 if (*amountp
& (*amountp
- 1))
18898 output_operand_lossage ("invalid shift operand");
18902 *amountp
= int_log2 (*amountp
);
18903 return ARM_LSL_NAME
;
18906 output_operand_lossage ("invalid shift operand");
18910 /* This is not 100% correct, but follows from the desire to merge
18911 multiplication by a power of 2 with the recognizer for a
18912 shift. >=32 is not a valid shift for "lsl", so we must try and
18913 output a shift that produces the correct arithmetical result.
18914 Using lsr #32 is identical except for the fact that the carry bit
18915 is not set correctly if we set the flags; but we never use the
18916 carry bit from such an operation, so we can ignore that. */
18917 if (code
== ROTATERT
)
18918 /* Rotate is just modulo 32. */
18920 else if (*amountp
!= (*amountp
& 31))
18922 if (code
== ASHIFT
)
18927 /* Shifts of 0 are no-ops. */
18934 /* Obtain the shift from the POWER of two. */
18936 static HOST_WIDE_INT
18937 int_log2 (HOST_WIDE_INT power
)
18939 HOST_WIDE_INT shift
= 0;
18941 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
18943 gcc_assert (shift
<= 31);
18950 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18951 because /bin/as is horribly restrictive. The judgement about
18952 whether or not each character is 'printable' (and can be output as
18953 is) or not (and must be printed with an octal escape) must be made
18954 with reference to the *host* character set -- the situation is
18955 similar to that discussed in the comments above pp_c_char in
18956 c-pretty-print.c. */
18958 #define MAX_ASCII_LEN 51
18961 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18964 int len_so_far
= 0;
18966 fputs ("\t.ascii\t\"", stream
);
18968 for (i
= 0; i
< len
; i
++)
18972 if (len_so_far
>= MAX_ASCII_LEN
)
18974 fputs ("\"\n\t.ascii\t\"", stream
);
18980 if (c
== '\\' || c
== '\"')
18982 putc ('\\', stream
);
18990 fprintf (stream
, "\\%03o", c
);
18995 fputs ("\"\n", stream
);
18998 /* Whether a register is callee saved or not. This is necessary because high
18999 registers are marked as caller saved when optimizing for size on Thumb-1
19000 targets despite being callee saved in order to avoid using them. */
19001 #define callee_saved_reg_p(reg) \
19002 (!call_used_regs[reg] \
19003 || (TARGET_THUMB1 && optimize_size \
19004 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19006 /* Compute the register save mask for registers 0 through 12
19007 inclusive. This code is used by arm_compute_save_reg_mask. */
19009 static unsigned long
19010 arm_compute_save_reg0_reg12_mask (void)
19012 unsigned long func_type
= arm_current_func_type ();
19013 unsigned long save_reg_mask
= 0;
19016 if (IS_INTERRUPT (func_type
))
19018 unsigned int max_reg
;
19019 /* Interrupt functions must not corrupt any registers,
19020 even call clobbered ones. If this is a leaf function
19021 we can just examine the registers used by the RTL, but
19022 otherwise we have to assume that whatever function is
19023 called might clobber anything, and so we have to save
19024 all the call-clobbered registers as well. */
19025 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
19026 /* FIQ handlers have registers r8 - r12 banked, so
19027 we only need to check r0 - r7, Normal ISRs only
19028 bank r14 and r15, so we must check up to r12.
19029 r13 is the stack pointer which is always preserved,
19030 so we do not need to consider it here. */
19035 for (reg
= 0; reg
<= max_reg
; reg
++)
19036 if (df_regs_ever_live_p (reg
)
19037 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
19038 save_reg_mask
|= (1 << reg
);
19040 /* Also save the pic base register if necessary. */
19042 && !TARGET_SINGLE_PIC_BASE
19043 && arm_pic_register
!= INVALID_REGNUM
19044 && crtl
->uses_pic_offset_table
)
19045 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19047 else if (IS_VOLATILE(func_type
))
19049 /* For noreturn functions we historically omitted register saves
19050 altogether. However this really messes up debugging. As a
19051 compromise save just the frame pointers. Combined with the link
19052 register saved elsewhere this should be sufficient to get
19054 if (frame_pointer_needed
)
19055 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19056 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19057 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19058 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19059 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19063 /* In the normal case we only need to save those registers
19064 which are call saved and which are used by this function. */
19065 for (reg
= 0; reg
<= 11; reg
++)
19066 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19067 save_reg_mask
|= (1 << reg
);
19069 /* Handle the frame pointer as a special case. */
19070 if (frame_pointer_needed
)
19071 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19073 /* If we aren't loading the PIC register,
19074 don't stack it even though it may be live. */
19076 && !TARGET_SINGLE_PIC_BASE
19077 && arm_pic_register
!= INVALID_REGNUM
19078 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19079 || crtl
->uses_pic_offset_table
))
19080 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19082 /* The prologue will copy SP into R0, so save it. */
19083 if (IS_STACKALIGN (func_type
))
19084 save_reg_mask
|= 1;
19087 /* Save registers so the exception handler can modify them. */
19088 if (crtl
->calls_eh_return
)
19094 reg
= EH_RETURN_DATA_REGNO (i
);
19095 if (reg
== INVALID_REGNUM
)
19097 save_reg_mask
|= 1 << reg
;
19101 return save_reg_mask
;
19104 /* Return true if r3 is live at the start of the function. */
19107 arm_r3_live_at_start_p (void)
19109 /* Just look at cfg info, which is still close enough to correct at this
19110 point. This gives false positives for broken functions that might use
19111 uninitialized data that happens to be allocated in r3, but who cares? */
19112 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19115 /* Compute the number of bytes used to store the static chain register on the
19116 stack, above the stack frame. We need to know this accurately to get the
19117 alignment of the rest of the stack frame correct. */
19120 arm_compute_static_chain_stack_bytes (void)
19122 /* See the defining assertion in arm_expand_prologue. */
19123 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
19124 && IS_NESTED (arm_current_func_type ())
19125 && arm_r3_live_at_start_p ()
19126 && crtl
->args
.pretend_args_size
== 0)
19132 /* Compute a bit mask of which registers need to be
19133 saved on the stack for the current function.
19134 This is used by arm_get_frame_offsets, which may add extra registers. */
19136 static unsigned long
19137 arm_compute_save_reg_mask (void)
19139 unsigned int save_reg_mask
= 0;
19140 unsigned long func_type
= arm_current_func_type ();
19143 if (IS_NAKED (func_type
))
19144 /* This should never really happen. */
19147 /* If we are creating a stack frame, then we must save the frame pointer,
19148 IP (which will hold the old stack pointer), LR and the PC. */
19149 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19151 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19154 | (1 << PC_REGNUM
);
19156 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19158 /* Decide if we need to save the link register.
19159 Interrupt routines have their own banked link register,
19160 so they never need to save it.
19161 Otherwise if we do not use the link register we do not need to save
19162 it. If we are pushing other registers onto the stack however, we
19163 can save an instruction in the epilogue by pushing the link register
19164 now and then popping it back into the PC. This incurs extra memory
19165 accesses though, so we only do it when optimizing for size, and only
19166 if we know that we will not need a fancy return sequence. */
19167 if (df_regs_ever_live_p (LR_REGNUM
)
19170 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19171 && !crtl
->tail_call_emit
19172 && !crtl
->calls_eh_return
))
19173 save_reg_mask
|= 1 << LR_REGNUM
;
19175 if (cfun
->machine
->lr_save_eliminated
)
19176 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19178 if (TARGET_REALLY_IWMMXT
19179 && ((bit_count (save_reg_mask
)
19180 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19181 arm_compute_static_chain_stack_bytes())
19184 /* The total number of registers that are going to be pushed
19185 onto the stack is odd. We need to ensure that the stack
19186 is 64-bit aligned before we start to save iWMMXt registers,
19187 and also before we start to create locals. (A local variable
19188 might be a double or long long which we will load/store using
19189 an iWMMXt instruction). Therefore we need to push another
19190 ARM register, so that the stack will be 64-bit aligned. We
19191 try to avoid using the arg registers (r0 -r3) as they might be
19192 used to pass values in a tail call. */
19193 for (reg
= 4; reg
<= 12; reg
++)
19194 if ((save_reg_mask
& (1 << reg
)) == 0)
19198 save_reg_mask
|= (1 << reg
);
19201 cfun
->machine
->sibcall_blocked
= 1;
19202 save_reg_mask
|= (1 << 3);
19206 /* We may need to push an additional register for use initializing the
19207 PIC base register. */
19208 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19209 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19211 reg
= thumb_find_work_register (1 << 4);
19212 if (!call_used_regs
[reg
])
19213 save_reg_mask
|= (1 << reg
);
19216 return save_reg_mask
;
19220 /* Compute a bit mask of which registers need to be
19221 saved on the stack for the current function. */
19222 static unsigned long
19223 thumb1_compute_save_reg_mask (void)
19225 unsigned long mask
;
19229 for (reg
= 0; reg
< 12; reg
++)
19230 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19234 && !TARGET_SINGLE_PIC_BASE
19235 && arm_pic_register
!= INVALID_REGNUM
19236 && crtl
->uses_pic_offset_table
)
19237 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19239 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19240 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19241 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19243 /* LR will also be pushed if any lo regs are pushed. */
19244 if (mask
& 0xff || thumb_force_lr_save ())
19245 mask
|= (1 << LR_REGNUM
);
19247 /* Make sure we have a low work register if we need one.
19248 We will need one if we are going to push a high register,
19249 but we are not currently intending to push a low register. */
19250 if ((mask
& 0xff) == 0
19251 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19253 /* Use thumb_find_work_register to choose which register
19254 we will use. If the register is live then we will
19255 have to push it. Use LAST_LO_REGNUM as our fallback
19256 choice for the register to select. */
19257 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19258 /* Make sure the register returned by thumb_find_work_register is
19259 not part of the return value. */
19260 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19261 reg
= LAST_LO_REGNUM
;
19263 if (callee_saved_reg_p (reg
))
19267 /* The 504 below is 8 bytes less than 512 because there are two possible
19268 alignment words. We can't tell here if they will be present or not so we
19269 have to play it safe and assume that they are. */
19270 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19271 ROUND_UP_WORD (get_frame_size ()) +
19272 crtl
->outgoing_args_size
) >= 504)
19274 /* This is the same as the code in thumb1_expand_prologue() which
19275 determines which register to use for stack decrement. */
19276 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19277 if (mask
& (1 << reg
))
19280 if (reg
> LAST_LO_REGNUM
)
19282 /* Make sure we have a register available for stack decrement. */
19283 mask
|= 1 << LAST_LO_REGNUM
;
19291 /* Return the number of bytes required to save VFP registers. */
19293 arm_get_vfp_saved_size (void)
19295 unsigned int regno
;
19300 /* Space for saved VFP registers. */
19301 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
19304 for (regno
= FIRST_VFP_REGNUM
;
19305 regno
< LAST_VFP_REGNUM
;
19308 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19309 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19313 /* Workaround ARM10 VFPr1 bug. */
19314 if (count
== 2 && !arm_arch6
)
19316 saved
+= count
* 8;
19325 if (count
== 2 && !arm_arch6
)
19327 saved
+= count
* 8;
19334 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19335 everything bar the final return instruction. If simple_return is true,
19336 then do not output epilogue, because it has already been emitted in RTL. */
19338 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19339 bool simple_return
)
19341 char conditional
[10];
19344 unsigned long live_regs_mask
;
19345 unsigned long func_type
;
19346 arm_stack_offsets
*offsets
;
19348 func_type
= arm_current_func_type ();
19350 if (IS_NAKED (func_type
))
19353 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19355 /* If this function was declared non-returning, and we have
19356 found a tail call, then we have to trust that the called
19357 function won't return. */
19362 /* Otherwise, trap an attempted return by aborting. */
19364 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19366 assemble_external_libcall (ops
[1]);
19367 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19373 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19375 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19377 cfun
->machine
->return_used_this_function
= 1;
19379 offsets
= arm_get_frame_offsets ();
19380 live_regs_mask
= offsets
->saved_regs_mask
;
19382 if (!simple_return
&& live_regs_mask
)
19384 const char * return_reg
;
19386 /* If we do not have any special requirements for function exit
19387 (e.g. interworking) then we can load the return address
19388 directly into the PC. Otherwise we must load it into LR. */
19390 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19391 return_reg
= reg_names
[PC_REGNUM
];
19393 return_reg
= reg_names
[LR_REGNUM
];
19395 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19397 /* There are three possible reasons for the IP register
19398 being saved. 1) a stack frame was created, in which case
19399 IP contains the old stack pointer, or 2) an ISR routine
19400 corrupted it, or 3) it was saved to align the stack on
19401 iWMMXt. In case 1, restore IP into SP, otherwise just
19403 if (frame_pointer_needed
)
19405 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19406 live_regs_mask
|= (1 << SP_REGNUM
);
19409 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19412 /* On some ARM architectures it is faster to use LDR rather than
19413 LDM to load a single register. On other architectures, the
19414 cost is the same. In 26 bit mode, or for exception handlers,
19415 we have to use LDM to load the PC so that the CPSR is also
19417 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19418 if (live_regs_mask
== (1U << reg
))
19421 if (reg
<= LAST_ARM_REGNUM
19422 && (reg
!= LR_REGNUM
19424 || ! IS_INTERRUPT (func_type
)))
19426 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19427 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19434 /* Generate the load multiple instruction to restore the
19435 registers. Note we can get here, even if
19436 frame_pointer_needed is true, but only if sp already
19437 points to the base of the saved core registers. */
19438 if (live_regs_mask
& (1 << SP_REGNUM
))
19440 unsigned HOST_WIDE_INT stack_adjust
;
19442 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19443 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19445 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19446 if (TARGET_UNIFIED_ASM
)
19447 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19449 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
19452 /* If we can't use ldmib (SA110 bug),
19453 then try to pop r3 instead. */
19455 live_regs_mask
|= 1 << 3;
19457 if (TARGET_UNIFIED_ASM
)
19458 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19460 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
19464 if (TARGET_UNIFIED_ASM
)
19465 sprintf (instr
, "pop%s\t{", conditional
);
19467 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
19469 p
= instr
+ strlen (instr
);
19471 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19472 if (live_regs_mask
& (1 << reg
))
19474 int l
= strlen (reg_names
[reg
]);
19480 memcpy (p
, ", ", 2);
19484 memcpy (p
, "%|", 2);
19485 memcpy (p
+ 2, reg_names
[reg
], l
);
19489 if (live_regs_mask
& (1 << LR_REGNUM
))
19491 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19492 /* If returning from an interrupt, restore the CPSR. */
19493 if (IS_INTERRUPT (func_type
))
19500 output_asm_insn (instr
, & operand
);
19502 /* See if we need to generate an extra instruction to
19503 perform the actual function return. */
19505 && func_type
!= ARM_FT_INTERWORKED
19506 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19508 /* The return has already been handled
19509 by loading the LR into the PC. */
19516 switch ((int) ARM_FUNC_TYPE (func_type
))
19520 /* ??? This is wrong for unified assembly syntax. */
19521 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19524 case ARM_FT_INTERWORKED
:
19525 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19528 case ARM_FT_EXCEPTION
:
19529 /* ??? This is wrong for unified assembly syntax. */
19530 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19534 /* Use bx if it's available. */
19535 if (arm_arch5
|| arm_arch4t
)
19536 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19538 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19542 output_asm_insn (instr
, & operand
);
19548 /* Write the function name into the code section, directly preceding
19549 the function prologue.
19551 Code will be output similar to this:
19553 .ascii "arm_poke_function_name", 0
19556 .word 0xff000000 + (t1 - t0)
19557 arm_poke_function_name
19559 stmfd sp!, {fp, ip, lr, pc}
19562 When performing a stack backtrace, code can inspect the value
19563 of 'pc' stored at 'fp' + 0. If the trace function then looks
19564 at location pc - 12 and the top 8 bits are set, then we know
19565 that there is a function name embedded immediately preceding this
19566 location and has length ((pc[-3]) & 0xff000000).
19568 We assume that pc is declared as a pointer to an unsigned long.
19570 It is of no benefit to output the function name if we are assembling
19571 a leaf function. These function types will not contain a stack
19572 backtrace structure, therefore it is not possible to determine the
19575 arm_poke_function_name (FILE *stream
, const char *name
)
19577 unsigned long alignlength
;
19578 unsigned long length
;
19581 length
= strlen (name
) + 1;
19582 alignlength
= ROUND_UP_WORD (length
);
19584 ASM_OUTPUT_ASCII (stream
, name
, length
);
19585 ASM_OUTPUT_ALIGN (stream
, 2);
19586 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19587 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19590 /* Place some comments into the assembler stream
19591 describing the current function. */
19593 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19595 unsigned long func_type
;
19597 /* ??? Do we want to print some of the below anyway? */
19601 /* Sanity check. */
19602 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19604 func_type
= arm_current_func_type ();
19606 switch ((int) ARM_FUNC_TYPE (func_type
))
19609 case ARM_FT_NORMAL
:
19611 case ARM_FT_INTERWORKED
:
19612 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19615 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19618 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19620 case ARM_FT_EXCEPTION
:
19621 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19625 if (IS_NAKED (func_type
))
19626 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19628 if (IS_VOLATILE (func_type
))
19629 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19631 if (IS_NESTED (func_type
))
19632 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19633 if (IS_STACKALIGN (func_type
))
19634 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19636 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19638 crtl
->args
.pretend_args_size
, frame_size
);
19640 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19641 frame_pointer_needed
,
19642 cfun
->machine
->uses_anonymous_args
);
19644 if (cfun
->machine
->lr_save_eliminated
)
19645 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19647 if (crtl
->calls_eh_return
)
19648 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19653 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19654 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19656 arm_stack_offsets
*offsets
;
19662 /* Emit any call-via-reg trampolines that are needed for v4t support
19663 of call_reg and call_value_reg type insns. */
19664 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19666 rtx label
= cfun
->machine
->call_via
[regno
];
19670 switch_to_section (function_section (current_function_decl
));
19671 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19672 CODE_LABEL_NUMBER (label
));
19673 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19677 /* ??? Probably not safe to set this here, since it assumes that a
19678 function will be emitted as assembly immediately after we generate
19679 RTL for it. This does not happen for inline functions. */
19680 cfun
->machine
->return_used_this_function
= 0;
19682 else /* TARGET_32BIT */
19684 /* We need to take into account any stack-frame rounding. */
19685 offsets
= arm_get_frame_offsets ();
19687 gcc_assert (!use_return_insn (FALSE
, NULL
)
19688 || (cfun
->machine
->return_used_this_function
!= 0)
19689 || offsets
->saved_regs
== offsets
->outgoing_args
19690 || frame_pointer_needed
);
19694 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19695 STR and STRD. If an even number of registers are being pushed, one
19696 or more STRD patterns are created for each register pair. If an
19697 odd number of registers are pushed, emit an initial STR followed by
19698 as many STRD instructions as are needed. This works best when the
19699 stack is initially 64-bit aligned (the normal case), since it
19700 ensures that each STRD is also 64-bit aligned. */
19702 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19707 rtx par
= NULL_RTX
;
19708 rtx dwarf
= NULL_RTX
;
19712 num_regs
= bit_count (saved_regs_mask
);
19714 /* Must be at least one register to save, and can't save SP or PC. */
19715 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19716 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19717 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19719 /* Create sequence for DWARF info. All the frame-related data for
19720 debugging is held in this wrapper. */
19721 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19723 /* Describe the stack adjustment. */
19724 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19725 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19726 RTX_FRAME_RELATED_P (tmp
) = 1;
19727 XVECEXP (dwarf
, 0, 0) = tmp
;
19729 /* Find the first register. */
19730 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19735 /* If there's an odd number of registers to push. Start off by
19736 pushing a single register. This ensures that subsequent strd
19737 operations are dword aligned (assuming that SP was originally
19738 64-bit aligned). */
19739 if ((num_regs
& 1) != 0)
19741 rtx reg
, mem
, insn
;
19743 reg
= gen_rtx_REG (SImode
, regno
);
19745 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19746 stack_pointer_rtx
));
19748 mem
= gen_frame_mem (Pmode
,
19750 (Pmode
, stack_pointer_rtx
,
19751 plus_constant (Pmode
, stack_pointer_rtx
,
19754 tmp
= gen_rtx_SET (mem
, reg
);
19755 RTX_FRAME_RELATED_P (tmp
) = 1;
19756 insn
= emit_insn (tmp
);
19757 RTX_FRAME_RELATED_P (insn
) = 1;
19758 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19759 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
19760 RTX_FRAME_RELATED_P (tmp
) = 1;
19763 XVECEXP (dwarf
, 0, i
) = tmp
;
19767 while (i
< num_regs
)
19768 if (saved_regs_mask
& (1 << regno
))
19770 rtx reg1
, reg2
, mem1
, mem2
;
19771 rtx tmp0
, tmp1
, tmp2
;
19774 /* Find the register to pair with this one. */
19775 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19779 reg1
= gen_rtx_REG (SImode
, regno
);
19780 reg2
= gen_rtx_REG (SImode
, regno2
);
19787 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19790 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19792 -4 * (num_regs
- 1)));
19793 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
19794 plus_constant (Pmode
, stack_pointer_rtx
,
19796 tmp1
= gen_rtx_SET (mem1
, reg1
);
19797 tmp2
= gen_rtx_SET (mem2
, reg2
);
19798 RTX_FRAME_RELATED_P (tmp0
) = 1;
19799 RTX_FRAME_RELATED_P (tmp1
) = 1;
19800 RTX_FRAME_RELATED_P (tmp2
) = 1;
19801 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19802 XVECEXP (par
, 0, 0) = tmp0
;
19803 XVECEXP (par
, 0, 1) = tmp1
;
19804 XVECEXP (par
, 0, 2) = tmp2
;
19805 insn
= emit_insn (par
);
19806 RTX_FRAME_RELATED_P (insn
) = 1;
19807 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19811 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19814 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19817 tmp1
= gen_rtx_SET (mem1
, reg1
);
19818 tmp2
= gen_rtx_SET (mem2
, reg2
);
19819 RTX_FRAME_RELATED_P (tmp1
) = 1;
19820 RTX_FRAME_RELATED_P (tmp2
) = 1;
19821 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19822 XVECEXP (par
, 0, 0) = tmp1
;
19823 XVECEXP (par
, 0, 1) = tmp2
;
19827 /* Create unwind information. This is an approximation. */
19828 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
19829 plus_constant (Pmode
,
19833 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
19834 plus_constant (Pmode
,
19839 RTX_FRAME_RELATED_P (tmp1
) = 1;
19840 RTX_FRAME_RELATED_P (tmp2
) = 1;
19841 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19842 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19844 regno
= regno2
+ 1;
19852 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19853 whenever possible, otherwise it emits single-word stores. The first store
19854 also allocates stack space for all saved registers, using writeback with
19855 post-addressing mode. All other stores use offset addressing. If no STRD
19856 can be emitted, this function emits a sequence of single-word stores,
19857 and not an STM as before, because single-word stores provide more freedom
19858 scheduling and can be turned into an STM by peephole optimizations. */
19860 arm_emit_strd_push (unsigned long saved_regs_mask
)
19863 int i
, j
, dwarf_index
= 0;
19865 rtx dwarf
= NULL_RTX
;
19866 rtx insn
= NULL_RTX
;
19869 /* TODO: A more efficient code can be emitted by changing the
19870 layout, e.g., first push all pairs that can use STRD to keep the
19871 stack aligned, and then push all other registers. */
19872 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19873 if (saved_regs_mask
& (1 << i
))
19876 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19877 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19878 gcc_assert (num_regs
> 0);
19880 /* Create sequence for DWARF info. */
19881 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19883 /* For dwarf info, we generate explicit stack update. */
19884 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19885 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19886 RTX_FRAME_RELATED_P (tmp
) = 1;
19887 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19889 /* Save registers. */
19890 offset
= - 4 * num_regs
;
19892 while (j
<= LAST_ARM_REGNUM
)
19893 if (saved_regs_mask
& (1 << j
))
19896 && (saved_regs_mask
& (1 << (j
+ 1))))
19898 /* Current register and previous register form register pair for
19899 which STRD can be generated. */
19902 /* Allocate stack space for all saved registers. */
19903 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19904 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19905 mem
= gen_frame_mem (DImode
, tmp
);
19908 else if (offset
> 0)
19909 mem
= gen_frame_mem (DImode
,
19910 plus_constant (Pmode
,
19914 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19916 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
19917 RTX_FRAME_RELATED_P (tmp
) = 1;
19918 tmp
= emit_insn (tmp
);
19920 /* Record the first store insn. */
19921 if (dwarf_index
== 1)
19924 /* Generate dwarf info. */
19925 mem
= gen_frame_mem (SImode
,
19926 plus_constant (Pmode
,
19929 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19930 RTX_FRAME_RELATED_P (tmp
) = 1;
19931 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19933 mem
= gen_frame_mem (SImode
,
19934 plus_constant (Pmode
,
19937 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
19938 RTX_FRAME_RELATED_P (tmp
) = 1;
19939 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19946 /* Emit a single word store. */
19949 /* Allocate stack space for all saved registers. */
19950 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19951 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19952 mem
= gen_frame_mem (SImode
, tmp
);
19955 else if (offset
> 0)
19956 mem
= gen_frame_mem (SImode
,
19957 plus_constant (Pmode
,
19961 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
19963 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19964 RTX_FRAME_RELATED_P (tmp
) = 1;
19965 tmp
= emit_insn (tmp
);
19967 /* Record the first store insn. */
19968 if (dwarf_index
== 1)
19971 /* Generate dwarf info. */
19972 mem
= gen_frame_mem (SImode
,
19973 plus_constant(Pmode
,
19976 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19977 RTX_FRAME_RELATED_P (tmp
) = 1;
19978 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19987 /* Attach dwarf info to the first insn we generate. */
19988 gcc_assert (insn
!= NULL_RTX
);
19989 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19990 RTX_FRAME_RELATED_P (insn
) = 1;
19993 /* Generate and emit an insn that we will recognize as a push_multi.
19994 Unfortunately, since this insn does not reflect very well the actual
19995 semantics of the operation, we need to annotate the insn for the benefit
19996 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19997 MASK for registers that should be annotated for DWARF2 frame unwind
20000 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
20003 int num_dwarf_regs
= 0;
20007 int dwarf_par_index
;
20010 /* We don't record the PC in the dwarf frame information. */
20011 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20013 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20015 if (mask
& (1 << i
))
20017 if (dwarf_regs_mask
& (1 << i
))
20021 gcc_assert (num_regs
&& num_regs
<= 16);
20022 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20024 /* For the body of the insn we are going to generate an UNSPEC in
20025 parallel with several USEs. This allows the insn to be recognized
20026 by the push_multi pattern in the arm.md file.
20028 The body of the insn looks something like this:
20031 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20032 (const_int:SI <num>)))
20033 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20039 For the frame note however, we try to be more explicit and actually
20040 show each register being stored into the stack frame, plus a (single)
20041 decrement of the stack pointer. We do it this way in order to be
20042 friendly to the stack unwinding code, which only wants to see a single
20043 stack decrement per instruction. The RTL we generate for the note looks
20044 something like this:
20047 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20048 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20049 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20050 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20054 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20055 instead we'd have a parallel expression detailing all
20056 the stores to the various memory addresses so that debug
20057 information is more up-to-date. Remember however while writing
20058 this to take care of the constraints with the push instruction.
20060 Note also that this has to be taken care of for the VFP registers.
20062 For more see PR43399. */
20064 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20065 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20066 dwarf_par_index
= 1;
20068 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20070 if (mask
& (1 << i
))
20072 reg
= gen_rtx_REG (SImode
, i
);
20074 XVECEXP (par
, 0, 0)
20075 = gen_rtx_SET (gen_frame_mem
20077 gen_rtx_PRE_MODIFY (Pmode
,
20080 (Pmode
, stack_pointer_rtx
,
20083 gen_rtx_UNSPEC (BLKmode
,
20084 gen_rtvec (1, reg
),
20085 UNSPEC_PUSH_MULT
));
20087 if (dwarf_regs_mask
& (1 << i
))
20089 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
20091 RTX_FRAME_RELATED_P (tmp
) = 1;
20092 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20099 for (j
= 1, i
++; j
< num_regs
; i
++)
20101 if (mask
& (1 << i
))
20103 reg
= gen_rtx_REG (SImode
, i
);
20105 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20107 if (dwarf_regs_mask
& (1 << i
))
20110 = gen_rtx_SET (gen_frame_mem
20112 plus_constant (Pmode
, stack_pointer_rtx
,
20115 RTX_FRAME_RELATED_P (tmp
) = 1;
20116 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20123 par
= emit_insn (par
);
20125 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20126 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20127 RTX_FRAME_RELATED_P (tmp
) = 1;
20128 XVECEXP (dwarf
, 0, 0) = tmp
;
20130 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20135 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20136 SIZE is the offset to be adjusted.
20137 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20139 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20143 RTX_FRAME_RELATED_P (insn
) = 1;
20144 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
20145 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20148 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20149 SAVED_REGS_MASK shows which registers need to be restored.
20151 Unfortunately, since this insn does not reflect very well the actual
20152 semantics of the operation, we need to annotate the insn for the benefit
20153 of DWARF2 frame unwind information. */
20155 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20160 rtx dwarf
= NULL_RTX
;
20162 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20166 offset_adj
= return_in_pc
? 1 : 0;
20167 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20168 if (saved_regs_mask
& (1 << i
))
20171 gcc_assert (num_regs
&& num_regs
<= 16);
20173 /* If SP is in reglist, then we don't emit SP update insn. */
20174 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20176 /* The parallel needs to hold num_regs SETs
20177 and one SET for the stack update. */
20178 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20181 XVECEXP (par
, 0, 0) = ret_rtx
;
20185 /* Increment the stack pointer, based on there being
20186 num_regs 4-byte registers to restore. */
20187 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20188 plus_constant (Pmode
,
20191 RTX_FRAME_RELATED_P (tmp
) = 1;
20192 XVECEXP (par
, 0, offset_adj
) = tmp
;
20195 /* Now restore every reg, which may include PC. */
20196 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20197 if (saved_regs_mask
& (1 << i
))
20199 reg
= gen_rtx_REG (SImode
, i
);
20200 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20202 /* Emit single load with writeback. */
20203 tmp
= gen_frame_mem (SImode
,
20204 gen_rtx_POST_INC (Pmode
,
20205 stack_pointer_rtx
));
20206 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
20207 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20211 tmp
= gen_rtx_SET (reg
,
20214 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20215 RTX_FRAME_RELATED_P (tmp
) = 1;
20216 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20218 /* We need to maintain a sequence for DWARF info too. As dwarf info
20219 should not have PC, skip PC. */
20220 if (i
!= PC_REGNUM
)
20221 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20227 par
= emit_jump_insn (par
);
20229 par
= emit_insn (par
);
20231 REG_NOTES (par
) = dwarf
;
20233 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20234 stack_pointer_rtx
, stack_pointer_rtx
);
20237 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20238 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20240 Unfortunately, since this insn does not reflect very well the actual
20241 semantics of the operation, we need to annotate the insn for the benefit
20242 of DWARF2 frame unwind information. */
20244 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20248 rtx dwarf
= NULL_RTX
;
20251 gcc_assert (num_regs
&& num_regs
<= 32);
20253 /* Workaround ARM10 VFPr1 bug. */
20254 if (num_regs
== 2 && !arm_arch6
)
20256 if (first_reg
== 15)
20262 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20263 there could be up to 32 D-registers to restore.
20264 If there are more than 16 D-registers, make two recursive calls,
20265 each of which emits one pop_multi instruction. */
20268 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20269 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20273 /* The parallel needs to hold num_regs SETs
20274 and one SET for the stack update. */
20275 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20277 /* Increment the stack pointer, based on there being
20278 num_regs 8-byte registers to restore. */
20279 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20280 RTX_FRAME_RELATED_P (tmp
) = 1;
20281 XVECEXP (par
, 0, 0) = tmp
;
20283 /* Now show every reg that will be restored, using a SET for each. */
20284 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20286 reg
= gen_rtx_REG (DFmode
, i
);
20288 tmp
= gen_rtx_SET (reg
,
20291 plus_constant (Pmode
, base_reg
, 8 * j
)));
20292 RTX_FRAME_RELATED_P (tmp
) = 1;
20293 XVECEXP (par
, 0, j
+ 1) = tmp
;
20295 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20300 par
= emit_insn (par
);
20301 REG_NOTES (par
) = dwarf
;
20303 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20304 if (TARGET_VFP
&& REGNO (base_reg
) == IP_REGNUM
)
20306 RTX_FRAME_RELATED_P (par
) = 1;
20307 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20310 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20311 base_reg
, base_reg
);
20314 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20315 number of registers are being popped, multiple LDRD patterns are created for
20316 all register pairs. If odd number of registers are popped, last register is
20317 loaded by using LDR pattern. */
20319 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20323 rtx par
= NULL_RTX
;
20324 rtx dwarf
= NULL_RTX
;
20325 rtx tmp
, reg
, tmp1
;
20326 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20328 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20329 if (saved_regs_mask
& (1 << i
))
20332 gcc_assert (num_regs
&& num_regs
<= 16);
20334 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20335 to be popped. So, if num_regs is even, now it will become odd,
20336 and we can generate pop with PC. If num_regs is odd, it will be
20337 even now, and ldr with return can be generated for PC. */
20341 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20343 /* Var j iterates over all the registers to gather all the registers in
20344 saved_regs_mask. Var i gives index of saved registers in stack frame.
20345 A PARALLEL RTX of register-pair is created here, so that pattern for
20346 LDRD can be matched. As PC is always last register to be popped, and
20347 we have already decremented num_regs if PC, we don't have to worry
20348 about PC in this loop. */
20349 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20350 if (saved_regs_mask
& (1 << j
))
20352 /* Create RTX for memory load. */
20353 reg
= gen_rtx_REG (SImode
, j
);
20354 tmp
= gen_rtx_SET (reg
,
20355 gen_frame_mem (SImode
,
20356 plus_constant (Pmode
,
20357 stack_pointer_rtx
, 4 * i
)));
20358 RTX_FRAME_RELATED_P (tmp
) = 1;
20362 /* When saved-register index (i) is even, the RTX to be emitted is
20363 yet to be created. Hence create it first. The LDRD pattern we
20364 are generating is :
20365 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20366 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20367 where target registers need not be consecutive. */
20368 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20372 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20373 added as 0th element and if i is odd, reg_i is added as 1st element
20374 of LDRD pattern shown above. */
20375 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20376 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20380 /* When saved-register index (i) is odd, RTXs for both the registers
20381 to be loaded are generated in above given LDRD pattern, and the
20382 pattern can be emitted now. */
20383 par
= emit_insn (par
);
20384 REG_NOTES (par
) = dwarf
;
20385 RTX_FRAME_RELATED_P (par
) = 1;
20391 /* If the number of registers pushed is odd AND return_in_pc is false OR
20392 number of registers are even AND return_in_pc is true, last register is
20393 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20394 then LDR with post increment. */
20396 /* Increment the stack pointer, based on there being
20397 num_regs 4-byte registers to restore. */
20398 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20399 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20400 RTX_FRAME_RELATED_P (tmp
) = 1;
20401 tmp
= emit_insn (tmp
);
20404 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20405 stack_pointer_rtx
, stack_pointer_rtx
);
20410 if (((num_regs
% 2) == 1 && !return_in_pc
)
20411 || ((num_regs
% 2) == 0 && return_in_pc
))
20413 /* Scan for the single register to be popped. Skip until the saved
20414 register is found. */
20415 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20417 /* Gen LDR with post increment here. */
20418 tmp1
= gen_rtx_MEM (SImode
,
20419 gen_rtx_POST_INC (SImode
,
20420 stack_pointer_rtx
));
20421 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20423 reg
= gen_rtx_REG (SImode
, j
);
20424 tmp
= gen_rtx_SET (reg
, tmp1
);
20425 RTX_FRAME_RELATED_P (tmp
) = 1;
20426 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20430 /* If return_in_pc, j must be PC_REGNUM. */
20431 gcc_assert (j
== PC_REGNUM
);
20432 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20433 XVECEXP (par
, 0, 0) = ret_rtx
;
20434 XVECEXP (par
, 0, 1) = tmp
;
20435 par
= emit_jump_insn (par
);
20439 par
= emit_insn (tmp
);
20440 REG_NOTES (par
) = dwarf
;
20441 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20442 stack_pointer_rtx
, stack_pointer_rtx
);
20446 else if ((num_regs
% 2) == 1 && return_in_pc
)
20448 /* There are 2 registers to be popped. So, generate the pattern
20449 pop_multiple_with_stack_update_and_return to pop in PC. */
20450 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20456 /* LDRD in ARM mode needs consecutive registers as operands. This function
20457 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20458 offset addressing and then generates one separate stack udpate. This provides
20459 more scheduling freedom, compared to writeback on every load. However,
20460 if the function returns using load into PC directly
20461 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20462 before the last load. TODO: Add a peephole optimization to recognize
20463 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20464 peephole optimization to merge the load at stack-offset zero
20465 with the stack update instruction using load with writeback
20466 in post-index addressing mode. */
20468 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20472 rtx par
= NULL_RTX
;
20473 rtx dwarf
= NULL_RTX
;
20476 /* Restore saved registers. */
20477 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20479 while (j
<= LAST_ARM_REGNUM
)
20480 if (saved_regs_mask
& (1 << j
))
20483 && (saved_regs_mask
& (1 << (j
+ 1)))
20484 && (j
+ 1) != PC_REGNUM
)
20486 /* Current register and next register form register pair for which
20487 LDRD can be generated. PC is always the last register popped, and
20488 we handle it separately. */
20490 mem
= gen_frame_mem (DImode
,
20491 plus_constant (Pmode
,
20495 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20497 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
20498 tmp
= emit_insn (tmp
);
20499 RTX_FRAME_RELATED_P (tmp
) = 1;
20501 /* Generate dwarf info. */
20503 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20504 gen_rtx_REG (SImode
, j
),
20506 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20507 gen_rtx_REG (SImode
, j
+ 1),
20510 REG_NOTES (tmp
) = dwarf
;
20515 else if (j
!= PC_REGNUM
)
20517 /* Emit a single word load. */
20519 mem
= gen_frame_mem (SImode
,
20520 plus_constant (Pmode
,
20524 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20526 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
20527 tmp
= emit_insn (tmp
);
20528 RTX_FRAME_RELATED_P (tmp
) = 1;
20530 /* Generate dwarf info. */
20531 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20532 gen_rtx_REG (SImode
, j
),
20538 else /* j == PC_REGNUM */
20544 /* Update the stack. */
20547 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20548 plus_constant (Pmode
,
20551 tmp
= emit_insn (tmp
);
20552 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20553 stack_pointer_rtx
, stack_pointer_rtx
);
20557 if (saved_regs_mask
& (1 << PC_REGNUM
))
20559 /* Only PC is to be popped. */
20560 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20561 XVECEXP (par
, 0, 0) = ret_rtx
;
20562 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
20563 gen_frame_mem (SImode
,
20564 gen_rtx_POST_INC (SImode
,
20565 stack_pointer_rtx
)));
20566 RTX_FRAME_RELATED_P (tmp
) = 1;
20567 XVECEXP (par
, 0, 1) = tmp
;
20568 par
= emit_jump_insn (par
);
20570 /* Generate dwarf info. */
20571 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20572 gen_rtx_REG (SImode
, PC_REGNUM
),
20574 REG_NOTES (par
) = dwarf
;
20575 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20576 stack_pointer_rtx
, stack_pointer_rtx
);
20580 /* Calculate the size of the return value that is passed in registers. */
20582 arm_size_return_regs (void)
20586 if (crtl
->return_rtx
!= 0)
20587 mode
= GET_MODE (crtl
->return_rtx
);
20589 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20591 return GET_MODE_SIZE (mode
);
20594 /* Return true if the current function needs to save/restore LR. */
20596 thumb_force_lr_save (void)
20598 return !cfun
->machine
->lr_save_eliminated
20599 && (!leaf_function_p ()
20600 || thumb_far_jump_used_p ()
20601 || df_regs_ever_live_p (LR_REGNUM
));
20604 /* We do not know if r3 will be available because
20605 we do have an indirect tailcall happening in this
20606 particular case. */
20608 is_indirect_tailcall_p (rtx call
)
20610 rtx pat
= PATTERN (call
);
20612 /* Indirect tail call. */
20613 pat
= XVECEXP (pat
, 0, 0);
20614 if (GET_CODE (pat
) == SET
)
20615 pat
= SET_SRC (pat
);
20617 pat
= XEXP (XEXP (pat
, 0), 0);
20618 return REG_P (pat
);
20621 /* Return true if r3 is used by any of the tail call insns in the
20622 current function. */
20624 any_sibcall_could_use_r3 (void)
20629 if (!crtl
->tail_call_emit
)
20631 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20632 if (e
->flags
& EDGE_SIBCALL
)
20634 rtx call
= BB_END (e
->src
);
20635 if (!CALL_P (call
))
20636 call
= prev_nonnote_nondebug_insn (call
);
20637 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20638 if (find_regno_fusage (call
, USE
, 3)
20639 || is_indirect_tailcall_p (call
))
20646 /* Compute the distance from register FROM to register TO.
20647 These can be the arg pointer (26), the soft frame pointer (25),
20648 the stack pointer (13) or the hard frame pointer (11).
20649 In thumb mode r7 is used as the soft frame pointer, if needed.
20650 Typical stack layout looks like this:
20652 old stack pointer -> | |
20655 | | saved arguments for
20656 | | vararg functions
20659 hard FP & arg pointer -> | | \
20667 soft frame pointer -> | | /
20672 locals base pointer -> | | /
20677 current stack pointer -> | | /
20680 For a given function some or all of these stack components
20681 may not be needed, giving rise to the possibility of
20682 eliminating some of the registers.
20684 The values returned by this function must reflect the behavior
20685 of arm_expand_prologue() and arm_compute_save_reg_mask().
20687 The sign of the number returned reflects the direction of stack
20688 growth, so the values are positive for all eliminations except
20689 from the soft frame pointer to the hard frame pointer.
20691 SFP may point just inside the local variables block to ensure correct
20695 /* Calculate stack offsets. These are used to calculate register elimination
20696 offsets and in prologue/epilogue code. Also calculates which registers
20697 should be saved. */
20699 static arm_stack_offsets
*
20700 arm_get_frame_offsets (void)
20702 struct arm_stack_offsets
*offsets
;
20703 unsigned long func_type
;
20707 HOST_WIDE_INT frame_size
;
20710 offsets
= &cfun
->machine
->stack_offsets
;
20712 /* We need to know if we are a leaf function. Unfortunately, it
20713 is possible to be called after start_sequence has been called,
20714 which causes get_insns to return the insns for the sequence,
20715 not the function, which will cause leaf_function_p to return
20716 the incorrect result.
20718 to know about leaf functions once reload has completed, and the
20719 frame size cannot be changed after that time, so we can safely
20720 use the cached value. */
20722 if (reload_completed
)
20725 /* Initially this is the size of the local variables. It will translated
20726 into an offset once we have determined the size of preceding data. */
20727 frame_size
= ROUND_UP_WORD (get_frame_size ());
20729 leaf
= leaf_function_p ();
20731 /* Space for variadic functions. */
20732 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20734 /* In Thumb mode this is incorrect, but never used. */
20736 = (offsets
->saved_args
20737 + arm_compute_static_chain_stack_bytes ()
20738 + (frame_pointer_needed
? 4 : 0));
20742 unsigned int regno
;
20744 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20745 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20746 saved
= core_saved
;
20748 /* We know that SP will be doubleword aligned on entry, and we must
20749 preserve that condition at any subroutine call. We also require the
20750 soft frame pointer to be doubleword aligned. */
20752 if (TARGET_REALLY_IWMMXT
)
20754 /* Check for the call-saved iWMMXt registers. */
20755 for (regno
= FIRST_IWMMXT_REGNUM
;
20756 regno
<= LAST_IWMMXT_REGNUM
;
20758 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20762 func_type
= arm_current_func_type ();
20763 /* Space for saved VFP registers. */
20764 if (! IS_VOLATILE (func_type
)
20765 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
20766 saved
+= arm_get_vfp_saved_size ();
20768 else /* TARGET_THUMB1 */
20770 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20771 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20772 saved
= core_saved
;
20773 if (TARGET_BACKTRACE
)
20777 /* Saved registers include the stack frame. */
20778 offsets
->saved_regs
20779 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20780 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20782 /* A leaf function does not need any stack alignment if it has nothing
20784 if (leaf
&& frame_size
== 0
20785 /* However if it calls alloca(), we have a dynamically allocated
20786 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20787 && ! cfun
->calls_alloca
)
20789 offsets
->outgoing_args
= offsets
->soft_frame
;
20790 offsets
->locals_base
= offsets
->soft_frame
;
20794 /* Ensure SFP has the correct alignment. */
20795 if (ARM_DOUBLEWORD_ALIGN
20796 && (offsets
->soft_frame
& 7))
20798 offsets
->soft_frame
+= 4;
20799 /* Try to align stack by pushing an extra reg. Don't bother doing this
20800 when there is a stack frame as the alignment will be rolled into
20801 the normal stack adjustment. */
20802 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20806 /* Register r3 is caller-saved. Normally it does not need to be
20807 saved on entry by the prologue. However if we choose to save
20808 it for padding then we may confuse the compiler into thinking
20809 a prologue sequence is required when in fact it is not. This
20810 will occur when shrink-wrapping if r3 is used as a scratch
20811 register and there are no other callee-saved writes.
20813 This situation can be avoided when other callee-saved registers
20814 are available and r3 is not mandatory if we choose a callee-saved
20815 register for padding. */
20816 bool prefer_callee_reg_p
= false;
20818 /* If it is safe to use r3, then do so. This sometimes
20819 generates better code on Thumb-2 by avoiding the need to
20820 use 32-bit push/pop instructions. */
20821 if (! any_sibcall_could_use_r3 ()
20822 && arm_size_return_regs () <= 12
20823 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20825 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20828 if (!TARGET_THUMB2
)
20829 prefer_callee_reg_p
= true;
20832 || prefer_callee_reg_p
)
20834 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20836 /* Avoid fixed registers; they may be changed at
20837 arbitrary times so it's unsafe to restore them
20838 during the epilogue. */
20840 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20850 offsets
->saved_regs
+= 4;
20851 offsets
->saved_regs_mask
|= (1 << reg
);
20856 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20857 offsets
->outgoing_args
= (offsets
->locals_base
20858 + crtl
->outgoing_args_size
);
20860 if (ARM_DOUBLEWORD_ALIGN
)
20862 /* Ensure SP remains doubleword aligned. */
20863 if (offsets
->outgoing_args
& 7)
20864 offsets
->outgoing_args
+= 4;
20865 gcc_assert (!(offsets
->outgoing_args
& 7));
20872 /* Calculate the relative offsets for the different stack pointers. Positive
20873 offsets are in the direction of stack growth. */
20876 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20878 arm_stack_offsets
*offsets
;
20880 offsets
= arm_get_frame_offsets ();
20882 /* OK, now we have enough information to compute the distances.
20883 There must be an entry in these switch tables for each pair
20884 of registers in ELIMINABLE_REGS, even if some of the entries
20885 seem to be redundant or useless. */
20888 case ARG_POINTER_REGNUM
:
20891 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20894 case FRAME_POINTER_REGNUM
:
20895 /* This is the reverse of the soft frame pointer
20896 to hard frame pointer elimination below. */
20897 return offsets
->soft_frame
- offsets
->saved_args
;
20899 case ARM_HARD_FRAME_POINTER_REGNUM
:
20900 /* This is only non-zero in the case where the static chain register
20901 is stored above the frame. */
20902 return offsets
->frame
- offsets
->saved_args
- 4;
20904 case STACK_POINTER_REGNUM
:
20905 /* If nothing has been pushed on the stack at all
20906 then this will return -4. This *is* correct! */
20907 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
20910 gcc_unreachable ();
20912 gcc_unreachable ();
20914 case FRAME_POINTER_REGNUM
:
20917 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20920 case ARM_HARD_FRAME_POINTER_REGNUM
:
20921 /* The hard frame pointer points to the top entry in the
20922 stack frame. The soft frame pointer to the bottom entry
20923 in the stack frame. If there is no stack frame at all,
20924 then they are identical. */
20926 return offsets
->frame
- offsets
->soft_frame
;
20928 case STACK_POINTER_REGNUM
:
20929 return offsets
->outgoing_args
- offsets
->soft_frame
;
20932 gcc_unreachable ();
20934 gcc_unreachable ();
20937 /* You cannot eliminate from the stack pointer.
20938 In theory you could eliminate from the hard frame
20939 pointer to the stack pointer, but this will never
20940 happen, since if a stack frame is not needed the
20941 hard frame pointer will never be used. */
20942 gcc_unreachable ();
20946 /* Given FROM and TO register numbers, say whether this elimination is
20947 allowed. Frame pointer elimination is automatically handled.
20949 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20950 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20951 pointer, we must eliminate FRAME_POINTER_REGNUM into
20952 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20953 ARG_POINTER_REGNUM. */
20956 arm_can_eliminate (const int from
, const int to
)
20958 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
20959 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
20960 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
20961 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
20965 /* Emit RTL to save coprocessor registers on function entry. Returns the
20966 number of bytes pushed. */
20969 arm_save_coproc_regs(void)
20971 int saved_size
= 0;
20973 unsigned start_reg
;
20976 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
20977 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
20979 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20980 insn
= gen_rtx_MEM (V2SImode
, insn
);
20981 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
20982 RTX_FRAME_RELATED_P (insn
) = 1;
20986 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
20988 start_reg
= FIRST_VFP_REGNUM
;
20990 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
20992 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
20993 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
20995 if (start_reg
!= reg
)
20996 saved_size
+= vfp_emit_fstmd (start_reg
,
20997 (reg
- start_reg
) / 2);
20998 start_reg
= reg
+ 2;
21001 if (start_reg
!= reg
)
21002 saved_size
+= vfp_emit_fstmd (start_reg
,
21003 (reg
- start_reg
) / 2);
21009 /* Set the Thumb frame pointer from the stack pointer. */
21012 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21014 HOST_WIDE_INT amount
;
21017 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21019 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21020 stack_pointer_rtx
, GEN_INT (amount
)));
21023 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21024 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21025 expects the first two operands to be the same. */
21028 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21030 hard_frame_pointer_rtx
));
21034 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21035 hard_frame_pointer_rtx
,
21036 stack_pointer_rtx
));
21038 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
21039 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21040 RTX_FRAME_RELATED_P (dwarf
) = 1;
21041 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21044 RTX_FRAME_RELATED_P (insn
) = 1;
21047 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21050 arm_expand_prologue (void)
21055 unsigned long live_regs_mask
;
21056 unsigned long func_type
;
21058 int saved_pretend_args
= 0;
21059 int saved_regs
= 0;
21060 unsigned HOST_WIDE_INT args_to_push
;
21061 arm_stack_offsets
*offsets
;
21063 func_type
= arm_current_func_type ();
21065 /* Naked functions don't have prologues. */
21066 if (IS_NAKED (func_type
))
21069 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21070 args_to_push
= crtl
->args
.pretend_args_size
;
21072 /* Compute which register we will have to save onto the stack. */
21073 offsets
= arm_get_frame_offsets ();
21074 live_regs_mask
= offsets
->saved_regs_mask
;
21076 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21078 if (IS_STACKALIGN (func_type
))
21082 /* Handle a word-aligned stack pointer. We generate the following:
21087 <save and restore r0 in normal prologue/epilogue>
21091 The unwinder doesn't need to know about the stack realignment.
21092 Just tell it we saved SP in r0. */
21093 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21095 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
21096 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
21098 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21099 RTX_FRAME_RELATED_P (insn
) = 1;
21100 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21102 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21104 /* ??? The CFA changes here, which may cause GDB to conclude that it
21105 has entered a different function. That said, the unwind info is
21106 correct, individually, before and after this instruction because
21107 we've described the save of SP, which will override the default
21108 handling of SP as restoring from the CFA. */
21109 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21112 /* For APCS frames, if IP register is clobbered
21113 when creating frame, save that register in a special
21115 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21117 if (IS_INTERRUPT (func_type
))
21119 /* Interrupt functions must not corrupt any registers.
21120 Creating a frame pointer however, corrupts the IP
21121 register, so we must push it first. */
21122 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21124 /* Do not set RTX_FRAME_RELATED_P on this insn.
21125 The dwarf stack unwinding code only wants to see one
21126 stack decrement per function, and this is not it. If
21127 this instruction is labeled as being part of the frame
21128 creation sequence then dwarf2out_frame_debug_expr will
21129 die when it encounters the assignment of IP to FP
21130 later on, since the use of SP here establishes SP as
21131 the CFA register and not IP.
21133 Anyway this instruction is not really part of the stack
21134 frame creation although it is part of the prologue. */
21136 else if (IS_NESTED (func_type
))
21138 /* The static chain register is the same as the IP register
21139 used as a scratch register during stack frame creation.
21140 To get around this need to find somewhere to store IP
21141 whilst the frame is being created. We try the following
21144 1. The last argument register r3 if it is available.
21145 2. A slot on the stack above the frame if there are no
21146 arguments to push onto the stack.
21147 3. Register r3 again, after pushing the argument registers
21148 onto the stack, if this is a varargs function.
21149 4. The last slot on the stack created for the arguments to
21150 push, if this isn't a varargs function.
21152 Note - we only need to tell the dwarf2 backend about the SP
21153 adjustment in the second variant; the static chain register
21154 doesn't need to be unwound, as it doesn't contain a value
21155 inherited from the caller. */
21157 if (!arm_r3_live_at_start_p ())
21158 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21159 else if (args_to_push
== 0)
21163 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21166 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21167 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21170 /* Just tell the dwarf backend that we adjusted SP. */
21171 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21172 plus_constant (Pmode
, stack_pointer_rtx
,
21174 RTX_FRAME_RELATED_P (insn
) = 1;
21175 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21179 /* Store the args on the stack. */
21180 if (cfun
->machine
->uses_anonymous_args
)
21183 = emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21184 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21185 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21186 saved_pretend_args
= 1;
21192 if (args_to_push
== 4)
21193 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21196 = gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21197 plus_constant (Pmode
,
21201 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21203 /* Just tell the dwarf backend that we adjusted SP. */
21205 = gen_rtx_SET (stack_pointer_rtx
,
21206 plus_constant (Pmode
, stack_pointer_rtx
,
21208 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21211 RTX_FRAME_RELATED_P (insn
) = 1;
21212 fp_offset
= args_to_push
;
21217 insn
= emit_set_insn (ip_rtx
,
21218 plus_constant (Pmode
, stack_pointer_rtx
,
21220 RTX_FRAME_RELATED_P (insn
) = 1;
21225 /* Push the argument registers, or reserve space for them. */
21226 if (cfun
->machine
->uses_anonymous_args
)
21227 insn
= emit_multi_reg_push
21228 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21229 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21232 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21233 GEN_INT (- args_to_push
)));
21234 RTX_FRAME_RELATED_P (insn
) = 1;
21237 /* If this is an interrupt service routine, and the link register
21238 is going to be pushed, and we're not generating extra
21239 push of IP (needed when frame is needed and frame layout if apcs),
21240 subtracting four from LR now will mean that the function return
21241 can be done with a single instruction. */
21242 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21243 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21244 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21247 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21249 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21252 if (live_regs_mask
)
21254 unsigned long dwarf_regs_mask
= live_regs_mask
;
21256 saved_regs
+= bit_count (live_regs_mask
) * 4;
21257 if (optimize_size
&& !frame_pointer_needed
21258 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21260 /* If no coprocessor registers are being pushed and we don't have
21261 to worry about a frame pointer then push extra registers to
21262 create the stack frame. This is done is a way that does not
21263 alter the frame layout, so is independent of the epilogue. */
21267 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21269 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21270 if (frame
&& n
* 4 >= frame
)
21273 live_regs_mask
|= (1 << n
) - 1;
21274 saved_regs
+= frame
;
21279 && current_tune
->prefer_ldrd_strd
21280 && !optimize_function_for_size_p (cfun
))
21282 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21284 thumb2_emit_strd_push (live_regs_mask
);
21285 else if (TARGET_ARM
21286 && !TARGET_APCS_FRAME
21287 && !IS_INTERRUPT (func_type
))
21288 arm_emit_strd_push (live_regs_mask
);
21291 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21292 RTX_FRAME_RELATED_P (insn
) = 1;
21297 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21298 RTX_FRAME_RELATED_P (insn
) = 1;
21302 if (! IS_VOLATILE (func_type
))
21303 saved_regs
+= arm_save_coproc_regs ();
21305 if (frame_pointer_needed
&& TARGET_ARM
)
21307 /* Create the new frame pointer. */
21308 if (TARGET_APCS_FRAME
)
21310 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21311 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21312 RTX_FRAME_RELATED_P (insn
) = 1;
21314 if (IS_NESTED (func_type
))
21316 /* Recover the static chain register. */
21317 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21318 insn
= gen_rtx_REG (SImode
, 3);
21321 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21322 insn
= gen_frame_mem (SImode
, insn
);
21324 emit_set_insn (ip_rtx
, insn
);
21325 /* Add a USE to stop propagate_one_insn() from barfing. */
21326 emit_insn (gen_force_register_use (ip_rtx
));
21331 insn
= GEN_INT (saved_regs
- 4);
21332 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21333 stack_pointer_rtx
, insn
));
21334 RTX_FRAME_RELATED_P (insn
) = 1;
21338 if (flag_stack_usage_info
)
21339 current_function_static_stack_size
21340 = offsets
->outgoing_args
- offsets
->saved_args
;
21342 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21344 /* This add can produce multiple insns for a large constant, so we
21345 need to get tricky. */
21346 rtx_insn
*last
= get_last_insn ();
21348 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21349 - offsets
->outgoing_args
);
21351 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21355 last
= last
? NEXT_INSN (last
) : get_insns ();
21356 RTX_FRAME_RELATED_P (last
) = 1;
21358 while (last
!= insn
);
21360 /* If the frame pointer is needed, emit a special barrier that
21361 will prevent the scheduler from moving stores to the frame
21362 before the stack adjustment. */
21363 if (frame_pointer_needed
)
21364 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
21365 hard_frame_pointer_rtx
));
21369 if (frame_pointer_needed
&& TARGET_THUMB2
)
21370 thumb_set_frame_pointer (offsets
);
21372 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21374 unsigned long mask
;
21376 mask
= live_regs_mask
;
21377 mask
&= THUMB2_WORK_REGS
;
21378 if (!IS_NESTED (func_type
))
21379 mask
|= (1 << IP_REGNUM
);
21380 arm_load_pic_register (mask
);
21383 /* If we are profiling, make sure no instructions are scheduled before
21384 the call to mcount. Similarly if the user has requested no
21385 scheduling in the prolog. Similarly if we want non-call exceptions
21386 using the EABI unwinder, to prevent faulting instructions from being
21387 swapped with a stack adjustment. */
21388 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21389 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21390 && cfun
->can_throw_non_call_exceptions
))
21391 emit_insn (gen_blockage ());
21393 /* If the link register is being kept alive, with the return address in it,
21394 then make sure that it does not get reused by the ce2 pass. */
21395 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21396 cfun
->machine
->lr_save_eliminated
= 1;
21399 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21401 arm_print_condition (FILE *stream
)
21403 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21405 /* Branch conversion is not implemented for Thumb-2. */
21408 output_operand_lossage ("predicated Thumb instruction");
21411 if (current_insn_predicate
!= NULL
)
21413 output_operand_lossage
21414 ("predicated instruction in conditional sequence");
21418 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21420 else if (current_insn_predicate
)
21422 enum arm_cond_code code
;
21426 output_operand_lossage ("predicated Thumb instruction");
21430 code
= get_arm_condition_code (current_insn_predicate
);
21431 fputs (arm_condition_codes
[code
], stream
);
21436 /* Globally reserved letters: acln
21437 Puncutation letters currently used: @_|?().!#
21438 Lower case letters currently used: bcdefhimpqtvwxyz
21439 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21440 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21442 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21444 If CODE is 'd', then the X is a condition operand and the instruction
21445 should only be executed if the condition is true.
21446 if CODE is 'D', then the X is a condition operand and the instruction
21447 should only be executed if the condition is false: however, if the mode
21448 of the comparison is CCFPEmode, then always execute the instruction -- we
21449 do this because in these circumstances !GE does not necessarily imply LT;
21450 in these cases the instruction pattern will take care to make sure that
21451 an instruction containing %d will follow, thereby undoing the effects of
21452 doing this instruction unconditionally.
21453 If CODE is 'N' then X is a floating point operand that must be negated
21455 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21456 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21458 arm_print_operand (FILE *stream
, rtx x
, int code
)
21463 fputs (ASM_COMMENT_START
, stream
);
21467 fputs (user_label_prefix
, stream
);
21471 fputs (REGISTER_PREFIX
, stream
);
21475 arm_print_condition (stream
);
21479 /* Nothing in unified syntax, otherwise the current condition code. */
21480 if (!TARGET_UNIFIED_ASM
)
21481 arm_print_condition (stream
);
21485 /* The current condition code in unified syntax, otherwise nothing. */
21486 if (TARGET_UNIFIED_ASM
)
21487 arm_print_condition (stream
);
21491 /* The current condition code for a condition code setting instruction.
21492 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21493 if (TARGET_UNIFIED_ASM
)
21495 fputc('s', stream
);
21496 arm_print_condition (stream
);
21500 arm_print_condition (stream
);
21501 fputc('s', stream
);
21506 /* If the instruction is conditionally executed then print
21507 the current condition code, otherwise print 's'. */
21508 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
21509 if (current_insn_predicate
)
21510 arm_print_condition (stream
);
21512 fputc('s', stream
);
21515 /* %# is a "break" sequence. It doesn't output anything, but is used to
21516 separate e.g. operand numbers from following text, if that text consists
21517 of further digits which we don't want to be part of the operand
21525 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
21526 r
= real_value_negate (&r
);
21527 fprintf (stream
, "%s", fp_const_from_val (&r
));
21531 /* An integer or symbol address without a preceding # sign. */
21533 switch (GET_CODE (x
))
21536 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21540 output_addr_const (stream
, x
);
21544 if (GET_CODE (XEXP (x
, 0)) == PLUS
21545 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21547 output_addr_const (stream
, x
);
21550 /* Fall through. */
21553 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21557 /* An integer that we want to print in HEX. */
21559 switch (GET_CODE (x
))
21562 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21566 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21571 if (CONST_INT_P (x
))
21574 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21575 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21579 putc ('~', stream
);
21580 output_addr_const (stream
, x
);
21585 /* Print the log2 of a CONST_INT. */
21589 if (!CONST_INT_P (x
)
21590 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
21591 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21593 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21598 /* The low 16 bits of an immediate constant. */
21599 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21603 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21607 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21615 shift
= shift_op (x
, &val
);
21619 fprintf (stream
, ", %s ", shift
);
21621 arm_print_operand (stream
, XEXP (x
, 1), 0);
21623 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21628 /* An explanation of the 'Q', 'R' and 'H' register operands:
21630 In a pair of registers containing a DI or DF value the 'Q'
21631 operand returns the register number of the register containing
21632 the least significant part of the value. The 'R' operand returns
21633 the register number of the register containing the most
21634 significant part of the value.
21636 The 'H' operand returns the higher of the two register numbers.
21637 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21638 same as the 'Q' operand, since the most significant part of the
21639 value is held in the lower number register. The reverse is true
21640 on systems where WORDS_BIG_ENDIAN is false.
21642 The purpose of these operands is to distinguish between cases
21643 where the endian-ness of the values is important (for example
21644 when they are added together), and cases where the endian-ness
21645 is irrelevant, but the order of register operations is important.
21646 For example when loading a value from memory into a register
21647 pair, the endian-ness does not matter. Provided that the value
21648 from the lower memory address is put into the lower numbered
21649 register, and the value from the higher address is put into the
21650 higher numbered register, the load will work regardless of whether
21651 the value being loaded is big-wordian or little-wordian. The
21652 order of the two register loads can matter however, if the address
21653 of the memory location is actually held in one of the registers
21654 being overwritten by the load.
21656 The 'Q' and 'R' constraints are also available for 64-bit
21659 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21661 rtx part
= gen_lowpart (SImode
, x
);
21662 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21666 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21668 output_operand_lossage ("invalid operand for code '%c'", code
);
21672 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
21676 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21678 machine_mode mode
= GET_MODE (x
);
21681 if (mode
== VOIDmode
)
21683 part
= gen_highpart_mode (SImode
, mode
, x
);
21684 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21688 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21690 output_operand_lossage ("invalid operand for code '%c'", code
);
21694 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
21698 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21700 output_operand_lossage ("invalid operand for code '%c'", code
);
21704 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
21708 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21710 output_operand_lossage ("invalid operand for code '%c'", code
);
21714 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
21718 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21720 output_operand_lossage ("invalid operand for code '%c'", code
);
21724 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
21728 asm_fprintf (stream
, "%r",
21729 REG_P (XEXP (x
, 0))
21730 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
21734 asm_fprintf (stream
, "{%r-%r}",
21736 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
21739 /* Like 'M', but writing doubleword vector registers, for use by Neon
21743 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
21744 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
21746 asm_fprintf (stream
, "{d%d}", regno
);
21748 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
21753 /* CONST_TRUE_RTX means always -- that's the default. */
21754 if (x
== const_true_rtx
)
21757 if (!COMPARISON_P (x
))
21759 output_operand_lossage ("invalid operand for code '%c'", code
);
21763 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
21768 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21769 want to do that. */
21770 if (x
== const_true_rtx
)
21772 output_operand_lossage ("instruction never executed");
21775 if (!COMPARISON_P (x
))
21777 output_operand_lossage ("invalid operand for code '%c'", code
);
21781 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
21782 (get_arm_condition_code (x
))],
21792 /* Former Maverick support, removed after GCC-4.7. */
21793 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
21798 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
21799 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
21800 /* Bad value for wCG register number. */
21802 output_operand_lossage ("invalid operand for code '%c'", code
);
21807 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
21810 /* Print an iWMMXt control register name. */
21812 if (!CONST_INT_P (x
)
21814 || INTVAL (x
) >= 16)
21815 /* Bad value for wC register number. */
21817 output_operand_lossage ("invalid operand for code '%c'", code
);
21823 static const char * wc_reg_names
[16] =
21825 "wCID", "wCon", "wCSSF", "wCASF",
21826 "wC4", "wC5", "wC6", "wC7",
21827 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21828 "wC12", "wC13", "wC14", "wC15"
21831 fputs (wc_reg_names
[INTVAL (x
)], stream
);
21835 /* Print the high single-precision register of a VFP double-precision
21839 machine_mode mode
= GET_MODE (x
);
21842 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
21844 output_operand_lossage ("invalid operand for code '%c'", code
);
21849 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
21851 output_operand_lossage ("invalid operand for code '%c'", code
);
21855 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
21859 /* Print a VFP/Neon double precision or quad precision register name. */
21863 machine_mode mode
= GET_MODE (x
);
21864 int is_quad
= (code
== 'q');
21867 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
21869 output_operand_lossage ("invalid operand for code '%c'", code
);
21874 || !IS_VFP_REGNUM (REGNO (x
)))
21876 output_operand_lossage ("invalid operand for code '%c'", code
);
21881 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
21882 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
21884 output_operand_lossage ("invalid operand for code '%c'", code
);
21888 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
21889 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
21893 /* These two codes print the low/high doubleword register of a Neon quad
21894 register, respectively. For pair-structure types, can also print
21895 low/high quadword registers. */
21899 machine_mode mode
= GET_MODE (x
);
21902 if ((GET_MODE_SIZE (mode
) != 16
21903 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
21905 output_operand_lossage ("invalid operand for code '%c'", code
);
21910 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
21912 output_operand_lossage ("invalid operand for code '%c'", code
);
21916 if (GET_MODE_SIZE (mode
) == 16)
21917 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
21918 + (code
== 'f' ? 1 : 0));
21920 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
21921 + (code
== 'f' ? 1 : 0));
21925 /* Print a VFPv3 floating-point constant, represented as an integer
21929 int index
= vfp3_const_double_index (x
);
21930 gcc_assert (index
!= -1);
21931 fprintf (stream
, "%d", index
);
21935 /* Print bits representing opcode features for Neon.
21937 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21938 and polynomials as unsigned.
21940 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21942 Bit 2 is 1 for rounding functions, 0 otherwise. */
21944 /* Identify the type as 's', 'u', 'p' or 'f'. */
21947 HOST_WIDE_INT bits
= INTVAL (x
);
21948 fputc ("uspf"[bits
& 3], stream
);
21952 /* Likewise, but signed and unsigned integers are both 'i'. */
21955 HOST_WIDE_INT bits
= INTVAL (x
);
21956 fputc ("iipf"[bits
& 3], stream
);
21960 /* As for 'T', but emit 'u' instead of 'p'. */
21963 HOST_WIDE_INT bits
= INTVAL (x
);
21964 fputc ("usuf"[bits
& 3], stream
);
21968 /* Bit 2: rounding (vs none). */
21971 HOST_WIDE_INT bits
= INTVAL (x
);
21972 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
21976 /* Memory operand for vld1/vst1 instruction. */
21980 bool postinc
= FALSE
;
21981 rtx postinc_reg
= NULL
;
21982 unsigned align
, memsize
, align_bits
;
21984 gcc_assert (MEM_P (x
));
21985 addr
= XEXP (x
, 0);
21986 if (GET_CODE (addr
) == POST_INC
)
21989 addr
= XEXP (addr
, 0);
21991 if (GET_CODE (addr
) == POST_MODIFY
)
21993 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
21994 addr
= XEXP (addr
, 0);
21996 asm_fprintf (stream
, "[%r", REGNO (addr
));
21998 /* We know the alignment of this access, so we can emit a hint in the
21999 instruction (for some alignments) as an aid to the memory subsystem
22001 align
= MEM_ALIGN (x
) >> 3;
22002 memsize
= MEM_SIZE (x
);
22004 /* Only certain alignment specifiers are supported by the hardware. */
22005 if (memsize
== 32 && (align
% 32) == 0)
22007 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22009 else if (memsize
>= 8 && (align
% 8) == 0)
22014 if (align_bits
!= 0)
22015 asm_fprintf (stream
, ":%d", align_bits
);
22017 asm_fprintf (stream
, "]");
22020 fputs("!", stream
);
22022 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22030 gcc_assert (MEM_P (x
));
22031 addr
= XEXP (x
, 0);
22032 gcc_assert (REG_P (addr
));
22033 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22037 /* Translate an S register number into a D register number and element index. */
22040 machine_mode mode
= GET_MODE (x
);
22043 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22045 output_operand_lossage ("invalid operand for code '%c'", code
);
22050 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22052 output_operand_lossage ("invalid operand for code '%c'", code
);
22056 regno
= regno
- FIRST_VFP_REGNUM
;
22057 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22062 gcc_assert (CONST_DOUBLE_P (x
));
22064 result
= vfp3_const_double_for_fract_bits (x
);
22066 result
= vfp3_const_double_for_bits (x
);
22067 fprintf (stream
, "#%d", result
);
22070 /* Register specifier for vld1.16/vst1.16. Translate the S register
22071 number into a D register number and element index. */
22074 machine_mode mode
= GET_MODE (x
);
22077 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22079 output_operand_lossage ("invalid operand for code '%c'", code
);
22084 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22086 output_operand_lossage ("invalid operand for code '%c'", code
);
22090 regno
= regno
- FIRST_VFP_REGNUM
;
22091 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22098 output_operand_lossage ("missing operand");
22102 switch (GET_CODE (x
))
22105 asm_fprintf (stream
, "%r", REGNO (x
));
22109 output_memory_reference_mode
= GET_MODE (x
);
22110 output_address (XEXP (x
, 0));
22116 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22117 sizeof (fpstr
), 0, 1);
22118 fprintf (stream
, "#%s", fpstr
);
22123 gcc_assert (GET_CODE (x
) != NEG
);
22124 fputc ('#', stream
);
22125 if (GET_CODE (x
) == HIGH
)
22127 fputs (":lower16:", stream
);
22131 output_addr_const (stream
, x
);
22137 /* Target hook for printing a memory address. */
22139 arm_print_operand_address (FILE *stream
, rtx x
)
22143 int is_minus
= GET_CODE (x
) == MINUS
;
22146 asm_fprintf (stream
, "[%r]", REGNO (x
));
22147 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22149 rtx base
= XEXP (x
, 0);
22150 rtx index
= XEXP (x
, 1);
22151 HOST_WIDE_INT offset
= 0;
22153 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22155 /* Ensure that BASE is a register. */
22156 /* (one of them must be). */
22157 /* Also ensure the SP is not used as in index register. */
22158 std::swap (base
, index
);
22160 switch (GET_CODE (index
))
22163 offset
= INTVAL (index
);
22166 asm_fprintf (stream
, "[%r, #%wd]",
22167 REGNO (base
), offset
);
22171 asm_fprintf (stream
, "[%r, %s%r]",
22172 REGNO (base
), is_minus
? "-" : "",
22182 asm_fprintf (stream
, "[%r, %s%r",
22183 REGNO (base
), is_minus
? "-" : "",
22184 REGNO (XEXP (index
, 0)));
22185 arm_print_operand (stream
, index
, 'S');
22186 fputs ("]", stream
);
22191 gcc_unreachable ();
22194 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22195 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22197 extern machine_mode output_memory_reference_mode
;
22199 gcc_assert (REG_P (XEXP (x
, 0)));
22201 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22202 asm_fprintf (stream
, "[%r, #%s%d]!",
22203 REGNO (XEXP (x
, 0)),
22204 GET_CODE (x
) == PRE_DEC
? "-" : "",
22205 GET_MODE_SIZE (output_memory_reference_mode
));
22207 asm_fprintf (stream
, "[%r], #%s%d",
22208 REGNO (XEXP (x
, 0)),
22209 GET_CODE (x
) == POST_DEC
? "-" : "",
22210 GET_MODE_SIZE (output_memory_reference_mode
));
22212 else if (GET_CODE (x
) == PRE_MODIFY
)
22214 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22215 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22216 asm_fprintf (stream
, "#%wd]!",
22217 INTVAL (XEXP (XEXP (x
, 1), 1)));
22219 asm_fprintf (stream
, "%r]!",
22220 REGNO (XEXP (XEXP (x
, 1), 1)));
22222 else if (GET_CODE (x
) == POST_MODIFY
)
22224 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22225 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22226 asm_fprintf (stream
, "#%wd",
22227 INTVAL (XEXP (XEXP (x
, 1), 1)));
22229 asm_fprintf (stream
, "%r",
22230 REGNO (XEXP (XEXP (x
, 1), 1)));
22232 else output_addr_const (stream
, x
);
22237 asm_fprintf (stream
, "[%r]", REGNO (x
));
22238 else if (GET_CODE (x
) == POST_INC
)
22239 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22240 else if (GET_CODE (x
) == PLUS
)
22242 gcc_assert (REG_P (XEXP (x
, 0)));
22243 if (CONST_INT_P (XEXP (x
, 1)))
22244 asm_fprintf (stream
, "[%r, #%wd]",
22245 REGNO (XEXP (x
, 0)),
22246 INTVAL (XEXP (x
, 1)));
22248 asm_fprintf (stream
, "[%r, %r]",
22249 REGNO (XEXP (x
, 0)),
22250 REGNO (XEXP (x
, 1)));
22253 output_addr_const (stream
, x
);
22257 /* Target hook for indicating whether a punctuation character for
22258 TARGET_PRINT_OPERAND is valid. */
22260 arm_print_operand_punct_valid_p (unsigned char code
)
22262 return (code
== '@' || code
== '|' || code
== '.'
22263 || code
== '(' || code
== ')' || code
== '#'
22264 || (TARGET_32BIT
&& (code
== '?'))
22265 || (TARGET_THUMB2
&& (code
== '!'))
22266 || (TARGET_THUMB
&& (code
== '_')));
22269 /* Target hook for assembling integer objects. The ARM version needs to
22270 handle word-sized values specially. */
22272 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22276 if (size
== UNITS_PER_WORD
&& aligned_p
)
22278 fputs ("\t.word\t", asm_out_file
);
22279 output_addr_const (asm_out_file
, x
);
22281 /* Mark symbols as position independent. We only do this in the
22282 .text segment, not in the .data segment. */
22283 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22284 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22286 /* See legitimize_pic_address for an explanation of the
22287 TARGET_VXWORKS_RTP check. */
22288 if (!arm_pic_data_is_text_relative
22289 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
22290 fputs ("(GOT)", asm_out_file
);
22292 fputs ("(GOTOFF)", asm_out_file
);
22294 fputc ('\n', asm_out_file
);
22298 mode
= GET_MODE (x
);
22300 if (arm_vector_mode_supported_p (mode
))
22304 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22306 units
= CONST_VECTOR_NUNITS (x
);
22307 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
22309 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22310 for (i
= 0; i
< units
; i
++)
22312 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22314 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22317 for (i
= 0; i
< units
; i
++)
22319 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22320 REAL_VALUE_TYPE rval
;
22322 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
22325 (rval
, GET_MODE_INNER (mode
),
22326 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22332 return default_assemble_integer (x
, size
, aligned_p
);
22336 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22340 if (!TARGET_AAPCS_BASED
)
22343 default_named_section_asm_out_constructor
22344 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22348 /* Put these in the .init_array section, using a special relocation. */
22349 if (priority
!= DEFAULT_INIT_PRIORITY
)
22352 sprintf (buf
, "%s.%.5u",
22353 is_ctor
? ".init_array" : ".fini_array",
22355 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
22362 switch_to_section (s
);
22363 assemble_align (POINTER_SIZE
);
22364 fputs ("\t.word\t", asm_out_file
);
22365 output_addr_const (asm_out_file
, symbol
);
22366 fputs ("(target1)\n", asm_out_file
);
22369 /* Add a function to the list of static constructors. */
22372 arm_elf_asm_constructor (rtx symbol
, int priority
)
22374 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22377 /* Add a function to the list of static destructors. */
22380 arm_elf_asm_destructor (rtx symbol
, int priority
)
22382 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22385 /* A finite state machine takes care of noticing whether or not instructions
22386 can be conditionally executed, and thus decrease execution time and code
22387 size by deleting branch instructions. The fsm is controlled by
22388 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22390 /* The state of the fsm controlling condition codes are:
22391 0: normal, do nothing special
22392 1: make ASM_OUTPUT_OPCODE not output this instruction
22393 2: make ASM_OUTPUT_OPCODE not output this instruction
22394 3: make instructions conditional
22395 4: make instructions conditional
22397 State transitions (state->state by whom under condition):
22398 0 -> 1 final_prescan_insn if the `target' is a label
22399 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22400 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22401 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22402 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22403 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22404 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22405 (the target insn is arm_target_insn).
22407 If the jump clobbers the conditions then we use states 2 and 4.
22409 A similar thing can be done with conditional return insns.
22411 XXX In case the `target' is an unconditional branch, this conditionalising
22412 of the instructions always reduces code size, but not always execution
22413 time. But then, I want to reduce the code size to somewhere near what
22414 /bin/cc produces. */
22416 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22417 instructions. When a COND_EXEC instruction is seen the subsequent
22418 instructions are scanned so that multiple conditional instructions can be
22419 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22420 specify the length and true/false mask for the IT block. These will be
22421 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22423 /* Returns the index of the ARM condition code string in
22424 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22425 COMPARISON should be an rtx like `(eq (...) (...))'. */
22428 maybe_get_arm_condition_code (rtx comparison
)
22430 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22431 enum arm_cond_code code
;
22432 enum rtx_code comp_code
= GET_CODE (comparison
);
22434 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22435 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22436 XEXP (comparison
, 1));
22440 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22441 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22442 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22443 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22444 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22445 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22446 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22447 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22448 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22449 case CC_DLTUmode
: code
= ARM_CC
;
22452 if (comp_code
== EQ
)
22453 return ARM_INVERSE_CONDITION_CODE (code
);
22454 if (comp_code
== NE
)
22461 case NE
: return ARM_NE
;
22462 case EQ
: return ARM_EQ
;
22463 case GE
: return ARM_PL
;
22464 case LT
: return ARM_MI
;
22465 default: return ARM_NV
;
22471 case NE
: return ARM_NE
;
22472 case EQ
: return ARM_EQ
;
22473 default: return ARM_NV
;
22479 case NE
: return ARM_MI
;
22480 case EQ
: return ARM_PL
;
22481 default: return ARM_NV
;
22486 /* We can handle all cases except UNEQ and LTGT. */
22489 case GE
: return ARM_GE
;
22490 case GT
: return ARM_GT
;
22491 case LE
: return ARM_LS
;
22492 case LT
: return ARM_MI
;
22493 case NE
: return ARM_NE
;
22494 case EQ
: return ARM_EQ
;
22495 case ORDERED
: return ARM_VC
;
22496 case UNORDERED
: return ARM_VS
;
22497 case UNLT
: return ARM_LT
;
22498 case UNLE
: return ARM_LE
;
22499 case UNGT
: return ARM_HI
;
22500 case UNGE
: return ARM_PL
;
22501 /* UNEQ and LTGT do not have a representation. */
22502 case UNEQ
: /* Fall through. */
22503 case LTGT
: /* Fall through. */
22504 default: return ARM_NV
;
22510 case NE
: return ARM_NE
;
22511 case EQ
: return ARM_EQ
;
22512 case GE
: return ARM_LE
;
22513 case GT
: return ARM_LT
;
22514 case LE
: return ARM_GE
;
22515 case LT
: return ARM_GT
;
22516 case GEU
: return ARM_LS
;
22517 case GTU
: return ARM_CC
;
22518 case LEU
: return ARM_CS
;
22519 case LTU
: return ARM_HI
;
22520 default: return ARM_NV
;
22526 case LTU
: return ARM_CS
;
22527 case GEU
: return ARM_CC
;
22528 default: return ARM_NV
;
22534 case NE
: return ARM_NE
;
22535 case EQ
: return ARM_EQ
;
22536 case GEU
: return ARM_CS
;
22537 case GTU
: return ARM_HI
;
22538 case LEU
: return ARM_LS
;
22539 case LTU
: return ARM_CC
;
22540 default: return ARM_NV
;
22546 case GE
: return ARM_GE
;
22547 case LT
: return ARM_LT
;
22548 case GEU
: return ARM_CS
;
22549 case LTU
: return ARM_CC
;
22550 default: return ARM_NV
;
22556 case NE
: return ARM_NE
;
22557 case EQ
: return ARM_EQ
;
22558 case GE
: return ARM_GE
;
22559 case GT
: return ARM_GT
;
22560 case LE
: return ARM_LE
;
22561 case LT
: return ARM_LT
;
22562 case GEU
: return ARM_CS
;
22563 case GTU
: return ARM_HI
;
22564 case LEU
: return ARM_LS
;
22565 case LTU
: return ARM_CC
;
22566 default: return ARM_NV
;
22569 default: gcc_unreachable ();
22573 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22574 static enum arm_cond_code
22575 get_arm_condition_code (rtx comparison
)
22577 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22578 gcc_assert (code
!= ARM_NV
);
22582 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22585 thumb2_final_prescan_insn (rtx_insn
*insn
)
22587 rtx_insn
*first_insn
= insn
;
22588 rtx body
= PATTERN (insn
);
22590 enum arm_cond_code code
;
22595 /* max_insns_skipped in the tune was already taken into account in the
22596 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22597 just emit the IT blocks as we can. It does not make sense to split
22599 max
= MAX_INSN_PER_IT_BLOCK
;
22601 /* Remove the previous insn from the count of insns to be output. */
22602 if (arm_condexec_count
)
22603 arm_condexec_count
--;
22605 /* Nothing to do if we are already inside a conditional block. */
22606 if (arm_condexec_count
)
22609 if (GET_CODE (body
) != COND_EXEC
)
22612 /* Conditional jumps are implemented directly. */
22616 predicate
= COND_EXEC_TEST (body
);
22617 arm_current_cc
= get_arm_condition_code (predicate
);
22619 n
= get_attr_ce_count (insn
);
22620 arm_condexec_count
= 1;
22621 arm_condexec_mask
= (1 << n
) - 1;
22622 arm_condexec_masklen
= n
;
22623 /* See if subsequent instructions can be combined into the same block. */
22626 insn
= next_nonnote_insn (insn
);
22628 /* Jumping into the middle of an IT block is illegal, so a label or
22629 barrier terminates the block. */
22630 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22633 body
= PATTERN (insn
);
22634 /* USE and CLOBBER aren't really insns, so just skip them. */
22635 if (GET_CODE (body
) == USE
22636 || GET_CODE (body
) == CLOBBER
)
22639 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22640 if (GET_CODE (body
) != COND_EXEC
)
22642 /* Maximum number of conditionally executed instructions in a block. */
22643 n
= get_attr_ce_count (insn
);
22644 if (arm_condexec_masklen
+ n
> max
)
22647 predicate
= COND_EXEC_TEST (body
);
22648 code
= get_arm_condition_code (predicate
);
22649 mask
= (1 << n
) - 1;
22650 if (arm_current_cc
== code
)
22651 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22652 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22655 arm_condexec_count
++;
22656 arm_condexec_masklen
+= n
;
22658 /* A jump must be the last instruction in a conditional block. */
22662 /* Restore recog_data (getting the attributes of other insns can
22663 destroy this array, but final.c assumes that it remains intact
22664 across this call). */
22665 extract_constrain_insn_cached (first_insn
);
22669 arm_final_prescan_insn (rtx_insn
*insn
)
22671 /* BODY will hold the body of INSN. */
22672 rtx body
= PATTERN (insn
);
22674 /* This will be 1 if trying to repeat the trick, and things need to be
22675 reversed if it appears to fail. */
22678 /* If we start with a return insn, we only succeed if we find another one. */
22679 int seeking_return
= 0;
22680 enum rtx_code return_code
= UNKNOWN
;
22682 /* START_INSN will hold the insn from where we start looking. This is the
22683 first insn after the following code_label if REVERSE is true. */
22684 rtx_insn
*start_insn
= insn
;
22686 /* If in state 4, check if the target branch is reached, in order to
22687 change back to state 0. */
22688 if (arm_ccfsm_state
== 4)
22690 if (insn
== arm_target_insn
)
22692 arm_target_insn
= NULL
;
22693 arm_ccfsm_state
= 0;
22698 /* If in state 3, it is possible to repeat the trick, if this insn is an
22699 unconditional branch to a label, and immediately following this branch
22700 is the previous target label which is only used once, and the label this
22701 branch jumps to is not too far off. */
22702 if (arm_ccfsm_state
== 3)
22704 if (simplejump_p (insn
))
22706 start_insn
= next_nonnote_insn (start_insn
);
22707 if (BARRIER_P (start_insn
))
22709 /* XXX Isn't this always a barrier? */
22710 start_insn
= next_nonnote_insn (start_insn
);
22712 if (LABEL_P (start_insn
)
22713 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22714 && LABEL_NUSES (start_insn
) == 1)
22719 else if (ANY_RETURN_P (body
))
22721 start_insn
= next_nonnote_insn (start_insn
);
22722 if (BARRIER_P (start_insn
))
22723 start_insn
= next_nonnote_insn (start_insn
);
22724 if (LABEL_P (start_insn
)
22725 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22726 && LABEL_NUSES (start_insn
) == 1)
22729 seeking_return
= 1;
22730 return_code
= GET_CODE (body
);
22739 gcc_assert (!arm_ccfsm_state
|| reverse
);
22740 if (!JUMP_P (insn
))
22743 /* This jump might be paralleled with a clobber of the condition codes
22744 the jump should always come first */
22745 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
22746 body
= XVECEXP (body
, 0, 0);
22749 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
22750 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
22753 int fail
= FALSE
, succeed
= FALSE
;
22754 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22755 int then_not_else
= TRUE
;
22756 rtx_insn
*this_insn
= start_insn
;
22759 /* Register the insn jumped to. */
22762 if (!seeking_return
)
22763 label
= XEXP (SET_SRC (body
), 0);
22765 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
22766 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
22767 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
22769 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
22770 then_not_else
= FALSE
;
22772 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
22774 seeking_return
= 1;
22775 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
22777 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
22779 seeking_return
= 1;
22780 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
22781 then_not_else
= FALSE
;
22784 gcc_unreachable ();
22786 /* See how many insns this branch skips, and what kind of insns. If all
22787 insns are okay, and the label or unconditional branch to the same
22788 label is not too far away, succeed. */
22789 for (insns_skipped
= 0;
22790 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
22794 this_insn
= next_nonnote_insn (this_insn
);
22798 switch (GET_CODE (this_insn
))
22801 /* Succeed if it is the target label, otherwise fail since
22802 control falls in from somewhere else. */
22803 if (this_insn
== label
)
22805 arm_ccfsm_state
= 1;
22813 /* Succeed if the following insn is the target label.
22815 If return insns are used then the last insn in a function
22816 will be a barrier. */
22817 this_insn
= next_nonnote_insn (this_insn
);
22818 if (this_insn
&& this_insn
== label
)
22820 arm_ccfsm_state
= 1;
22828 /* The AAPCS says that conditional calls should not be
22829 used since they make interworking inefficient (the
22830 linker can't transform BL<cond> into BLX). That's
22831 only a problem if the machine has BLX. */
22838 /* Succeed if the following insn is the target label, or
22839 if the following two insns are a barrier and the
22841 this_insn
= next_nonnote_insn (this_insn
);
22842 if (this_insn
&& BARRIER_P (this_insn
))
22843 this_insn
= next_nonnote_insn (this_insn
);
22845 if (this_insn
&& this_insn
== label
22846 && insns_skipped
< max_insns_skipped
)
22848 arm_ccfsm_state
= 1;
22856 /* If this is an unconditional branch to the same label, succeed.
22857 If it is to another label, do nothing. If it is conditional,
22859 /* XXX Probably, the tests for SET and the PC are
22862 scanbody
= PATTERN (this_insn
);
22863 if (GET_CODE (scanbody
) == SET
22864 && GET_CODE (SET_DEST (scanbody
)) == PC
)
22866 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
22867 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
22869 arm_ccfsm_state
= 2;
22872 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
22875 /* Fail if a conditional return is undesirable (e.g. on a
22876 StrongARM), but still allow this if optimizing for size. */
22877 else if (GET_CODE (scanbody
) == return_code
22878 && !use_return_insn (TRUE
, NULL
)
22881 else if (GET_CODE (scanbody
) == return_code
)
22883 arm_ccfsm_state
= 2;
22886 else if (GET_CODE (scanbody
) == PARALLEL
)
22888 switch (get_attr_conds (this_insn
))
22898 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
22903 /* Instructions using or affecting the condition codes make it
22905 scanbody
= PATTERN (this_insn
);
22906 if (!(GET_CODE (scanbody
) == SET
22907 || GET_CODE (scanbody
) == PARALLEL
)
22908 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
22918 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
22919 arm_target_label
= CODE_LABEL_NUMBER (label
);
22922 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
22924 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
22926 this_insn
= next_nonnote_insn (this_insn
);
22927 gcc_assert (!this_insn
22928 || (!BARRIER_P (this_insn
)
22929 && !LABEL_P (this_insn
)));
22933 /* Oh, dear! we ran off the end.. give up. */
22934 extract_constrain_insn_cached (insn
);
22935 arm_ccfsm_state
= 0;
22936 arm_target_insn
= NULL
;
22939 arm_target_insn
= this_insn
;
22942 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22945 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
22947 if (reverse
|| then_not_else
)
22948 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
22951 /* Restore recog_data (getting the attributes of other insns can
22952 destroy this array, but final.c assumes that it remains intact
22953 across this call. */
22954 extract_constrain_insn_cached (insn
);
22958 /* Output IT instructions. */
22960 thumb2_asm_output_opcode (FILE * stream
)
22965 if (arm_condexec_mask
)
22967 for (n
= 0; n
< arm_condexec_masklen
; n
++)
22968 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
22970 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
22971 arm_condition_codes
[arm_current_cc
]);
22972 arm_condexec_mask
= 0;
22976 /* Returns true if REGNO is a valid register
22977 for holding a quantity of type MODE. */
22979 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
22981 if (GET_MODE_CLASS (mode
) == MODE_CC
)
22982 return (regno
== CC_REGNUM
22983 || (TARGET_HARD_FLOAT
&& TARGET_VFP
22984 && regno
== VFPCC_REGNUM
));
22986 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
22990 /* For the Thumb we only allow values bigger than SImode in
22991 registers 0 - 6, so that there is always a second low
22992 register available to hold the upper part of the value.
22993 We probably we ought to ensure that the register is the
22994 start of an even numbered register pair. */
22995 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
22997 if (TARGET_HARD_FLOAT
&& TARGET_VFP
22998 && IS_VFP_REGNUM (regno
))
23000 if (mode
== SFmode
|| mode
== SImode
)
23001 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23003 if (mode
== DFmode
)
23004 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23006 /* VFP registers can hold HFmode values, but there is no point in
23007 putting them there unless we have hardware conversion insns. */
23008 if (mode
== HFmode
)
23009 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
23012 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23013 || (VALID_NEON_QREG_MODE (mode
)
23014 && NEON_REGNO_OK_FOR_QUAD (regno
))
23015 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23016 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23017 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23018 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23019 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23024 if (TARGET_REALLY_IWMMXT
)
23026 if (IS_IWMMXT_GR_REGNUM (regno
))
23027 return mode
== SImode
;
23029 if (IS_IWMMXT_REGNUM (regno
))
23030 return VALID_IWMMXT_REG_MODE (mode
);
23033 /* We allow almost any value to be stored in the general registers.
23034 Restrict doubleword quantities to even register pairs in ARM state
23035 so that we can use ldrd. Do not allow very large Neon structure
23036 opaque modes in general registers; they would use too many. */
23037 if (regno
<= LAST_ARM_REGNUM
)
23039 if (ARM_NUM_REGS (mode
) > 4)
23045 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23048 if (regno
== FRAME_POINTER_REGNUM
23049 || regno
== ARG_POINTER_REGNUM
)
23050 /* We only allow integers in the fake hard registers. */
23051 return GET_MODE_CLASS (mode
) == MODE_INT
;
23056 /* Implement MODES_TIEABLE_P. */
23059 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23061 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23064 /* We specifically want to allow elements of "structure" modes to
23065 be tieable to the structure. This more general condition allows
23066 other rarer situations too. */
23068 && (VALID_NEON_DREG_MODE (mode1
)
23069 || VALID_NEON_QREG_MODE (mode1
)
23070 || VALID_NEON_STRUCT_MODE (mode1
))
23071 && (VALID_NEON_DREG_MODE (mode2
)
23072 || VALID_NEON_QREG_MODE (mode2
)
23073 || VALID_NEON_STRUCT_MODE (mode2
)))
23079 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23080 not used in arm mode. */
23083 arm_regno_class (int regno
)
23085 if (regno
== PC_REGNUM
)
23090 if (regno
== STACK_POINTER_REGNUM
)
23092 if (regno
== CC_REGNUM
)
23099 if (TARGET_THUMB2
&& regno
< 8)
23102 if ( regno
<= LAST_ARM_REGNUM
23103 || regno
== FRAME_POINTER_REGNUM
23104 || regno
== ARG_POINTER_REGNUM
)
23105 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23107 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23108 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23110 if (IS_VFP_REGNUM (regno
))
23112 if (regno
<= D7_VFP_REGNUM
)
23113 return VFP_D0_D7_REGS
;
23114 else if (regno
<= LAST_LO_VFP_REGNUM
)
23115 return VFP_LO_REGS
;
23117 return VFP_HI_REGS
;
23120 if (IS_IWMMXT_REGNUM (regno
))
23121 return IWMMXT_REGS
;
23123 if (IS_IWMMXT_GR_REGNUM (regno
))
23124 return IWMMXT_GR_REGS
;
23129 /* Handle a special case when computing the offset
23130 of an argument from the frame pointer. */
23132 arm_debugger_arg_offset (int value
, rtx addr
)
23136 /* We are only interested if dbxout_parms() failed to compute the offset. */
23140 /* We can only cope with the case where the address is held in a register. */
23144 /* If we are using the frame pointer to point at the argument, then
23145 an offset of 0 is correct. */
23146 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23149 /* If we are using the stack pointer to point at the
23150 argument, then an offset of 0 is correct. */
23151 /* ??? Check this is consistent with thumb2 frame layout. */
23152 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23153 && REGNO (addr
) == SP_REGNUM
)
23156 /* Oh dear. The argument is pointed to by a register rather
23157 than being held in a register, or being stored at a known
23158 offset from the frame pointer. Since GDB only understands
23159 those two kinds of argument we must translate the address
23160 held in the register into an offset from the frame pointer.
23161 We do this by searching through the insns for the function
23162 looking to see where this register gets its value. If the
23163 register is initialized from the frame pointer plus an offset
23164 then we are in luck and we can continue, otherwise we give up.
23166 This code is exercised by producing debugging information
23167 for a function with arguments like this:
23169 double func (double a, double b, int c, double d) {return d;}
23171 Without this code the stab for parameter 'd' will be set to
23172 an offset of 0 from the frame pointer, rather than 8. */
23174 /* The if() statement says:
23176 If the insn is a normal instruction
23177 and if the insn is setting the value in a register
23178 and if the register being set is the register holding the address of the argument
23179 and if the address is computing by an addition
23180 that involves adding to a register
23181 which is the frame pointer
23186 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23188 if ( NONJUMP_INSN_P (insn
)
23189 && GET_CODE (PATTERN (insn
)) == SET
23190 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23191 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23192 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23193 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23194 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23197 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23206 warning (0, "unable to compute real location of stacked parameter");
23207 value
= 8; /* XXX magic hack */
23213 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23215 static const char *
23216 arm_invalid_parameter_type (const_tree t
)
23218 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23219 return N_("function parameters cannot have __fp16 type");
23223 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23225 static const char *
23226 arm_invalid_return_type (const_tree t
)
23228 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23229 return N_("functions cannot return __fp16 type");
23233 /* Implement TARGET_PROMOTED_TYPE. */
23236 arm_promoted_type (const_tree t
)
23238 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23239 return float_type_node
;
23243 /* Implement TARGET_CONVERT_TO_TYPE.
23244 Specifically, this hook implements the peculiarity of the ARM
23245 half-precision floating-point C semantics that requires conversions between
23246 __fp16 to or from double to do an intermediate conversion to float. */
23249 arm_convert_to_type (tree type
, tree expr
)
23251 tree fromtype
= TREE_TYPE (expr
);
23252 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
23254 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
23255 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
23256 return convert (type
, convert (float_type_node
, expr
));
23260 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23261 This simply adds HFmode as a supported mode; even though we don't
23262 implement arithmetic on this type directly, it's supported by
23263 optabs conversions, much the way the double-word arithmetic is
23264 special-cased in the default hook. */
23267 arm_scalar_mode_supported_p (machine_mode mode
)
23269 if (mode
== HFmode
)
23270 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23271 else if (ALL_FIXED_POINT_MODE_P (mode
))
23274 return default_scalar_mode_supported_p (mode
);
23277 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23279 neon_reinterpret (rtx dest
, rtx src
)
23281 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
23284 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23285 not to early-clobber SRC registers in the process.
23287 We assume that the operands described by SRC and DEST represent a
23288 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23289 number of components into which the copy has been decomposed. */
23291 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23295 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23296 || REGNO (operands
[0]) < REGNO (operands
[1]))
23298 for (i
= 0; i
< count
; i
++)
23300 operands
[2 * i
] = dest
[i
];
23301 operands
[2 * i
+ 1] = src
[i
];
23306 for (i
= 0; i
< count
; i
++)
23308 operands
[2 * i
] = dest
[count
- i
- 1];
23309 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23314 /* Split operands into moves from op[1] + op[2] into op[0]. */
23317 neon_split_vcombine (rtx operands
[3])
23319 unsigned int dest
= REGNO (operands
[0]);
23320 unsigned int src1
= REGNO (operands
[1]);
23321 unsigned int src2
= REGNO (operands
[2]);
23322 machine_mode halfmode
= GET_MODE (operands
[1]);
23323 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
23324 rtx destlo
, desthi
;
23326 if (src1
== dest
&& src2
== dest
+ halfregs
)
23328 /* No-op move. Can't split to nothing; emit something. */
23329 emit_note (NOTE_INSN_DELETED
);
23333 /* Preserve register attributes for variable tracking. */
23334 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23335 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23336 GET_MODE_SIZE (halfmode
));
23338 /* Special case of reversed high/low parts. Use VSWP. */
23339 if (src2
== dest
&& src1
== dest
+ halfregs
)
23341 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
23342 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
23343 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
23347 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
23349 /* Try to avoid unnecessary moves if part of the result
23350 is in the right place already. */
23352 emit_move_insn (destlo
, operands
[1]);
23353 if (src2
!= dest
+ halfregs
)
23354 emit_move_insn (desthi
, operands
[2]);
23358 if (src2
!= dest
+ halfregs
)
23359 emit_move_insn (desthi
, operands
[2]);
23361 emit_move_insn (destlo
, operands
[1]);
23365 /* Return the number (counting from 0) of
23366 the least significant set bit in MASK. */
23369 number_of_first_bit_set (unsigned mask
)
23371 return ctz_hwi (mask
);
23374 /* Like emit_multi_reg_push, but allowing for a different set of
23375 registers to be described as saved. MASK is the set of registers
23376 to be saved; REAL_REGS is the set of registers to be described as
23377 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23380 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
23382 unsigned long regno
;
23383 rtx par
[10], tmp
, reg
;
23387 /* Build the parallel of the registers actually being stored. */
23388 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
23390 regno
= ctz_hwi (mask
);
23391 reg
= gen_rtx_REG (SImode
, regno
);
23394 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
23396 tmp
= gen_rtx_USE (VOIDmode
, reg
);
23401 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23402 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
23403 tmp
= gen_frame_mem (BLKmode
, tmp
);
23404 tmp
= gen_rtx_SET (tmp
, par
[0]);
23407 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
23408 insn
= emit_insn (tmp
);
23410 /* Always build the stack adjustment note for unwind info. */
23411 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23412 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
23415 /* Build the parallel of the registers recorded as saved for unwind. */
23416 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
23418 regno
= ctz_hwi (real_regs
);
23419 reg
= gen_rtx_REG (SImode
, regno
);
23421 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
23422 tmp
= gen_frame_mem (SImode
, tmp
);
23423 tmp
= gen_rtx_SET (tmp
, reg
);
23424 RTX_FRAME_RELATED_P (tmp
) = 1;
23432 RTX_FRAME_RELATED_P (par
[0]) = 1;
23433 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
23436 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
23441 /* Emit code to push or pop registers to or from the stack. F is the
23442 assembly file. MASK is the registers to pop. */
23444 thumb_pop (FILE *f
, unsigned long mask
)
23447 int lo_mask
= mask
& 0xFF;
23448 int pushed_words
= 0;
23452 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
23454 /* Special case. Do not generate a POP PC statement here, do it in
23456 thumb_exit (f
, -1);
23460 fprintf (f
, "\tpop\t{");
23462 /* Look at the low registers first. */
23463 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
23467 asm_fprintf (f
, "%r", regno
);
23469 if ((lo_mask
& ~1) != 0)
23476 if (mask
& (1 << PC_REGNUM
))
23478 /* Catch popping the PC. */
23479 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
23480 || crtl
->calls_eh_return
)
23482 /* The PC is never poped directly, instead
23483 it is popped into r3 and then BX is used. */
23484 fprintf (f
, "}\n");
23486 thumb_exit (f
, -1);
23495 asm_fprintf (f
, "%r", PC_REGNUM
);
23499 fprintf (f
, "}\n");
23502 /* Generate code to return from a thumb function.
23503 If 'reg_containing_return_addr' is -1, then the return address is
23504 actually on the stack, at the stack pointer. */
23506 thumb_exit (FILE *f
, int reg_containing_return_addr
)
23508 unsigned regs_available_for_popping
;
23509 unsigned regs_to_pop
;
23511 unsigned available
;
23515 int restore_a4
= FALSE
;
23517 /* Compute the registers we need to pop. */
23521 if (reg_containing_return_addr
== -1)
23523 regs_to_pop
|= 1 << LR_REGNUM
;
23527 if (TARGET_BACKTRACE
)
23529 /* Restore the (ARM) frame pointer and stack pointer. */
23530 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
23534 /* If there is nothing to pop then just emit the BX instruction and
23536 if (pops_needed
== 0)
23538 if (crtl
->calls_eh_return
)
23539 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23541 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23544 /* Otherwise if we are not supporting interworking and we have not created
23545 a backtrace structure and the function was not entered in ARM mode then
23546 just pop the return address straight into the PC. */
23547 else if (!TARGET_INTERWORK
23548 && !TARGET_BACKTRACE
23549 && !is_called_in_ARM_mode (current_function_decl
)
23550 && !crtl
->calls_eh_return
)
23552 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
23556 /* Find out how many of the (return) argument registers we can corrupt. */
23557 regs_available_for_popping
= 0;
23559 /* If returning via __builtin_eh_return, the bottom three registers
23560 all contain information needed for the return. */
23561 if (crtl
->calls_eh_return
)
23565 /* If we can deduce the registers used from the function's
23566 return value. This is more reliable that examining
23567 df_regs_ever_live_p () because that will be set if the register is
23568 ever used in the function, not just if the register is used
23569 to hold a return value. */
23571 if (crtl
->return_rtx
!= 0)
23572 mode
= GET_MODE (crtl
->return_rtx
);
23574 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
23576 size
= GET_MODE_SIZE (mode
);
23580 /* In a void function we can use any argument register.
23581 In a function that returns a structure on the stack
23582 we can use the second and third argument registers. */
23583 if (mode
== VOIDmode
)
23584 regs_available_for_popping
=
23585 (1 << ARG_REGISTER (1))
23586 | (1 << ARG_REGISTER (2))
23587 | (1 << ARG_REGISTER (3));
23589 regs_available_for_popping
=
23590 (1 << ARG_REGISTER (2))
23591 | (1 << ARG_REGISTER (3));
23593 else if (size
<= 4)
23594 regs_available_for_popping
=
23595 (1 << ARG_REGISTER (2))
23596 | (1 << ARG_REGISTER (3));
23597 else if (size
<= 8)
23598 regs_available_for_popping
=
23599 (1 << ARG_REGISTER (3));
23602 /* Match registers to be popped with registers into which we pop them. */
23603 for (available
= regs_available_for_popping
,
23604 required
= regs_to_pop
;
23605 required
!= 0 && available
!= 0;
23606 available
&= ~(available
& - available
),
23607 required
&= ~(required
& - required
))
23610 /* If we have any popping registers left over, remove them. */
23612 regs_available_for_popping
&= ~available
;
23614 /* Otherwise if we need another popping register we can use
23615 the fourth argument register. */
23616 else if (pops_needed
)
23618 /* If we have not found any free argument registers and
23619 reg a4 contains the return address, we must move it. */
23620 if (regs_available_for_popping
== 0
23621 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
23623 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
23624 reg_containing_return_addr
= LR_REGNUM
;
23626 else if (size
> 12)
23628 /* Register a4 is being used to hold part of the return value,
23629 but we have dire need of a free, low register. */
23632 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
23635 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
23637 /* The fourth argument register is available. */
23638 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
23644 /* Pop as many registers as we can. */
23645 thumb_pop (f
, regs_available_for_popping
);
23647 /* Process the registers we popped. */
23648 if (reg_containing_return_addr
== -1)
23650 /* The return address was popped into the lowest numbered register. */
23651 regs_to_pop
&= ~(1 << LR_REGNUM
);
23653 reg_containing_return_addr
=
23654 number_of_first_bit_set (regs_available_for_popping
);
23656 /* Remove this register for the mask of available registers, so that
23657 the return address will not be corrupted by further pops. */
23658 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
23661 /* If we popped other registers then handle them here. */
23662 if (regs_available_for_popping
)
23666 /* Work out which register currently contains the frame pointer. */
23667 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
23669 /* Move it into the correct place. */
23670 asm_fprintf (f
, "\tmov\t%r, %r\n",
23671 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
23673 /* (Temporarily) remove it from the mask of popped registers. */
23674 regs_available_for_popping
&= ~(1 << frame_pointer
);
23675 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
23677 if (regs_available_for_popping
)
23681 /* We popped the stack pointer as well,
23682 find the register that contains it. */
23683 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
23685 /* Move it into the stack register. */
23686 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
23688 /* At this point we have popped all necessary registers, so
23689 do not worry about restoring regs_available_for_popping
23690 to its correct value:
23692 assert (pops_needed == 0)
23693 assert (regs_available_for_popping == (1 << frame_pointer))
23694 assert (regs_to_pop == (1 << STACK_POINTER)) */
23698 /* Since we have just move the popped value into the frame
23699 pointer, the popping register is available for reuse, and
23700 we know that we still have the stack pointer left to pop. */
23701 regs_available_for_popping
|= (1 << frame_pointer
);
23705 /* If we still have registers left on the stack, but we no longer have
23706 any registers into which we can pop them, then we must move the return
23707 address into the link register and make available the register that
23709 if (regs_available_for_popping
== 0 && pops_needed
> 0)
23711 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
23713 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
23714 reg_containing_return_addr
);
23716 reg_containing_return_addr
= LR_REGNUM
;
23719 /* If we have registers left on the stack then pop some more.
23720 We know that at most we will want to pop FP and SP. */
23721 if (pops_needed
> 0)
23726 thumb_pop (f
, regs_available_for_popping
);
23728 /* We have popped either FP or SP.
23729 Move whichever one it is into the correct register. */
23730 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
23731 move_to
= number_of_first_bit_set (regs_to_pop
);
23733 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
23735 regs_to_pop
&= ~(1 << move_to
);
23740 /* If we still have not popped everything then we must have only
23741 had one register available to us and we are now popping the SP. */
23742 if (pops_needed
> 0)
23746 thumb_pop (f
, regs_available_for_popping
);
23748 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
23750 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
23752 assert (regs_to_pop == (1 << STACK_POINTER))
23753 assert (pops_needed == 1)
23757 /* If necessary restore the a4 register. */
23760 if (reg_containing_return_addr
!= LR_REGNUM
)
23762 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
23763 reg_containing_return_addr
= LR_REGNUM
;
23766 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
23769 if (crtl
->calls_eh_return
)
23770 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23772 /* Return to caller. */
23773 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23776 /* Scan INSN just before assembler is output for it.
23777 For Thumb-1, we track the status of the condition codes; this
23778 information is used in the cbranchsi4_insn pattern. */
23780 thumb1_final_prescan_insn (rtx_insn
*insn
)
23782 if (flag_print_asm_name
)
23783 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
23784 INSN_ADDRESSES (INSN_UID (insn
)));
23785 /* Don't overwrite the previous setter when we get to a cbranch. */
23786 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
23788 enum attr_conds conds
;
23790 if (cfun
->machine
->thumb1_cc_insn
)
23792 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
23793 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
23796 conds
= get_attr_conds (insn
);
23797 if (conds
== CONDS_SET
)
23799 rtx set
= single_set (insn
);
23800 cfun
->machine
->thumb1_cc_insn
= insn
;
23801 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
23802 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
23803 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
23804 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
23806 rtx src1
= XEXP (SET_SRC (set
), 1);
23807 if (src1
== const0_rtx
)
23808 cfun
->machine
->thumb1_cc_mode
= CCmode
;
23810 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
23812 /* Record the src register operand instead of dest because
23813 cprop_hardreg pass propagates src. */
23814 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
23817 else if (conds
!= CONDS_NOCOND
)
23818 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
23821 /* Check if unexpected far jump is used. */
23822 if (cfun
->machine
->lr_save_eliminated
23823 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
23824 internal_error("Unexpected thumb1 far jump");
23828 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
23830 unsigned HOST_WIDE_INT mask
= 0xff;
23833 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
23834 if (val
== 0) /* XXX */
23837 for (i
= 0; i
< 25; i
++)
23838 if ((val
& (mask
<< i
)) == val
)
23844 /* Returns nonzero if the current function contains,
23845 or might contain a far jump. */
23847 thumb_far_jump_used_p (void)
23850 bool far_jump
= false;
23851 unsigned int func_size
= 0;
23853 /* This test is only important for leaf functions. */
23854 /* assert (!leaf_function_p ()); */
23856 /* If we have already decided that far jumps may be used,
23857 do not bother checking again, and always return true even if
23858 it turns out that they are not being used. Once we have made
23859 the decision that far jumps are present (and that hence the link
23860 register will be pushed onto the stack) we cannot go back on it. */
23861 if (cfun
->machine
->far_jump_used
)
23864 /* If this function is not being called from the prologue/epilogue
23865 generation code then it must be being called from the
23866 INITIAL_ELIMINATION_OFFSET macro. */
23867 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
23869 /* In this case we know that we are being asked about the elimination
23870 of the arg pointer register. If that register is not being used,
23871 then there are no arguments on the stack, and we do not have to
23872 worry that a far jump might force the prologue to push the link
23873 register, changing the stack offsets. In this case we can just
23874 return false, since the presence of far jumps in the function will
23875 not affect stack offsets.
23877 If the arg pointer is live (or if it was live, but has now been
23878 eliminated and so set to dead) then we do have to test to see if
23879 the function might contain a far jump. This test can lead to some
23880 false negatives, since before reload is completed, then length of
23881 branch instructions is not known, so gcc defaults to returning their
23882 longest length, which in turn sets the far jump attribute to true.
23884 A false negative will not result in bad code being generated, but it
23885 will result in a needless push and pop of the link register. We
23886 hope that this does not occur too often.
23888 If we need doubleword stack alignment this could affect the other
23889 elimination offsets so we can't risk getting it wrong. */
23890 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
23891 cfun
->machine
->arg_pointer_live
= 1;
23892 else if (!cfun
->machine
->arg_pointer_live
)
23896 /* We should not change far_jump_used during or after reload, as there is
23897 no chance to change stack frame layout. */
23898 if (reload_in_progress
|| reload_completed
)
23901 /* Check to see if the function contains a branch
23902 insn with the far jump attribute set. */
23903 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23905 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
23909 func_size
+= get_attr_length (insn
);
23912 /* Attribute far_jump will always be true for thumb1 before
23913 shorten_branch pass. So checking far_jump attribute before
23914 shorten_branch isn't much useful.
23916 Following heuristic tries to estimate more accurately if a far jump
23917 may finally be used. The heuristic is very conservative as there is
23918 no chance to roll-back the decision of not to use far jump.
23920 Thumb1 long branch offset is -2048 to 2046. The worst case is each
23921 2-byte insn is associated with a 4 byte constant pool. Using
23922 function size 2048/3 as the threshold is conservative enough. */
23925 if ((func_size
* 3) >= 2048)
23927 /* Record the fact that we have decided that
23928 the function does use far jumps. */
23929 cfun
->machine
->far_jump_used
= 1;
23937 /* Return nonzero if FUNC must be entered in ARM mode. */
23939 is_called_in_ARM_mode (tree func
)
23941 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
23943 /* Ignore the problem about functions whose address is taken. */
23944 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
23948 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
23954 /* Given the stack offsets and register mask in OFFSETS, decide how
23955 many additional registers to push instead of subtracting a constant
23956 from SP. For epilogues the principle is the same except we use pop.
23957 FOR_PROLOGUE indicates which we're generating. */
23959 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
23961 HOST_WIDE_INT amount
;
23962 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
23963 /* Extract a mask of the ones we can give to the Thumb's push/pop
23965 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
23966 /* Then count how many other high registers will need to be pushed. */
23967 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
23968 int n_free
, reg_base
, size
;
23970 if (!for_prologue
&& frame_pointer_needed
)
23971 amount
= offsets
->locals_base
- offsets
->saved_regs
;
23973 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
23975 /* If the stack frame size is 512 exactly, we can save one load
23976 instruction, which should make this a win even when optimizing
23978 if (!optimize_size
&& amount
!= 512)
23981 /* Can't do this if there are high registers to push. */
23982 if (high_regs_pushed
!= 0)
23985 /* Shouldn't do it in the prologue if no registers would normally
23986 be pushed at all. In the epilogue, also allow it if we'll have
23987 a pop insn for the PC. */
23990 || TARGET_BACKTRACE
23991 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
23992 || TARGET_INTERWORK
23993 || crtl
->args
.pretend_args_size
!= 0))
23996 /* Don't do this if thumb_expand_prologue wants to emit instructions
23997 between the push and the stack frame allocation. */
23999 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24000 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
24007 size
= arm_size_return_regs ();
24008 reg_base
= ARM_NUM_INTS (size
);
24009 live_regs_mask
>>= reg_base
;
24012 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
24013 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
24015 live_regs_mask
>>= 1;
24021 gcc_assert (amount
/ 4 * 4 == amount
);
24023 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
24024 return (amount
- 508) / 4;
24025 if (amount
<= n_free
* 4)
24030 /* The bits which aren't usefully expanded as rtl. */
24032 thumb1_unexpanded_epilogue (void)
24034 arm_stack_offsets
*offsets
;
24036 unsigned long live_regs_mask
= 0;
24037 int high_regs_pushed
= 0;
24039 int had_to_push_lr
;
24042 if (cfun
->machine
->return_used_this_function
!= 0)
24045 if (IS_NAKED (arm_current_func_type ()))
24048 offsets
= arm_get_frame_offsets ();
24049 live_regs_mask
= offsets
->saved_regs_mask
;
24050 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24052 /* If we can deduce the registers used from the function's return value.
24053 This is more reliable that examining df_regs_ever_live_p () because that
24054 will be set if the register is ever used in the function, not just if
24055 the register is used to hold a return value. */
24056 size
= arm_size_return_regs ();
24058 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24061 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24062 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24065 /* The prolog may have pushed some high registers to use as
24066 work registers. e.g. the testsuite file:
24067 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24068 compiles to produce:
24069 push {r4, r5, r6, r7, lr}
24073 as part of the prolog. We have to undo that pushing here. */
24075 if (high_regs_pushed
)
24077 unsigned long mask
= live_regs_mask
& 0xff;
24080 /* The available low registers depend on the size of the value we are
24088 /* Oh dear! We have no low registers into which we can pop
24091 ("no low registers available for popping high registers");
24093 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
24094 if (live_regs_mask
& (1 << next_hi_reg
))
24097 while (high_regs_pushed
)
24099 /* Find lo register(s) into which the high register(s) can
24101 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24103 if (mask
& (1 << regno
))
24104 high_regs_pushed
--;
24105 if (high_regs_pushed
== 0)
24109 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24111 /* Pop the values into the low register(s). */
24112 thumb_pop (asm_out_file
, mask
);
24114 /* Move the value(s) into the high registers. */
24115 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24117 if (mask
& (1 << regno
))
24119 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24122 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24123 if (live_regs_mask
& (1 << next_hi_reg
))
24128 live_regs_mask
&= ~0x0f00;
24131 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24132 live_regs_mask
&= 0xff;
24134 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24136 /* Pop the return address into the PC. */
24137 if (had_to_push_lr
)
24138 live_regs_mask
|= 1 << PC_REGNUM
;
24140 /* Either no argument registers were pushed or a backtrace
24141 structure was created which includes an adjusted stack
24142 pointer, so just pop everything. */
24143 if (live_regs_mask
)
24144 thumb_pop (asm_out_file
, live_regs_mask
);
24146 /* We have either just popped the return address into the
24147 PC or it is was kept in LR for the entire function.
24148 Note that thumb_pop has already called thumb_exit if the
24149 PC was in the list. */
24150 if (!had_to_push_lr
)
24151 thumb_exit (asm_out_file
, LR_REGNUM
);
24155 /* Pop everything but the return address. */
24156 if (live_regs_mask
)
24157 thumb_pop (asm_out_file
, live_regs_mask
);
24159 if (had_to_push_lr
)
24163 /* We have no free low regs, so save one. */
24164 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24168 /* Get the return address into a temporary register. */
24169 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24173 /* Move the return address to lr. */
24174 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24176 /* Restore the low register. */
24177 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24182 regno
= LAST_ARG_REGNUM
;
24187 /* Remove the argument registers that were pushed onto the stack. */
24188 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24189 SP_REGNUM
, SP_REGNUM
,
24190 crtl
->args
.pretend_args_size
);
24192 thumb_exit (asm_out_file
, regno
);
24198 /* Functions to save and restore machine-specific function data. */
24199 static struct machine_function
*
24200 arm_init_machine_status (void)
24202 struct machine_function
*machine
;
24203 machine
= ggc_cleared_alloc
<machine_function
> ();
24205 #if ARM_FT_UNKNOWN != 0
24206 machine
->func_type
= ARM_FT_UNKNOWN
;
24211 /* Return an RTX indicating where the return address to the
24212 calling function can be found. */
24214 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24219 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24222 /* Do anything needed before RTL is emitted for each function. */
24224 arm_init_expanders (void)
24226 /* Arrange to initialize and mark the machine per-function status. */
24227 init_machine_status
= arm_init_machine_status
;
24229 /* This is to stop the combine pass optimizing away the alignment
24230 adjustment of va_arg. */
24231 /* ??? It is claimed that this should not be necessary. */
24233 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24236 /* Check that FUNC is called with a different mode. */
24239 arm_change_mode_p (tree func
)
24241 if (TREE_CODE (func
) != FUNCTION_DECL
)
24244 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
24247 callee_tree
= target_option_default_node
;
24249 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
24250 int flags
= callee_opts
->x_target_flags
;
24252 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
24255 /* Like arm_compute_initial_elimination offset. Simpler because there
24256 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24257 to point at the base of the local variables after static stack
24258 space for a function has been allocated. */
24261 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24263 arm_stack_offsets
*offsets
;
24265 offsets
= arm_get_frame_offsets ();
24269 case ARG_POINTER_REGNUM
:
24272 case STACK_POINTER_REGNUM
:
24273 return offsets
->outgoing_args
- offsets
->saved_args
;
24275 case FRAME_POINTER_REGNUM
:
24276 return offsets
->soft_frame
- offsets
->saved_args
;
24278 case ARM_HARD_FRAME_POINTER_REGNUM
:
24279 return offsets
->saved_regs
- offsets
->saved_args
;
24281 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24282 return offsets
->locals_base
- offsets
->saved_args
;
24285 gcc_unreachable ();
24289 case FRAME_POINTER_REGNUM
:
24292 case STACK_POINTER_REGNUM
:
24293 return offsets
->outgoing_args
- offsets
->soft_frame
;
24295 case ARM_HARD_FRAME_POINTER_REGNUM
:
24296 return offsets
->saved_regs
- offsets
->soft_frame
;
24298 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24299 return offsets
->locals_base
- offsets
->soft_frame
;
24302 gcc_unreachable ();
24307 gcc_unreachable ();
24311 /* Generate the function's prologue. */
24314 thumb1_expand_prologue (void)
24318 HOST_WIDE_INT amount
;
24319 arm_stack_offsets
*offsets
;
24320 unsigned long func_type
;
24322 unsigned long live_regs_mask
;
24323 unsigned long l_mask
;
24324 unsigned high_regs_pushed
= 0;
24326 func_type
= arm_current_func_type ();
24328 /* Naked functions don't have prologues. */
24329 if (IS_NAKED (func_type
))
24332 if (IS_INTERRUPT (func_type
))
24334 error ("interrupt Service Routines cannot be coded in Thumb mode");
24338 if (is_called_in_ARM_mode (current_function_decl
))
24339 emit_insn (gen_prologue_thumb1_interwork ());
24341 offsets
= arm_get_frame_offsets ();
24342 live_regs_mask
= offsets
->saved_regs_mask
;
24344 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24345 l_mask
= live_regs_mask
& 0x40ff;
24346 /* Then count how many other high registers will need to be pushed. */
24347 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24349 if (crtl
->args
.pretend_args_size
)
24351 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
24353 if (cfun
->machine
->uses_anonymous_args
)
24355 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
24356 unsigned long mask
;
24358 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
24359 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
24361 insn
= thumb1_emit_multi_reg_push (mask
, 0);
24365 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24366 stack_pointer_rtx
, x
));
24368 RTX_FRAME_RELATED_P (insn
) = 1;
24371 if (TARGET_BACKTRACE
)
24373 HOST_WIDE_INT offset
= 0;
24374 unsigned work_register
;
24375 rtx work_reg
, x
, arm_hfp_rtx
;
24377 /* We have been asked to create a stack backtrace structure.
24378 The code looks like this:
24382 0 sub SP, #16 Reserve space for 4 registers.
24383 2 push {R7} Push low registers.
24384 4 add R7, SP, #20 Get the stack pointer before the push.
24385 6 str R7, [SP, #8] Store the stack pointer
24386 (before reserving the space).
24387 8 mov R7, PC Get hold of the start of this code + 12.
24388 10 str R7, [SP, #16] Store it.
24389 12 mov R7, FP Get hold of the current frame pointer.
24390 14 str R7, [SP, #4] Store it.
24391 16 mov R7, LR Get hold of the current return address.
24392 18 str R7, [SP, #12] Store it.
24393 20 add R7, SP, #16 Point at the start of the
24394 backtrace structure.
24395 22 mov FP, R7 Put this value into the frame pointer. */
24397 work_register
= thumb_find_work_register (live_regs_mask
);
24398 work_reg
= gen_rtx_REG (SImode
, work_register
);
24399 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
24401 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24402 stack_pointer_rtx
, GEN_INT (-16)));
24403 RTX_FRAME_RELATED_P (insn
) = 1;
24407 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
24408 RTX_FRAME_RELATED_P (insn
) = 1;
24410 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
24413 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
24414 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24416 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
24417 x
= gen_frame_mem (SImode
, x
);
24418 emit_move_insn (x
, work_reg
);
24420 /* Make sure that the instruction fetching the PC is in the right place
24421 to calculate "start of backtrace creation code + 12". */
24422 /* ??? The stores using the common WORK_REG ought to be enough to
24423 prevent the scheduler from doing anything weird. Failing that
24424 we could always move all of the following into an UNSPEC_VOLATILE. */
24427 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24428 emit_move_insn (work_reg
, x
);
24430 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24431 x
= gen_frame_mem (SImode
, x
);
24432 emit_move_insn (x
, work_reg
);
24434 emit_move_insn (work_reg
, arm_hfp_rtx
);
24436 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24437 x
= gen_frame_mem (SImode
, x
);
24438 emit_move_insn (x
, work_reg
);
24442 emit_move_insn (work_reg
, arm_hfp_rtx
);
24444 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24445 x
= gen_frame_mem (SImode
, x
);
24446 emit_move_insn (x
, work_reg
);
24448 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24449 emit_move_insn (work_reg
, x
);
24451 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24452 x
= gen_frame_mem (SImode
, x
);
24453 emit_move_insn (x
, work_reg
);
24456 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
24457 emit_move_insn (work_reg
, x
);
24459 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
24460 x
= gen_frame_mem (SImode
, x
);
24461 emit_move_insn (x
, work_reg
);
24463 x
= GEN_INT (offset
+ 12);
24464 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24466 emit_move_insn (arm_hfp_rtx
, work_reg
);
24468 /* Optimization: If we are not pushing any low registers but we are going
24469 to push some high registers then delay our first push. This will just
24470 be a push of LR and we can combine it with the push of the first high
24472 else if ((l_mask
& 0xff) != 0
24473 || (high_regs_pushed
== 0 && l_mask
))
24475 unsigned long mask
= l_mask
;
24476 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
24477 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
24478 RTX_FRAME_RELATED_P (insn
) = 1;
24481 if (high_regs_pushed
)
24483 unsigned pushable_regs
;
24484 unsigned next_hi_reg
;
24485 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
24486 : crtl
->args
.info
.nregs
;
24487 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
24489 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
24490 if (live_regs_mask
& (1 << next_hi_reg
))
24493 /* Here we need to mask out registers used for passing arguments
24494 even if they can be pushed. This is to avoid using them to stash the high
24495 registers. Such kind of stash may clobber the use of arguments. */
24496 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
24498 if (pushable_regs
== 0)
24499 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
24501 while (high_regs_pushed
> 0)
24503 unsigned long real_regs_mask
= 0;
24505 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
24507 if (pushable_regs
& (1 << regno
))
24509 emit_move_insn (gen_rtx_REG (SImode
, regno
),
24510 gen_rtx_REG (SImode
, next_hi_reg
));
24512 high_regs_pushed
--;
24513 real_regs_mask
|= (1 << next_hi_reg
);
24515 if (high_regs_pushed
)
24517 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
24519 if (live_regs_mask
& (1 << next_hi_reg
))
24524 pushable_regs
&= ~((1 << regno
) - 1);
24530 /* If we had to find a work register and we have not yet
24531 saved the LR then add it to the list of regs to push. */
24532 if (l_mask
== (1 << LR_REGNUM
))
24534 pushable_regs
|= l_mask
;
24535 real_regs_mask
|= l_mask
;
24539 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
24540 RTX_FRAME_RELATED_P (insn
) = 1;
24544 /* Load the pic register before setting the frame pointer,
24545 so we can use r7 as a temporary work register. */
24546 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24547 arm_load_pic_register (live_regs_mask
);
24549 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
24550 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
24551 stack_pointer_rtx
);
24553 if (flag_stack_usage_info
)
24554 current_function_static_stack_size
24555 = offsets
->outgoing_args
- offsets
->saved_args
;
24557 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24558 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
24563 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24564 GEN_INT (- amount
)));
24565 RTX_FRAME_RELATED_P (insn
) = 1;
24571 /* The stack decrement is too big for an immediate value in a single
24572 insn. In theory we could issue multiple subtracts, but after
24573 three of them it becomes more space efficient to place the full
24574 value in the constant pool and load into a register. (Also the
24575 ARM debugger really likes to see only one stack decrement per
24576 function). So instead we look for a scratch register into which
24577 we can load the decrement, and then we subtract this from the
24578 stack pointer. Unfortunately on the thumb the only available
24579 scratch registers are the argument registers, and we cannot use
24580 these as they may hold arguments to the function. Instead we
24581 attempt to locate a call preserved register which is used by this
24582 function. If we can find one, then we know that it will have
24583 been pushed at the start of the prologue and so we can corrupt
24585 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
24586 if (live_regs_mask
& (1 << regno
))
24589 gcc_assert(regno
<= LAST_LO_REGNUM
);
24591 reg
= gen_rtx_REG (SImode
, regno
);
24593 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
24595 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24596 stack_pointer_rtx
, reg
));
24598 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
24599 plus_constant (Pmode
, stack_pointer_rtx
,
24601 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
24602 RTX_FRAME_RELATED_P (insn
) = 1;
24606 if (frame_pointer_needed
)
24607 thumb_set_frame_pointer (offsets
);
24609 /* If we are profiling, make sure no instructions are scheduled before
24610 the call to mcount. Similarly if the user has requested no
24611 scheduling in the prolog. Similarly if we want non-call exceptions
24612 using the EABI unwinder, to prevent faulting instructions from being
24613 swapped with a stack adjustment. */
24614 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
24615 || (arm_except_unwind_info (&global_options
) == UI_TARGET
24616 && cfun
->can_throw_non_call_exceptions
))
24617 emit_insn (gen_blockage ());
24619 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
24620 if (live_regs_mask
& 0xff)
24621 cfun
->machine
->lr_save_eliminated
= 0;
24624 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24625 POP instruction can be generated. LR should be replaced by PC. All
24626 the checks required are already done by USE_RETURN_INSN (). Hence,
24627 all we really need to check here is if single register is to be
24628 returned, or multiple register return. */
24630 thumb2_expand_return (bool simple_return
)
24633 unsigned long saved_regs_mask
;
24634 arm_stack_offsets
*offsets
;
24636 offsets
= arm_get_frame_offsets ();
24637 saved_regs_mask
= offsets
->saved_regs_mask
;
24639 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
24640 if (saved_regs_mask
& (1 << i
))
24643 if (!simple_return
&& saved_regs_mask
)
24647 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
24648 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
24649 rtx addr
= gen_rtx_MEM (SImode
,
24650 gen_rtx_POST_INC (SImode
,
24651 stack_pointer_rtx
));
24652 set_mem_alias_set (addr
, get_frame_alias_set ());
24653 XVECEXP (par
, 0, 0) = ret_rtx
;
24654 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
24655 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
24656 emit_jump_insn (par
);
24660 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
24661 saved_regs_mask
|= (1 << PC_REGNUM
);
24662 arm_emit_multi_reg_pop (saved_regs_mask
);
24667 emit_jump_insn (simple_return_rtx
);
24672 thumb1_expand_epilogue (void)
24674 HOST_WIDE_INT amount
;
24675 arm_stack_offsets
*offsets
;
24678 /* Naked functions don't have prologues. */
24679 if (IS_NAKED (arm_current_func_type ()))
24682 offsets
= arm_get_frame_offsets ();
24683 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24685 if (frame_pointer_needed
)
24687 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
24688 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24690 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
24692 gcc_assert (amount
>= 0);
24695 emit_insn (gen_blockage ());
24698 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24699 GEN_INT (amount
)));
24702 /* r3 is always free in the epilogue. */
24703 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
24705 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
24706 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
24710 /* Emit a USE (stack_pointer_rtx), so that
24711 the stack adjustment will not be deleted. */
24712 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24714 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
24715 emit_insn (gen_blockage ());
24717 /* Emit a clobber for each insn that will be restored in the epilogue,
24718 so that flow2 will get register lifetimes correct. */
24719 for (regno
= 0; regno
< 13; regno
++)
24720 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
24721 emit_clobber (gen_rtx_REG (SImode
, regno
));
24723 if (! df_regs_ever_live_p (LR_REGNUM
))
24724 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
24727 /* Epilogue code for APCS frame. */
24729 arm_expand_epilogue_apcs_frame (bool really_return
)
24731 unsigned long func_type
;
24732 unsigned long saved_regs_mask
;
24735 int floats_from_frame
= 0;
24736 arm_stack_offsets
*offsets
;
24738 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
24739 func_type
= arm_current_func_type ();
24741 /* Get frame offsets for ARM. */
24742 offsets
= arm_get_frame_offsets ();
24743 saved_regs_mask
= offsets
->saved_regs_mask
;
24745 /* Find the offset of the floating-point save area in the frame. */
24747 = (offsets
->saved_args
24748 + arm_compute_static_chain_stack_bytes ()
24751 /* Compute how many core registers saved and how far away the floats are. */
24752 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
24753 if (saved_regs_mask
& (1 << i
))
24756 floats_from_frame
+= 4;
24759 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
24762 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
24764 /* The offset is from IP_REGNUM. */
24765 int saved_size
= arm_get_vfp_saved_size ();
24766 if (saved_size
> 0)
24769 floats_from_frame
+= saved_size
;
24770 insn
= emit_insn (gen_addsi3 (ip_rtx
,
24771 hard_frame_pointer_rtx
,
24772 GEN_INT (-floats_from_frame
)));
24773 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
24774 ip_rtx
, hard_frame_pointer_rtx
);
24777 /* Generate VFP register multi-pop. */
24778 start_reg
= FIRST_VFP_REGNUM
;
24780 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
24781 /* Look for a case where a reg does not need restoring. */
24782 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
24783 && (!df_regs_ever_live_p (i
+ 1)
24784 || call_used_regs
[i
+ 1]))
24786 if (start_reg
!= i
)
24787 arm_emit_vfp_multi_reg_pop (start_reg
,
24788 (i
- start_reg
) / 2,
24789 gen_rtx_REG (SImode
,
24794 /* Restore the remaining regs that we have discovered (or possibly
24795 even all of them, if the conditional in the for loop never
24797 if (start_reg
!= i
)
24798 arm_emit_vfp_multi_reg_pop (start_reg
,
24799 (i
- start_reg
) / 2,
24800 gen_rtx_REG (SImode
, IP_REGNUM
));
24805 /* The frame pointer is guaranteed to be non-double-word aligned, as
24806 it is set to double-word-aligned old_stack_pointer - 4. */
24808 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
24810 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
24811 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
24813 rtx addr
= gen_frame_mem (V2SImode
,
24814 plus_constant (Pmode
, hard_frame_pointer_rtx
,
24816 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
24817 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24818 gen_rtx_REG (V2SImode
, i
),
24824 /* saved_regs_mask should contain IP which contains old stack pointer
24825 at the time of activation creation. Since SP and IP are adjacent registers,
24826 we can restore the value directly into SP. */
24827 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
24828 saved_regs_mask
&= ~(1 << IP_REGNUM
);
24829 saved_regs_mask
|= (1 << SP_REGNUM
);
24831 /* There are two registers left in saved_regs_mask - LR and PC. We
24832 only need to restore LR (the return address), but to
24833 save time we can load it directly into PC, unless we need a
24834 special function exit sequence, or we are not really returning. */
24836 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
24837 && !crtl
->calls_eh_return
)
24838 /* Delete LR from the register mask, so that LR on
24839 the stack is loaded into the PC in the register mask. */
24840 saved_regs_mask
&= ~(1 << LR_REGNUM
);
24842 saved_regs_mask
&= ~(1 << PC_REGNUM
);
24844 num_regs
= bit_count (saved_regs_mask
);
24845 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
24848 emit_insn (gen_blockage ());
24849 /* Unwind the stack to just below the saved registers. */
24850 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24851 hard_frame_pointer_rtx
,
24852 GEN_INT (- 4 * num_regs
)));
24854 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
24855 stack_pointer_rtx
, hard_frame_pointer_rtx
);
24858 arm_emit_multi_reg_pop (saved_regs_mask
);
24860 if (IS_INTERRUPT (func_type
))
24862 /* Interrupt handlers will have pushed the
24863 IP onto the stack, so restore it now. */
24865 rtx addr
= gen_rtx_MEM (SImode
,
24866 gen_rtx_POST_INC (SImode
,
24867 stack_pointer_rtx
));
24868 set_mem_alias_set (addr
, get_frame_alias_set ());
24869 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
24870 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24871 gen_rtx_REG (SImode
, IP_REGNUM
),
24875 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
24878 if (crtl
->calls_eh_return
)
24879 emit_insn (gen_addsi3 (stack_pointer_rtx
,
24881 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
24883 if (IS_STACKALIGN (func_type
))
24884 /* Restore the original stack pointer. Before prologue, the stack was
24885 realigned and the original stack pointer saved in r0. For details,
24886 see comment in arm_expand_prologue. */
24887 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
24889 emit_jump_insn (simple_return_rtx
);
24892 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24893 function is not a sibcall. */
24895 arm_expand_epilogue (bool really_return
)
24897 unsigned long func_type
;
24898 unsigned long saved_regs_mask
;
24902 arm_stack_offsets
*offsets
;
24904 func_type
= arm_current_func_type ();
24906 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24907 let output_return_instruction take care of instruction emission if any. */
24908 if (IS_NAKED (func_type
)
24909 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
24912 emit_jump_insn (simple_return_rtx
);
24916 /* If we are throwing an exception, then we really must be doing a
24917 return, so we can't tail-call. */
24918 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
24920 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
24922 arm_expand_epilogue_apcs_frame (really_return
);
24926 /* Get frame offsets for ARM. */
24927 offsets
= arm_get_frame_offsets ();
24928 saved_regs_mask
= offsets
->saved_regs_mask
;
24929 num_regs
= bit_count (saved_regs_mask
);
24931 if (frame_pointer_needed
)
24934 /* Restore stack pointer if necessary. */
24937 /* In ARM mode, frame pointer points to first saved register.
24938 Restore stack pointer to last saved register. */
24939 amount
= offsets
->frame
- offsets
->saved_regs
;
24941 /* Force out any pending memory operations that reference stacked data
24942 before stack de-allocation occurs. */
24943 emit_insn (gen_blockage ());
24944 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24945 hard_frame_pointer_rtx
,
24946 GEN_INT (amount
)));
24947 arm_add_cfa_adjust_cfa_note (insn
, amount
,
24949 hard_frame_pointer_rtx
);
24951 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24953 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24957 /* In Thumb-2 mode, the frame pointer points to the last saved
24959 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24962 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
24963 hard_frame_pointer_rtx
,
24964 GEN_INT (amount
)));
24965 arm_add_cfa_adjust_cfa_note (insn
, amount
,
24966 hard_frame_pointer_rtx
,
24967 hard_frame_pointer_rtx
);
24970 /* Force out any pending memory operations that reference stacked data
24971 before stack de-allocation occurs. */
24972 emit_insn (gen_blockage ());
24973 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
24974 hard_frame_pointer_rtx
));
24975 arm_add_cfa_adjust_cfa_note (insn
, 0,
24977 hard_frame_pointer_rtx
);
24978 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24980 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24985 /* Pop off outgoing args and local frame to adjust stack pointer to
24986 last saved register. */
24987 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24991 /* Force out any pending memory operations that reference stacked data
24992 before stack de-allocation occurs. */
24993 emit_insn (gen_blockage ());
24994 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24996 GEN_INT (amount
)));
24997 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
24998 stack_pointer_rtx
, stack_pointer_rtx
);
24999 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25001 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25005 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
25007 /* Generate VFP register multi-pop. */
25008 int end_reg
= LAST_VFP_REGNUM
+ 1;
25010 /* Scan the registers in reverse order. We need to match
25011 any groupings made in the prologue and generate matching
25012 vldm operations. The need to match groups is because,
25013 unlike pop, vldm can only do consecutive regs. */
25014 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
25015 /* Look for a case where a reg does not need restoring. */
25016 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25017 && (!df_regs_ever_live_p (i
+ 1)
25018 || call_used_regs
[i
+ 1]))
25020 /* Restore the regs discovered so far (from reg+2 to
25022 if (end_reg
> i
+ 2)
25023 arm_emit_vfp_multi_reg_pop (i
+ 2,
25024 (end_reg
- (i
+ 2)) / 2,
25025 stack_pointer_rtx
);
25029 /* Restore the remaining regs that we have discovered (or possibly
25030 even all of them, if the conditional in the for loop never
25032 if (end_reg
> i
+ 2)
25033 arm_emit_vfp_multi_reg_pop (i
+ 2,
25034 (end_reg
- (i
+ 2)) / 2,
25035 stack_pointer_rtx
);
25039 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
25040 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25043 rtx addr
= gen_rtx_MEM (V2SImode
,
25044 gen_rtx_POST_INC (SImode
,
25045 stack_pointer_rtx
));
25046 set_mem_alias_set (addr
, get_frame_alias_set ());
25047 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25048 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25049 gen_rtx_REG (V2SImode
, i
),
25051 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25052 stack_pointer_rtx
, stack_pointer_rtx
);
25055 if (saved_regs_mask
)
25058 bool return_in_pc
= false;
25060 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
25061 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
25062 && !IS_STACKALIGN (func_type
)
25064 && crtl
->args
.pretend_args_size
== 0
25065 && saved_regs_mask
& (1 << LR_REGNUM
)
25066 && !crtl
->calls_eh_return
)
25068 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25069 saved_regs_mask
|= (1 << PC_REGNUM
);
25070 return_in_pc
= true;
25073 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
25075 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25076 if (saved_regs_mask
& (1 << i
))
25078 rtx addr
= gen_rtx_MEM (SImode
,
25079 gen_rtx_POST_INC (SImode
,
25080 stack_pointer_rtx
));
25081 set_mem_alias_set (addr
, get_frame_alias_set ());
25083 if (i
== PC_REGNUM
)
25085 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25086 XVECEXP (insn
, 0, 0) = ret_rtx
;
25087 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
25089 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
25090 insn
= emit_jump_insn (insn
);
25094 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
25096 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25097 gen_rtx_REG (SImode
, i
),
25099 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25101 stack_pointer_rtx
);
25108 && current_tune
->prefer_ldrd_strd
25109 && !optimize_function_for_size_p (cfun
))
25112 thumb2_emit_ldrd_pop (saved_regs_mask
);
25113 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
25114 arm_emit_ldrd_pop (saved_regs_mask
);
25116 arm_emit_multi_reg_pop (saved_regs_mask
);
25119 arm_emit_multi_reg_pop (saved_regs_mask
);
25126 if (crtl
->args
.pretend_args_size
)
25129 rtx dwarf
= NULL_RTX
;
25131 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25133 GEN_INT (crtl
->args
.pretend_args_size
)));
25135 RTX_FRAME_RELATED_P (tmp
) = 1;
25137 if (cfun
->machine
->uses_anonymous_args
)
25139 /* Restore pretend args. Refer arm_expand_prologue on how to save
25140 pretend_args in stack. */
25141 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25142 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25143 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25144 if (saved_regs_mask
& (1 << i
))
25146 rtx reg
= gen_rtx_REG (SImode
, i
);
25147 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25150 REG_NOTES (tmp
) = dwarf
;
25152 arm_add_cfa_adjust_cfa_note (tmp
, crtl
->args
.pretend_args_size
,
25153 stack_pointer_rtx
, stack_pointer_rtx
);
25156 if (!really_return
)
25159 if (crtl
->calls_eh_return
)
25160 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25162 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25164 if (IS_STACKALIGN (func_type
))
25165 /* Restore the original stack pointer. Before prologue, the stack was
25166 realigned and the original stack pointer saved in r0. For details,
25167 see comment in arm_expand_prologue. */
25168 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25170 emit_jump_insn (simple_return_rtx
);
25173 /* Implementation of insn prologue_thumb1_interwork. This is the first
25174 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25177 thumb1_output_interwork (void)
25180 FILE *f
= asm_out_file
;
25182 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25183 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25185 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25187 /* Generate code sequence to switch us into Thumb mode. */
25188 /* The .code 32 directive has already been emitted by
25189 ASM_DECLARE_FUNCTION_NAME. */
25190 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25191 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
25193 /* Generate a label, so that the debugger will notice the
25194 change in instruction sets. This label is also used by
25195 the assembler to bypass the ARM code when this function
25196 is called from a Thumb encoded function elsewhere in the
25197 same file. Hence the definition of STUB_NAME here must
25198 agree with the definition in gas/config/tc-arm.c. */
25200 #define STUB_NAME ".real_start_of"
25202 fprintf (f
, "\t.code\t16\n");
25204 if (arm_dllexport_name_p (name
))
25205 name
= arm_strip_name_encoding (name
);
25207 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
25208 fprintf (f
, "\t.thumb_func\n");
25209 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
25214 /* Handle the case of a double word load into a low register from
25215 a computed memory address. The computed address may involve a
25216 register which is overwritten by the load. */
25218 thumb_load_double_from_address (rtx
*operands
)
25226 gcc_assert (REG_P (operands
[0]));
25227 gcc_assert (MEM_P (operands
[1]));
25229 /* Get the memory address. */
25230 addr
= XEXP (operands
[1], 0);
25232 /* Work out how the memory address is computed. */
25233 switch (GET_CODE (addr
))
25236 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25238 if (REGNO (operands
[0]) == REGNO (addr
))
25240 output_asm_insn ("ldr\t%H0, %2", operands
);
25241 output_asm_insn ("ldr\t%0, %1", operands
);
25245 output_asm_insn ("ldr\t%0, %1", operands
);
25246 output_asm_insn ("ldr\t%H0, %2", operands
);
25251 /* Compute <address> + 4 for the high order load. */
25252 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25254 output_asm_insn ("ldr\t%0, %1", operands
);
25255 output_asm_insn ("ldr\t%H0, %2", operands
);
25259 arg1
= XEXP (addr
, 0);
25260 arg2
= XEXP (addr
, 1);
25262 if (CONSTANT_P (arg1
))
25263 base
= arg2
, offset
= arg1
;
25265 base
= arg1
, offset
= arg2
;
25267 gcc_assert (REG_P (base
));
25269 /* Catch the case of <address> = <reg> + <reg> */
25270 if (REG_P (offset
))
25272 int reg_offset
= REGNO (offset
);
25273 int reg_base
= REGNO (base
);
25274 int reg_dest
= REGNO (operands
[0]);
25276 /* Add the base and offset registers together into the
25277 higher destination register. */
25278 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
25279 reg_dest
+ 1, reg_base
, reg_offset
);
25281 /* Load the lower destination register from the address in
25282 the higher destination register. */
25283 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
25284 reg_dest
, reg_dest
+ 1);
25286 /* Load the higher destination register from its own address
25288 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
25289 reg_dest
+ 1, reg_dest
+ 1);
25293 /* Compute <address> + 4 for the high order load. */
25294 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25296 /* If the computed address is held in the low order register
25297 then load the high order register first, otherwise always
25298 load the low order register first. */
25299 if (REGNO (operands
[0]) == REGNO (base
))
25301 output_asm_insn ("ldr\t%H0, %2", operands
);
25302 output_asm_insn ("ldr\t%0, %1", operands
);
25306 output_asm_insn ("ldr\t%0, %1", operands
);
25307 output_asm_insn ("ldr\t%H0, %2", operands
);
25313 /* With no registers to worry about we can just load the value
25315 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25317 output_asm_insn ("ldr\t%H0, %2", operands
);
25318 output_asm_insn ("ldr\t%0, %1", operands
);
25322 gcc_unreachable ();
25329 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
25336 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25339 operands
[4] = operands
[5];
25342 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
25343 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
25347 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25348 std::swap (operands
[4], operands
[5]);
25349 if (REGNO (operands
[5]) > REGNO (operands
[6]))
25350 std::swap (operands
[5], operands
[6]);
25351 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25352 std::swap (operands
[4], operands
[5]);
25354 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
25355 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
25359 gcc_unreachable ();
25365 /* Output a call-via instruction for thumb state. */
25367 thumb_call_via_reg (rtx reg
)
25369 int regno
= REGNO (reg
);
25372 gcc_assert (regno
< LR_REGNUM
);
25374 /* If we are in the normal text section we can use a single instance
25375 per compilation unit. If we are doing function sections, then we need
25376 an entry per section, since we can't rely on reachability. */
25377 if (in_section
== text_section
)
25379 thumb_call_reg_needed
= 1;
25381 if (thumb_call_via_label
[regno
] == NULL
)
25382 thumb_call_via_label
[regno
] = gen_label_rtx ();
25383 labelp
= thumb_call_via_label
+ regno
;
25387 if (cfun
->machine
->call_via
[regno
] == NULL
)
25388 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
25389 labelp
= cfun
->machine
->call_via
+ regno
;
25392 output_asm_insn ("bl\t%a0", labelp
);
25396 /* Routines for generating rtl. */
25398 thumb_expand_movmemqi (rtx
*operands
)
25400 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
25401 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
25402 HOST_WIDE_INT len
= INTVAL (operands
[2]);
25403 HOST_WIDE_INT offset
= 0;
25407 emit_insn (gen_movmem12b (out
, in
, out
, in
));
25413 emit_insn (gen_movmem8b (out
, in
, out
, in
));
25419 rtx reg
= gen_reg_rtx (SImode
);
25420 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
25421 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
25428 rtx reg
= gen_reg_rtx (HImode
);
25429 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
25430 plus_constant (Pmode
, in
,
25432 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
25441 rtx reg
= gen_reg_rtx (QImode
);
25442 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
25443 plus_constant (Pmode
, in
,
25445 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
25452 thumb_reload_out_hi (rtx
*operands
)
25454 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
25457 /* Handle reading a half-word from memory during reload. */
25459 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
25461 gcc_unreachable ();
25464 /* Return the length of a function name prefix
25465 that starts with the character 'c'. */
25467 arm_get_strip_length (int c
)
25471 ARM_NAME_ENCODING_LENGTHS
25476 /* Return a pointer to a function's name with any
25477 and all prefix encodings stripped from it. */
25479 arm_strip_name_encoding (const char *name
)
25483 while ((skip
= arm_get_strip_length (* name
)))
25489 /* If there is a '*' anywhere in the name's prefix, then
25490 emit the stripped name verbatim, otherwise prepend an
25491 underscore if leading underscores are being used. */
25493 arm_asm_output_labelref (FILE *stream
, const char *name
)
25498 while ((skip
= arm_get_strip_length (* name
)))
25500 verbatim
|= (*name
== '*');
25505 fputs (name
, stream
);
25507 asm_fprintf (stream
, "%U%s", name
);
25510 /* This function is used to emit an EABI tag and its associated value.
25511 We emit the numerical value of the tag in case the assembler does not
25512 support textual tags. (Eg gas prior to 2.20). If requested we include
25513 the tag name in a comment so that anyone reading the assembler output
25514 will know which tag is being set.
25516 This function is not static because arm-c.c needs it too. */
25519 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
25521 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
25522 if (flag_verbose_asm
|| flag_debug_asm
)
25523 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
25524 asm_fprintf (asm_out_file
, "\n");
25527 /* This function is used to print CPU tuning information as comment
25528 in assembler file. Pointers are not printed for now. */
25531 arm_print_tune_info (void)
25533 asm_fprintf (asm_out_file
, "\t@.tune parameters\n");
25534 asm_fprintf (asm_out_file
, "\t\t@constant_limit:\t%d\n",
25535 current_tune
->constant_limit
);
25536 asm_fprintf (asm_out_file
, "\t\t@max_insns_skipped:\t%d\n",
25537 current_tune
->max_insns_skipped
);
25538 asm_fprintf (asm_out_file
, "\t\t@prefetch.num_slots:\t%d\n",
25539 current_tune
->prefetch
.num_slots
);
25540 asm_fprintf (asm_out_file
, "\t\t@prefetch.l1_cache_size:\t%d\n",
25541 current_tune
->prefetch
.l1_cache_size
);
25542 asm_fprintf (asm_out_file
, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
25543 current_tune
->prefetch
.l1_cache_line_size
);
25544 asm_fprintf (asm_out_file
, "\t\t@prefer_constant_pool:\t%d\n",
25545 (int) current_tune
->prefer_constant_pool
);
25546 asm_fprintf (asm_out_file
, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25547 asm_fprintf (asm_out_file
, "\t\t\t\ts&p\tcost\n");
25548 asm_fprintf (asm_out_file
, "\t\t\t\t00\t%d\n",
25549 current_tune
->branch_cost (false, false));
25550 asm_fprintf (asm_out_file
, "\t\t\t\t01\t%d\n",
25551 current_tune
->branch_cost (false, true));
25552 asm_fprintf (asm_out_file
, "\t\t\t\t10\t%d\n",
25553 current_tune
->branch_cost (true, false));
25554 asm_fprintf (asm_out_file
, "\t\t\t\t11\t%d\n",
25555 current_tune
->branch_cost (true, true));
25556 asm_fprintf (asm_out_file
, "\t\t@prefer_ldrd_strd:\t%d\n",
25557 (int) current_tune
->prefer_ldrd_strd
);
25558 asm_fprintf (asm_out_file
, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25559 (int) current_tune
->logical_op_non_short_circuit_thumb
,
25560 (int) current_tune
->logical_op_non_short_circuit_arm
);
25561 asm_fprintf (asm_out_file
, "\t\t@prefer_neon_for_64bits:\t%d\n",
25562 (int) current_tune
->prefer_neon_for_64bits
);
25563 asm_fprintf (asm_out_file
,
25564 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25565 (int) current_tune
->disparage_flag_setting_t16_encodings
);
25566 asm_fprintf (asm_out_file
, "\t\t@string_ops_prefer_neon:\t%d\n",
25567 (int) current_tune
->string_ops_prefer_neon
);
25568 asm_fprintf (asm_out_file
, "\t\t@max_insns_inline_memset:\t%d\n",
25569 current_tune
->max_insns_inline_memset
);
25570 asm_fprintf (asm_out_file
, "\t\t@fusible_ops:\t%u\n",
25571 current_tune
->fusible_ops
);
25572 asm_fprintf (asm_out_file
, "\t\t@sched_autopref:\t%d\n",
25573 (int) current_tune
->sched_autopref
);
25577 arm_file_start (void)
25583 const char *fpu_name
;
25584 if (arm_selected_arch
)
25586 /* armv7ve doesn't support any extensions. */
25587 if (strcmp (arm_selected_arch
->name
, "armv7ve") == 0)
25589 /* Keep backward compatability for assemblers
25590 which don't support armv7ve. */
25591 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
25592 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
25593 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
25594 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
25595 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
25599 const char* pos
= strchr (arm_selected_arch
->name
, '+');
25603 gcc_assert (strlen (arm_selected_arch
->name
)
25604 <= sizeof (buf
) / sizeof (*pos
));
25605 strncpy (buf
, arm_selected_arch
->name
,
25606 (pos
- arm_selected_arch
->name
) * sizeof (*pos
));
25607 buf
[pos
- arm_selected_arch
->name
] = '\0';
25608 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
25609 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
25612 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
25615 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
25616 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
25619 const char* truncated_name
25620 = arm_rewrite_selected_cpu (arm_selected_cpu
->name
);
25621 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
25624 if (print_tune_info
)
25625 arm_print_tune_info ();
25627 if (TARGET_SOFT_FLOAT
)
25629 fpu_name
= "softvfp";
25633 fpu_name
= arm_fpu_desc
->name
;
25634 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
25636 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
25637 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
25639 if (TARGET_HARD_FLOAT_ABI
)
25640 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25643 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
25645 /* Some of these attributes only apply when the corresponding features
25646 are used. However we don't have any easy way of figuring this out.
25647 Conservatively record the setting that would have been used. */
25649 if (flag_rounding_math
)
25650 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25652 if (!flag_unsafe_math_optimizations
)
25654 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25655 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25657 if (flag_signaling_nans
)
25658 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25660 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25661 flag_finite_math_only
? 1 : 3);
25663 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25664 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25665 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25666 flag_short_enums
? 1 : 2);
25668 /* Tag_ABI_optimization_goals. */
25671 else if (optimize
>= 2)
25677 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
25679 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25682 if (arm_fp16_format
)
25683 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25684 (int) arm_fp16_format
);
25686 if (arm_lang_output_object_attributes_hook
)
25687 arm_lang_output_object_attributes_hook();
25690 default_file_start ();
25694 arm_file_end (void)
25698 if (NEED_INDICATE_EXEC_STACK
)
25699 /* Add .note.GNU-stack. */
25700 file_end_indicate_exec_stack ();
25702 if (! thumb_call_reg_needed
)
25705 switch_to_section (text_section
);
25706 asm_fprintf (asm_out_file
, "\t.code 16\n");
25707 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
25709 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
25711 rtx label
= thumb_call_via_label
[regno
];
25715 targetm
.asm_out
.internal_label (asm_out_file
, "L",
25716 CODE_LABEL_NUMBER (label
));
25717 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
25723 /* Symbols in the text segment can be accessed without indirecting via the
25724 constant pool; it may take an extra binary operation, but this is still
25725 faster than indirecting via memory. Don't do this when not optimizing,
25726 since we won't be calculating al of the offsets necessary to do this
25730 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
25732 if (optimize
> 0 && TREE_CONSTANT (decl
))
25733 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
25735 default_encode_section_info (decl
, rtl
, first
);
25737 #endif /* !ARM_PE */
25740 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
25742 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
25743 && !strcmp (prefix
, "L"))
25745 arm_ccfsm_state
= 0;
25746 arm_target_insn
= NULL
;
25748 default_internal_label (stream
, prefix
, labelno
);
25751 /* Output code to add DELTA to the first argument, and then jump
25752 to FUNCTION. Used for C++ multiple inheritance. */
25754 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
25755 HOST_WIDE_INT delta
,
25756 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
25759 static int thunk_label
= 0;
25762 int mi_delta
= delta
;
25763 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
25765 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
25768 mi_delta
= - mi_delta
;
25770 final_start_function (emit_barrier (), file
, 1);
25774 int labelno
= thunk_label
++;
25775 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
25776 /* Thunks are entered in arm mode when avaiable. */
25777 if (TARGET_THUMB1_ONLY
)
25779 /* push r3 so we can use it as a temporary. */
25780 /* TODO: Omit this save if r3 is not used. */
25781 fputs ("\tpush {r3}\n", file
);
25782 fputs ("\tldr\tr3, ", file
);
25786 fputs ("\tldr\tr12, ", file
);
25788 assemble_name (file
, label
);
25789 fputc ('\n', file
);
25792 /* If we are generating PIC, the ldr instruction below loads
25793 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25794 the address of the add + 8, so we have:
25796 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25799 Note that we have "+ 1" because some versions of GNU ld
25800 don't set the low bit of the result for R_ARM_REL32
25801 relocations against thumb function symbols.
25802 On ARMv6M this is +4, not +8. */
25803 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
25804 assemble_name (file
, labelpc
);
25805 fputs (":\n", file
);
25806 if (TARGET_THUMB1_ONLY
)
25808 /* This is 2 insns after the start of the thunk, so we know it
25809 is 4-byte aligned. */
25810 fputs ("\tadd\tr3, pc, r3\n", file
);
25811 fputs ("\tmov r12, r3\n", file
);
25814 fputs ("\tadd\tr12, pc, r12\n", file
);
25816 else if (TARGET_THUMB1_ONLY
)
25817 fputs ("\tmov r12, r3\n", file
);
25819 if (TARGET_THUMB1_ONLY
)
25821 if (mi_delta
> 255)
25823 fputs ("\tldr\tr3, ", file
);
25824 assemble_name (file
, label
);
25825 fputs ("+4\n", file
);
25826 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
25827 mi_op
, this_regno
, this_regno
);
25829 else if (mi_delta
!= 0)
25831 /* Thumb1 unified syntax requires s suffix in instruction name when
25832 one of the operands is immediate. */
25833 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
25834 mi_op
, this_regno
, this_regno
,
25840 /* TODO: Use movw/movt for large constants when available. */
25841 while (mi_delta
!= 0)
25843 if ((mi_delta
& (3 << shift
)) == 0)
25847 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
25848 mi_op
, this_regno
, this_regno
,
25849 mi_delta
& (0xff << shift
));
25850 mi_delta
&= ~(0xff << shift
);
25857 if (TARGET_THUMB1_ONLY
)
25858 fputs ("\tpop\t{r3}\n", file
);
25860 fprintf (file
, "\tbx\tr12\n");
25861 ASM_OUTPUT_ALIGN (file
, 2);
25862 assemble_name (file
, label
);
25863 fputs (":\n", file
);
25866 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25867 rtx tem
= XEXP (DECL_RTL (function
), 0);
25868 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25869 pipeline offset is four rather than eight. Adjust the offset
25871 tem
= plus_constant (GET_MODE (tem
), tem
,
25872 TARGET_THUMB1_ONLY
? -3 : -7);
25873 tem
= gen_rtx_MINUS (GET_MODE (tem
),
25875 gen_rtx_SYMBOL_REF (Pmode
,
25876 ggc_strdup (labelpc
)));
25877 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
25880 /* Output ".word .LTHUNKn". */
25881 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
25883 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
25884 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
25888 fputs ("\tb\t", file
);
25889 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
25890 if (NEED_PLT_RELOC
)
25891 fputs ("(PLT)", file
);
25892 fputc ('\n', file
);
25895 final_end_function ();
25899 arm_emit_vector_const (FILE *file
, rtx x
)
25902 const char * pattern
;
25904 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
25906 switch (GET_MODE (x
))
25908 case V2SImode
: pattern
= "%08x"; break;
25909 case V4HImode
: pattern
= "%04x"; break;
25910 case V8QImode
: pattern
= "%02x"; break;
25911 default: gcc_unreachable ();
25914 fprintf (file
, "0x");
25915 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
25919 element
= CONST_VECTOR_ELT (x
, i
);
25920 fprintf (file
, pattern
, INTVAL (element
));
25926 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25927 HFmode constant pool entries are actually loaded with ldr. */
25929 arm_emit_fp16_const (rtx c
)
25934 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
25935 bits
= real_to_target (NULL
, &r
, HFmode
);
25936 if (WORDS_BIG_ENDIAN
)
25937 assemble_zeros (2);
25938 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
25939 if (!WORDS_BIG_ENDIAN
)
25940 assemble_zeros (2);
25944 arm_output_load_gr (rtx
*operands
)
25951 if (!MEM_P (operands
[1])
25952 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
25953 || !REG_P (reg
= XEXP (sum
, 0))
25954 || !CONST_INT_P (offset
= XEXP (sum
, 1))
25955 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
25956 return "wldrw%?\t%0, %1";
25958 /* Fix up an out-of-range load of a GR register. */
25959 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
25960 wcgr
= operands
[0];
25962 output_asm_insn ("ldr%?\t%0, %1", operands
);
25964 operands
[0] = wcgr
;
25966 output_asm_insn ("tmcr%?\t%0, %1", operands
);
25967 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
25972 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25974 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25975 named arg and all anonymous args onto the stack.
25976 XXX I know the prologue shouldn't be pushing registers, but it is faster
25980 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
25984 int second_time ATTRIBUTE_UNUSED
)
25986 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
25989 cfun
->machine
->uses_anonymous_args
= 1;
25990 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
25992 nregs
= pcum
->aapcs_ncrn
;
25993 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
25997 nregs
= pcum
->nregs
;
25999 if (nregs
< NUM_ARG_REGS
)
26000 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
26003 /* We can't rely on the caller doing the proper promotion when
26004 using APCS or ATPCS. */
26007 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
26009 return !TARGET_AAPCS_BASED
;
26012 static machine_mode
26013 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
26015 int *punsignedp ATTRIBUTE_UNUSED
,
26016 const_tree fntype ATTRIBUTE_UNUSED
,
26017 int for_return ATTRIBUTE_UNUSED
)
26019 if (GET_MODE_CLASS (mode
) == MODE_INT
26020 && GET_MODE_SIZE (mode
) < 4)
26026 /* AAPCS based ABIs use short enums by default. */
26029 arm_default_short_enums (void)
26031 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
26035 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26038 arm_align_anon_bitfield (void)
26040 return TARGET_AAPCS_BASED
;
26044 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26047 arm_cxx_guard_type (void)
26049 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
26053 /* The EABI says test the least significant bit of a guard variable. */
26056 arm_cxx_guard_mask_bit (void)
26058 return TARGET_AAPCS_BASED
;
26062 /* The EABI specifies that all array cookies are 8 bytes long. */
26065 arm_get_cookie_size (tree type
)
26069 if (!TARGET_AAPCS_BASED
)
26070 return default_cxx_get_cookie_size (type
);
26072 size
= build_int_cst (sizetype
, 8);
26077 /* The EABI says that array cookies should also contain the element size. */
26080 arm_cookie_has_size (void)
26082 return TARGET_AAPCS_BASED
;
26086 /* The EABI says constructors and destructors should return a pointer to
26087 the object constructed/destroyed. */
26090 arm_cxx_cdtor_returns_this (void)
26092 return TARGET_AAPCS_BASED
;
26095 /* The EABI says that an inline function may never be the key
26099 arm_cxx_key_method_may_be_inline (void)
26101 return !TARGET_AAPCS_BASED
;
26105 arm_cxx_determine_class_data_visibility (tree decl
)
26107 if (!TARGET_AAPCS_BASED
26108 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
26111 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26112 is exported. However, on systems without dynamic vague linkage,
26113 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26114 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
26115 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
26117 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
26118 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
26122 arm_cxx_class_data_always_comdat (void)
26124 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26125 vague linkage if the class has no key function. */
26126 return !TARGET_AAPCS_BASED
;
26130 /* The EABI says __aeabi_atexit should be used to register static
26134 arm_cxx_use_aeabi_atexit (void)
26136 return TARGET_AAPCS_BASED
;
26141 arm_set_return_address (rtx source
, rtx scratch
)
26143 arm_stack_offsets
*offsets
;
26144 HOST_WIDE_INT delta
;
26146 unsigned long saved_regs
;
26148 offsets
= arm_get_frame_offsets ();
26149 saved_regs
= offsets
->saved_regs_mask
;
26151 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
26152 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26155 if (frame_pointer_needed
)
26156 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
26159 /* LR will be the first saved register. */
26160 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
26165 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
26166 GEN_INT (delta
& ~4095)));
26171 addr
= stack_pointer_rtx
;
26173 addr
= plus_constant (Pmode
, addr
, delta
);
26175 /* The store needs to be marked as frame related in order to prevent
26176 DSE from deleting it as dead if it is based on fp. */
26177 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26178 RTX_FRAME_RELATED_P (insn
) = 1;
26179 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26185 thumb_set_return_address (rtx source
, rtx scratch
)
26187 arm_stack_offsets
*offsets
;
26188 HOST_WIDE_INT delta
;
26189 HOST_WIDE_INT limit
;
26192 unsigned long mask
;
26196 offsets
= arm_get_frame_offsets ();
26197 mask
= offsets
->saved_regs_mask
;
26198 if (mask
& (1 << LR_REGNUM
))
26201 /* Find the saved regs. */
26202 if (frame_pointer_needed
)
26204 delta
= offsets
->soft_frame
- offsets
->saved_args
;
26205 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
26211 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
26214 /* Allow for the stack frame. */
26215 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
26217 /* The link register is always the first saved register. */
26220 /* Construct the address. */
26221 addr
= gen_rtx_REG (SImode
, reg
);
26224 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
26225 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
26229 addr
= plus_constant (Pmode
, addr
, delta
);
26231 /* The store needs to be marked as frame related in order to prevent
26232 DSE from deleting it as dead if it is based on fp. */
26233 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26234 RTX_FRAME_RELATED_P (insn
) = 1;
26235 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26238 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26241 /* Implements target hook vector_mode_supported_p. */
26243 arm_vector_mode_supported_p (machine_mode mode
)
26245 /* Neon also supports V2SImode, etc. listed in the clause below. */
26246 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
26247 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
26250 if ((TARGET_NEON
|| TARGET_IWMMXT
)
26251 && ((mode
== V2SImode
)
26252 || (mode
== V4HImode
)
26253 || (mode
== V8QImode
)))
26256 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
26257 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
26258 || mode
== V2HAmode
))
26264 /* Implements target hook array_mode_supported_p. */
26267 arm_array_mode_supported_p (machine_mode mode
,
26268 unsigned HOST_WIDE_INT nelems
)
26271 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
26272 && (nelems
>= 2 && nelems
<= 4))
26278 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26279 registers when autovectorizing for Neon, at least until multiple vector
26280 widths are supported properly by the middle-end. */
26282 static machine_mode
26283 arm_preferred_simd_mode (machine_mode mode
)
26289 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
26291 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
26293 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
26295 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
26297 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
26304 if (TARGET_REALLY_IWMMXT
)
26320 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26322 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26323 using r0-r4 for function arguments, r7 for the stack frame and don't have
26324 enough left over to do doubleword arithmetic. For Thumb-2 all the
26325 potentially problematic instructions accept high registers so this is not
26326 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26327 that require many low registers. */
26329 arm_class_likely_spilled_p (reg_class_t rclass
)
26331 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
26332 || rclass
== CC_REG
)
26338 /* Implements target hook small_register_classes_for_mode_p. */
26340 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
26342 return TARGET_THUMB1
;
26345 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26346 ARM insns and therefore guarantee that the shift count is modulo 256.
26347 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26348 guarantee no particular behavior for out-of-range counts. */
26350 static unsigned HOST_WIDE_INT
26351 arm_shift_truncation_mask (machine_mode mode
)
26353 return mode
== SImode
? 255 : 0;
26357 /* Map internal gcc register numbers to DWARF2 register numbers. */
26360 arm_dbx_register_number (unsigned int regno
)
26365 if (IS_VFP_REGNUM (regno
))
26367 /* See comment in arm_dwarf_register_span. */
26368 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26369 return 64 + regno
- FIRST_VFP_REGNUM
;
26371 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
26374 if (IS_IWMMXT_GR_REGNUM (regno
))
26375 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
26377 if (IS_IWMMXT_REGNUM (regno
))
26378 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
26380 gcc_unreachable ();
26383 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26384 GCC models tham as 64 32-bit registers, so we need to describe this to
26385 the DWARF generation code. Other registers can use the default. */
26387 arm_dwarf_register_span (rtx rtl
)
26395 regno
= REGNO (rtl
);
26396 if (!IS_VFP_REGNUM (regno
))
26399 /* XXX FIXME: The EABI defines two VFP register ranges:
26400 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26402 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26403 corresponding D register. Until GDB supports this, we shall use the
26404 legacy encodings. We also use these encodings for D0-D15 for
26405 compatibility with older debuggers. */
26406 mode
= GET_MODE (rtl
);
26407 if (GET_MODE_SIZE (mode
) < 8)
26410 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26412 nregs
= GET_MODE_SIZE (mode
) / 4;
26413 for (i
= 0; i
< nregs
; i
+= 2)
26414 if (TARGET_BIG_END
)
26416 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26417 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
26421 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
26422 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26427 nregs
= GET_MODE_SIZE (mode
) / 8;
26428 for (i
= 0; i
< nregs
; i
++)
26429 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
26432 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
26435 #if ARM_UNWIND_INFO
26436 /* Emit unwind directives for a store-multiple instruction or stack pointer
26437 push during alignment.
26438 These should only ever be generated by the function prologue code, so
26439 expect them to have a particular form.
26440 The store-multiple instruction sometimes pushes pc as the last register,
26441 although it should not be tracked into unwind information, or for -Os
26442 sometimes pushes some dummy registers before first register that needs
26443 to be tracked in unwind information; such dummy registers are there just
26444 to avoid separate stack adjustment, and will not be restored in the
26448 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
26451 HOST_WIDE_INT offset
;
26452 HOST_WIDE_INT nregs
;
26456 unsigned padfirst
= 0, padlast
= 0;
26459 e
= XVECEXP (p
, 0, 0);
26460 gcc_assert (GET_CODE (e
) == SET
);
26462 /* First insn will adjust the stack pointer. */
26463 gcc_assert (GET_CODE (e
) == SET
26464 && REG_P (SET_DEST (e
))
26465 && REGNO (SET_DEST (e
)) == SP_REGNUM
26466 && GET_CODE (SET_SRC (e
)) == PLUS
);
26468 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
26469 nregs
= XVECLEN (p
, 0) - 1;
26470 gcc_assert (nregs
);
26472 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
26475 /* For -Os dummy registers can be pushed at the beginning to
26476 avoid separate stack pointer adjustment. */
26477 e
= XVECEXP (p
, 0, 1);
26478 e
= XEXP (SET_DEST (e
), 0);
26479 if (GET_CODE (e
) == PLUS
)
26480 padfirst
= INTVAL (XEXP (e
, 1));
26481 gcc_assert (padfirst
== 0 || optimize_size
);
26482 /* The function prologue may also push pc, but not annotate it as it is
26483 never restored. We turn this into a stack pointer adjustment. */
26484 e
= XVECEXP (p
, 0, nregs
);
26485 e
= XEXP (SET_DEST (e
), 0);
26486 if (GET_CODE (e
) == PLUS
)
26487 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
26489 padlast
= offset
- 4;
26490 gcc_assert (padlast
== 0 || padlast
== 4);
26492 fprintf (asm_out_file
, "\t.pad #4\n");
26494 fprintf (asm_out_file
, "\t.save {");
26496 else if (IS_VFP_REGNUM (reg
))
26499 fprintf (asm_out_file
, "\t.vsave {");
26502 /* Unknown register type. */
26503 gcc_unreachable ();
26505 /* If the stack increment doesn't match the size of the saved registers,
26506 something has gone horribly wrong. */
26507 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
26511 /* The remaining insns will describe the stores. */
26512 for (i
= 1; i
<= nregs
; i
++)
26514 /* Expect (set (mem <addr>) (reg)).
26515 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26516 e
= XVECEXP (p
, 0, i
);
26517 gcc_assert (GET_CODE (e
) == SET
26518 && MEM_P (SET_DEST (e
))
26519 && REG_P (SET_SRC (e
)));
26521 reg
= REGNO (SET_SRC (e
));
26522 gcc_assert (reg
>= lastreg
);
26525 fprintf (asm_out_file
, ", ");
26526 /* We can't use %r for vfp because we need to use the
26527 double precision register names. */
26528 if (IS_VFP_REGNUM (reg
))
26529 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
26531 asm_fprintf (asm_out_file
, "%r", reg
);
26533 #ifdef ENABLE_CHECKING
26534 /* Check that the addresses are consecutive. */
26535 e
= XEXP (SET_DEST (e
), 0);
26536 if (GET_CODE (e
) == PLUS
)
26537 gcc_assert (REG_P (XEXP (e
, 0))
26538 && REGNO (XEXP (e
, 0)) == SP_REGNUM
26539 && CONST_INT_P (XEXP (e
, 1))
26540 && offset
== INTVAL (XEXP (e
, 1)));
26544 && REGNO (e
) == SP_REGNUM
);
26545 offset
+= reg_size
;
26548 fprintf (asm_out_file
, "}\n");
26550 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
26553 /* Emit unwind directives for a SET. */
26556 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
26564 switch (GET_CODE (e0
))
26567 /* Pushing a single register. */
26568 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
26569 || !REG_P (XEXP (XEXP (e0
, 0), 0))
26570 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
26573 asm_fprintf (asm_out_file
, "\t.save ");
26574 if (IS_VFP_REGNUM (REGNO (e1
)))
26575 asm_fprintf(asm_out_file
, "{d%d}\n",
26576 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
26578 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
26582 if (REGNO (e0
) == SP_REGNUM
)
26584 /* A stack increment. */
26585 if (GET_CODE (e1
) != PLUS
26586 || !REG_P (XEXP (e1
, 0))
26587 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
26588 || !CONST_INT_P (XEXP (e1
, 1)))
26591 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
26592 -INTVAL (XEXP (e1
, 1)));
26594 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
26596 HOST_WIDE_INT offset
;
26598 if (GET_CODE (e1
) == PLUS
)
26600 if (!REG_P (XEXP (e1
, 0))
26601 || !CONST_INT_P (XEXP (e1
, 1)))
26603 reg
= REGNO (XEXP (e1
, 0));
26604 offset
= INTVAL (XEXP (e1
, 1));
26605 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
26606 HARD_FRAME_POINTER_REGNUM
, reg
,
26609 else if (REG_P (e1
))
26612 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
26613 HARD_FRAME_POINTER_REGNUM
, reg
);
26618 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
26620 /* Move from sp to reg. */
26621 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
26623 else if (GET_CODE (e1
) == PLUS
26624 && REG_P (XEXP (e1
, 0))
26625 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
26626 && CONST_INT_P (XEXP (e1
, 1)))
26628 /* Set reg to offset from sp. */
26629 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
26630 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
26642 /* Emit unwind directives for the given insn. */
26645 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
26648 bool handled_one
= false;
26650 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
26653 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
26654 && (TREE_NOTHROW (current_function_decl
)
26655 || crtl
->all_throwers_are_sibcalls
))
26658 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
26661 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
26663 switch (REG_NOTE_KIND (note
))
26665 case REG_FRAME_RELATED_EXPR
:
26666 pat
= XEXP (note
, 0);
26669 case REG_CFA_REGISTER
:
26670 pat
= XEXP (note
, 0);
26673 pat
= PATTERN (insn
);
26674 if (GET_CODE (pat
) == PARALLEL
)
26675 pat
= XVECEXP (pat
, 0, 0);
26678 /* Only emitted for IS_STACKALIGN re-alignment. */
26683 src
= SET_SRC (pat
);
26684 dest
= SET_DEST (pat
);
26686 gcc_assert (src
== stack_pointer_rtx
);
26687 reg
= REGNO (dest
);
26688 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26691 handled_one
= true;
26694 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26695 to get correct dwarf information for shrink-wrap. We should not
26696 emit unwind information for it because these are used either for
26697 pretend arguments or notes to adjust sp and restore registers from
26699 case REG_CFA_DEF_CFA
:
26700 case REG_CFA_ADJUST_CFA
:
26701 case REG_CFA_RESTORE
:
26704 case REG_CFA_EXPRESSION
:
26705 case REG_CFA_OFFSET
:
26706 /* ??? Only handling here what we actually emit. */
26707 gcc_unreachable ();
26715 pat
= PATTERN (insn
);
26718 switch (GET_CODE (pat
))
26721 arm_unwind_emit_set (asm_out_file
, pat
);
26725 /* Store multiple. */
26726 arm_unwind_emit_sequence (asm_out_file
, pat
);
26735 /* Output a reference from a function exception table to the type_info
26736 object X. The EABI specifies that the symbol should be relocated by
26737 an R_ARM_TARGET2 relocation. */
26740 arm_output_ttype (rtx x
)
26742 fputs ("\t.word\t", asm_out_file
);
26743 output_addr_const (asm_out_file
, x
);
26744 /* Use special relocations for symbol references. */
26745 if (!CONST_INT_P (x
))
26746 fputs ("(TARGET2)", asm_out_file
);
26747 fputc ('\n', asm_out_file
);
26752 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26755 arm_asm_emit_except_personality (rtx personality
)
26757 fputs ("\t.personality\t", asm_out_file
);
26758 output_addr_const (asm_out_file
, personality
);
26759 fputc ('\n', asm_out_file
);
26762 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26765 arm_asm_init_sections (void)
26767 exception_section
= get_unnamed_section (0, output_section_asm_op
,
26770 #endif /* ARM_UNWIND_INFO */
26772 /* Output unwind directives for the start/end of a function. */
26775 arm_output_fn_unwind (FILE * f
, bool prologue
)
26777 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
26781 fputs ("\t.fnstart\n", f
);
26784 /* If this function will never be unwound, then mark it as such.
26785 The came condition is used in arm_unwind_emit to suppress
26786 the frame annotations. */
26787 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
26788 && (TREE_NOTHROW (current_function_decl
)
26789 || crtl
->all_throwers_are_sibcalls
))
26790 fputs("\t.cantunwind\n", f
);
26792 fputs ("\t.fnend\n", f
);
26797 arm_emit_tls_decoration (FILE *fp
, rtx x
)
26799 enum tls_reloc reloc
;
26802 val
= XVECEXP (x
, 0, 0);
26803 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
26805 output_addr_const (fp
, val
);
26810 fputs ("(tlsgd)", fp
);
26813 fputs ("(tlsldm)", fp
);
26816 fputs ("(tlsldo)", fp
);
26819 fputs ("(gottpoff)", fp
);
26822 fputs ("(tpoff)", fp
);
26825 fputs ("(tlsdesc)", fp
);
26828 gcc_unreachable ();
26837 fputs (" + (. - ", fp
);
26838 output_addr_const (fp
, XVECEXP (x
, 0, 2));
26839 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26840 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
26841 output_addr_const (fp
, XVECEXP (x
, 0, 3));
26851 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26854 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
26856 gcc_assert (size
== 4);
26857 fputs ("\t.word\t", file
);
26858 output_addr_const (file
, x
);
26859 fputs ("(tlsldo)", file
);
26862 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26865 arm_output_addr_const_extra (FILE *fp
, rtx x
)
26867 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
26868 return arm_emit_tls_decoration (fp
, x
);
26869 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
26872 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
26874 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
26875 assemble_name_raw (fp
, label
);
26879 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
26881 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
26885 output_addr_const (fp
, XVECEXP (x
, 0, 0));
26889 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
26891 output_addr_const (fp
, XVECEXP (x
, 0, 0));
26895 output_addr_const (fp
, XVECEXP (x
, 0, 1));
26899 else if (GET_CODE (x
) == CONST_VECTOR
)
26900 return arm_emit_vector_const (fp
, x
);
26905 /* Output assembly for a shift instruction.
26906 SET_FLAGS determines how the instruction modifies the condition codes.
26907 0 - Do not set condition codes.
26908 1 - Set condition codes.
26909 2 - Use smallest instruction. */
26911 arm_output_shift(rtx
* operands
, int set_flags
)
26914 static const char flag_chars
[3] = {'?', '.', '!'};
26919 c
= flag_chars
[set_flags
];
26920 if (TARGET_UNIFIED_ASM
)
26922 shift
= shift_op(operands
[3], &val
);
26926 operands
[2] = GEN_INT(val
);
26927 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
26930 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
26933 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
26934 output_asm_insn (pattern
, operands
);
26938 /* Output assembly for a WMMX immediate shift instruction. */
26940 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
26942 int shift
= INTVAL (operands
[2]);
26944 machine_mode opmode
= GET_MODE (operands
[0]);
26946 gcc_assert (shift
>= 0);
26948 /* If the shift value in the register versions is > 63 (for D qualifier),
26949 31 (for W qualifier) or 15 (for H qualifier). */
26950 if (((opmode
== V4HImode
) && (shift
> 15))
26951 || ((opmode
== V2SImode
) && (shift
> 31))
26952 || ((opmode
== DImode
) && (shift
> 63)))
26956 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
26957 output_asm_insn (templ
, operands
);
26958 if (opmode
== DImode
)
26960 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
26961 output_asm_insn (templ
, operands
);
26966 /* The destination register will contain all zeros. */
26967 sprintf (templ
, "wzero\t%%0");
26968 output_asm_insn (templ
, operands
);
26973 if ((opmode
== DImode
) && (shift
> 32))
26975 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
26976 output_asm_insn (templ
, operands
);
26977 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
26978 output_asm_insn (templ
, operands
);
26982 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
26983 output_asm_insn (templ
, operands
);
26988 /* Output assembly for a WMMX tinsr instruction. */
26990 arm_output_iwmmxt_tinsr (rtx
*operands
)
26992 int mask
= INTVAL (operands
[3]);
26995 int units
= mode_nunits
[GET_MODE (operands
[0])];
26996 gcc_assert ((mask
& (mask
- 1)) == 0);
26997 for (i
= 0; i
< units
; ++i
)
26999 if ((mask
& 0x01) == 1)
27005 gcc_assert (i
< units
);
27007 switch (GET_MODE (operands
[0]))
27010 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
27013 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
27016 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
27019 gcc_unreachable ();
27022 output_asm_insn (templ
, operands
);
27027 /* Output a Thumb-1 casesi dispatch sequence. */
27029 thumb1_output_casesi (rtx
*operands
)
27031 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
27033 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27035 switch (GET_MODE(diff_vec
))
27038 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27039 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27041 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27042 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27044 return "bl\t%___gnu_thumb1_case_si";
27046 gcc_unreachable ();
27050 /* Output a Thumb-2 casesi instruction. */
27052 thumb2_output_casesi (rtx
*operands
)
27054 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
27056 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27058 output_asm_insn ("cmp\t%0, %1", operands
);
27059 output_asm_insn ("bhi\t%l3", operands
);
27060 switch (GET_MODE(diff_vec
))
27063 return "tbb\t[%|pc, %0]";
27065 return "tbh\t[%|pc, %0, lsl #1]";
27069 output_asm_insn ("adr\t%4, %l2", operands
);
27070 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
27071 output_asm_insn ("add\t%4, %4, %5", operands
);
27076 output_asm_insn ("adr\t%4, %l2", operands
);
27077 return "ldr\t%|pc, [%4, %0, lsl #2]";
27080 gcc_unreachable ();
27084 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27085 per-core tuning structs. */
27087 arm_issue_rate (void)
27089 return current_tune
->issue_rate
;
27092 /* Return how many instructions should scheduler lookahead to choose the
27095 arm_first_cycle_multipass_dfa_lookahead (void)
27097 int issue_rate
= arm_issue_rate ();
27099 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
27102 /* Enable modeling of L2 auto-prefetcher. */
27104 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
27106 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
27110 arm_mangle_type (const_tree type
)
27112 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27113 has to be managled as if it is in the "std" namespace. */
27114 if (TARGET_AAPCS_BASED
27115 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
27116 return "St9__va_list";
27118 /* Half-precision float. */
27119 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
27122 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27124 if (TYPE_NAME (type
) != NULL
)
27125 return arm_mangle_builtin_type (type
);
27127 /* Use the default mangling. */
27131 /* Order of allocation of core registers for Thumb: this allocation is
27132 written over the corresponding initial entries of the array
27133 initialized with REG_ALLOC_ORDER. We allocate all low registers
27134 first. Saving and restoring a low register is usually cheaper than
27135 using a call-clobbered high register. */
27137 static const int thumb_core_reg_alloc_order
[] =
27139 3, 2, 1, 0, 4, 5, 6, 7,
27140 14, 12, 8, 9, 10, 11
27143 /* Adjust register allocation order when compiling for Thumb. */
27146 arm_order_regs_for_local_alloc (void)
27148 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
27149 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
27151 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
27152 sizeof (thumb_core_reg_alloc_order
));
27155 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27158 arm_frame_pointer_required (void)
27160 return (cfun
->has_nonlocal_label
27161 || SUBTARGET_FRAME_POINTER_REQUIRED
27162 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
27165 /* Only thumb1 can't support conditional execution, so return true if
27166 the target is not thumb1. */
27168 arm_have_conditional_execution (void)
27170 return !TARGET_THUMB1
;
27173 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27174 static HOST_WIDE_INT
27175 arm_vector_alignment (const_tree type
)
27177 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
27179 if (TARGET_AAPCS_BASED
)
27180 align
= MIN (align
, 64);
27185 static unsigned int
27186 arm_autovectorize_vector_sizes (void)
27188 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
27192 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
27194 /* Vectors which aren't in packed structures will not be less aligned than
27195 the natural alignment of their element type, so this is safe. */
27196 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27199 return default_builtin_vector_alignment_reachable (type
, is_packed
);
27203 arm_builtin_support_vector_misalignment (machine_mode mode
,
27204 const_tree type
, int misalignment
,
27207 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27209 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
27214 /* If the misalignment is unknown, we should be able to handle the access
27215 so long as it is not to a member of a packed data structure. */
27216 if (misalignment
== -1)
27219 /* Return true if the misalignment is a multiple of the natural alignment
27220 of the vector's element type. This is probably always going to be
27221 true in practice, since we've already established that this isn't a
27223 return ((misalignment
% align
) == 0);
27226 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
27231 arm_conditional_register_usage (void)
27235 if (TARGET_THUMB1
&& optimize_size
)
27237 /* When optimizing for size on Thumb-1, it's better not
27238 to use the HI regs, because of the overhead of
27240 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
27241 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
27244 /* The link register can be clobbered by any branch insn,
27245 but we have no way to track that at present, so mark
27246 it as unavailable. */
27248 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
27250 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
27252 /* VFPv3 registers are disabled when earlier VFP
27253 versions are selected due to the definition of
27254 LAST_VFP_REGNUM. */
27255 for (regno
= FIRST_VFP_REGNUM
;
27256 regno
<= LAST_VFP_REGNUM
; ++ regno
)
27258 fixed_regs
[regno
] = 0;
27259 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
27260 || regno
>= FIRST_VFP_REGNUM
+ 32;
27264 if (TARGET_REALLY_IWMMXT
)
27266 regno
= FIRST_IWMMXT_GR_REGNUM
;
27267 /* The 2002/10/09 revision of the XScale ABI has wCG0
27268 and wCG1 as call-preserved registers. The 2002/11/21
27269 revision changed this so that all wCG registers are
27270 scratch registers. */
27271 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
27272 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
27273 fixed_regs
[regno
] = 0;
27274 /* The XScale ABI has wR0 - wR9 as scratch registers,
27275 the rest as call-preserved registers. */
27276 for (regno
= FIRST_IWMMXT_REGNUM
;
27277 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
27279 fixed_regs
[regno
] = 0;
27280 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
27284 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
27286 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27287 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27289 else if (TARGET_APCS_STACK
)
27291 fixed_regs
[10] = 1;
27292 call_used_regs
[10] = 1;
27294 /* -mcaller-super-interworking reserves r11 for calls to
27295 _interwork_r11_call_via_rN(). Making the register global
27296 is an easy way of ensuring that it remains valid for all
27298 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
27299 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
27301 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27302 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27303 if (TARGET_CALLER_INTERWORKING
)
27304 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27306 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27310 arm_preferred_rename_class (reg_class_t rclass
)
27312 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27313 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27314 and code size can be reduced. */
27315 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
27321 /* Compute the atrribute "length" of insn "*push_multi".
27322 So this function MUST be kept in sync with that insn pattern. */
27324 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
27326 int i
, regno
, hi_reg
;
27327 int num_saves
= XVECLEN (parallel_op
, 0);
27337 regno
= REGNO (first_op
);
27338 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27339 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
27341 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
27342 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27350 /* Compute the number of instructions emitted by output_move_double. */
27352 arm_count_output_move_double_insns (rtx
*operands
)
27356 /* output_move_double may modify the operands array, so call it
27357 here on a copy of the array. */
27358 ops
[0] = operands
[0];
27359 ops
[1] = operands
[1];
27360 output_move_double (ops
, false, &count
);
27365 vfp3_const_double_for_fract_bits (rtx operand
)
27367 REAL_VALUE_TYPE r0
;
27369 if (!CONST_DOUBLE_P (operand
))
27372 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
27373 if (exact_real_inverse (DFmode
, &r0
))
27375 if (exact_real_truncate (DFmode
, &r0
))
27377 HOST_WIDE_INT value
= real_to_integer (&r0
);
27378 value
= value
& 0xffffffff;
27379 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
27380 return int_log2 (value
);
27387 vfp3_const_double_for_bits (rtx operand
)
27389 REAL_VALUE_TYPE r0
;
27391 if (!CONST_DOUBLE_P (operand
))
27394 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
27395 if (exact_real_truncate (DFmode
, &r0
))
27397 HOST_WIDE_INT value
= real_to_integer (&r0
);
27398 value
= value
& 0xffffffff;
27399 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
27400 return int_log2 (value
);
27406 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27409 arm_pre_atomic_barrier (enum memmodel model
)
27411 if (need_atomic_barrier_p (model
, true))
27412 emit_insn (gen_memory_barrier ());
27416 arm_post_atomic_barrier (enum memmodel model
)
27418 if (need_atomic_barrier_p (model
, false))
27419 emit_insn (gen_memory_barrier ());
27422 /* Emit the load-exclusive and store-exclusive instructions.
27423 Use acquire and release versions if necessary. */
27426 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
27428 rtx (*gen
) (rtx
, rtx
);
27434 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
27435 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
27436 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
27437 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
27439 gcc_unreachable ();
27446 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
27447 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
27448 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
27449 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
27451 gcc_unreachable ();
27455 emit_insn (gen (rval
, mem
));
27459 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
27462 rtx (*gen
) (rtx
, rtx
, rtx
);
27468 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
27469 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
27470 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
27471 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
27473 gcc_unreachable ();
27480 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
27481 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
27482 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
27483 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
27485 gcc_unreachable ();
27489 emit_insn (gen (bval
, rval
, mem
));
27492 /* Mark the previous jump instruction as unlikely. */
27495 emit_unlikely_jump (rtx insn
)
27497 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
27499 insn
= emit_jump_insn (insn
);
27500 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
27503 /* Expand a compare and swap pattern. */
27506 arm_expand_compare_and_swap (rtx operands
[])
27508 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
27510 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
27512 bval
= operands
[0];
27513 rval
= operands
[1];
27515 oldval
= operands
[3];
27516 newval
= operands
[4];
27517 is_weak
= operands
[5];
27518 mod_s
= operands
[6];
27519 mod_f
= operands
[7];
27520 mode
= GET_MODE (mem
);
27522 /* Normally the succ memory model must be stronger than fail, but in the
27523 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27524 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27526 if (TARGET_HAVE_LDACQ
27527 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
27528 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
27529 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
27535 /* For narrow modes, we're going to perform the comparison in SImode,
27536 so do the zero-extension now. */
27537 rval
= gen_reg_rtx (SImode
);
27538 oldval
= convert_modes (SImode
, mode
, oldval
, true);
27542 /* Force the value into a register if needed. We waited until after
27543 the zero-extension above to do this properly. */
27544 if (!arm_add_operand (oldval
, SImode
))
27545 oldval
= force_reg (SImode
, oldval
);
27549 if (!cmpdi_operand (oldval
, mode
))
27550 oldval
= force_reg (mode
, oldval
);
27554 gcc_unreachable ();
27559 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
27560 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
27561 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
27562 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
27564 gcc_unreachable ();
27567 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
27569 if (mode
== QImode
|| mode
== HImode
)
27570 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
27572 /* In all cases, we arrange for success to be signaled by Z set.
27573 This arrangement allows for the boolean result to be used directly
27574 in a subsequent branch, post optimization. */
27575 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
27576 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
27577 emit_insn (gen_rtx_SET (bval
, x
));
27580 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27581 another memory store between the load-exclusive and store-exclusive can
27582 reset the monitor from Exclusive to Open state. This means we must wait
27583 until after reload to split the pattern, lest we get a register spill in
27584 the middle of the atomic sequence. */
27587 arm_split_compare_and_swap (rtx operands
[])
27589 rtx rval
, mem
, oldval
, newval
, scratch
;
27591 enum memmodel mod_s
, mod_f
;
27593 rtx_code_label
*label1
, *label2
;
27596 rval
= operands
[0];
27598 oldval
= operands
[2];
27599 newval
= operands
[3];
27600 is_weak
= (operands
[4] != const0_rtx
);
27601 mod_s
= memmodel_from_int (INTVAL (operands
[5]));
27602 mod_f
= memmodel_from_int (INTVAL (operands
[6]));
27603 scratch
= operands
[7];
27604 mode
= GET_MODE (mem
);
27606 bool use_acquire
= TARGET_HAVE_LDACQ
27607 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
27608 || is_mm_release (mod_s
));
27610 bool use_release
= TARGET_HAVE_LDACQ
27611 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
27612 || is_mm_acquire (mod_s
));
27614 /* Checks whether a barrier is needed and emits one accordingly. */
27615 if (!(use_acquire
|| use_release
))
27616 arm_pre_atomic_barrier (mod_s
);
27621 label1
= gen_label_rtx ();
27622 emit_label (label1
);
27624 label2
= gen_label_rtx ();
27626 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
27628 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
27629 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27630 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
27631 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
27632 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
27634 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
, use_release
);
27636 /* Weak or strong, we want EQ to be true for success, so that we
27637 match the flags that we got from the compare above. */
27638 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
27639 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
27640 emit_insn (gen_rtx_SET (cond
, x
));
27644 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27645 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
27646 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
27647 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
27650 if (!is_mm_relaxed (mod_f
))
27651 emit_label (label2
);
27653 /* Checks whether a barrier is needed and emits one accordingly. */
27654 if (!(use_acquire
|| use_release
))
27655 arm_post_atomic_barrier (mod_s
);
27657 if (is_mm_relaxed (mod_f
))
27658 emit_label (label2
);
27662 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
27663 rtx value
, rtx model_rtx
, rtx cond
)
27665 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
27666 machine_mode mode
= GET_MODE (mem
);
27667 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
27668 rtx_code_label
*label
;
27671 bool use_acquire
= TARGET_HAVE_LDACQ
27672 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
27673 || is_mm_release (model
));
27675 bool use_release
= TARGET_HAVE_LDACQ
27676 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
27677 || is_mm_acquire (model
));
27679 /* Checks whether a barrier is needed and emits one accordingly. */
27680 if (!(use_acquire
|| use_release
))
27681 arm_pre_atomic_barrier (model
);
27683 label
= gen_label_rtx ();
27684 emit_label (label
);
27687 new_out
= gen_lowpart (wmode
, new_out
);
27689 old_out
= gen_lowpart (wmode
, old_out
);
27692 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
27694 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
27703 x
= gen_rtx_AND (wmode
, old_out
, value
);
27704 emit_insn (gen_rtx_SET (new_out
, x
));
27705 x
= gen_rtx_NOT (wmode
, new_out
);
27706 emit_insn (gen_rtx_SET (new_out
, x
));
27710 if (CONST_INT_P (value
))
27712 value
= GEN_INT (-INTVAL (value
));
27718 if (mode
== DImode
)
27720 /* DImode plus/minus need to clobber flags. */
27721 /* The adddi3 and subdi3 patterns are incorrectly written so that
27722 they require matching operands, even when we could easily support
27723 three operands. Thankfully, this can be fixed up post-splitting,
27724 as the individual add+adc patterns do accept three operands and
27725 post-reload cprop can make these moves go away. */
27726 emit_move_insn (new_out
, old_out
);
27728 x
= gen_adddi3 (new_out
, new_out
, value
);
27730 x
= gen_subdi3 (new_out
, new_out
, value
);
27737 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
27738 emit_insn (gen_rtx_SET (new_out
, x
));
27742 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
27745 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27746 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
27748 /* Checks whether a barrier is needed and emits one accordingly. */
27749 if (!(use_acquire
|| use_release
))
27750 arm_post_atomic_barrier (model
);
27753 #define MAX_VECT_LEN 16
27755 struct expand_vec_perm_d
27757 rtx target
, op0
, op1
;
27758 unsigned char perm
[MAX_VECT_LEN
];
27759 machine_mode vmode
;
27760 unsigned char nelt
;
27765 /* Generate a variable permutation. */
27768 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
27770 machine_mode vmode
= GET_MODE (target
);
27771 bool one_vector_p
= rtx_equal_p (op0
, op1
);
27773 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
27774 gcc_checking_assert (GET_MODE (op0
) == vmode
);
27775 gcc_checking_assert (GET_MODE (op1
) == vmode
);
27776 gcc_checking_assert (GET_MODE (sel
) == vmode
);
27777 gcc_checking_assert (TARGET_NEON
);
27781 if (vmode
== V8QImode
)
27782 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
27784 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
27790 if (vmode
== V8QImode
)
27792 pair
= gen_reg_rtx (V16QImode
);
27793 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
27794 pair
= gen_lowpart (TImode
, pair
);
27795 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
27799 pair
= gen_reg_rtx (OImode
);
27800 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
27801 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
27807 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
27809 machine_mode vmode
= GET_MODE (target
);
27810 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
27811 bool one_vector_p
= rtx_equal_p (op0
, op1
);
27812 rtx rmask
[MAX_VECT_LEN
], mask
;
27814 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27815 numbering of elements for big-endian, we must reverse the order. */
27816 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
27818 /* The VTBL instruction does not use a modulo index, so we must take care
27819 of that ourselves. */
27820 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27821 for (i
= 0; i
< nelt
; ++i
)
27823 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
27824 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
27826 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
27829 /* Generate or test for an insn that supports a constant permutation. */
27831 /* Recognize patterns for the VUZP insns. */
27834 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
27836 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
27837 rtx out0
, out1
, in0
, in1
, x
;
27838 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
27840 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
27843 /* Note that these are little-endian tests. Adjust for big-endian later. */
27844 if (d
->perm
[0] == 0)
27846 else if (d
->perm
[0] == 1)
27850 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27852 for (i
= 0; i
< nelt
; i
++)
27854 unsigned elt
= (i
* 2 + odd
) & mask
;
27855 if (d
->perm
[i
] != elt
)
27865 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
27866 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
27867 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
27868 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
27869 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
27870 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
27871 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
27872 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
27874 gcc_unreachable ();
27879 if (BYTES_BIG_ENDIAN
)
27881 x
= in0
, in0
= in1
, in1
= x
;
27886 out1
= gen_reg_rtx (d
->vmode
);
27888 x
= out0
, out0
= out1
, out1
= x
;
27890 emit_insn (gen (out0
, in0
, in1
, out1
));
27894 /* Recognize patterns for the VZIP insns. */
27897 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
27899 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
27900 rtx out0
, out1
, in0
, in1
, x
;
27901 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
27903 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
27906 /* Note that these are little-endian tests. Adjust for big-endian later. */
27908 if (d
->perm
[0] == high
)
27910 else if (d
->perm
[0] == 0)
27914 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27916 for (i
= 0; i
< nelt
/ 2; i
++)
27918 unsigned elt
= (i
+ high
) & mask
;
27919 if (d
->perm
[i
* 2] != elt
)
27921 elt
= (elt
+ nelt
) & mask
;
27922 if (d
->perm
[i
* 2 + 1] != elt
)
27932 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
27933 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
27934 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
27935 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
27936 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
27937 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
27938 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
27939 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
27941 gcc_unreachable ();
27946 if (BYTES_BIG_ENDIAN
)
27948 x
= in0
, in0
= in1
, in1
= x
;
27953 out1
= gen_reg_rtx (d
->vmode
);
27955 x
= out0
, out0
= out1
, out1
= x
;
27957 emit_insn (gen (out0
, in0
, in1
, out1
));
27961 /* Recognize patterns for the VREV insns. */
27964 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
27966 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
27967 rtx (*gen
)(rtx
, rtx
);
27969 if (!d
->one_vector_p
)
27978 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
27979 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
27987 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
27988 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
27989 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
27990 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
27998 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
27999 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
28000 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
28001 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
28002 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
28003 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
28004 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
28005 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
28014 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
28015 for (j
= 0; j
<= diff
; j
+= 1)
28017 /* This is guaranteed to be true as the value of diff
28018 is 7, 3, 1 and we should have enough elements in the
28019 queue to generate this. Getting a vector mask with a
28020 value of diff other than these values implies that
28021 something is wrong by the time we get here. */
28022 gcc_assert (i
+ j
< nelt
);
28023 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
28031 emit_insn (gen (d
->target
, d
->op0
));
28035 /* Recognize patterns for the VTRN insns. */
28038 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
28040 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28041 rtx out0
, out1
, in0
, in1
, x
;
28042 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28044 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28047 /* Note that these are little-endian tests. Adjust for big-endian later. */
28048 if (d
->perm
[0] == 0)
28050 else if (d
->perm
[0] == 1)
28054 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28056 for (i
= 0; i
< nelt
; i
+= 2)
28058 if (d
->perm
[i
] != i
+ odd
)
28060 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
28070 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
28071 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
28072 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
28073 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
28074 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
28075 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
28076 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
28077 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
28079 gcc_unreachable ();
28084 if (BYTES_BIG_ENDIAN
)
28086 x
= in0
, in0
= in1
, in1
= x
;
28091 out1
= gen_reg_rtx (d
->vmode
);
28093 x
= out0
, out0
= out1
, out1
= x
;
28095 emit_insn (gen (out0
, in0
, in1
, out1
));
28099 /* Recognize patterns for the VEXT insns. */
28102 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
28104 unsigned int i
, nelt
= d
->nelt
;
28105 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
28108 unsigned int location
;
28110 unsigned int next
= d
->perm
[0] + 1;
28112 /* TODO: Handle GCC's numbering of elements for big-endian. */
28113 if (BYTES_BIG_ENDIAN
)
28116 /* Check if the extracted indexes are increasing by one. */
28117 for (i
= 1; i
< nelt
; next
++, i
++)
28119 /* If we hit the most significant element of the 2nd vector in
28120 the previous iteration, no need to test further. */
28121 if (next
== 2 * nelt
)
28124 /* If we are operating on only one vector: it could be a
28125 rotation. If there are only two elements of size < 64, let
28126 arm_evpc_neon_vrev catch it. */
28127 if (d
->one_vector_p
&& (next
== nelt
))
28129 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
28135 if (d
->perm
[i
] != next
)
28139 location
= d
->perm
[0];
28143 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
28144 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
28145 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
28146 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
28147 case V2SImode
: gen
= gen_neon_vextv2si
; break;
28148 case V4SImode
: gen
= gen_neon_vextv4si
; break;
28149 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
28150 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
28151 case V2DImode
: gen
= gen_neon_vextv2di
; break;
28160 offset
= GEN_INT (location
);
28161 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
28165 /* The NEON VTBL instruction is a fully variable permuation that's even
28166 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28167 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28168 can do slightly better by expanding this as a constant where we don't
28169 have to apply a mask. */
28172 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
28174 rtx rperm
[MAX_VECT_LEN
], sel
;
28175 machine_mode vmode
= d
->vmode
;
28176 unsigned int i
, nelt
= d
->nelt
;
28178 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28179 numbering of elements for big-endian, we must reverse the order. */
28180 if (BYTES_BIG_ENDIAN
)
28186 /* Generic code will try constant permutation twice. Once with the
28187 original mode and again with the elements lowered to QImode.
28188 So wait and don't do the selector expansion ourselves. */
28189 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
28192 for (i
= 0; i
< nelt
; ++i
)
28193 rperm
[i
] = GEN_INT (d
->perm
[i
]);
28194 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
28195 sel
= force_reg (vmode
, sel
);
28197 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
28202 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
28204 /* Check if the input mask matches vext before reordering the
28207 if (arm_evpc_neon_vext (d
))
28210 /* The pattern matching functions above are written to look for a small
28211 number to begin the sequence (0, 1, N/2). If we begin with an index
28212 from the second operand, we can swap the operands. */
28213 if (d
->perm
[0] >= d
->nelt
)
28215 unsigned i
, nelt
= d
->nelt
;
28218 for (i
= 0; i
< nelt
; ++i
)
28219 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
28228 if (arm_evpc_neon_vuzp (d
))
28230 if (arm_evpc_neon_vzip (d
))
28232 if (arm_evpc_neon_vrev (d
))
28234 if (arm_evpc_neon_vtrn (d
))
28236 return arm_evpc_neon_vtbl (d
);
28241 /* Expand a vec_perm_const pattern. */
28244 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28246 struct expand_vec_perm_d d
;
28247 int i
, nelt
, which
;
28253 d
.vmode
= GET_MODE (target
);
28254 gcc_assert (VECTOR_MODE_P (d
.vmode
));
28255 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
28256 d
.testing_p
= false;
28258 for (i
= which
= 0; i
< nelt
; ++i
)
28260 rtx e
= XVECEXP (sel
, 0, i
);
28261 int ei
= INTVAL (e
) & (2 * nelt
- 1);
28262 which
|= (ei
< nelt
? 1 : 2);
28272 d
.one_vector_p
= false;
28273 if (!rtx_equal_p (op0
, op1
))
28276 /* The elements of PERM do not suggest that only the first operand
28277 is used, but both operands are identical. Allow easier matching
28278 of the permutation by folding the permutation into the single
28282 for (i
= 0; i
< nelt
; ++i
)
28283 d
.perm
[i
] &= nelt
- 1;
28285 d
.one_vector_p
= true;
28290 d
.one_vector_p
= true;
28294 return arm_expand_vec_perm_const_1 (&d
);
28297 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28300 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
28301 const unsigned char *sel
)
28303 struct expand_vec_perm_d d
;
28304 unsigned int i
, nelt
, which
;
28308 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
28309 d
.testing_p
= true;
28310 memcpy (d
.perm
, sel
, nelt
);
28312 /* Categorize the set of elements in the selector. */
28313 for (i
= which
= 0; i
< nelt
; ++i
)
28315 unsigned char e
= d
.perm
[i
];
28316 gcc_assert (e
< 2 * nelt
);
28317 which
|= (e
< nelt
? 1 : 2);
28320 /* For all elements from second vector, fold the elements to first. */
28322 for (i
= 0; i
< nelt
; ++i
)
28325 /* Check whether the mask can be applied to the vector type. */
28326 d
.one_vector_p
= (which
!= 3);
28328 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
28329 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
28330 if (!d
.one_vector_p
)
28331 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
28334 ret
= arm_expand_vec_perm_const_1 (&d
);
28341 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
28343 /* If we are soft float and we do not have ldrd
28344 then all auto increment forms are ok. */
28345 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
28350 /* Post increment and Pre Decrement are supported for all
28351 instruction forms except for vector forms. */
28354 if (VECTOR_MODE_P (mode
))
28356 if (code
!= ARM_PRE_DEC
)
28366 /* Without LDRD and mode size greater than
28367 word size, there is no point in auto-incrementing
28368 because ldm and stm will not have these forms. */
28369 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
28372 /* Vector and floating point modes do not support
28373 these auto increment forms. */
28374 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
28387 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28388 on ARM, since we know that shifts by negative amounts are no-ops.
28389 Additionally, the default expansion code is not available or suitable
28390 for post-reload insn splits (this can occur when the register allocator
28391 chooses not to do a shift in NEON).
28393 This function is used in both initial expand and post-reload splits, and
28394 handles all kinds of 64-bit shifts.
28396 Input requirements:
28397 - It is safe for the input and output to be the same register, but
28398 early-clobber rules apply for the shift amount and scratch registers.
28399 - Shift by register requires both scratch registers. In all other cases
28400 the scratch registers may be NULL.
28401 - Ashiftrt by a register also clobbers the CC register. */
28403 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
28404 rtx amount
, rtx scratch1
, rtx scratch2
)
28406 rtx out_high
= gen_highpart (SImode
, out
);
28407 rtx out_low
= gen_lowpart (SImode
, out
);
28408 rtx in_high
= gen_highpart (SImode
, in
);
28409 rtx in_low
= gen_lowpart (SImode
, in
);
28412 in = the register pair containing the input value.
28413 out = the destination register pair.
28414 up = the high- or low-part of each pair.
28415 down = the opposite part to "up".
28416 In a shift, we can consider bits to shift from "up"-stream to
28417 "down"-stream, so in a left-shift "up" is the low-part and "down"
28418 is the high-part of each register pair. */
28420 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
28421 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
28422 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
28423 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
28425 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
28427 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
28428 && GET_MODE (out
) == DImode
);
28430 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
28431 && GET_MODE (in
) == DImode
);
28433 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
28434 && GET_MODE (amount
) == SImode
)
28435 || CONST_INT_P (amount
)));
28436 gcc_assert (scratch1
== NULL
28437 || (GET_CODE (scratch1
) == SCRATCH
)
28438 || (GET_MODE (scratch1
) == SImode
28439 && REG_P (scratch1
)));
28440 gcc_assert (scratch2
== NULL
28441 || (GET_CODE (scratch2
) == SCRATCH
)
28442 || (GET_MODE (scratch2
) == SImode
28443 && REG_P (scratch2
)));
28444 gcc_assert (!REG_P (out
) || !REG_P (amount
)
28445 || !HARD_REGISTER_P (out
)
28446 || (REGNO (out
) != REGNO (amount
)
28447 && REGNO (out
) + 1 != REGNO (amount
)));
28449 /* Macros to make following code more readable. */
28450 #define SUB_32(DEST,SRC) \
28451 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28452 #define RSB_32(DEST,SRC) \
28453 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28454 #define SUB_S_32(DEST,SRC) \
28455 gen_addsi3_compare0 ((DEST), (SRC), \
28457 #define SET(DEST,SRC) \
28458 gen_rtx_SET ((DEST), (SRC))
28459 #define SHIFT(CODE,SRC,AMOUNT) \
28460 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28461 #define LSHIFT(CODE,SRC,AMOUNT) \
28462 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28463 SImode, (SRC), (AMOUNT))
28464 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28465 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28466 SImode, (SRC), (AMOUNT))
28468 gen_rtx_IOR (SImode, (A), (B))
28469 #define BRANCH(COND,LABEL) \
28470 gen_arm_cond_branch ((LABEL), \
28471 gen_rtx_ ## COND (CCmode, cc_reg, \
28475 /* Shifts by register and shifts by constant are handled separately. */
28476 if (CONST_INT_P (amount
))
28478 /* We have a shift-by-constant. */
28480 /* First, handle out-of-range shift amounts.
28481 In both cases we try to match the result an ARM instruction in a
28482 shift-by-register would give. This helps reduce execution
28483 differences between optimization levels, but it won't stop other
28484 parts of the compiler doing different things. This is "undefined
28485 behaviour, in any case. */
28486 if (INTVAL (amount
) <= 0)
28487 emit_insn (gen_movdi (out
, in
));
28488 else if (INTVAL (amount
) >= 64)
28490 if (code
== ASHIFTRT
)
28492 rtx const31_rtx
= GEN_INT (31);
28493 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
28494 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
28497 emit_insn (gen_movdi (out
, const0_rtx
));
28500 /* Now handle valid shifts. */
28501 else if (INTVAL (amount
) < 32)
28503 /* Shifts by a constant less than 32. */
28504 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
28506 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
28507 emit_insn (SET (out_down
,
28508 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
28510 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
28514 /* Shifts by a constant greater than 31. */
28515 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
28517 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
28518 if (code
== ASHIFTRT
)
28519 emit_insn (gen_ashrsi3 (out_up
, in_up
,
28522 emit_insn (SET (out_up
, const0_rtx
));
28527 /* We have a shift-by-register. */
28528 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
28530 /* This alternative requires the scratch registers. */
28531 gcc_assert (scratch1
&& REG_P (scratch1
));
28532 gcc_assert (scratch2
&& REG_P (scratch2
));
28534 /* We will need the values "amount-32" and "32-amount" later.
28535 Swapping them around now allows the later code to be more general. */
28539 emit_insn (SUB_32 (scratch1
, amount
));
28540 emit_insn (RSB_32 (scratch2
, amount
));
28543 emit_insn (RSB_32 (scratch1
, amount
));
28544 /* Also set CC = amount > 32. */
28545 emit_insn (SUB_S_32 (scratch2
, amount
));
28548 emit_insn (RSB_32 (scratch1
, amount
));
28549 emit_insn (SUB_32 (scratch2
, amount
));
28552 gcc_unreachable ();
28555 /* Emit code like this:
28558 out_down = in_down << amount;
28559 out_down = (in_up << (amount - 32)) | out_down;
28560 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28561 out_up = in_up << amount;
28564 out_down = in_down >> amount;
28565 out_down = (in_up << (32 - amount)) | out_down;
28567 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28568 out_up = in_up << amount;
28571 out_down = in_down >> amount;
28572 out_down = (in_up << (32 - amount)) | out_down;
28574 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28575 out_up = in_up << amount;
28577 The ARM and Thumb2 variants are the same but implemented slightly
28578 differently. If this were only called during expand we could just
28579 use the Thumb2 case and let combine do the right thing, but this
28580 can also be called from post-reload splitters. */
28582 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
28584 if (!TARGET_THUMB2
)
28586 /* Emit code for ARM mode. */
28587 emit_insn (SET (out_down
,
28588 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
28589 if (code
== ASHIFTRT
)
28591 rtx_code_label
*done_label
= gen_label_rtx ();
28592 emit_jump_insn (BRANCH (LT
, done_label
));
28593 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
28595 emit_label (done_label
);
28598 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
28603 /* Emit code for Thumb2 mode.
28604 Thumb2 can't do shift and or in one insn. */
28605 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
28606 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
28608 if (code
== ASHIFTRT
)
28610 rtx_code_label
*done_label
= gen_label_rtx ();
28611 emit_jump_insn (BRANCH (LT
, done_label
));
28612 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
28613 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
28614 emit_label (done_label
);
28618 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
28619 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
28623 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
28638 /* Returns true if a valid comparison operation and makes
28639 the operands in a form that is valid. */
28641 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
28643 enum rtx_code code
= GET_CODE (*comparison
);
28645 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
28646 ? GET_MODE (*op2
) : GET_MODE (*op1
);
28648 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
28650 if (code
== UNEQ
|| code
== LTGT
)
28653 code_int
= (int)code
;
28654 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
28655 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
28660 if (!arm_add_operand (*op1
, mode
))
28661 *op1
= force_reg (mode
, *op1
);
28662 if (!arm_add_operand (*op2
, mode
))
28663 *op2
= force_reg (mode
, *op2
);
28667 if (!cmpdi_operand (*op1
, mode
))
28668 *op1
= force_reg (mode
, *op1
);
28669 if (!cmpdi_operand (*op2
, mode
))
28670 *op2
= force_reg (mode
, *op2
);
28675 if (!arm_float_compare_operand (*op1
, mode
))
28676 *op1
= force_reg (mode
, *op1
);
28677 if (!arm_float_compare_operand (*op2
, mode
))
28678 *op2
= force_reg (mode
, *op2
);
28688 /* Maximum number of instructions to set block of memory. */
28690 arm_block_set_max_insns (void)
28692 if (optimize_function_for_size_p (cfun
))
28695 return current_tune
->max_insns_inline_memset
;
28698 /* Return TRUE if it's profitable to set block of memory for
28699 non-vectorized case. VAL is the value to set the memory
28700 with. LENGTH is the number of bytes to set. ALIGN is the
28701 alignment of the destination memory in bytes. UNALIGNED_P
28702 is TRUE if we can only set the memory with instructions
28703 meeting alignment requirements. USE_STRD_P is TRUE if we
28704 can use strd to set the memory. */
28706 arm_block_set_non_vect_profit_p (rtx val
,
28707 unsigned HOST_WIDE_INT length
,
28708 unsigned HOST_WIDE_INT align
,
28709 bool unaligned_p
, bool use_strd_p
)
28712 /* For leftovers in bytes of 0-7, we can set the memory block using
28713 strb/strh/str with minimum instruction number. */
28714 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28718 num
= arm_const_inline_cost (SET
, val
);
28719 num
+= length
/ align
+ length
% align
;
28721 else if (use_strd_p
)
28723 num
= arm_const_double_inline_cost (val
);
28724 num
+= (length
>> 3) + leftover
[length
& 7];
28728 num
= arm_const_inline_cost (SET
, val
);
28729 num
+= (length
>> 2) + leftover
[length
& 3];
28732 /* We may be able to combine last pair STRH/STRB into a single STR
28733 by shifting one byte back. */
28734 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
28737 return (num
<= arm_block_set_max_insns ());
28740 /* Return TRUE if it's profitable to set block of memory for
28741 vectorized case. LENGTH is the number of bytes to set.
28742 ALIGN is the alignment of destination memory in bytes.
28743 MODE is the vector mode used to set the memory. */
28745 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
28746 unsigned HOST_WIDE_INT align
,
28750 bool unaligned_p
= ((align
& 3) != 0);
28751 unsigned int nelt
= GET_MODE_NUNITS (mode
);
28753 /* Instruction loading constant value. */
28755 /* Instructions storing the memory. */
28756 num
+= (length
+ nelt
- 1) / nelt
;
28757 /* Instructions adjusting the address expression. Only need to
28758 adjust address expression if it's 4 bytes aligned and bytes
28759 leftover can only be stored by mis-aligned store instruction. */
28760 if (!unaligned_p
&& (length
& 3) != 0)
28763 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
28764 if (!unaligned_p
&& mode
== V16QImode
)
28767 return (num
<= arm_block_set_max_insns ());
28770 /* Set a block of memory using vectorization instructions for the
28771 unaligned case. We fill the first LENGTH bytes of the memory
28772 area starting from DSTBASE with byte constant VALUE. ALIGN is
28773 the alignment requirement of memory. Return TRUE if succeeded. */
28775 arm_block_set_unaligned_vect (rtx dstbase
,
28776 unsigned HOST_WIDE_INT length
,
28777 unsigned HOST_WIDE_INT value
,
28778 unsigned HOST_WIDE_INT align
)
28780 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
28782 rtx val_elt
, val_vec
, reg
;
28783 rtx rval
[MAX_VECT_LEN
];
28784 rtx (*gen_func
) (rtx
, rtx
);
28786 unsigned HOST_WIDE_INT v
= value
;
28788 gcc_assert ((align
& 0x3) != 0);
28789 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
28790 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
28791 if (length
>= nelt_v16
)
28794 gen_func
= gen_movmisalignv16qi
;
28799 gen_func
= gen_movmisalignv8qi
;
28801 nelt_mode
= GET_MODE_NUNITS (mode
);
28802 gcc_assert (length
>= nelt_mode
);
28803 /* Skip if it isn't profitable. */
28804 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
28807 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28808 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28810 v
= sext_hwi (v
, BITS_PER_WORD
);
28811 val_elt
= GEN_INT (v
);
28812 for (j
= 0; j
< nelt_mode
; j
++)
28815 reg
= gen_reg_rtx (mode
);
28816 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
28817 /* Emit instruction loading the constant value. */
28818 emit_move_insn (reg
, val_vec
);
28820 /* Handle nelt_mode bytes in a vector. */
28821 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
28823 emit_insn ((*gen_func
) (mem
, reg
));
28824 if (i
+ 2 * nelt_mode
<= length
)
28825 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
28828 /* If there are not less than nelt_v8 bytes leftover, we must be in
28830 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
28832 /* Handle (8, 16) bytes leftover. */
28833 if (i
+ nelt_v8
< length
)
28835 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
28836 /* We are shifting bytes back, set the alignment accordingly. */
28837 if ((length
& 1) != 0 && align
>= 2)
28838 set_mem_align (mem
, BITS_PER_UNIT
);
28840 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28842 /* Handle (0, 8] bytes leftover. */
28843 else if (i
< length
&& i
+ nelt_v8
>= length
)
28845 if (mode
== V16QImode
)
28847 reg
= gen_lowpart (V8QImode
, reg
);
28848 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, 0);
28850 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
28851 + (nelt_mode
- nelt_v8
))));
28852 /* We are shifting bytes back, set the alignment accordingly. */
28853 if ((length
& 1) != 0 && align
>= 2)
28854 set_mem_align (mem
, BITS_PER_UNIT
);
28856 emit_insn (gen_movmisalignv8qi (mem
, reg
));
28862 /* Set a block of memory using vectorization instructions for the
28863 aligned case. We fill the first LENGTH bytes of the memory area
28864 starting from DSTBASE with byte constant VALUE. ALIGN is the
28865 alignment requirement of memory. Return TRUE if succeeded. */
28867 arm_block_set_aligned_vect (rtx dstbase
,
28868 unsigned HOST_WIDE_INT length
,
28869 unsigned HOST_WIDE_INT value
,
28870 unsigned HOST_WIDE_INT align
)
28872 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
28873 rtx dst
, addr
, mem
;
28874 rtx val_elt
, val_vec
, reg
;
28875 rtx rval
[MAX_VECT_LEN
];
28877 unsigned HOST_WIDE_INT v
= value
;
28879 gcc_assert ((align
& 0x3) == 0);
28880 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
28881 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
28882 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
28887 nelt_mode
= GET_MODE_NUNITS (mode
);
28888 gcc_assert (length
>= nelt_mode
);
28889 /* Skip if it isn't profitable. */
28890 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
28893 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28895 v
= sext_hwi (v
, BITS_PER_WORD
);
28896 val_elt
= GEN_INT (v
);
28897 for (j
= 0; j
< nelt_mode
; j
++)
28900 reg
= gen_reg_rtx (mode
);
28901 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
28902 /* Emit instruction loading the constant value. */
28903 emit_move_insn (reg
, val_vec
);
28906 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
28907 if (mode
== V16QImode
)
28909 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28910 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28912 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
28913 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
28915 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
28916 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28917 /* We are shifting bytes back, set the alignment accordingly. */
28918 if ((length
& 0x3) == 0)
28919 set_mem_align (mem
, BITS_PER_UNIT
* 4);
28920 else if ((length
& 0x1) == 0)
28921 set_mem_align (mem
, BITS_PER_UNIT
* 2);
28923 set_mem_align (mem
, BITS_PER_UNIT
);
28925 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28928 /* Fall through for bytes leftover. */
28930 nelt_mode
= GET_MODE_NUNITS (mode
);
28931 reg
= gen_lowpart (V8QImode
, reg
);
28934 /* Handle 8 bytes in a vector. */
28935 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
28937 addr
= plus_constant (Pmode
, dst
, i
);
28938 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
28939 emit_move_insn (mem
, reg
);
28942 /* Handle single word leftover by shifting 4 bytes back. We can
28943 use aligned access for this case. */
28944 if (i
+ UNITS_PER_WORD
== length
)
28946 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
28947 mem
= adjust_automodify_address (dstbase
, mode
,
28948 addr
, i
- UNITS_PER_WORD
);
28949 /* We are shifting 4 bytes back, set the alignment accordingly. */
28950 if (align
> UNITS_PER_WORD
)
28951 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
28953 emit_move_insn (mem
, reg
);
28955 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28956 We have to use unaligned access for this case. */
28957 else if (i
< length
)
28959 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
28960 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28961 /* We are shifting bytes back, set the alignment accordingly. */
28962 if ((length
& 1) == 0)
28963 set_mem_align (mem
, BITS_PER_UNIT
* 2);
28965 set_mem_align (mem
, BITS_PER_UNIT
);
28967 emit_insn (gen_movmisalignv8qi (mem
, reg
));
28973 /* Set a block of memory using plain strh/strb instructions, only
28974 using instructions allowed by ALIGN on processor. We fill the
28975 first LENGTH bytes of the memory area starting from DSTBASE
28976 with byte constant VALUE. ALIGN is the alignment requirement
28979 arm_block_set_unaligned_non_vect (rtx dstbase
,
28980 unsigned HOST_WIDE_INT length
,
28981 unsigned HOST_WIDE_INT value
,
28982 unsigned HOST_WIDE_INT align
)
28985 rtx dst
, addr
, mem
;
28986 rtx val_exp
, val_reg
, reg
;
28988 HOST_WIDE_INT v
= value
;
28990 gcc_assert (align
== 1 || align
== 2);
28993 v
|= (value
<< BITS_PER_UNIT
);
28995 v
= sext_hwi (v
, BITS_PER_WORD
);
28996 val_exp
= GEN_INT (v
);
28997 /* Skip if it isn't profitable. */
28998 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
28999 align
, true, false))
29002 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29003 mode
= (align
== 2 ? HImode
: QImode
);
29004 val_reg
= force_reg (SImode
, val_exp
);
29005 reg
= gen_lowpart (mode
, val_reg
);
29007 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
29009 addr
= plus_constant (Pmode
, dst
, i
);
29010 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
29011 emit_move_insn (mem
, reg
);
29014 /* Handle single byte leftover. */
29015 if (i
+ 1 == length
)
29017 reg
= gen_lowpart (QImode
, val_reg
);
29018 addr
= plus_constant (Pmode
, dst
, i
);
29019 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29020 emit_move_insn (mem
, reg
);
29024 gcc_assert (i
== length
);
29028 /* Set a block of memory using plain strd/str/strh/strb instructions,
29029 to permit unaligned copies on processors which support unaligned
29030 semantics for those instructions. We fill the first LENGTH bytes
29031 of the memory area starting from DSTBASE with byte constant VALUE.
29032 ALIGN is the alignment requirement of memory. */
29034 arm_block_set_aligned_non_vect (rtx dstbase
,
29035 unsigned HOST_WIDE_INT length
,
29036 unsigned HOST_WIDE_INT value
,
29037 unsigned HOST_WIDE_INT align
)
29040 rtx dst
, addr
, mem
;
29041 rtx val_exp
, val_reg
, reg
;
29042 unsigned HOST_WIDE_INT v
;
29045 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
29046 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
29048 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
29049 if (length
< UNITS_PER_WORD
)
29050 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
29053 v
|= (v
<< BITS_PER_WORD
);
29055 v
= sext_hwi (v
, BITS_PER_WORD
);
29057 val_exp
= GEN_INT (v
);
29058 /* Skip if it isn't profitable. */
29059 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29060 align
, false, use_strd_p
))
29065 /* Try without strd. */
29066 v
= (v
>> BITS_PER_WORD
);
29067 v
= sext_hwi (v
, BITS_PER_WORD
);
29068 val_exp
= GEN_INT (v
);
29069 use_strd_p
= false;
29070 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29071 align
, false, use_strd_p
))
29076 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29077 /* Handle double words using strd if possible. */
29080 val_reg
= force_reg (DImode
, val_exp
);
29082 for (; (i
+ 8 <= length
); i
+= 8)
29084 addr
= plus_constant (Pmode
, dst
, i
);
29085 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
29086 emit_move_insn (mem
, reg
);
29090 val_reg
= force_reg (SImode
, val_exp
);
29092 /* Handle words. */
29093 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
29094 for (; (i
+ 4 <= length
); i
+= 4)
29096 addr
= plus_constant (Pmode
, dst
, i
);
29097 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
29098 if ((align
& 3) == 0)
29099 emit_move_insn (mem
, reg
);
29101 emit_insn (gen_unaligned_storesi (mem
, reg
));
29104 /* Merge last pair of STRH and STRB into a STR if possible. */
29105 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
29107 addr
= plus_constant (Pmode
, dst
, i
- 1);
29108 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
29109 /* We are shifting one byte back, set the alignment accordingly. */
29110 if ((align
& 1) == 0)
29111 set_mem_align (mem
, BITS_PER_UNIT
);
29113 /* Most likely this is an unaligned access, and we can't tell at
29114 compilation time. */
29115 emit_insn (gen_unaligned_storesi (mem
, reg
));
29119 /* Handle half word leftover. */
29120 if (i
+ 2 <= length
)
29122 reg
= gen_lowpart (HImode
, val_reg
);
29123 addr
= plus_constant (Pmode
, dst
, i
);
29124 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
29125 if ((align
& 1) == 0)
29126 emit_move_insn (mem
, reg
);
29128 emit_insn (gen_unaligned_storehi (mem
, reg
));
29133 /* Handle single byte leftover. */
29134 if (i
+ 1 == length
)
29136 reg
= gen_lowpart (QImode
, val_reg
);
29137 addr
= plus_constant (Pmode
, dst
, i
);
29138 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29139 emit_move_insn (mem
, reg
);
29145 /* Set a block of memory using vectorization instructions for both
29146 aligned and unaligned cases. We fill the first LENGTH bytes of
29147 the memory area starting from DSTBASE with byte constant VALUE.
29148 ALIGN is the alignment requirement of memory. */
29150 arm_block_set_vect (rtx dstbase
,
29151 unsigned HOST_WIDE_INT length
,
29152 unsigned HOST_WIDE_INT value
,
29153 unsigned HOST_WIDE_INT align
)
29155 /* Check whether we need to use unaligned store instruction. */
29156 if (((align
& 3) != 0 || (length
& 3) != 0)
29157 /* Check whether unaligned store instruction is available. */
29158 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
29161 if ((align
& 3) == 0)
29162 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
29164 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
29167 /* Expand string store operation. Firstly we try to do that by using
29168 vectorization instructions, then try with ARM unaligned access and
29169 double-word store if profitable. OPERANDS[0] is the destination,
29170 OPERANDS[1] is the number of bytes, operands[2] is the value to
29171 initialize the memory, OPERANDS[3] is the known alignment of the
29174 arm_gen_setmem (rtx
*operands
)
29176 rtx dstbase
= operands
[0];
29177 unsigned HOST_WIDE_INT length
;
29178 unsigned HOST_WIDE_INT value
;
29179 unsigned HOST_WIDE_INT align
;
29181 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
29184 length
= UINTVAL (operands
[1]);
29188 value
= (UINTVAL (operands
[2]) & 0xFF);
29189 align
= UINTVAL (operands
[3]);
29190 if (TARGET_NEON
&& length
>= 8
29191 && current_tune
->string_ops_prefer_neon
29192 && arm_block_set_vect (dstbase
, length
, value
, align
))
29195 if (!unaligned_access
&& (align
& 3) != 0)
29196 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
29198 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
29203 arm_macro_fusion_p (void)
29205 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
29210 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
29213 rtx prev_set
= single_set (prev
);
29214 rtx curr_set
= single_set (curr
);
29220 if (any_condjump_p (curr
))
29223 if (!arm_macro_fusion_p ())
29226 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
)
29228 /* We are trying to fuse
29229 movw imm / movt imm
29230 instructions as a group that gets scheduled together. */
29232 set_dest
= SET_DEST (curr_set
);
29234 if (GET_MODE (set_dest
) != SImode
)
29237 /* We are trying to match:
29238 prev (movw) == (set (reg r0) (const_int imm16))
29239 curr (movt) == (set (zero_extract (reg r0)
29242 (const_int imm16_1))
29244 prev (movw) == (set (reg r1)
29245 (high (symbol_ref ("SYM"))))
29246 curr (movt) == (set (reg r0)
29248 (symbol_ref ("SYM")))) */
29249 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
29251 if (CONST_INT_P (SET_SRC (curr_set
))
29252 && CONST_INT_P (SET_SRC (prev_set
))
29253 && REG_P (XEXP (set_dest
, 0))
29254 && REG_P (SET_DEST (prev_set
))
29255 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
29258 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
29259 && REG_P (SET_DEST (curr_set
))
29260 && REG_P (SET_DEST (prev_set
))
29261 && GET_CODE (SET_SRC (prev_set
)) == HIGH
29262 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
29268 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29270 static unsigned HOST_WIDE_INT
29271 arm_asan_shadow_offset (void)
29273 return (unsigned HOST_WIDE_INT
) 1 << 29;
29277 /* This is a temporary fix for PR60655. Ideally we need
29278 to handle most of these cases in the generic part but
29279 currently we reject minus (..) (sym_ref). We try to
29280 ameliorate the case with minus (sym_ref1) (sym_ref2)
29281 where they are in the same section. */
29284 arm_const_not_ok_for_debug_p (rtx p
)
29286 tree decl_op0
= NULL
;
29287 tree decl_op1
= NULL
;
29289 if (GET_CODE (p
) == MINUS
)
29291 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
29293 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
29295 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
29296 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
29298 if ((TREE_CODE (decl_op1
) == VAR_DECL
29299 || TREE_CODE (decl_op1
) == CONST_DECL
)
29300 && (TREE_CODE (decl_op0
) == VAR_DECL
29301 || TREE_CODE (decl_op0
) == CONST_DECL
))
29302 return (get_variable_section (decl_op1
, false)
29303 != get_variable_section (decl_op0
, false));
29305 if (TREE_CODE (decl_op1
) == LABEL_DECL
29306 && TREE_CODE (decl_op0
) == LABEL_DECL
)
29307 return (DECL_CONTEXT (decl_op1
)
29308 != DECL_CONTEXT (decl_op0
));
29318 /* return TRUE if x is a reference to a value in a constant pool */
29320 arm_is_constant_pool_ref (rtx x
)
29323 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
29324 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
29327 /* Remember the last target of arm_set_current_function. */
29328 static GTY(()) tree arm_previous_fndecl
;
29330 /* Invalidate arm_previous_fndecl. */
29332 arm_reset_previous_fndecl (void)
29334 arm_previous_fndecl
= NULL_TREE
;
29337 /* Establish appropriate back-end context for processing the function
29338 FNDECL. The argument might be NULL to indicate processing at top
29339 level, outside of any function scope. */
29341 arm_set_current_function (tree fndecl
)
29343 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
29346 tree old_tree
= (arm_previous_fndecl
29347 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
29350 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
29352 arm_previous_fndecl
= fndecl
;
29353 if (old_tree
== new_tree
)
29358 cl_target_option_restore (&global_options
,
29359 TREE_TARGET_OPTION (new_tree
));
29361 if (TREE_TARGET_GLOBALS (new_tree
))
29362 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
29364 TREE_TARGET_GLOBALS (new_tree
)
29365 = save_target_globals_default_opts ();
29370 new_tree
= target_option_current_node
;
29372 cl_target_option_restore (&global_options
,
29373 TREE_TARGET_OPTION (new_tree
));
29374 if (TREE_TARGET_GLOBALS (new_tree
))
29375 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
29376 else if (new_tree
== target_option_default_node
)
29377 restore_target_globals (&default_target_globals
);
29379 TREE_TARGET_GLOBALS (new_tree
)
29380 = save_target_globals_default_opts ();
29383 arm_option_params_internal (&global_options
);
29386 /* Hook to determine if one function can safely inline another. */
29389 arm_can_inline_p (tree caller ATTRIBUTE_UNUSED
, tree callee ATTRIBUTE_UNUSED
)
29391 /* Overidde default hook: Always OK to inline between different modes.
29392 Function with mode specific instructions, e.g using asm, must be explicitely
29393 protected with noinline. */
29397 /* Inner function to process the attribute((target(...))), take an argument and
29398 set the current options from the argument. If we have a list, recursively
29399 go over the list. */
29402 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
29404 if (TREE_CODE (args
) == TREE_LIST
)
29407 for (; args
; args
= TREE_CHAIN (args
))
29408 if (TREE_VALUE (args
)
29409 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
29414 else if (TREE_CODE (args
) != STRING_CST
)
29416 error ("attribute %<target%> argument not a string");
29420 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
29421 while (argstr
&& *argstr
!= '\0')
29423 while (ISSPACE (*argstr
))
29426 if (!strcmp (argstr
, "thumb"))
29428 opts
->x_target_flags
|= MASK_THUMB
;
29429 arm_option_check_internal (opts
);
29433 if (!strcmp (argstr
, "arm"))
29435 opts
->x_target_flags
&= ~MASK_THUMB
;
29436 arm_option_check_internal (opts
);
29440 warning (0, "attribute(target(\"%s\")) is unknown", argstr
);
29447 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
29450 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
29451 struct gcc_options
*opts_set
)
29453 if (!arm_valid_target_attribute_rec (args
, opts
))
29456 /* Do any overrides, such as global options arch=xxx. */
29457 arm_option_override_internal (opts
, opts_set
);
29459 return build_target_option_node (opts
);
29462 /* Hook to validate attribute((target("string"))). */
29465 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
29466 tree args
, int ARG_UNUSED (flags
))
29469 struct gcc_options func_options
;
29470 tree cur_tree
, new_optimize
;
29471 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
29473 /* Get the optimization options of the current function. */
29474 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
29476 /* If the function changed the optimization levels as well as setting target
29477 options, start with the optimizations specified. */
29478 if (!func_optimize
)
29479 func_optimize
= optimization_default_node
;
29481 /* Init func_options. */
29482 memset (&func_options
, 0, sizeof (func_options
));
29483 init_options_struct (&func_options
, NULL
);
29484 lang_hooks
.init_options_struct (&func_options
);
29486 /* Initialize func_options to the defaults. */
29487 cl_optimization_restore (&func_options
,
29488 TREE_OPTIMIZATION (func_optimize
));
29490 cl_target_option_restore (&func_options
,
29491 TREE_TARGET_OPTION (target_option_default_node
));
29493 /* Set func_options flags with new target mode. */
29494 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
29495 &global_options_set
);
29497 if (cur_tree
== NULL_TREE
)
29500 new_optimize
= build_optimization_node (&func_options
);
29502 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
29504 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
29510 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
29512 if (TARGET_UNIFIED_ASM
)
29513 fprintf (stream
, "\t.syntax unified\n");
29515 fprintf (stream
, "\t.syntax divided\n");
29519 if (is_called_in_ARM_mode (decl
)
29520 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
29521 && cfun
->is_thunk
))
29522 fprintf (stream
, "\t.code 32\n");
29523 else if (TARGET_THUMB1
)
29524 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
29526 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
29529 fprintf (stream
, "\t.arm\n");
29531 if (TARGET_POKE_FUNCTION_NAME
)
29532 arm_poke_function_name (stream
, (const char *) name
);
29535 /* If MEM is in the form of [base+offset], extract the two parts
29536 of address and set to BASE and OFFSET, otherwise return false
29537 after clearing BASE and OFFSET. */
29540 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
29544 gcc_assert (MEM_P (mem
));
29546 addr
= XEXP (mem
, 0);
29548 /* Strip off const from addresses like (const (addr)). */
29549 if (GET_CODE (addr
) == CONST
)
29550 addr
= XEXP (addr
, 0);
29552 if (GET_CODE (addr
) == REG
)
29555 *offset
= const0_rtx
;
29559 if (GET_CODE (addr
) == PLUS
29560 && GET_CODE (XEXP (addr
, 0)) == REG
29561 && CONST_INT_P (XEXP (addr
, 1)))
29563 *base
= XEXP (addr
, 0);
29564 *offset
= XEXP (addr
, 1);
29569 *offset
= NULL_RTX
;
29574 /* If INSN is a load or store of address in the form of [base+offset],
29575 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
29576 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
29577 otherwise return FALSE. */
29580 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
29584 gcc_assert (INSN_P (insn
));
29585 x
= PATTERN (insn
);
29586 if (GET_CODE (x
) != SET
)
29590 dest
= SET_DEST (x
);
29591 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
29594 extract_base_offset_in_addr (dest
, base
, offset
);
29596 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
29599 extract_base_offset_in_addr (src
, base
, offset
);
29604 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
29607 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29609 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29610 and PRI are only calculated for these instructions. For other instruction,
29611 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
29612 instruction fusion can be supported by returning different priorities.
29614 It's important that irrelevant instructions get the largest FUSION_PRI. */
29617 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
29618 int *fusion_pri
, int *pri
)
29624 gcc_assert (INSN_P (insn
));
29627 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
29634 /* Load goes first. */
29636 *fusion_pri
= tmp
- 1;
29638 *fusion_pri
= tmp
- 2;
29642 /* INSN with smaller base register goes first. */
29643 tmp
-= ((REGNO (base
) & 0xff) << 20);
29645 /* INSN with smaller offset goes first. */
29646 off_val
= (int)(INTVAL (offset
));
29648 tmp
-= (off_val
& 0xfffff);
29650 tmp
+= ((- off_val
) & 0xfffff);
29655 #include "gt-arm.h"