1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
26 #include "hash-table.h"
32 #include "double-int.h"
39 #include "fold-const.h"
40 #include "stringpool.h"
41 #include "stor-layout.h"
46 #include "hard-reg-set.h"
47 #include "insn-config.h"
48 #include "conditions.h"
50 #include "insn-attr.h"
56 #include "insn-codes.h"
58 #include "diagnostic-core.h"
61 #include "dominance.h"
67 #include "cfgcleanup.h"
68 #include "basic-block.h"
71 #include "plugin-api.h"
78 #include "sched-int.h"
79 #include "target-def.h"
81 #include "langhooks.h"
88 #include "gimple-expr.h"
90 #include "tm-constrs.h"
93 /* Forward definitions of types. */
94 typedef struct minipool_node Mnode
;
95 typedef struct minipool_fixup Mfix
;
97 void (*arm_lang_output_object_attributes_hook
)(void);
104 /* Forward function declarations. */
105 static bool arm_const_not_ok_for_debug_p (rtx
);
106 static bool arm_lra_p (void);
107 static bool arm_needs_doubleword_align (machine_mode
, const_tree
);
108 static int arm_compute_static_chain_stack_bytes (void);
109 static arm_stack_offsets
*arm_get_frame_offsets (void);
110 static void arm_add_gc_roots (void);
111 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
112 HOST_WIDE_INT
, rtx
, rtx
, int, int);
113 static unsigned bit_count (unsigned long);
114 static int arm_address_register_rtx_p (rtx
, int);
115 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
116 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
117 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
118 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
119 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
120 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
121 inline static int thumb1_index_register_rtx_p (rtx
, int);
122 static int thumb_far_jump_used_p (void);
123 static bool thumb_force_lr_save (void);
124 static unsigned arm_size_return_regs (void);
125 static bool arm_assemble_integer (rtx
, unsigned int, int);
126 static void arm_print_operand (FILE *, rtx
, int);
127 static void arm_print_operand_address (FILE *, rtx
);
128 static bool arm_print_operand_punct_valid_p (unsigned char code
);
129 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
130 static arm_cc
get_arm_condition_code (rtx
);
131 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
132 static const char *output_multi_immediate (rtx
*, const char *, const char *,
134 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
135 static struct machine_function
*arm_init_machine_status (void);
136 static void thumb_exit (FILE *, int);
137 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
138 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
139 static Mnode
*add_minipool_forward_ref (Mfix
*);
140 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
141 static Mnode
*add_minipool_backward_ref (Mfix
*);
142 static void assign_minipool_offsets (Mfix
*);
143 static void arm_print_value (FILE *, rtx
);
144 static void dump_minipool (rtx_insn
*);
145 static int arm_barrier_cost (rtx
);
146 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
147 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
148 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
150 static void arm_reorg (void);
151 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
152 static unsigned long arm_compute_save_reg0_reg12_mask (void);
153 static unsigned long arm_compute_save_reg_mask (void);
154 static unsigned long arm_isr_value (tree
);
155 static unsigned long arm_compute_func_type (void);
156 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
157 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
158 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
159 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
160 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
162 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
163 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
164 static int arm_comp_type_attributes (const_tree
, const_tree
);
165 static void arm_set_default_type_attributes (tree
);
166 static int arm_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int);
167 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
168 static int optimal_immediate_sequence (enum rtx_code code
,
169 unsigned HOST_WIDE_INT val
,
170 struct four_ints
*return_sequence
);
171 static int optimal_immediate_sequence_1 (enum rtx_code code
,
172 unsigned HOST_WIDE_INT val
,
173 struct four_ints
*return_sequence
,
175 static int arm_get_strip_length (int);
176 static bool arm_function_ok_for_sibcall (tree
, tree
);
177 static machine_mode
arm_promote_function_mode (const_tree
,
180 static bool arm_return_in_memory (const_tree
, const_tree
);
181 static rtx
arm_function_value (const_tree
, const_tree
, bool);
182 static rtx
arm_libcall_value_1 (machine_mode
);
183 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
184 static bool arm_function_value_regno_p (const unsigned int);
185 static void arm_internal_label (FILE *, const char *, unsigned long);
186 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
188 static bool arm_have_conditional_execution (void);
189 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
190 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
191 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
192 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
193 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
194 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
195 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
196 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
197 static bool arm_rtx_costs (rtx
, int, int, int, int *, bool);
198 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
199 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
200 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
201 static void emit_constant_insn (rtx cond
, rtx pattern
);
202 static rtx_insn
*emit_set_insn (rtx
, rtx
);
203 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
204 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
206 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
208 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
210 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
211 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
213 static rtx
aapcs_libcall_value (machine_mode
);
214 static int aapcs_select_return_coproc (const_tree
, const_tree
);
216 #ifdef OBJECT_FORMAT_ELF
217 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
218 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
221 static void arm_encode_section_info (tree
, rtx
, int);
224 static void arm_file_end (void);
225 static void arm_file_start (void);
227 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
229 static bool arm_pass_by_reference (cumulative_args_t
,
230 machine_mode
, const_tree
, bool);
231 static bool arm_promote_prototypes (const_tree
);
232 static bool arm_default_short_enums (void);
233 static bool arm_align_anon_bitfield (void);
234 static bool arm_return_in_msb (const_tree
);
235 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
236 static bool arm_return_in_memory (const_tree
, const_tree
);
238 static void arm_unwind_emit (FILE *, rtx_insn
*);
239 static bool arm_output_ttype (rtx
);
240 static void arm_asm_emit_except_personality (rtx
);
241 static void arm_asm_init_sections (void);
243 static rtx
arm_dwarf_register_span (rtx
);
245 static tree
arm_cxx_guard_type (void);
246 static bool arm_cxx_guard_mask_bit (void);
247 static tree
arm_get_cookie_size (tree
);
248 static bool arm_cookie_has_size (void);
249 static bool arm_cxx_cdtor_returns_this (void);
250 static bool arm_cxx_key_method_may_be_inline (void);
251 static void arm_cxx_determine_class_data_visibility (tree
);
252 static bool arm_cxx_class_data_always_comdat (void);
253 static bool arm_cxx_use_aeabi_atexit (void);
254 static void arm_init_libfuncs (void);
255 static tree
arm_build_builtin_va_list (void);
256 static void arm_expand_builtin_va_start (tree
, rtx
);
257 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
258 static void arm_option_override (void);
259 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
260 static bool arm_cannot_copy_insn_p (rtx_insn
*);
261 static int arm_issue_rate (void);
262 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
263 static bool arm_output_addr_const_extra (FILE *, rtx
);
264 static bool arm_allocate_stack_slots_for_args (void);
265 static bool arm_warn_func_return (tree
);
266 static const char *arm_invalid_parameter_type (const_tree t
);
267 static const char *arm_invalid_return_type (const_tree t
);
268 static tree
arm_promoted_type (const_tree t
);
269 static tree
arm_convert_to_type (tree type
, tree expr
);
270 static bool arm_scalar_mode_supported_p (machine_mode
);
271 static bool arm_frame_pointer_required (void);
272 static bool arm_can_eliminate (const int, const int);
273 static void arm_asm_trampoline_template (FILE *);
274 static void arm_trampoline_init (rtx
, tree
, rtx
);
275 static rtx
arm_trampoline_adjust_address (rtx
);
276 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
277 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
278 static bool xscale_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
279 static bool fa726te_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
280 static bool arm_array_mode_supported_p (machine_mode
,
281 unsigned HOST_WIDE_INT
);
282 static machine_mode
arm_preferred_simd_mode (machine_mode
);
283 static bool arm_class_likely_spilled_p (reg_class_t
);
284 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
285 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
286 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
290 static void arm_conditional_register_usage (void);
291 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
292 static unsigned int arm_autovectorize_vector_sizes (void);
293 static int arm_default_branch_cost (bool, bool);
294 static int arm_cortex_a5_branch_cost (bool, bool);
295 static int arm_cortex_m_branch_cost (bool, bool);
297 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
298 const unsigned char *sel
);
300 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
302 int misalign ATTRIBUTE_UNUSED
);
303 static unsigned arm_add_stmt_cost (void *data
, int count
,
304 enum vect_cost_for_stmt kind
,
305 struct _stmt_vec_info
*stmt_info
,
307 enum vect_cost_model_location where
);
309 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
310 bool op0_preserve_value
);
311 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
313 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
315 /* Table of machine attributes. */
316 static const struct attribute_spec arm_attribute_table
[] =
318 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
319 affects_type_identity } */
320 /* Function calls made to this symbol must be done indirectly, because
321 it may lie outside of the 26 bit addressing range of a normal function
323 { "long_call", 0, 0, false, true, true, NULL
, false },
324 /* Whereas these functions are always known to reside within the 26 bit
326 { "short_call", 0, 0, false, true, true, NULL
, false },
327 /* Specify the procedure call conventions for a function. */
328 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
330 /* Interrupt Service Routines have special prologue and epilogue requirements. */
331 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
333 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
335 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
338 /* ARM/PE has three new attributes:
340 dllexport - for exporting a function/variable that will live in a dll
341 dllimport - for importing a function/variable from a dll
343 Microsoft allows multiple declspecs in one __declspec, separating
344 them with spaces. We do NOT support this. Instead, use __declspec
347 { "dllimport", 0, 0, true, false, false, NULL
, false },
348 { "dllexport", 0, 0, true, false, false, NULL
, false },
349 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
351 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
352 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
353 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
354 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
357 { NULL
, 0, 0, false, false, false, NULL
, false }
360 /* Initialize the GCC target structure. */
361 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
362 #undef TARGET_MERGE_DECL_ATTRIBUTES
363 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
366 #undef TARGET_LEGITIMIZE_ADDRESS
367 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
370 #define TARGET_LRA_P arm_lra_p
372 #undef TARGET_ATTRIBUTE_TABLE
373 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
375 #undef TARGET_ASM_FILE_START
376 #define TARGET_ASM_FILE_START arm_file_start
377 #undef TARGET_ASM_FILE_END
378 #define TARGET_ASM_FILE_END arm_file_end
380 #undef TARGET_ASM_ALIGNED_SI_OP
381 #define TARGET_ASM_ALIGNED_SI_OP NULL
382 #undef TARGET_ASM_INTEGER
383 #define TARGET_ASM_INTEGER arm_assemble_integer
385 #undef TARGET_PRINT_OPERAND
386 #define TARGET_PRINT_OPERAND arm_print_operand
387 #undef TARGET_PRINT_OPERAND_ADDRESS
388 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
389 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
390 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
392 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
393 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
395 #undef TARGET_ASM_FUNCTION_PROLOGUE
396 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
398 #undef TARGET_ASM_FUNCTION_EPILOGUE
399 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
401 #undef TARGET_OPTION_OVERRIDE
402 #define TARGET_OPTION_OVERRIDE arm_option_override
404 #undef TARGET_COMP_TYPE_ATTRIBUTES
405 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
407 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
408 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
410 #undef TARGET_SCHED_ADJUST_COST
411 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
413 #undef TARGET_SCHED_REORDER
414 #define TARGET_SCHED_REORDER arm_sched_reorder
416 #undef TARGET_REGISTER_MOVE_COST
417 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
419 #undef TARGET_MEMORY_MOVE_COST
420 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
422 #undef TARGET_ENCODE_SECTION_INFO
424 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
426 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
429 #undef TARGET_STRIP_NAME_ENCODING
430 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
432 #undef TARGET_ASM_INTERNAL_LABEL
433 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
435 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
436 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
438 #undef TARGET_FUNCTION_VALUE
439 #define TARGET_FUNCTION_VALUE arm_function_value
441 #undef TARGET_LIBCALL_VALUE
442 #define TARGET_LIBCALL_VALUE arm_libcall_value
444 #undef TARGET_FUNCTION_VALUE_REGNO_P
445 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
447 #undef TARGET_ASM_OUTPUT_MI_THUNK
448 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
449 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
450 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
452 #undef TARGET_RTX_COSTS
453 #define TARGET_RTX_COSTS arm_rtx_costs
454 #undef TARGET_ADDRESS_COST
455 #define TARGET_ADDRESS_COST arm_address_cost
457 #undef TARGET_SHIFT_TRUNCATION_MASK
458 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
459 #undef TARGET_VECTOR_MODE_SUPPORTED_P
460 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
461 #undef TARGET_ARRAY_MODE_SUPPORTED_P
462 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
463 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
464 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
465 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
466 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
467 arm_autovectorize_vector_sizes
469 #undef TARGET_MACHINE_DEPENDENT_REORG
470 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
472 #undef TARGET_INIT_BUILTINS
473 #define TARGET_INIT_BUILTINS arm_init_builtins
474 #undef TARGET_EXPAND_BUILTIN
475 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
476 #undef TARGET_BUILTIN_DECL
477 #define TARGET_BUILTIN_DECL arm_builtin_decl
479 #undef TARGET_INIT_LIBFUNCS
480 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
482 #undef TARGET_PROMOTE_FUNCTION_MODE
483 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
484 #undef TARGET_PROMOTE_PROTOTYPES
485 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
486 #undef TARGET_PASS_BY_REFERENCE
487 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
488 #undef TARGET_ARG_PARTIAL_BYTES
489 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
490 #undef TARGET_FUNCTION_ARG
491 #define TARGET_FUNCTION_ARG arm_function_arg
492 #undef TARGET_FUNCTION_ARG_ADVANCE
493 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
494 #undef TARGET_FUNCTION_ARG_BOUNDARY
495 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
497 #undef TARGET_SETUP_INCOMING_VARARGS
498 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
500 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
501 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
503 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
504 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
505 #undef TARGET_TRAMPOLINE_INIT
506 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
507 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
508 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
510 #undef TARGET_WARN_FUNC_RETURN
511 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
513 #undef TARGET_DEFAULT_SHORT_ENUMS
514 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
516 #undef TARGET_ALIGN_ANON_BITFIELD
517 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
519 #undef TARGET_NARROW_VOLATILE_BITFIELD
520 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
522 #undef TARGET_CXX_GUARD_TYPE
523 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
525 #undef TARGET_CXX_GUARD_MASK_BIT
526 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
528 #undef TARGET_CXX_GET_COOKIE_SIZE
529 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
531 #undef TARGET_CXX_COOKIE_HAS_SIZE
532 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
534 #undef TARGET_CXX_CDTOR_RETURNS_THIS
535 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
537 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
538 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
540 #undef TARGET_CXX_USE_AEABI_ATEXIT
541 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
543 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
544 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
545 arm_cxx_determine_class_data_visibility
547 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
548 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
550 #undef TARGET_RETURN_IN_MSB
551 #define TARGET_RETURN_IN_MSB arm_return_in_msb
553 #undef TARGET_RETURN_IN_MEMORY
554 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
556 #undef TARGET_MUST_PASS_IN_STACK
557 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
560 #undef TARGET_ASM_UNWIND_EMIT
561 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
563 /* EABI unwinding tables use a different format for the typeinfo tables. */
564 #undef TARGET_ASM_TTYPE
565 #define TARGET_ASM_TTYPE arm_output_ttype
567 #undef TARGET_ARM_EABI_UNWINDER
568 #define TARGET_ARM_EABI_UNWINDER true
570 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
571 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
573 #undef TARGET_ASM_INIT_SECTIONS
574 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
575 #endif /* ARM_UNWIND_INFO */
577 #undef TARGET_DWARF_REGISTER_SPAN
578 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
580 #undef TARGET_CANNOT_COPY_INSN_P
581 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
584 #undef TARGET_HAVE_TLS
585 #define TARGET_HAVE_TLS true
588 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
589 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
591 #undef TARGET_LEGITIMATE_CONSTANT_P
592 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
594 #undef TARGET_CANNOT_FORCE_CONST_MEM
595 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
597 #undef TARGET_MAX_ANCHOR_OFFSET
598 #define TARGET_MAX_ANCHOR_OFFSET 4095
600 /* The minimum is set such that the total size of the block
601 for a particular anchor is -4088 + 1 + 4095 bytes, which is
602 divisible by eight, ensuring natural spacing of anchors. */
603 #undef TARGET_MIN_ANCHOR_OFFSET
604 #define TARGET_MIN_ANCHOR_OFFSET -4088
606 #undef TARGET_SCHED_ISSUE_RATE
607 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
609 #undef TARGET_MANGLE_TYPE
610 #define TARGET_MANGLE_TYPE arm_mangle_type
612 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
613 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
615 #undef TARGET_BUILD_BUILTIN_VA_LIST
616 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
617 #undef TARGET_EXPAND_BUILTIN_VA_START
618 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
619 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
620 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
623 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
624 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
627 #undef TARGET_LEGITIMATE_ADDRESS_P
628 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
630 #undef TARGET_PREFERRED_RELOAD_CLASS
631 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
633 #undef TARGET_INVALID_PARAMETER_TYPE
634 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
636 #undef TARGET_INVALID_RETURN_TYPE
637 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
639 #undef TARGET_PROMOTED_TYPE
640 #define TARGET_PROMOTED_TYPE arm_promoted_type
642 #undef TARGET_CONVERT_TO_TYPE
643 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
645 #undef TARGET_SCALAR_MODE_SUPPORTED_P
646 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
648 #undef TARGET_FRAME_POINTER_REQUIRED
649 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
651 #undef TARGET_CAN_ELIMINATE
652 #define TARGET_CAN_ELIMINATE arm_can_eliminate
654 #undef TARGET_CONDITIONAL_REGISTER_USAGE
655 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
657 #undef TARGET_CLASS_LIKELY_SPILLED_P
658 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
660 #undef TARGET_VECTORIZE_BUILTINS
661 #define TARGET_VECTORIZE_BUILTINS
663 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
664 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
665 arm_builtin_vectorized_function
667 #undef TARGET_VECTOR_ALIGNMENT
668 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
670 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
671 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
672 arm_vector_alignment_reachable
674 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
675 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
676 arm_builtin_support_vector_misalignment
678 #undef TARGET_PREFERRED_RENAME_CLASS
679 #define TARGET_PREFERRED_RENAME_CLASS \
680 arm_preferred_rename_class
682 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
683 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
684 arm_vectorize_vec_perm_const_ok
686 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
687 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
688 arm_builtin_vectorization_cost
689 #undef TARGET_VECTORIZE_ADD_STMT_COST
690 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
692 #undef TARGET_CANONICALIZE_COMPARISON
693 #define TARGET_CANONICALIZE_COMPARISON \
694 arm_canonicalize_comparison
696 #undef TARGET_ASAN_SHADOW_OFFSET
697 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
699 #undef MAX_INSN_PER_IT_BLOCK
700 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
702 #undef TARGET_CAN_USE_DOLOOP_P
703 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
705 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
706 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
708 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
709 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
711 #undef TARGET_SCHED_FUSION_PRIORITY
712 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
714 struct gcc_target targetm
= TARGET_INITIALIZER
;
716 /* Obstack for minipool constant handling. */
717 static struct obstack minipool_obstack
;
718 static char * minipool_startobj
;
720 /* The maximum number of insns skipped which
721 will be conditionalised if possible. */
722 static int max_insns_skipped
= 5;
724 extern FILE * asm_out_file
;
726 /* True if we are currently building a constant table. */
727 int making_const_table
;
729 /* The processor for which instructions should be scheduled. */
730 enum processor_type arm_tune
= arm_none
;
732 /* The current tuning set. */
733 const struct tune_params
*current_tune
;
735 /* Which floating point hardware to schedule for. */
738 /* Which floating popint hardware to use. */
739 const struct arm_fpu_desc
*arm_fpu_desc
;
741 /* Used for Thumb call_via trampolines. */
742 rtx thumb_call_via_label
[14];
743 static int thumb_call_reg_needed
;
745 /* The bits in this mask specify which
746 instructions we are allowed to generate. */
747 unsigned long insn_flags
= 0;
749 /* The bits in this mask specify which instruction scheduling options should
751 unsigned long tune_flags
= 0;
753 /* The highest ARM architecture version supported by the
755 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
757 /* The following are used in the arm.md file as equivalents to bits
758 in the above two flag variables. */
760 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
763 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
766 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
769 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
772 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
775 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
778 /* Nonzero if this chip supports the ARM 6K extensions. */
781 /* Nonzero if instructions present in ARMv6-M can be used. */
784 /* Nonzero if this chip supports the ARM 7 extensions. */
787 /* Nonzero if instructions not present in the 'M' profile can be used. */
788 int arm_arch_notm
= 0;
790 /* Nonzero if instructions present in ARMv7E-M can be used. */
793 /* Nonzero if instructions present in ARMv8 can be used. */
796 /* Nonzero if this chip can benefit from load scheduling. */
797 int arm_ld_sched
= 0;
799 /* Nonzero if this chip is a StrongARM. */
800 int arm_tune_strongarm
= 0;
802 /* Nonzero if this chip supports Intel Wireless MMX technology. */
803 int arm_arch_iwmmxt
= 0;
805 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
806 int arm_arch_iwmmxt2
= 0;
808 /* Nonzero if this chip is an XScale. */
809 int arm_arch_xscale
= 0;
811 /* Nonzero if tuning for XScale */
812 int arm_tune_xscale
= 0;
814 /* Nonzero if we want to tune for stores that access the write-buffer.
815 This typically means an ARM6 or ARM7 with MMU or MPU. */
816 int arm_tune_wbuf
= 0;
818 /* Nonzero if tuning for Cortex-A9. */
819 int arm_tune_cortex_a9
= 0;
821 /* Nonzero if generating Thumb instructions. */
824 /* Nonzero if generating Thumb-1 instructions. */
827 /* Nonzero if we should define __THUMB_INTERWORK__ in the
829 XXX This is a bit of a hack, it's intended to help work around
830 problems in GLD which doesn't understand that armv5t code is
831 interworking clean. */
832 int arm_cpp_interwork
= 0;
834 /* Nonzero if chip supports Thumb 2. */
837 /* Nonzero if chip supports integer division instruction. */
838 int arm_arch_arm_hwdiv
;
839 int arm_arch_thumb_hwdiv
;
841 /* Nonzero if we should use Neon to handle 64-bits operations rather
842 than core registers. */
843 int prefer_neon_for_64bits
= 0;
845 /* Nonzero if we shouldn't use literal pools. */
846 bool arm_disable_literal_pool
= false;
848 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
849 we must report the mode of the memory reference from
850 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
851 machine_mode output_memory_reference_mode
;
853 /* The register number to be used for the PIC offset register. */
854 unsigned arm_pic_register
= INVALID_REGNUM
;
856 enum arm_pcs arm_pcs_default
;
858 /* For an explanation of these variables, see final_prescan_insn below. */
860 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
861 enum arm_cond_code arm_current_cc
;
864 int arm_target_label
;
865 /* The number of conditionally executed insns, including the current insn. */
866 int arm_condexec_count
= 0;
867 /* A bitmask specifying the patterns for the IT block.
868 Zero means do not output an IT block before this insn. */
869 int arm_condexec_mask
= 0;
870 /* The number of bits used in arm_condexec_mask. */
871 int arm_condexec_masklen
= 0;
873 /* Nonzero if chip supports the ARMv8 CRC instructions. */
874 int arm_arch_crc
= 0;
876 /* Nonzero if the core has a very small, high-latency, multiply unit. */
877 int arm_m_profile_small_mul
= 0;
879 /* The condition codes of the ARM, and the inverse function. */
880 static const char * const arm_condition_codes
[] =
882 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
883 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
886 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
887 int arm_regs_in_sequence
[] =
889 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
892 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
893 #define streq(string1, string2) (strcmp (string1, string2) == 0)
895 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
896 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
897 | (1 << PIC_OFFSET_TABLE_REGNUM)))
899 /* Initialization code. */
903 const char *const name
;
904 enum processor_type core
;
906 enum base_architecture base_arch
;
907 const unsigned long flags
;
908 const struct tune_params
*const tune
;
912 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
913 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
918 /* arm generic vectorizer costs. */
920 struct cpu_vec_costs arm_default_vec_cost
= {
921 1, /* scalar_stmt_cost. */
922 1, /* scalar load_cost. */
923 1, /* scalar_store_cost. */
924 1, /* vec_stmt_cost. */
925 1, /* vec_to_scalar_cost. */
926 1, /* scalar_to_vec_cost. */
927 1, /* vec_align_load_cost. */
928 1, /* vec_unalign_load_cost. */
929 1, /* vec_unalign_store_cost. */
930 1, /* vec_store_cost. */
931 3, /* cond_taken_branch_cost. */
932 1, /* cond_not_taken_branch_cost. */
935 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
936 #include "aarch-cost-tables.h"
940 const struct cpu_cost_table cortexa9_extra_costs
=
947 COSTS_N_INSNS (1), /* shift_reg. */
948 COSTS_N_INSNS (1), /* arith_shift. */
949 COSTS_N_INSNS (2), /* arith_shift_reg. */
951 COSTS_N_INSNS (1), /* log_shift_reg. */
952 COSTS_N_INSNS (1), /* extend. */
953 COSTS_N_INSNS (2), /* extend_arith. */
954 COSTS_N_INSNS (1), /* bfi. */
955 COSTS_N_INSNS (1), /* bfx. */
959 true /* non_exec_costs_exec. */
964 COSTS_N_INSNS (3), /* simple. */
965 COSTS_N_INSNS (3), /* flag_setting. */
966 COSTS_N_INSNS (2), /* extend. */
967 COSTS_N_INSNS (3), /* add. */
968 COSTS_N_INSNS (2), /* extend_add. */
969 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
973 0, /* simple (N/A). */
974 0, /* flag_setting (N/A). */
975 COSTS_N_INSNS (4), /* extend. */
977 COSTS_N_INSNS (4), /* extend_add. */
983 COSTS_N_INSNS (2), /* load. */
984 COSTS_N_INSNS (2), /* load_sign_extend. */
985 COSTS_N_INSNS (2), /* ldrd. */
986 COSTS_N_INSNS (2), /* ldm_1st. */
987 1, /* ldm_regs_per_insn_1st. */
988 2, /* ldm_regs_per_insn_subsequent. */
989 COSTS_N_INSNS (5), /* loadf. */
990 COSTS_N_INSNS (5), /* loadd. */
991 COSTS_N_INSNS (1), /* load_unaligned. */
992 COSTS_N_INSNS (2), /* store. */
993 COSTS_N_INSNS (2), /* strd. */
994 COSTS_N_INSNS (2), /* stm_1st. */
995 1, /* stm_regs_per_insn_1st. */
996 2, /* stm_regs_per_insn_subsequent. */
997 COSTS_N_INSNS (1), /* storef. */
998 COSTS_N_INSNS (1), /* stored. */
999 COSTS_N_INSNS (1) /* store_unaligned. */
1004 COSTS_N_INSNS (14), /* div. */
1005 COSTS_N_INSNS (4), /* mult. */
1006 COSTS_N_INSNS (7), /* mult_addsub. */
1007 COSTS_N_INSNS (30), /* fma. */
1008 COSTS_N_INSNS (3), /* addsub. */
1009 COSTS_N_INSNS (1), /* fpconst. */
1010 COSTS_N_INSNS (1), /* neg. */
1011 COSTS_N_INSNS (3), /* compare. */
1012 COSTS_N_INSNS (3), /* widen. */
1013 COSTS_N_INSNS (3), /* narrow. */
1014 COSTS_N_INSNS (3), /* toint. */
1015 COSTS_N_INSNS (3), /* fromint. */
1016 COSTS_N_INSNS (3) /* roundint. */
1020 COSTS_N_INSNS (24), /* div. */
1021 COSTS_N_INSNS (5), /* mult. */
1022 COSTS_N_INSNS (8), /* mult_addsub. */
1023 COSTS_N_INSNS (30), /* fma. */
1024 COSTS_N_INSNS (3), /* addsub. */
1025 COSTS_N_INSNS (1), /* fpconst. */
1026 COSTS_N_INSNS (1), /* neg. */
1027 COSTS_N_INSNS (3), /* compare. */
1028 COSTS_N_INSNS (3), /* widen. */
1029 COSTS_N_INSNS (3), /* narrow. */
1030 COSTS_N_INSNS (3), /* toint. */
1031 COSTS_N_INSNS (3), /* fromint. */
1032 COSTS_N_INSNS (3) /* roundint. */
1037 COSTS_N_INSNS (1) /* alu. */
1041 const struct cpu_cost_table cortexa8_extra_costs
=
1047 COSTS_N_INSNS (1), /* shift. */
1049 COSTS_N_INSNS (1), /* arith_shift. */
1050 0, /* arith_shift_reg. */
1051 COSTS_N_INSNS (1), /* log_shift. */
1052 0, /* log_shift_reg. */
1054 0, /* extend_arith. */
1060 true /* non_exec_costs_exec. */
1065 COSTS_N_INSNS (1), /* simple. */
1066 COSTS_N_INSNS (1), /* flag_setting. */
1067 COSTS_N_INSNS (1), /* extend. */
1068 COSTS_N_INSNS (1), /* add. */
1069 COSTS_N_INSNS (1), /* extend_add. */
1070 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1074 0, /* simple (N/A). */
1075 0, /* flag_setting (N/A). */
1076 COSTS_N_INSNS (2), /* extend. */
1078 COSTS_N_INSNS (2), /* extend_add. */
1084 COSTS_N_INSNS (1), /* load. */
1085 COSTS_N_INSNS (1), /* load_sign_extend. */
1086 COSTS_N_INSNS (1), /* ldrd. */
1087 COSTS_N_INSNS (1), /* ldm_1st. */
1088 1, /* ldm_regs_per_insn_1st. */
1089 2, /* ldm_regs_per_insn_subsequent. */
1090 COSTS_N_INSNS (1), /* loadf. */
1091 COSTS_N_INSNS (1), /* loadd. */
1092 COSTS_N_INSNS (1), /* load_unaligned. */
1093 COSTS_N_INSNS (1), /* store. */
1094 COSTS_N_INSNS (1), /* strd. */
1095 COSTS_N_INSNS (1), /* stm_1st. */
1096 1, /* stm_regs_per_insn_1st. */
1097 2, /* stm_regs_per_insn_subsequent. */
1098 COSTS_N_INSNS (1), /* storef. */
1099 COSTS_N_INSNS (1), /* stored. */
1100 COSTS_N_INSNS (1) /* store_unaligned. */
1105 COSTS_N_INSNS (36), /* div. */
1106 COSTS_N_INSNS (11), /* mult. */
1107 COSTS_N_INSNS (20), /* mult_addsub. */
1108 COSTS_N_INSNS (30), /* fma. */
1109 COSTS_N_INSNS (9), /* addsub. */
1110 COSTS_N_INSNS (3), /* fpconst. */
1111 COSTS_N_INSNS (3), /* neg. */
1112 COSTS_N_INSNS (6), /* compare. */
1113 COSTS_N_INSNS (4), /* widen. */
1114 COSTS_N_INSNS (4), /* narrow. */
1115 COSTS_N_INSNS (8), /* toint. */
1116 COSTS_N_INSNS (8), /* fromint. */
1117 COSTS_N_INSNS (8) /* roundint. */
1121 COSTS_N_INSNS (64), /* div. */
1122 COSTS_N_INSNS (16), /* mult. */
1123 COSTS_N_INSNS (25), /* mult_addsub. */
1124 COSTS_N_INSNS (30), /* fma. */
1125 COSTS_N_INSNS (9), /* addsub. */
1126 COSTS_N_INSNS (3), /* fpconst. */
1127 COSTS_N_INSNS (3), /* neg. */
1128 COSTS_N_INSNS (6), /* compare. */
1129 COSTS_N_INSNS (6), /* widen. */
1130 COSTS_N_INSNS (6), /* narrow. */
1131 COSTS_N_INSNS (8), /* toint. */
1132 COSTS_N_INSNS (8), /* fromint. */
1133 COSTS_N_INSNS (8) /* roundint. */
1138 COSTS_N_INSNS (1) /* alu. */
1142 const struct cpu_cost_table cortexa5_extra_costs
=
1148 COSTS_N_INSNS (1), /* shift. */
1149 COSTS_N_INSNS (1), /* shift_reg. */
1150 COSTS_N_INSNS (1), /* arith_shift. */
1151 COSTS_N_INSNS (1), /* arith_shift_reg. */
1152 COSTS_N_INSNS (1), /* log_shift. */
1153 COSTS_N_INSNS (1), /* log_shift_reg. */
1154 COSTS_N_INSNS (1), /* extend. */
1155 COSTS_N_INSNS (1), /* extend_arith. */
1156 COSTS_N_INSNS (1), /* bfi. */
1157 COSTS_N_INSNS (1), /* bfx. */
1158 COSTS_N_INSNS (1), /* clz. */
1159 COSTS_N_INSNS (1), /* rev. */
1161 true /* non_exec_costs_exec. */
1168 COSTS_N_INSNS (1), /* flag_setting. */
1169 COSTS_N_INSNS (1), /* extend. */
1170 COSTS_N_INSNS (1), /* add. */
1171 COSTS_N_INSNS (1), /* extend_add. */
1172 COSTS_N_INSNS (7) /* idiv. */
1176 0, /* simple (N/A). */
1177 0, /* flag_setting (N/A). */
1178 COSTS_N_INSNS (1), /* extend. */
1180 COSTS_N_INSNS (2), /* extend_add. */
1186 COSTS_N_INSNS (1), /* load. */
1187 COSTS_N_INSNS (1), /* load_sign_extend. */
1188 COSTS_N_INSNS (6), /* ldrd. */
1189 COSTS_N_INSNS (1), /* ldm_1st. */
1190 1, /* ldm_regs_per_insn_1st. */
1191 2, /* ldm_regs_per_insn_subsequent. */
1192 COSTS_N_INSNS (2), /* loadf. */
1193 COSTS_N_INSNS (4), /* loadd. */
1194 COSTS_N_INSNS (1), /* load_unaligned. */
1195 COSTS_N_INSNS (1), /* store. */
1196 COSTS_N_INSNS (3), /* strd. */
1197 COSTS_N_INSNS (1), /* stm_1st. */
1198 1, /* stm_regs_per_insn_1st. */
1199 2, /* stm_regs_per_insn_subsequent. */
1200 COSTS_N_INSNS (2), /* storef. */
1201 COSTS_N_INSNS (2), /* stored. */
1202 COSTS_N_INSNS (1) /* store_unaligned. */
1207 COSTS_N_INSNS (15), /* div. */
1208 COSTS_N_INSNS (3), /* mult. */
1209 COSTS_N_INSNS (7), /* mult_addsub. */
1210 COSTS_N_INSNS (7), /* fma. */
1211 COSTS_N_INSNS (3), /* addsub. */
1212 COSTS_N_INSNS (3), /* fpconst. */
1213 COSTS_N_INSNS (3), /* neg. */
1214 COSTS_N_INSNS (3), /* compare. */
1215 COSTS_N_INSNS (3), /* widen. */
1216 COSTS_N_INSNS (3), /* narrow. */
1217 COSTS_N_INSNS (3), /* toint. */
1218 COSTS_N_INSNS (3), /* fromint. */
1219 COSTS_N_INSNS (3) /* roundint. */
1223 COSTS_N_INSNS (30), /* div. */
1224 COSTS_N_INSNS (6), /* mult. */
1225 COSTS_N_INSNS (10), /* mult_addsub. */
1226 COSTS_N_INSNS (7), /* fma. */
1227 COSTS_N_INSNS (3), /* addsub. */
1228 COSTS_N_INSNS (3), /* fpconst. */
1229 COSTS_N_INSNS (3), /* neg. */
1230 COSTS_N_INSNS (3), /* compare. */
1231 COSTS_N_INSNS (3), /* widen. */
1232 COSTS_N_INSNS (3), /* narrow. */
1233 COSTS_N_INSNS (3), /* toint. */
1234 COSTS_N_INSNS (3), /* fromint. */
1235 COSTS_N_INSNS (3) /* roundint. */
1240 COSTS_N_INSNS (1) /* alu. */
1245 const struct cpu_cost_table cortexa7_extra_costs
=
1251 COSTS_N_INSNS (1), /* shift. */
1252 COSTS_N_INSNS (1), /* shift_reg. */
1253 COSTS_N_INSNS (1), /* arith_shift. */
1254 COSTS_N_INSNS (1), /* arith_shift_reg. */
1255 COSTS_N_INSNS (1), /* log_shift. */
1256 COSTS_N_INSNS (1), /* log_shift_reg. */
1257 COSTS_N_INSNS (1), /* extend. */
1258 COSTS_N_INSNS (1), /* extend_arith. */
1259 COSTS_N_INSNS (1), /* bfi. */
1260 COSTS_N_INSNS (1), /* bfx. */
1261 COSTS_N_INSNS (1), /* clz. */
1262 COSTS_N_INSNS (1), /* rev. */
1264 true /* non_exec_costs_exec. */
1271 COSTS_N_INSNS (1), /* flag_setting. */
1272 COSTS_N_INSNS (1), /* extend. */
1273 COSTS_N_INSNS (1), /* add. */
1274 COSTS_N_INSNS (1), /* extend_add. */
1275 COSTS_N_INSNS (7) /* idiv. */
1279 0, /* simple (N/A). */
1280 0, /* flag_setting (N/A). */
1281 COSTS_N_INSNS (1), /* extend. */
1283 COSTS_N_INSNS (2), /* extend_add. */
1289 COSTS_N_INSNS (1), /* load. */
1290 COSTS_N_INSNS (1), /* load_sign_extend. */
1291 COSTS_N_INSNS (3), /* ldrd. */
1292 COSTS_N_INSNS (1), /* ldm_1st. */
1293 1, /* ldm_regs_per_insn_1st. */
1294 2, /* ldm_regs_per_insn_subsequent. */
1295 COSTS_N_INSNS (2), /* loadf. */
1296 COSTS_N_INSNS (2), /* loadd. */
1297 COSTS_N_INSNS (1), /* load_unaligned. */
1298 COSTS_N_INSNS (1), /* store. */
1299 COSTS_N_INSNS (3), /* strd. */
1300 COSTS_N_INSNS (1), /* stm_1st. */
1301 1, /* stm_regs_per_insn_1st. */
1302 2, /* stm_regs_per_insn_subsequent. */
1303 COSTS_N_INSNS (2), /* storef. */
1304 COSTS_N_INSNS (2), /* stored. */
1305 COSTS_N_INSNS (1) /* store_unaligned. */
1310 COSTS_N_INSNS (15), /* div. */
1311 COSTS_N_INSNS (3), /* mult. */
1312 COSTS_N_INSNS (7), /* mult_addsub. */
1313 COSTS_N_INSNS (7), /* fma. */
1314 COSTS_N_INSNS (3), /* addsub. */
1315 COSTS_N_INSNS (3), /* fpconst. */
1316 COSTS_N_INSNS (3), /* neg. */
1317 COSTS_N_INSNS (3), /* compare. */
1318 COSTS_N_INSNS (3), /* widen. */
1319 COSTS_N_INSNS (3), /* narrow. */
1320 COSTS_N_INSNS (3), /* toint. */
1321 COSTS_N_INSNS (3), /* fromint. */
1322 COSTS_N_INSNS (3) /* roundint. */
1326 COSTS_N_INSNS (30), /* div. */
1327 COSTS_N_INSNS (6), /* mult. */
1328 COSTS_N_INSNS (10), /* mult_addsub. */
1329 COSTS_N_INSNS (7), /* fma. */
1330 COSTS_N_INSNS (3), /* addsub. */
1331 COSTS_N_INSNS (3), /* fpconst. */
1332 COSTS_N_INSNS (3), /* neg. */
1333 COSTS_N_INSNS (3), /* compare. */
1334 COSTS_N_INSNS (3), /* widen. */
1335 COSTS_N_INSNS (3), /* narrow. */
1336 COSTS_N_INSNS (3), /* toint. */
1337 COSTS_N_INSNS (3), /* fromint. */
1338 COSTS_N_INSNS (3) /* roundint. */
1343 COSTS_N_INSNS (1) /* alu. */
1347 const struct cpu_cost_table cortexa12_extra_costs
=
1354 COSTS_N_INSNS (1), /* shift_reg. */
1355 COSTS_N_INSNS (1), /* arith_shift. */
1356 COSTS_N_INSNS (1), /* arith_shift_reg. */
1357 COSTS_N_INSNS (1), /* log_shift. */
1358 COSTS_N_INSNS (1), /* log_shift_reg. */
1360 COSTS_N_INSNS (1), /* extend_arith. */
1362 COSTS_N_INSNS (1), /* bfx. */
1363 COSTS_N_INSNS (1), /* clz. */
1364 COSTS_N_INSNS (1), /* rev. */
1366 true /* non_exec_costs_exec. */
1371 COSTS_N_INSNS (2), /* simple. */
1372 COSTS_N_INSNS (3), /* flag_setting. */
1373 COSTS_N_INSNS (2), /* extend. */
1374 COSTS_N_INSNS (3), /* add. */
1375 COSTS_N_INSNS (2), /* extend_add. */
1376 COSTS_N_INSNS (18) /* idiv. */
1380 0, /* simple (N/A). */
1381 0, /* flag_setting (N/A). */
1382 COSTS_N_INSNS (3), /* extend. */
1384 COSTS_N_INSNS (3), /* extend_add. */
1390 COSTS_N_INSNS (3), /* load. */
1391 COSTS_N_INSNS (3), /* load_sign_extend. */
1392 COSTS_N_INSNS (3), /* ldrd. */
1393 COSTS_N_INSNS (3), /* ldm_1st. */
1394 1, /* ldm_regs_per_insn_1st. */
1395 2, /* ldm_regs_per_insn_subsequent. */
1396 COSTS_N_INSNS (3), /* loadf. */
1397 COSTS_N_INSNS (3), /* loadd. */
1398 0, /* load_unaligned. */
1402 1, /* stm_regs_per_insn_1st. */
1403 2, /* stm_regs_per_insn_subsequent. */
1404 COSTS_N_INSNS (2), /* storef. */
1405 COSTS_N_INSNS (2), /* stored. */
1406 0 /* store_unaligned. */
1411 COSTS_N_INSNS (17), /* div. */
1412 COSTS_N_INSNS (4), /* mult. */
1413 COSTS_N_INSNS (8), /* mult_addsub. */
1414 COSTS_N_INSNS (8), /* fma. */
1415 COSTS_N_INSNS (4), /* addsub. */
1416 COSTS_N_INSNS (2), /* fpconst. */
1417 COSTS_N_INSNS (2), /* neg. */
1418 COSTS_N_INSNS (2), /* compare. */
1419 COSTS_N_INSNS (4), /* widen. */
1420 COSTS_N_INSNS (4), /* narrow. */
1421 COSTS_N_INSNS (4), /* toint. */
1422 COSTS_N_INSNS (4), /* fromint. */
1423 COSTS_N_INSNS (4) /* roundint. */
1427 COSTS_N_INSNS (31), /* div. */
1428 COSTS_N_INSNS (4), /* mult. */
1429 COSTS_N_INSNS (8), /* mult_addsub. */
1430 COSTS_N_INSNS (8), /* fma. */
1431 COSTS_N_INSNS (4), /* addsub. */
1432 COSTS_N_INSNS (2), /* fpconst. */
1433 COSTS_N_INSNS (2), /* neg. */
1434 COSTS_N_INSNS (2), /* compare. */
1435 COSTS_N_INSNS (4), /* widen. */
1436 COSTS_N_INSNS (4), /* narrow. */
1437 COSTS_N_INSNS (4), /* toint. */
1438 COSTS_N_INSNS (4), /* fromint. */
1439 COSTS_N_INSNS (4) /* roundint. */
1444 COSTS_N_INSNS (1) /* alu. */
1448 const struct cpu_cost_table cortexa15_extra_costs
=
1456 COSTS_N_INSNS (1), /* arith_shift. */
1457 COSTS_N_INSNS (1), /* arith_shift_reg. */
1458 COSTS_N_INSNS (1), /* log_shift. */
1459 COSTS_N_INSNS (1), /* log_shift_reg. */
1461 COSTS_N_INSNS (1), /* extend_arith. */
1462 COSTS_N_INSNS (1), /* bfi. */
1467 true /* non_exec_costs_exec. */
1472 COSTS_N_INSNS (2), /* simple. */
1473 COSTS_N_INSNS (3), /* flag_setting. */
1474 COSTS_N_INSNS (2), /* extend. */
1475 COSTS_N_INSNS (2), /* add. */
1476 COSTS_N_INSNS (2), /* extend_add. */
1477 COSTS_N_INSNS (18) /* idiv. */
1481 0, /* simple (N/A). */
1482 0, /* flag_setting (N/A). */
1483 COSTS_N_INSNS (3), /* extend. */
1485 COSTS_N_INSNS (3), /* extend_add. */
1491 COSTS_N_INSNS (3), /* load. */
1492 COSTS_N_INSNS (3), /* load_sign_extend. */
1493 COSTS_N_INSNS (3), /* ldrd. */
1494 COSTS_N_INSNS (4), /* ldm_1st. */
1495 1, /* ldm_regs_per_insn_1st. */
1496 2, /* ldm_regs_per_insn_subsequent. */
1497 COSTS_N_INSNS (4), /* loadf. */
1498 COSTS_N_INSNS (4), /* loadd. */
1499 0, /* load_unaligned. */
1502 COSTS_N_INSNS (1), /* stm_1st. */
1503 1, /* stm_regs_per_insn_1st. */
1504 2, /* stm_regs_per_insn_subsequent. */
1507 0 /* store_unaligned. */
1512 COSTS_N_INSNS (17), /* div. */
1513 COSTS_N_INSNS (4), /* mult. */
1514 COSTS_N_INSNS (8), /* mult_addsub. */
1515 COSTS_N_INSNS (8), /* fma. */
1516 COSTS_N_INSNS (4), /* addsub. */
1517 COSTS_N_INSNS (2), /* fpconst. */
1518 COSTS_N_INSNS (2), /* neg. */
1519 COSTS_N_INSNS (5), /* compare. */
1520 COSTS_N_INSNS (4), /* widen. */
1521 COSTS_N_INSNS (4), /* narrow. */
1522 COSTS_N_INSNS (4), /* toint. */
1523 COSTS_N_INSNS (4), /* fromint. */
1524 COSTS_N_INSNS (4) /* roundint. */
1528 COSTS_N_INSNS (31), /* div. */
1529 COSTS_N_INSNS (4), /* mult. */
1530 COSTS_N_INSNS (8), /* mult_addsub. */
1531 COSTS_N_INSNS (8), /* fma. */
1532 COSTS_N_INSNS (4), /* addsub. */
1533 COSTS_N_INSNS (2), /* fpconst. */
1534 COSTS_N_INSNS (2), /* neg. */
1535 COSTS_N_INSNS (2), /* compare. */
1536 COSTS_N_INSNS (4), /* widen. */
1537 COSTS_N_INSNS (4), /* narrow. */
1538 COSTS_N_INSNS (4), /* toint. */
1539 COSTS_N_INSNS (4), /* fromint. */
1540 COSTS_N_INSNS (4) /* roundint. */
1545 COSTS_N_INSNS (1) /* alu. */
1549 const struct cpu_cost_table v7m_extra_costs
=
1557 0, /* arith_shift. */
1558 COSTS_N_INSNS (1), /* arith_shift_reg. */
1560 COSTS_N_INSNS (1), /* log_shift_reg. */
1562 COSTS_N_INSNS (1), /* extend_arith. */
1567 COSTS_N_INSNS (1), /* non_exec. */
1568 false /* non_exec_costs_exec. */
1573 COSTS_N_INSNS (1), /* simple. */
1574 COSTS_N_INSNS (1), /* flag_setting. */
1575 COSTS_N_INSNS (2), /* extend. */
1576 COSTS_N_INSNS (1), /* add. */
1577 COSTS_N_INSNS (3), /* extend_add. */
1578 COSTS_N_INSNS (8) /* idiv. */
1582 0, /* simple (N/A). */
1583 0, /* flag_setting (N/A). */
1584 COSTS_N_INSNS (2), /* extend. */
1586 COSTS_N_INSNS (3), /* extend_add. */
1592 COSTS_N_INSNS (2), /* load. */
1593 0, /* load_sign_extend. */
1594 COSTS_N_INSNS (3), /* ldrd. */
1595 COSTS_N_INSNS (2), /* ldm_1st. */
1596 1, /* ldm_regs_per_insn_1st. */
1597 1, /* ldm_regs_per_insn_subsequent. */
1598 COSTS_N_INSNS (2), /* loadf. */
1599 COSTS_N_INSNS (3), /* loadd. */
1600 COSTS_N_INSNS (1), /* load_unaligned. */
1601 COSTS_N_INSNS (2), /* store. */
1602 COSTS_N_INSNS (3), /* strd. */
1603 COSTS_N_INSNS (2), /* stm_1st. */
1604 1, /* stm_regs_per_insn_1st. */
1605 1, /* stm_regs_per_insn_subsequent. */
1606 COSTS_N_INSNS (2), /* storef. */
1607 COSTS_N_INSNS (3), /* stored. */
1608 COSTS_N_INSNS (1) /* store_unaligned. */
1613 COSTS_N_INSNS (7), /* div. */
1614 COSTS_N_INSNS (2), /* mult. */
1615 COSTS_N_INSNS (5), /* mult_addsub. */
1616 COSTS_N_INSNS (3), /* fma. */
1617 COSTS_N_INSNS (1), /* addsub. */
1629 COSTS_N_INSNS (15), /* div. */
1630 COSTS_N_INSNS (5), /* mult. */
1631 COSTS_N_INSNS (7), /* mult_addsub. */
1632 COSTS_N_INSNS (7), /* fma. */
1633 COSTS_N_INSNS (3), /* addsub. */
1646 COSTS_N_INSNS (1) /* alu. */
1650 const struct tune_params arm_slowmul_tune
=
1652 arm_slowmul_rtx_costs
,
1654 NULL
, /* Sched adj cost. */
1655 3, /* Constant limit. */
1656 5, /* Max cond insns. */
1657 ARM_PREFETCH_NOT_BENEFICIAL
,
1658 true, /* Prefer constant pool. */
1659 arm_default_branch_cost
,
1660 false, /* Prefer LDRD/STRD. */
1661 {true, true}, /* Prefer non short circuit. */
1662 &arm_default_vec_cost
, /* Vectorizer costs. */
1663 false, /* Prefer Neon for 64-bits bitops. */
1664 false, false, /* Prefer 32-bit encodings. */
1665 false, /* Prefer Neon for stringops. */
1666 8 /* Maximum insns to inline memset. */
1669 const struct tune_params arm_fastmul_tune
=
1671 arm_fastmul_rtx_costs
,
1673 NULL
, /* Sched adj cost. */
1674 1, /* Constant limit. */
1675 5, /* Max cond insns. */
1676 ARM_PREFETCH_NOT_BENEFICIAL
,
1677 true, /* Prefer constant pool. */
1678 arm_default_branch_cost
,
1679 false, /* Prefer LDRD/STRD. */
1680 {true, true}, /* Prefer non short circuit. */
1681 &arm_default_vec_cost
, /* Vectorizer costs. */
1682 false, /* Prefer Neon for 64-bits bitops. */
1683 false, false, /* Prefer 32-bit encodings. */
1684 false, /* Prefer Neon for stringops. */
1685 8 /* Maximum insns to inline memset. */
1688 /* StrongARM has early execution of branches, so a sequence that is worth
1689 skipping is shorter. Set max_insns_skipped to a lower value. */
1691 const struct tune_params arm_strongarm_tune
=
1693 arm_fastmul_rtx_costs
,
1695 NULL
, /* Sched adj cost. */
1696 1, /* Constant limit. */
1697 3, /* Max cond insns. */
1698 ARM_PREFETCH_NOT_BENEFICIAL
,
1699 true, /* Prefer constant pool. */
1700 arm_default_branch_cost
,
1701 false, /* Prefer LDRD/STRD. */
1702 {true, true}, /* Prefer non short circuit. */
1703 &arm_default_vec_cost
, /* Vectorizer costs. */
1704 false, /* Prefer Neon for 64-bits bitops. */
1705 false, false, /* Prefer 32-bit encodings. */
1706 false, /* Prefer Neon for stringops. */
1707 8 /* Maximum insns to inline memset. */
1710 const struct tune_params arm_xscale_tune
=
1712 arm_xscale_rtx_costs
,
1714 xscale_sched_adjust_cost
,
1715 2, /* Constant limit. */
1716 3, /* Max cond insns. */
1717 ARM_PREFETCH_NOT_BENEFICIAL
,
1718 true, /* Prefer constant pool. */
1719 arm_default_branch_cost
,
1720 false, /* Prefer LDRD/STRD. */
1721 {true, true}, /* Prefer non short circuit. */
1722 &arm_default_vec_cost
, /* Vectorizer costs. */
1723 false, /* Prefer Neon for 64-bits bitops. */
1724 false, false, /* Prefer 32-bit encodings. */
1725 false, /* Prefer Neon for stringops. */
1726 8 /* Maximum insns to inline memset. */
1729 const struct tune_params arm_9e_tune
=
1733 NULL
, /* Sched adj cost. */
1734 1, /* Constant limit. */
1735 5, /* Max cond insns. */
1736 ARM_PREFETCH_NOT_BENEFICIAL
,
1737 true, /* Prefer constant pool. */
1738 arm_default_branch_cost
,
1739 false, /* Prefer LDRD/STRD. */
1740 {true, true}, /* Prefer non short circuit. */
1741 &arm_default_vec_cost
, /* Vectorizer costs. */
1742 false, /* Prefer Neon for 64-bits bitops. */
1743 false, false, /* Prefer 32-bit encodings. */
1744 false, /* Prefer Neon for stringops. */
1745 8 /* Maximum insns to inline memset. */
1748 const struct tune_params arm_v6t2_tune
=
1752 NULL
, /* Sched adj cost. */
1753 1, /* Constant limit. */
1754 5, /* Max cond insns. */
1755 ARM_PREFETCH_NOT_BENEFICIAL
,
1756 false, /* Prefer constant pool. */
1757 arm_default_branch_cost
,
1758 false, /* Prefer LDRD/STRD. */
1759 {true, true}, /* Prefer non short circuit. */
1760 &arm_default_vec_cost
, /* Vectorizer costs. */
1761 false, /* Prefer Neon for 64-bits bitops. */
1762 false, false, /* Prefer 32-bit encodings. */
1763 false, /* Prefer Neon for stringops. */
1764 8 /* Maximum insns to inline memset. */
1767 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1768 const struct tune_params arm_cortex_tune
=
1771 &generic_extra_costs
,
1772 NULL
, /* Sched adj cost. */
1773 1, /* Constant limit. */
1774 5, /* Max cond insns. */
1775 ARM_PREFETCH_NOT_BENEFICIAL
,
1776 false, /* Prefer constant pool. */
1777 arm_default_branch_cost
,
1778 false, /* Prefer LDRD/STRD. */
1779 {true, true}, /* Prefer non short circuit. */
1780 &arm_default_vec_cost
, /* Vectorizer costs. */
1781 false, /* Prefer Neon for 64-bits bitops. */
1782 false, false, /* Prefer 32-bit encodings. */
1783 false, /* Prefer Neon for stringops. */
1784 8 /* Maximum insns to inline memset. */
1787 const struct tune_params arm_cortex_a8_tune
=
1790 &cortexa8_extra_costs
,
1791 NULL
, /* Sched adj cost. */
1792 1, /* Constant limit. */
1793 5, /* Max cond insns. */
1794 ARM_PREFETCH_NOT_BENEFICIAL
,
1795 false, /* Prefer constant pool. */
1796 arm_default_branch_cost
,
1797 false, /* Prefer LDRD/STRD. */
1798 {true, true}, /* Prefer non short circuit. */
1799 &arm_default_vec_cost
, /* Vectorizer costs. */
1800 false, /* Prefer Neon for 64-bits bitops. */
1801 false, false, /* Prefer 32-bit encodings. */
1802 true, /* Prefer Neon for stringops. */
1803 8 /* Maximum insns to inline memset. */
1806 const struct tune_params arm_cortex_a7_tune
=
1809 &cortexa7_extra_costs
,
1811 1, /* Constant limit. */
1812 5, /* Max cond insns. */
1813 ARM_PREFETCH_NOT_BENEFICIAL
,
1814 false, /* Prefer constant pool. */
1815 arm_default_branch_cost
,
1816 false, /* Prefer LDRD/STRD. */
1817 {true, true}, /* Prefer non short circuit. */
1818 &arm_default_vec_cost
, /* Vectorizer costs. */
1819 false, /* Prefer Neon for 64-bits bitops. */
1820 false, false, /* Prefer 32-bit encodings. */
1821 true, /* Prefer Neon for stringops. */
1822 8 /* Maximum insns to inline memset. */
1825 const struct tune_params arm_cortex_a15_tune
=
1828 &cortexa15_extra_costs
,
1829 NULL
, /* Sched adj cost. */
1830 1, /* Constant limit. */
1831 2, /* Max cond insns. */
1832 ARM_PREFETCH_NOT_BENEFICIAL
,
1833 false, /* Prefer constant pool. */
1834 arm_default_branch_cost
,
1835 true, /* Prefer LDRD/STRD. */
1836 {true, true}, /* Prefer non short circuit. */
1837 &arm_default_vec_cost
, /* Vectorizer costs. */
1838 false, /* Prefer Neon for 64-bits bitops. */
1839 true, true, /* Prefer 32-bit encodings. */
1840 true, /* Prefer Neon for stringops. */
1841 8 /* Maximum insns to inline memset. */
1844 const struct tune_params arm_cortex_a53_tune
=
1847 &cortexa53_extra_costs
,
1848 NULL
, /* Scheduler cost adjustment. */
1849 1, /* Constant limit. */
1850 5, /* Max cond insns. */
1851 ARM_PREFETCH_NOT_BENEFICIAL
,
1852 false, /* Prefer constant pool. */
1853 arm_default_branch_cost
,
1854 false, /* Prefer LDRD/STRD. */
1855 {true, true}, /* Prefer non short circuit. */
1856 &arm_default_vec_cost
, /* Vectorizer costs. */
1857 false, /* Prefer Neon for 64-bits bitops. */
1858 false, false, /* Prefer 32-bit encodings. */
1859 false, /* Prefer Neon for stringops. */
1860 8 /* Maximum insns to inline memset. */
1863 const struct tune_params arm_cortex_a57_tune
=
1866 &cortexa57_extra_costs
,
1867 NULL
, /* Scheduler cost adjustment. */
1868 1, /* Constant limit. */
1869 2, /* Max cond insns. */
1870 ARM_PREFETCH_NOT_BENEFICIAL
,
1871 false, /* Prefer constant pool. */
1872 arm_default_branch_cost
,
1873 true, /* Prefer LDRD/STRD. */
1874 {true, true}, /* Prefer non short circuit. */
1875 &arm_default_vec_cost
, /* Vectorizer costs. */
1876 false, /* Prefer Neon for 64-bits bitops. */
1877 true, true, /* Prefer 32-bit encodings. */
1878 false, /* Prefer Neon for stringops. */
1879 8 /* Maximum insns to inline memset. */
1882 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1883 less appealing. Set max_insns_skipped to a low value. */
1885 const struct tune_params arm_cortex_a5_tune
=
1888 &cortexa5_extra_costs
,
1889 NULL
, /* Sched adj cost. */
1890 1, /* Constant limit. */
1891 1, /* Max cond insns. */
1892 ARM_PREFETCH_NOT_BENEFICIAL
,
1893 false, /* Prefer constant pool. */
1894 arm_cortex_a5_branch_cost
,
1895 false, /* Prefer LDRD/STRD. */
1896 {false, false}, /* Prefer non short circuit. */
1897 &arm_default_vec_cost
, /* Vectorizer costs. */
1898 false, /* Prefer Neon for 64-bits bitops. */
1899 false, false, /* Prefer 32-bit encodings. */
1900 true, /* Prefer Neon for stringops. */
1901 8 /* Maximum insns to inline memset. */
1904 const struct tune_params arm_cortex_a9_tune
=
1907 &cortexa9_extra_costs
,
1908 cortex_a9_sched_adjust_cost
,
1909 1, /* Constant limit. */
1910 5, /* Max cond insns. */
1911 ARM_PREFETCH_BENEFICIAL(4,32,32),
1912 false, /* Prefer constant pool. */
1913 arm_default_branch_cost
,
1914 false, /* Prefer LDRD/STRD. */
1915 {true, true}, /* Prefer non short circuit. */
1916 &arm_default_vec_cost
, /* Vectorizer costs. */
1917 false, /* Prefer Neon for 64-bits bitops. */
1918 false, false, /* Prefer 32-bit encodings. */
1919 false, /* Prefer Neon for stringops. */
1920 8 /* Maximum insns to inline memset. */
1923 const struct tune_params arm_cortex_a12_tune
=
1926 &cortexa12_extra_costs
,
1928 1, /* Constant limit. */
1929 5, /* Max cond insns. */
1930 ARM_PREFETCH_BENEFICIAL(4,32,32),
1931 false, /* Prefer constant pool. */
1932 arm_default_branch_cost
,
1933 true, /* Prefer LDRD/STRD. */
1934 {true, true}, /* Prefer non short circuit. */
1935 &arm_default_vec_cost
, /* Vectorizer costs. */
1936 false, /* Prefer Neon for 64-bits bitops. */
1937 false, false, /* Prefer 32-bit encodings. */
1938 true, /* Prefer Neon for stringops. */
1939 8 /* Maximum insns to inline memset. */
1942 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1943 cycle to execute each. An LDR from the constant pool also takes two cycles
1944 to execute, but mildly increases pipelining opportunity (consecutive
1945 loads/stores can be pipelined together, saving one cycle), and may also
1946 improve icache utilisation. Hence we prefer the constant pool for such
1949 const struct tune_params arm_v7m_tune
=
1953 NULL
, /* Sched adj cost. */
1954 1, /* Constant limit. */
1955 2, /* Max cond insns. */
1956 ARM_PREFETCH_NOT_BENEFICIAL
,
1957 true, /* Prefer constant pool. */
1958 arm_cortex_m_branch_cost
,
1959 false, /* Prefer LDRD/STRD. */
1960 {false, false}, /* Prefer non short circuit. */
1961 &arm_default_vec_cost
, /* Vectorizer costs. */
1962 false, /* Prefer Neon for 64-bits bitops. */
1963 false, false, /* Prefer 32-bit encodings. */
1964 false, /* Prefer Neon for stringops. */
1965 8 /* Maximum insns to inline memset. */
1968 /* Cortex-M7 tuning. */
1970 const struct tune_params arm_cortex_m7_tune
=
1974 NULL
, /* Sched adj cost. */
1975 0, /* Constant limit. */
1976 0, /* Max cond insns. */
1977 ARM_PREFETCH_NOT_BENEFICIAL
,
1978 true, /* Prefer constant pool. */
1979 arm_cortex_m_branch_cost
,
1980 false, /* Prefer LDRD/STRD. */
1981 {true, true}, /* Prefer non short circuit. */
1982 &arm_default_vec_cost
, /* Vectorizer costs. */
1983 false, /* Prefer Neon for 64-bits bitops. */
1984 false, false, /* Prefer 32-bit encodings. */
1985 false, /* Prefer Neon for stringops. */
1986 8 /* Maximum insns to inline memset. */
1989 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1990 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1991 const struct tune_params arm_v6m_tune
=
1995 NULL
, /* Sched adj cost. */
1996 1, /* Constant limit. */
1997 5, /* Max cond insns. */
1998 ARM_PREFETCH_NOT_BENEFICIAL
,
1999 false, /* Prefer constant pool. */
2000 arm_default_branch_cost
,
2001 false, /* Prefer LDRD/STRD. */
2002 {false, false}, /* Prefer non short circuit. */
2003 &arm_default_vec_cost
, /* Vectorizer costs. */
2004 false, /* Prefer Neon for 64-bits bitops. */
2005 false, false, /* Prefer 32-bit encodings. */
2006 false, /* Prefer Neon for stringops. */
2007 8 /* Maximum insns to inline memset. */
2010 const struct tune_params arm_fa726te_tune
=
2014 fa726te_sched_adjust_cost
,
2015 1, /* Constant limit. */
2016 5, /* Max cond insns. */
2017 ARM_PREFETCH_NOT_BENEFICIAL
,
2018 true, /* Prefer constant pool. */
2019 arm_default_branch_cost
,
2020 false, /* Prefer LDRD/STRD. */
2021 {true, true}, /* Prefer non short circuit. */
2022 &arm_default_vec_cost
, /* Vectorizer costs. */
2023 false, /* Prefer Neon for 64-bits bitops. */
2024 false, false, /* Prefer 32-bit encodings. */
2025 false, /* Prefer Neon for stringops. */
2026 8 /* Maximum insns to inline memset. */
2030 /* Not all of these give usefully different compilation alternatives,
2031 but there is no simple way of generalizing them. */
2032 static const struct processors all_cores
[] =
2035 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2036 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2037 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2038 #include "arm-cores.def"
2040 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
2043 static const struct processors all_architectures
[] =
2045 /* ARM Architectures */
2046 /* We don't specify tuning costs here as it will be figured out
2049 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2050 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2051 #include "arm-arches.def"
2053 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
2057 /* These are populated as commandline arguments are processed, or NULL
2058 if not specified. */
2059 static const struct processors
*arm_selected_arch
;
2060 static const struct processors
*arm_selected_cpu
;
2061 static const struct processors
*arm_selected_tune
;
2063 /* The name of the preprocessor macro to define for this architecture. */
2065 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
2067 /* Available values for -mfpu=. */
2069 static const struct arm_fpu_desc all_fpus
[] =
2071 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2072 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2073 #include "arm-fpus.def"
2078 /* Supported TLS relocations. */
2086 TLS_DESCSEQ
/* GNU scheme */
2089 /* The maximum number of insns to be used when loading a constant. */
2091 arm_constant_limit (bool size_p
)
2093 return size_p
? 1 : current_tune
->constant_limit
;
2096 /* Emit an insn that's a simple single-set. Both the operands must be known
2098 inline static rtx_insn
*
2099 emit_set_insn (rtx x
, rtx y
)
2101 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
2104 /* Return the number of bits set in VALUE. */
2106 bit_count (unsigned long value
)
2108 unsigned long count
= 0;
2113 value
&= value
- 1; /* Clear the least-significant set bit. */
2123 } arm_fixed_mode_set
;
2125 /* A small helper for setting fixed-point library libfuncs. */
2128 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2129 const char *funcname
, const char *modename
,
2134 if (num_suffix
== 0)
2135 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2137 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2139 set_optab_libfunc (optable
, mode
, buffer
);
2143 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2144 machine_mode from
, const char *funcname
,
2145 const char *toname
, const char *fromname
)
2148 const char *maybe_suffix_2
= "";
2150 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2151 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2152 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2153 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2154 maybe_suffix_2
= "2";
2156 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2159 set_conv_libfunc (optable
, to
, from
, buffer
);
2162 /* Set up library functions unique to ARM. */
2165 arm_init_libfuncs (void)
2167 /* For Linux, we have access to kernel support for atomic operations. */
2168 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2169 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2171 /* There are no special library functions unless we are using the
2176 /* The functions below are described in Section 4 of the "Run-Time
2177 ABI for the ARM architecture", Version 1.0. */
2179 /* Double-precision floating-point arithmetic. Table 2. */
2180 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2181 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2182 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2183 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2184 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2186 /* Double-precision comparisons. Table 3. */
2187 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2188 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2189 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2190 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2191 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2192 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2193 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2195 /* Single-precision floating-point arithmetic. Table 4. */
2196 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2197 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2198 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2199 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2200 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2202 /* Single-precision comparisons. Table 5. */
2203 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2204 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2205 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2206 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2207 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2208 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2209 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2211 /* Floating-point to integer conversions. Table 6. */
2212 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2213 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2214 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2215 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2216 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2217 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2218 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2219 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2221 /* Conversions between floating types. Table 7. */
2222 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2223 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2225 /* Integer to floating-point conversions. Table 8. */
2226 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2227 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2228 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2229 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2230 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2231 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2232 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2233 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2235 /* Long long. Table 9. */
2236 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2237 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2238 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2239 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2240 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2241 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2242 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2243 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2245 /* Integer (32/32->32) division. \S 4.3.1. */
2246 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2247 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2249 /* The divmod functions are designed so that they can be used for
2250 plain division, even though they return both the quotient and the
2251 remainder. The quotient is returned in the usual location (i.e.,
2252 r0 for SImode, {r0, r1} for DImode), just as would be expected
2253 for an ordinary division routine. Because the AAPCS calling
2254 conventions specify that all of { r0, r1, r2, r3 } are
2255 callee-saved registers, there is no need to tell the compiler
2256 explicitly that those registers are clobbered by these
2258 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2259 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2261 /* For SImode division the ABI provides div-without-mod routines,
2262 which are faster. */
2263 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2264 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2266 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2267 divmod libcalls instead. */
2268 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2269 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2270 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2271 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2273 /* Half-precision float operations. The compiler handles all operations
2274 with NULL libfuncs by converting the SFmode. */
2275 switch (arm_fp16_format
)
2277 case ARM_FP16_FORMAT_IEEE
:
2278 case ARM_FP16_FORMAT_ALTERNATIVE
:
2281 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2282 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2284 : "__gnu_f2h_alternative"));
2285 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2286 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2288 : "__gnu_h2f_alternative"));
2291 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2292 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2293 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2294 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2295 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2298 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2299 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2300 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2301 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2302 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2303 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2304 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2311 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2313 const arm_fixed_mode_set fixed_arith_modes
[] =
2334 const arm_fixed_mode_set fixed_conv_modes
[] =
2364 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2366 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2367 "add", fixed_arith_modes
[i
].name
, 3);
2368 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2369 "ssadd", fixed_arith_modes
[i
].name
, 3);
2370 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2371 "usadd", fixed_arith_modes
[i
].name
, 3);
2372 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2373 "sub", fixed_arith_modes
[i
].name
, 3);
2374 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2375 "sssub", fixed_arith_modes
[i
].name
, 3);
2376 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2377 "ussub", fixed_arith_modes
[i
].name
, 3);
2378 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2379 "mul", fixed_arith_modes
[i
].name
, 3);
2380 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2381 "ssmul", fixed_arith_modes
[i
].name
, 3);
2382 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2383 "usmul", fixed_arith_modes
[i
].name
, 3);
2384 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2385 "div", fixed_arith_modes
[i
].name
, 3);
2386 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2387 "udiv", fixed_arith_modes
[i
].name
, 3);
2388 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2389 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2390 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2391 "usdiv", fixed_arith_modes
[i
].name
, 3);
2392 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2393 "neg", fixed_arith_modes
[i
].name
, 2);
2394 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2395 "ssneg", fixed_arith_modes
[i
].name
, 2);
2396 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2397 "usneg", fixed_arith_modes
[i
].name
, 2);
2398 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2399 "ashl", fixed_arith_modes
[i
].name
, 3);
2400 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2401 "ashr", fixed_arith_modes
[i
].name
, 3);
2402 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2403 "lshr", fixed_arith_modes
[i
].name
, 3);
2404 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2405 "ssashl", fixed_arith_modes
[i
].name
, 3);
2406 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2407 "usashl", fixed_arith_modes
[i
].name
, 3);
2408 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2409 "cmp", fixed_arith_modes
[i
].name
, 2);
2412 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2413 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2416 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2417 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2420 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2421 fixed_conv_modes
[j
].mode
, "fract",
2422 fixed_conv_modes
[i
].name
,
2423 fixed_conv_modes
[j
].name
);
2424 arm_set_fixed_conv_libfunc (satfract_optab
,
2425 fixed_conv_modes
[i
].mode
,
2426 fixed_conv_modes
[j
].mode
, "satfract",
2427 fixed_conv_modes
[i
].name
,
2428 fixed_conv_modes
[j
].name
);
2429 arm_set_fixed_conv_libfunc (fractuns_optab
,
2430 fixed_conv_modes
[i
].mode
,
2431 fixed_conv_modes
[j
].mode
, "fractuns",
2432 fixed_conv_modes
[i
].name
,
2433 fixed_conv_modes
[j
].name
);
2434 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2435 fixed_conv_modes
[i
].mode
,
2436 fixed_conv_modes
[j
].mode
, "satfractuns",
2437 fixed_conv_modes
[i
].name
,
2438 fixed_conv_modes
[j
].name
);
2442 if (TARGET_AAPCS_BASED
)
2443 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2446 /* On AAPCS systems, this is the "struct __va_list". */
2447 static GTY(()) tree va_list_type
;
2449 /* Return the type to use as __builtin_va_list. */
2451 arm_build_builtin_va_list (void)
2456 if (!TARGET_AAPCS_BASED
)
2457 return std_build_builtin_va_list ();
2459 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2467 The C Library ABI further reinforces this definition in \S
2470 We must follow this definition exactly. The structure tag
2471 name is visible in C++ mangled names, and thus forms a part
2472 of the ABI. The field name may be used by people who
2473 #include <stdarg.h>. */
2474 /* Create the type. */
2475 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2476 /* Give it the required name. */
2477 va_list_name
= build_decl (BUILTINS_LOCATION
,
2479 get_identifier ("__va_list"),
2481 DECL_ARTIFICIAL (va_list_name
) = 1;
2482 TYPE_NAME (va_list_type
) = va_list_name
;
2483 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2484 /* Create the __ap field. */
2485 ap_field
= build_decl (BUILTINS_LOCATION
,
2487 get_identifier ("__ap"),
2489 DECL_ARTIFICIAL (ap_field
) = 1;
2490 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2491 TYPE_FIELDS (va_list_type
) = ap_field
;
2492 /* Compute its layout. */
2493 layout_type (va_list_type
);
2495 return va_list_type
;
2498 /* Return an expression of type "void *" pointing to the next
2499 available argument in a variable-argument list. VALIST is the
2500 user-level va_list object, of type __builtin_va_list. */
2502 arm_extract_valist_ptr (tree valist
)
2504 if (TREE_TYPE (valist
) == error_mark_node
)
2505 return error_mark_node
;
2507 /* On an AAPCS target, the pointer is stored within "struct
2509 if (TARGET_AAPCS_BASED
)
2511 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2512 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2513 valist
, ap_field
, NULL_TREE
);
2519 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2521 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2523 valist
= arm_extract_valist_ptr (valist
);
2524 std_expand_builtin_va_start (valist
, nextarg
);
2527 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2529 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2532 valist
= arm_extract_valist_ptr (valist
);
2533 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2536 /* Fix up any incompatible options that the user has specified. */
2538 arm_option_override (void)
2540 if (global_options_set
.x_arm_arch_option
)
2541 arm_selected_arch
= &all_architectures
[arm_arch_option
];
2543 if (global_options_set
.x_arm_cpu_option
)
2545 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
2546 arm_selected_tune
= &all_cores
[(int) arm_cpu_option
];
2549 if (global_options_set
.x_arm_tune_option
)
2550 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
2552 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2553 SUBTARGET_OVERRIDE_OPTIONS
;
2556 if (arm_selected_arch
)
2558 if (arm_selected_cpu
)
2560 /* Check for conflict between mcpu and march. */
2561 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
2563 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2564 arm_selected_cpu
->name
, arm_selected_arch
->name
);
2565 /* -march wins for code generation.
2566 -mcpu wins for default tuning. */
2567 if (!arm_selected_tune
)
2568 arm_selected_tune
= arm_selected_cpu
;
2570 arm_selected_cpu
= arm_selected_arch
;
2574 arm_selected_arch
= NULL
;
2577 /* Pick a CPU based on the architecture. */
2578 arm_selected_cpu
= arm_selected_arch
;
2581 /* If the user did not specify a processor, choose one for them. */
2582 if (!arm_selected_cpu
)
2584 const struct processors
* sel
;
2585 unsigned int sought
;
2587 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
2588 if (!arm_selected_cpu
->name
)
2590 #ifdef SUBTARGET_CPU_DEFAULT
2591 /* Use the subtarget default CPU if none was specified by
2593 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
2595 /* Default to ARM6. */
2596 if (!arm_selected_cpu
->name
)
2597 arm_selected_cpu
= &all_cores
[arm6
];
2600 sel
= arm_selected_cpu
;
2601 insn_flags
= sel
->flags
;
2603 /* Now check to see if the user has specified some command line
2604 switch that require certain abilities from the cpu. */
2607 if (TARGET_INTERWORK
|| TARGET_THUMB
)
2609 sought
|= (FL_THUMB
| FL_MODE32
);
2611 /* There are no ARM processors that support both APCS-26 and
2612 interworking. Therefore we force FL_MODE26 to be removed
2613 from insn_flags here (if it was set), so that the search
2614 below will always be able to find a compatible processor. */
2615 insn_flags
&= ~FL_MODE26
;
2618 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
2620 /* Try to locate a CPU type that supports all of the abilities
2621 of the default CPU, plus the extra abilities requested by
2623 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2624 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
2627 if (sel
->name
== NULL
)
2629 unsigned current_bit_count
= 0;
2630 const struct processors
* best_fit
= NULL
;
2632 /* Ideally we would like to issue an error message here
2633 saying that it was not possible to find a CPU compatible
2634 with the default CPU, but which also supports the command
2635 line options specified by the programmer, and so they
2636 ought to use the -mcpu=<name> command line option to
2637 override the default CPU type.
2639 If we cannot find a cpu that has both the
2640 characteristics of the default cpu and the given
2641 command line options we scan the array again looking
2642 for a best match. */
2643 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2644 if ((sel
->flags
& sought
) == sought
)
2648 count
= bit_count (sel
->flags
& insn_flags
);
2650 if (count
>= current_bit_count
)
2653 current_bit_count
= count
;
2657 gcc_assert (best_fit
);
2661 arm_selected_cpu
= sel
;
2665 gcc_assert (arm_selected_cpu
);
2666 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2667 if (!arm_selected_tune
)
2668 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
2670 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
2671 insn_flags
= arm_selected_cpu
->flags
;
2672 arm_base_arch
= arm_selected_cpu
->base_arch
;
2674 arm_tune
= arm_selected_tune
->core
;
2675 tune_flags
= arm_selected_tune
->flags
;
2676 current_tune
= arm_selected_tune
->tune
;
2678 /* Make sure that the processor choice does not conflict with any of the
2679 other command line choices. */
2680 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
2681 error ("target CPU does not support ARM mode");
2683 /* BPABI targets use linker tricks to allow interworking on cores
2684 without thumb support. */
2685 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
2687 warning (0, "target CPU does not support interworking" );
2688 target_flags
&= ~MASK_INTERWORK
;
2691 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
2693 warning (0, "target CPU does not support THUMB instructions");
2694 target_flags
&= ~MASK_THUMB
;
2697 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
2699 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2700 target_flags
&= ~MASK_APCS_FRAME
;
2703 /* Callee super interworking implies thumb interworking. Adding
2704 this to the flags here simplifies the logic elsewhere. */
2705 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
2706 target_flags
|= MASK_INTERWORK
;
2708 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2709 from here where no function is being compiled currently. */
2710 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
2711 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2713 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
2714 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2716 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
2718 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2719 target_flags
|= MASK_APCS_FRAME
;
2722 if (TARGET_POKE_FUNCTION_NAME
)
2723 target_flags
|= MASK_APCS_FRAME
;
2725 if (TARGET_APCS_REENT
&& flag_pic
)
2726 error ("-fpic and -mapcs-reent are incompatible");
2728 if (TARGET_APCS_REENT
)
2729 warning (0, "APCS reentrant code not supported. Ignored");
2731 /* If this target is normally configured to use APCS frames, warn if they
2732 are turned off and debugging is turned on. */
2734 && write_symbols
!= NO_DEBUG
2735 && !TARGET_APCS_FRAME
2736 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2737 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2739 if (TARGET_APCS_FLOAT
)
2740 warning (0, "passing floating point arguments in fp regs not yet supported");
2742 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2743 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
2744 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
2745 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
2746 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
2747 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
2748 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
2749 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
2750 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
2751 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
2752 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
2753 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
2754 arm_arch8
= (insn_flags
& FL_ARCH8
) != 0;
2755 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
2756 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
2758 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
2759 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
2760 thumb_code
= TARGET_ARM
== 0;
2761 thumb1_code
= TARGET_THUMB1
!= 0;
2762 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
2763 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
2764 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
2765 arm_arch_iwmmxt2
= (insn_flags
& FL_IWMMXT2
) != 0;
2766 arm_arch_thumb_hwdiv
= (insn_flags
& FL_THUMB_DIV
) != 0;
2767 arm_arch_arm_hwdiv
= (insn_flags
& FL_ARM_DIV
) != 0;
2768 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
2769 arm_arch_crc
= (insn_flags
& FL_CRC32
) != 0;
2770 arm_m_profile_small_mul
= (insn_flags
& FL_SMALLMUL
) != 0;
2771 if (arm_restrict_it
== 2)
2772 arm_restrict_it
= arm_arch8
&& TARGET_THUMB2
;
2775 arm_restrict_it
= 0;
2777 /* If we are not using the default (ARM mode) section anchor offset
2778 ranges, then set the correct ranges now. */
2781 /* Thumb-1 LDR instructions cannot have negative offsets.
2782 Permissible positive offset ranges are 5-bit (for byte loads),
2783 6-bit (for halfword loads), or 7-bit (for word loads).
2784 Empirical results suggest a 7-bit anchor range gives the best
2785 overall code size. */
2786 targetm
.min_anchor_offset
= 0;
2787 targetm
.max_anchor_offset
= 127;
2789 else if (TARGET_THUMB2
)
2791 /* The minimum is set such that the total size of the block
2792 for a particular anchor is 248 + 1 + 4095 bytes, which is
2793 divisible by eight, ensuring natural spacing of anchors. */
2794 targetm
.min_anchor_offset
= -248;
2795 targetm
.max_anchor_offset
= 4095;
2798 /* V5 code we generate is completely interworking capable, so we turn off
2799 TARGET_INTERWORK here to avoid many tests later on. */
2801 /* XXX However, we must pass the right pre-processor defines to CPP
2802 or GLD can get confused. This is a hack. */
2803 if (TARGET_INTERWORK
)
2804 arm_cpp_interwork
= 1;
2807 target_flags
&= ~MASK_INTERWORK
;
2809 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
2810 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2812 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
2813 error ("iwmmxt abi requires an iwmmxt capable cpu");
2815 if (!global_options_set
.x_arm_fpu_index
)
2817 const char *target_fpu_name
;
2820 #ifdef FPUTYPE_DEFAULT
2821 target_fpu_name
= FPUTYPE_DEFAULT
;
2823 target_fpu_name
= "vfp";
2826 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
2831 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
2833 switch (arm_fpu_desc
->model
)
2835 case ARM_FP_MODEL_VFP
:
2836 arm_fpu_attr
= FPU_VFP
;
2843 if (TARGET_AAPCS_BASED
)
2845 if (TARGET_CALLER_INTERWORKING
)
2846 error ("AAPCS does not support -mcaller-super-interworking");
2848 if (TARGET_CALLEE_INTERWORKING
)
2849 error ("AAPCS does not support -mcallee-super-interworking");
2852 /* iWMMXt and NEON are incompatible. */
2853 if (TARGET_IWMMXT
&& TARGET_NEON
)
2854 error ("iWMMXt and NEON are incompatible");
2856 /* iWMMXt unsupported under Thumb mode. */
2857 if (TARGET_THUMB
&& TARGET_IWMMXT
)
2858 error ("iWMMXt unsupported under Thumb mode");
2860 /* __fp16 support currently assumes the core has ldrh. */
2861 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
2862 sorry ("__fp16 and no ldrh");
2864 /* If soft-float is specified then don't use FPU. */
2865 if (TARGET_SOFT_FLOAT
)
2866 arm_fpu_attr
= FPU_NONE
;
2868 if (TARGET_AAPCS_BASED
)
2870 if (arm_abi
== ARM_ABI_IWMMXT
)
2871 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
2872 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
2873 && TARGET_HARD_FLOAT
2875 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
2877 arm_pcs_default
= ARM_PCS_AAPCS
;
2881 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
2882 sorry ("-mfloat-abi=hard and VFP");
2884 if (arm_abi
== ARM_ABI_APCS
)
2885 arm_pcs_default
= ARM_PCS_APCS
;
2887 arm_pcs_default
= ARM_PCS_ATPCS
;
2890 /* For arm2/3 there is no need to do any scheduling if we are doing
2891 software floating-point. */
2892 if (TARGET_SOFT_FLOAT
&& (tune_flags
& FL_MODE32
) == 0)
2893 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
2895 /* Use the cp15 method if it is available. */
2896 if (target_thread_pointer
== TP_AUTO
)
2898 if (arm_arch6k
&& !TARGET_THUMB1
)
2899 target_thread_pointer
= TP_CP15
;
2901 target_thread_pointer
= TP_SOFT
;
2904 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
2905 error ("can not use -mtp=cp15 with 16-bit Thumb");
2907 /* Override the default structure alignment for AAPCS ABI. */
2908 if (!global_options_set
.x_arm_structure_size_boundary
)
2910 if (TARGET_AAPCS_BASED
)
2911 arm_structure_size_boundary
= 8;
2915 if (arm_structure_size_boundary
!= 8
2916 && arm_structure_size_boundary
!= 32
2917 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
2919 if (ARM_DOUBLEWORD_ALIGN
)
2921 "structure size boundary can only be set to 8, 32 or 64");
2923 warning (0, "structure size boundary can only be set to 8 or 32");
2924 arm_structure_size_boundary
2925 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
2929 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
2931 error ("RTP PIC is incompatible with Thumb");
2935 /* If stack checking is disabled, we can use r10 as the PIC register,
2936 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2937 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
2939 if (TARGET_VXWORKS_RTP
)
2940 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2941 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
2944 if (flag_pic
&& TARGET_VXWORKS_RTP
)
2945 arm_pic_register
= 9;
2947 if (arm_pic_register_string
!= NULL
)
2949 int pic_register
= decode_reg_name (arm_pic_register_string
);
2952 warning (0, "-mpic-register= is useless without -fpic");
2954 /* Prevent the user from choosing an obviously stupid PIC register. */
2955 else if (pic_register
< 0 || call_used_regs
[pic_register
]
2956 || pic_register
== HARD_FRAME_POINTER_REGNUM
2957 || pic_register
== STACK_POINTER_REGNUM
2958 || pic_register
>= PC_REGNUM
2959 || (TARGET_VXWORKS_RTP
2960 && (unsigned int) pic_register
!= arm_pic_register
))
2961 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
2963 arm_pic_register
= pic_register
;
2966 if (TARGET_VXWORKS_RTP
2967 && !global_options_set
.x_arm_pic_data_is_text_relative
)
2968 arm_pic_data_is_text_relative
= 0;
2970 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2971 if (fix_cm3_ldrd
== 2)
2973 if (arm_selected_cpu
->core
== cortexm3
)
2979 /* Enable -munaligned-access by default for
2980 - all ARMv6 architecture-based processors
2981 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2982 - ARMv8 architecture-base processors.
2984 Disable -munaligned-access by default for
2985 - all pre-ARMv6 architecture-based processors
2986 - ARMv6-M architecture-based processors. */
2988 if (unaligned_access
== 2)
2990 if (arm_arch6
&& (arm_arch_notm
|| arm_arch7
))
2991 unaligned_access
= 1;
2993 unaligned_access
= 0;
2995 else if (unaligned_access
== 1
2996 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
2998 warning (0, "target CPU does not support unaligned accesses");
2999 unaligned_access
= 0;
3002 if (TARGET_THUMB1
&& flag_schedule_insns
)
3004 /* Don't warn since it's on by default in -O2. */
3005 flag_schedule_insns
= 0;
3010 /* If optimizing for size, bump the number of instructions that we
3011 are prepared to conditionally execute (even on a StrongARM). */
3012 max_insns_skipped
= 6;
3014 /* For THUMB2, we limit the conditional sequence to one IT block. */
3016 max_insns_skipped
= MAX_INSN_PER_IT_BLOCK
;
3019 max_insns_skipped
= current_tune
->max_insns_skipped
;
3021 /* Hot/Cold partitioning is not currently supported, since we can't
3022 handle literal pool placement in that case. */
3023 if (flag_reorder_blocks_and_partition
)
3025 inform (input_location
,
3026 "-freorder-blocks-and-partition not supported on this architecture");
3027 flag_reorder_blocks_and_partition
= 0;
3028 flag_reorder_blocks
= 1;
3032 /* Hoisting PIC address calculations more aggressively provides a small,
3033 but measurable, size reduction for PIC code. Therefore, we decrease
3034 the bar for unrestricted expression hoisting to the cost of PIC address
3035 calculation, which is 2 instructions. */
3036 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3037 global_options
.x_param_values
,
3038 global_options_set
.x_param_values
);
3040 /* ARM EABI defaults to strict volatile bitfields. */
3041 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3042 && abi_version_at_least(2))
3043 flag_strict_volatile_bitfields
= 1;
3045 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3046 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
3047 if (flag_prefetch_loop_arrays
< 0
3050 && current_tune
->num_prefetch_slots
> 0)
3051 flag_prefetch_loop_arrays
= 1;
3053 /* Set up parameters to be used in prefetching algorithm. Do not override the
3054 defaults unless we are tuning for a core we have researched values for. */
3055 if (current_tune
->num_prefetch_slots
> 0)
3056 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3057 current_tune
->num_prefetch_slots
,
3058 global_options
.x_param_values
,
3059 global_options_set
.x_param_values
);
3060 if (current_tune
->l1_cache_line_size
>= 0)
3061 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3062 current_tune
->l1_cache_line_size
,
3063 global_options
.x_param_values
,
3064 global_options_set
.x_param_values
);
3065 if (current_tune
->l1_cache_size
>= 0)
3066 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3067 current_tune
->l1_cache_size
,
3068 global_options
.x_param_values
,
3069 global_options_set
.x_param_values
);
3071 /* Use Neon to perform 64-bits operations rather than core
3073 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3074 if (use_neon_for_64bits
== 1)
3075 prefer_neon_for_64bits
= true;
3077 /* Use the alternative scheduling-pressure algorithm by default. */
3078 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3079 global_options
.x_param_values
,
3080 global_options_set
.x_param_values
);
3082 /* Disable shrink-wrap when optimizing function for size, since it tends to
3083 generate additional returns. */
3084 if (optimize_function_for_size_p (cfun
) && TARGET_THUMB2
)
3085 flag_shrink_wrap
= false;
3086 /* TBD: Dwarf info for apcs frame is not handled yet. */
3087 if (TARGET_APCS_FRAME
)
3088 flag_shrink_wrap
= false;
3090 /* We only support -mslow-flash-data on armv7-m targets. */
3091 if (target_slow_flash_data
3092 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
3093 || (TARGET_THUMB1
|| flag_pic
|| TARGET_NEON
)))
3094 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3096 /* Currently, for slow flash data, we just disable literal pools. */
3097 if (target_slow_flash_data
)
3098 arm_disable_literal_pool
= true;
3100 /* Thumb2 inline assembly code should always use unified syntax.
3101 This will apply to ARM and Thumb1 eventually. */
3103 inline_asm_unified
= 1;
3105 /* Disable scheduling fusion by default if it's not armv7 processor
3106 or doesn't prefer ldrd/strd. */
3107 if (flag_schedule_fusion
== 2
3108 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3109 flag_schedule_fusion
= 0;
3111 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3112 - epilogue_insns - does not accurately model the corresponding insns
3113 emitted in the asm file. In particular, see the comment in thumb_exit
3114 'Find out how many of the (return) argument registers we can corrupt'.
3115 As a consequence, the epilogue may clobber registers without fipa-ra
3116 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3117 TODO: Accurately model clobbers for epilogue_insns and reenable
3122 /* Register global variables with the garbage collector. */
3123 arm_add_gc_roots ();
3127 arm_add_gc_roots (void)
3129 gcc_obstack_init(&minipool_obstack
);
3130 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3133 /* A table of known ARM exception types.
3134 For use with the interrupt function attribute. */
3138 const char *const arg
;
3139 const unsigned long return_value
;
3143 static const isr_attribute_arg isr_attribute_args
[] =
3145 { "IRQ", ARM_FT_ISR
},
3146 { "irq", ARM_FT_ISR
},
3147 { "FIQ", ARM_FT_FIQ
},
3148 { "fiq", ARM_FT_FIQ
},
3149 { "ABORT", ARM_FT_ISR
},
3150 { "abort", ARM_FT_ISR
},
3151 { "ABORT", ARM_FT_ISR
},
3152 { "abort", ARM_FT_ISR
},
3153 { "UNDEF", ARM_FT_EXCEPTION
},
3154 { "undef", ARM_FT_EXCEPTION
},
3155 { "SWI", ARM_FT_EXCEPTION
},
3156 { "swi", ARM_FT_EXCEPTION
},
3157 { NULL
, ARM_FT_NORMAL
}
3160 /* Returns the (interrupt) function type of the current
3161 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3163 static unsigned long
3164 arm_isr_value (tree argument
)
3166 const isr_attribute_arg
* ptr
;
3170 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3172 /* No argument - default to IRQ. */
3173 if (argument
== NULL_TREE
)
3176 /* Get the value of the argument. */
3177 if (TREE_VALUE (argument
) == NULL_TREE
3178 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3179 return ARM_FT_UNKNOWN
;
3181 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3183 /* Check it against the list of known arguments. */
3184 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3185 if (streq (arg
, ptr
->arg
))
3186 return ptr
->return_value
;
3188 /* An unrecognized interrupt type. */
3189 return ARM_FT_UNKNOWN
;
3192 /* Computes the type of the current function. */
3194 static unsigned long
3195 arm_compute_func_type (void)
3197 unsigned long type
= ARM_FT_UNKNOWN
;
3201 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3203 /* Decide if the current function is volatile. Such functions
3204 never return, and many memory cycles can be saved by not storing
3205 register values that will never be needed again. This optimization
3206 was added to speed up context switching in a kernel application. */
3208 && (TREE_NOTHROW (current_function_decl
)
3209 || !(flag_unwind_tables
3211 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3212 && TREE_THIS_VOLATILE (current_function_decl
))
3213 type
|= ARM_FT_VOLATILE
;
3215 if (cfun
->static_chain_decl
!= NULL
)
3216 type
|= ARM_FT_NESTED
;
3218 attr
= DECL_ATTRIBUTES (current_function_decl
);
3220 a
= lookup_attribute ("naked", attr
);
3222 type
|= ARM_FT_NAKED
;
3224 a
= lookup_attribute ("isr", attr
);
3226 a
= lookup_attribute ("interrupt", attr
);
3229 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3231 type
|= arm_isr_value (TREE_VALUE (a
));
3236 /* Returns the type of the current function. */
3239 arm_current_func_type (void)
3241 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3242 cfun
->machine
->func_type
= arm_compute_func_type ();
3244 return cfun
->machine
->func_type
;
3248 arm_allocate_stack_slots_for_args (void)
3250 /* Naked functions should not allocate stack slots for arguments. */
3251 return !IS_NAKED (arm_current_func_type ());
3255 arm_warn_func_return (tree decl
)
3257 /* Naked functions are implemented entirely in assembly, including the
3258 return sequence, so suppress warnings about this. */
3259 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3263 /* Output assembler code for a block containing the constant parts
3264 of a trampoline, leaving space for the variable parts.
3266 On the ARM, (if r8 is the static chain regnum, and remembering that
3267 referencing pc adds an offset of 8) the trampoline looks like:
3270 .word static chain value
3271 .word function's address
3272 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3275 arm_asm_trampoline_template (FILE *f
)
3279 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3280 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3282 else if (TARGET_THUMB2
)
3284 /* The Thumb-2 trampoline is similar to the arm implementation.
3285 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3286 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3287 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3288 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3292 ASM_OUTPUT_ALIGN (f
, 2);
3293 fprintf (f
, "\t.code\t16\n");
3294 fprintf (f
, ".Ltrampoline_start:\n");
3295 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3296 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3297 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3298 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3299 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3300 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3302 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3303 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3306 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3309 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3311 rtx fnaddr
, mem
, a_tramp
;
3313 emit_block_move (m_tramp
, assemble_trampoline_template (),
3314 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3316 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3317 emit_move_insn (mem
, chain_value
);
3319 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3320 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3321 emit_move_insn (mem
, fnaddr
);
3323 a_tramp
= XEXP (m_tramp
, 0);
3324 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3325 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3326 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3329 /* Thumb trampolines should be entered in thumb mode, so set
3330 the bottom bit of the address. */
3333 arm_trampoline_adjust_address (rtx addr
)
3336 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3337 NULL
, 0, OPTAB_LIB_WIDEN
);
3341 /* Return 1 if it is possible to return using a single instruction.
3342 If SIBLING is non-null, this is a test for a return before a sibling
3343 call. SIBLING is the call insn, so we can examine its register usage. */
3346 use_return_insn (int iscond
, rtx sibling
)
3349 unsigned int func_type
;
3350 unsigned long saved_int_regs
;
3351 unsigned HOST_WIDE_INT stack_adjust
;
3352 arm_stack_offsets
*offsets
;
3354 /* Never use a return instruction before reload has run. */
3355 if (!reload_completed
)
3358 func_type
= arm_current_func_type ();
3360 /* Naked, volatile and stack alignment functions need special
3362 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3365 /* So do interrupt functions that use the frame pointer and Thumb
3366 interrupt functions. */
3367 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3370 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3371 && !optimize_function_for_size_p (cfun
))
3374 offsets
= arm_get_frame_offsets ();
3375 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3377 /* As do variadic functions. */
3378 if (crtl
->args
.pretend_args_size
3379 || cfun
->machine
->uses_anonymous_args
3380 /* Or if the function calls __builtin_eh_return () */
3381 || crtl
->calls_eh_return
3382 /* Or if the function calls alloca */
3383 || cfun
->calls_alloca
3384 /* Or if there is a stack adjustment. However, if the stack pointer
3385 is saved on the stack, we can use a pre-incrementing stack load. */
3386 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3387 && stack_adjust
== 4)))
3390 saved_int_regs
= offsets
->saved_regs_mask
;
3392 /* Unfortunately, the insn
3394 ldmib sp, {..., sp, ...}
3396 triggers a bug on most SA-110 based devices, such that the stack
3397 pointer won't be correctly restored if the instruction takes a
3398 page fault. We work around this problem by popping r3 along with
3399 the other registers, since that is never slower than executing
3400 another instruction.
3402 We test for !arm_arch5 here, because code for any architecture
3403 less than this could potentially be run on one of the buggy
3405 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3407 /* Validate that r3 is a call-clobbered register (always true in
3408 the default abi) ... */
3409 if (!call_used_regs
[3])
3412 /* ... that it isn't being used for a return value ... */
3413 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3416 /* ... or for a tail-call argument ... */
3419 gcc_assert (CALL_P (sibling
));
3421 if (find_regno_fusage (sibling
, USE
, 3))
3425 /* ... and that there are no call-saved registers in r0-r2
3426 (always true in the default ABI). */
3427 if (saved_int_regs
& 0x7)
3431 /* Can't be done if interworking with Thumb, and any registers have been
3433 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3436 /* On StrongARM, conditional returns are expensive if they aren't
3437 taken and multiple registers have been stacked. */
3438 if (iscond
&& arm_tune_strongarm
)
3440 /* Conditional return when just the LR is stored is a simple
3441 conditional-load instruction, that's not expensive. */
3442 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3446 && arm_pic_register
!= INVALID_REGNUM
3447 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3451 /* If there are saved registers but the LR isn't saved, then we need
3452 two instructions for the return. */
3453 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3456 /* Can't be done if any of the VFP regs are pushed,
3457 since this also requires an insn. */
3458 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
3459 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3460 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
3463 if (TARGET_REALLY_IWMMXT
)
3464 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
3465 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
3471 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3472 shrink-wrapping if possible. This is the case if we need to emit a
3473 prologue, which we can test by looking at the offsets. */
3475 use_simple_return_p (void)
3477 arm_stack_offsets
*offsets
;
3479 offsets
= arm_get_frame_offsets ();
3480 return offsets
->outgoing_args
!= 0;
3483 /* Return TRUE if int I is a valid immediate ARM constant. */
3486 const_ok_for_arm (HOST_WIDE_INT i
)
3490 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3491 be all zero, or all one. */
3492 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
3493 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
3494 != ((~(unsigned HOST_WIDE_INT
) 0)
3495 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
3498 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
3500 /* Fast return for 0 and small values. We must do this for zero, since
3501 the code below can't handle that one case. */
3502 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
3505 /* Get the number of trailing zeros. */
3506 lowbit
= ffs((int) i
) - 1;
3508 /* Only even shifts are allowed in ARM mode so round down to the
3509 nearest even number. */
3513 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
3518 /* Allow rotated constants in ARM mode. */
3520 && ((i
& ~0xc000003f) == 0
3521 || (i
& ~0xf000000f) == 0
3522 || (i
& ~0xfc000003) == 0))
3529 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3532 if (i
== v
|| i
== (v
| (v
<< 8)))
3535 /* Allow repeated pattern 0xXY00XY00. */
3545 /* Return true if I is a valid constant for the operation CODE. */
3547 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
3549 if (const_ok_for_arm (i
))
3555 /* See if we can use movw. */
3556 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
3559 /* Otherwise, try mvn. */
3560 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3563 /* See if we can use addw or subw. */
3565 && ((i
& 0xfffff000) == 0
3566 || ((-i
) & 0xfffff000) == 0))
3568 /* else fall through. */
3588 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
3590 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
3596 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3600 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3607 /* Return true if I is a valid di mode constant for the operation CODE. */
3609 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
3611 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
3612 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
3613 rtx hi
= GEN_INT (hi_val
);
3614 rtx lo
= GEN_INT (lo_val
);
3624 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
3625 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
3627 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
3634 /* Emit a sequence of insns to handle a large constant.
3635 CODE is the code of the operation required, it can be any of SET, PLUS,
3636 IOR, AND, XOR, MINUS;
3637 MODE is the mode in which the operation is being performed;
3638 VAL is the integer to operate on;
3639 SOURCE is the other operand (a register, or a null-pointer for SET);
3640 SUBTARGETS means it is safe to create scratch registers if that will
3641 either produce a simpler sequence, or we will want to cse the values.
3642 Return value is the number of insns emitted. */
3644 /* ??? Tweak this for thumb2. */
3646 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
3647 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
3651 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
3652 cond
= COND_EXEC_TEST (PATTERN (insn
));
3656 if (subtargets
|| code
== SET
3657 || (REG_P (target
) && REG_P (source
)
3658 && REGNO (target
) != REGNO (source
)))
3660 /* After arm_reorg has been called, we can't fix up expensive
3661 constants by pushing them into memory so we must synthesize
3662 them in-line, regardless of the cost. This is only likely to
3663 be more costly on chips that have load delay slots and we are
3664 compiling without running the scheduler (so no splitting
3665 occurred before the final instruction emission).
3667 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3669 if (!cfun
->machine
->after_arm_reorg
3671 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
3673 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
3678 /* Currently SET is the only monadic value for CODE, all
3679 the rest are diadic. */
3680 if (TARGET_USE_MOVT
)
3681 arm_emit_movpair (target
, GEN_INT (val
));
3683 emit_set_insn (target
, GEN_INT (val
));
3689 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
3691 if (TARGET_USE_MOVT
)
3692 arm_emit_movpair (temp
, GEN_INT (val
));
3694 emit_set_insn (temp
, GEN_INT (val
));
3696 /* For MINUS, the value is subtracted from, since we never
3697 have subtraction of a constant. */
3699 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
3701 emit_set_insn (target
,
3702 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
3708 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
3712 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3713 ARM/THUMB2 immediates, and add up to VAL.
3714 Thr function return value gives the number of insns required. */
3716 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3717 struct four_ints
*return_sequence
)
3719 int best_consecutive_zeros
= 0;
3723 struct four_ints tmp_sequence
;
3725 /* If we aren't targeting ARM, the best place to start is always at
3726 the bottom, otherwise look more closely. */
3729 for (i
= 0; i
< 32; i
+= 2)
3731 int consecutive_zeros
= 0;
3733 if (!(val
& (3 << i
)))
3735 while ((i
< 32) && !(val
& (3 << i
)))
3737 consecutive_zeros
+= 2;
3740 if (consecutive_zeros
> best_consecutive_zeros
)
3742 best_consecutive_zeros
= consecutive_zeros
;
3743 best_start
= i
- consecutive_zeros
;
3750 /* So long as it won't require any more insns to do so, it's
3751 desirable to emit a small constant (in bits 0...9) in the last
3752 insn. This way there is more chance that it can be combined with
3753 a later addressing insn to form a pre-indexed load or store
3754 operation. Consider:
3756 *((volatile int *)0xe0000100) = 1;
3757 *((volatile int *)0xe0000110) = 2;
3759 We want this to wind up as:
3763 str rB, [rA, #0x100]
3765 str rB, [rA, #0x110]
3767 rather than having to synthesize both large constants from scratch.
3769 Therefore, we calculate how many insns would be required to emit
3770 the constant starting from `best_start', and also starting from
3771 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3772 yield a shorter sequence, we may as well use zero. */
3773 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
3775 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < val
))
3777 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
3778 if (insns2
<= insns1
)
3780 *return_sequence
= tmp_sequence
;
3788 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3790 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3791 struct four_ints
*return_sequence
, int i
)
3793 int remainder
= val
& 0xffffffff;
3796 /* Try and find a way of doing the job in either two or three
3799 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3800 location. We start at position I. This may be the MSB, or
3801 optimial_immediate_sequence may have positioned it at the largest block
3802 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3803 wrapping around to the top of the word when we drop off the bottom.
3804 In the worst case this code should produce no more than four insns.
3806 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3807 constants, shifted to any arbitrary location. We should always start
3812 unsigned int b1
, b2
, b3
, b4
;
3813 unsigned HOST_WIDE_INT result
;
3816 gcc_assert (insns
< 4);
3821 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3822 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
3825 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
3826 /* We can use addw/subw for the last 12 bits. */
3830 /* Use an 8-bit shifted/rotated immediate. */
3834 result
= remainder
& ((0x0ff << end
)
3835 | ((i
< end
) ? (0xff >> (32 - end
))
3842 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3843 arbitrary shifts. */
3844 i
-= TARGET_ARM
? 2 : 1;
3848 /* Next, see if we can do a better job with a thumb2 replicated
3851 We do it this way around to catch the cases like 0x01F001E0 where
3852 two 8-bit immediates would work, but a replicated constant would
3855 TODO: 16-bit constants that don't clear all the bits, but still win.
3856 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3859 b1
= (remainder
& 0xff000000) >> 24;
3860 b2
= (remainder
& 0x00ff0000) >> 16;
3861 b3
= (remainder
& 0x0000ff00) >> 8;
3862 b4
= remainder
& 0xff;
3866 /* The 8-bit immediate already found clears b1 (and maybe b2),
3867 but must leave b3 and b4 alone. */
3869 /* First try to find a 32-bit replicated constant that clears
3870 almost everything. We can assume that we can't do it in one,
3871 or else we wouldn't be here. */
3872 unsigned int tmp
= b1
& b2
& b3
& b4
;
3873 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
3875 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
3876 + (tmp
== b3
) + (tmp
== b4
);
3878 && (matching_bytes
>= 3
3879 || (matching_bytes
== 2
3880 && const_ok_for_op (remainder
& ~tmp2
, code
))))
3882 /* At least 3 of the bytes match, and the fourth has at
3883 least as many bits set, or two of the bytes match
3884 and it will only require one more insn to finish. */
3892 /* Second, try to find a 16-bit replicated constant that can
3893 leave three of the bytes clear. If b2 or b4 is already
3894 zero, then we can. If the 8-bit from above would not
3895 clear b2 anyway, then we still win. */
3896 else if (b1
== b3
&& (!b2
|| !b4
3897 || (remainder
& 0x00ff0000 & ~result
)))
3899 result
= remainder
& 0xff00ff00;
3905 /* The 8-bit immediate already found clears b2 (and maybe b3)
3906 and we don't get here unless b1 is alredy clear, but it will
3907 leave b4 unchanged. */
3909 /* If we can clear b2 and b4 at once, then we win, since the
3910 8-bits couldn't possibly reach that far. */
3913 result
= remainder
& 0x00ff00ff;
3919 return_sequence
->i
[insns
++] = result
;
3920 remainder
&= ~result
;
3922 if (code
== SET
|| code
== MINUS
)
3930 /* Emit an instruction with the indicated PATTERN. If COND is
3931 non-NULL, conditionalize the execution of the instruction on COND
3935 emit_constant_insn (rtx cond
, rtx pattern
)
3938 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
3939 emit_insn (pattern
);
3942 /* As above, but extra parameter GENERATE which, if clear, suppresses
3946 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
3947 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
3952 int final_invert
= 0;
3954 int set_sign_bit_copies
= 0;
3955 int clear_sign_bit_copies
= 0;
3956 int clear_zero_bit_copies
= 0;
3957 int set_zero_bit_copies
= 0;
3958 int insns
= 0, neg_insns
, inv_insns
;
3959 unsigned HOST_WIDE_INT temp1
, temp2
;
3960 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
3961 struct four_ints
*immediates
;
3962 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
3964 /* Find out which operations are safe for a given CODE. Also do a quick
3965 check for degenerate cases; these can occur when DImode operations
3978 if (remainder
== 0xffffffff)
3981 emit_constant_insn (cond
,
3982 gen_rtx_SET (VOIDmode
, target
,
3983 GEN_INT (ARM_SIGN_EXTEND (val
))));
3989 if (reload_completed
&& rtx_equal_p (target
, source
))
3993 emit_constant_insn (cond
,
3994 gen_rtx_SET (VOIDmode
, target
, source
));
4003 emit_constant_insn (cond
,
4004 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
4007 if (remainder
== 0xffffffff)
4009 if (reload_completed
&& rtx_equal_p (target
, source
))
4012 emit_constant_insn (cond
,
4013 gen_rtx_SET (VOIDmode
, target
, source
));
4022 if (reload_completed
&& rtx_equal_p (target
, source
))
4025 emit_constant_insn (cond
,
4026 gen_rtx_SET (VOIDmode
, target
, source
));
4030 if (remainder
== 0xffffffff)
4033 emit_constant_insn (cond
,
4034 gen_rtx_SET (VOIDmode
, target
,
4035 gen_rtx_NOT (mode
, source
)));
4042 /* We treat MINUS as (val - source), since (source - val) is always
4043 passed as (source + (-val)). */
4047 emit_constant_insn (cond
,
4048 gen_rtx_SET (VOIDmode
, target
,
4049 gen_rtx_NEG (mode
, source
)));
4052 if (const_ok_for_arm (val
))
4055 emit_constant_insn (cond
,
4056 gen_rtx_SET (VOIDmode
, target
,
4057 gen_rtx_MINUS (mode
, GEN_INT (val
),
4068 /* If we can do it in one insn get out quickly. */
4069 if (const_ok_for_op (val
, code
))
4072 emit_constant_insn (cond
,
4073 gen_rtx_SET (VOIDmode
, target
,
4075 ? gen_rtx_fmt_ee (code
, mode
, source
,
4081 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4083 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4084 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4088 if (mode
== SImode
&& i
== 16)
4089 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4091 emit_constant_insn (cond
,
4092 gen_zero_extendhisi2
4093 (target
, gen_lowpart (HImode
, source
)));
4095 /* Extz only supports SImode, but we can coerce the operands
4097 emit_constant_insn (cond
,
4098 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4099 gen_lowpart (SImode
, source
),
4100 GEN_INT (i
), const0_rtx
));
4106 /* Calculate a few attributes that may be useful for specific
4108 /* Count number of leading zeros. */
4109 for (i
= 31; i
>= 0; i
--)
4111 if ((remainder
& (1 << i
)) == 0)
4112 clear_sign_bit_copies
++;
4117 /* Count number of leading 1's. */
4118 for (i
= 31; i
>= 0; i
--)
4120 if ((remainder
& (1 << i
)) != 0)
4121 set_sign_bit_copies
++;
4126 /* Count number of trailing zero's. */
4127 for (i
= 0; i
<= 31; i
++)
4129 if ((remainder
& (1 << i
)) == 0)
4130 clear_zero_bit_copies
++;
4135 /* Count number of trailing 1's. */
4136 for (i
= 0; i
<= 31; i
++)
4138 if ((remainder
& (1 << i
)) != 0)
4139 set_zero_bit_copies
++;
4147 /* See if we can do this by sign_extending a constant that is known
4148 to be negative. This is a good, way of doing it, since the shift
4149 may well merge into a subsequent insn. */
4150 if (set_sign_bit_copies
> 1)
4152 if (const_ok_for_arm
4153 (temp1
= ARM_SIGN_EXTEND (remainder
4154 << (set_sign_bit_copies
- 1))))
4158 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4159 emit_constant_insn (cond
,
4160 gen_rtx_SET (VOIDmode
, new_src
,
4162 emit_constant_insn (cond
,
4163 gen_ashrsi3 (target
, new_src
,
4164 GEN_INT (set_sign_bit_copies
- 1)));
4168 /* For an inverted constant, we will need to set the low bits,
4169 these will be shifted out of harm's way. */
4170 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4171 if (const_ok_for_arm (~temp1
))
4175 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4176 emit_constant_insn (cond
,
4177 gen_rtx_SET (VOIDmode
, new_src
,
4179 emit_constant_insn (cond
,
4180 gen_ashrsi3 (target
, new_src
,
4181 GEN_INT (set_sign_bit_copies
- 1)));
4187 /* See if we can calculate the value as the difference between two
4188 valid immediates. */
4189 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4191 int topshift
= clear_sign_bit_copies
& ~1;
4193 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4194 & (0xff000000 >> topshift
));
4196 /* If temp1 is zero, then that means the 9 most significant
4197 bits of remainder were 1 and we've caused it to overflow.
4198 When topshift is 0 we don't need to do anything since we
4199 can borrow from 'bit 32'. */
4200 if (temp1
== 0 && topshift
!= 0)
4201 temp1
= 0x80000000 >> (topshift
- 1);
4203 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4205 if (const_ok_for_arm (temp2
))
4209 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4210 emit_constant_insn (cond
,
4211 gen_rtx_SET (VOIDmode
, new_src
,
4213 emit_constant_insn (cond
,
4214 gen_addsi3 (target
, new_src
,
4222 /* See if we can generate this by setting the bottom (or the top)
4223 16 bits, and then shifting these into the other half of the
4224 word. We only look for the simplest cases, to do more would cost
4225 too much. Be careful, however, not to generate this when the
4226 alternative would take fewer insns. */
4227 if (val
& 0xffff0000)
4229 temp1
= remainder
& 0xffff0000;
4230 temp2
= remainder
& 0x0000ffff;
4232 /* Overlaps outside this range are best done using other methods. */
4233 for (i
= 9; i
< 24; i
++)
4235 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4236 && !const_ok_for_arm (temp2
))
4238 rtx new_src
= (subtargets
4239 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4241 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4242 source
, subtargets
, generate
);
4250 gen_rtx_ASHIFT (mode
, source
,
4257 /* Don't duplicate cases already considered. */
4258 for (i
= 17; i
< 24; i
++)
4260 if (((temp1
| (temp1
>> i
)) == remainder
)
4261 && !const_ok_for_arm (temp1
))
4263 rtx new_src
= (subtargets
4264 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4266 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4267 source
, subtargets
, generate
);
4272 gen_rtx_SET (VOIDmode
, target
,
4275 gen_rtx_LSHIFTRT (mode
, source
,
4286 /* If we have IOR or XOR, and the constant can be loaded in a
4287 single instruction, and we can find a temporary to put it in,
4288 then this can be done in two instructions instead of 3-4. */
4290 /* TARGET can't be NULL if SUBTARGETS is 0 */
4291 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4293 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4297 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4299 emit_constant_insn (cond
,
4300 gen_rtx_SET (VOIDmode
, sub
,
4302 emit_constant_insn (cond
,
4303 gen_rtx_SET (VOIDmode
, target
,
4304 gen_rtx_fmt_ee (code
, mode
,
4315 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4316 and the remainder 0s for e.g. 0xfff00000)
4317 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4319 This can be done in 2 instructions by using shifts with mov or mvn.
4324 mvn r0, r0, lsr #12 */
4325 if (set_sign_bit_copies
> 8
4326 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
4330 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4331 rtx shift
= GEN_INT (set_sign_bit_copies
);
4335 gen_rtx_SET (VOIDmode
, sub
,
4337 gen_rtx_ASHIFT (mode
,
4342 gen_rtx_SET (VOIDmode
, target
,
4344 gen_rtx_LSHIFTRT (mode
, sub
,
4351 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4353 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4355 For eg. r0 = r0 | 0xfff
4360 if (set_zero_bit_copies
> 8
4361 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4365 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4366 rtx shift
= GEN_INT (set_zero_bit_copies
);
4370 gen_rtx_SET (VOIDmode
, sub
,
4372 gen_rtx_LSHIFTRT (mode
,
4377 gen_rtx_SET (VOIDmode
, target
,
4379 gen_rtx_ASHIFT (mode
, sub
,
4385 /* This will never be reached for Thumb2 because orn is a valid
4386 instruction. This is for Thumb1 and the ARM 32 bit cases.
4388 x = y | constant (such that ~constant is a valid constant)
4390 x = ~(~y & ~constant).
4392 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4396 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4397 emit_constant_insn (cond
,
4398 gen_rtx_SET (VOIDmode
, sub
,
4399 gen_rtx_NOT (mode
, source
)));
4402 sub
= gen_reg_rtx (mode
);
4403 emit_constant_insn (cond
,
4404 gen_rtx_SET (VOIDmode
, sub
,
4405 gen_rtx_AND (mode
, source
,
4407 emit_constant_insn (cond
,
4408 gen_rtx_SET (VOIDmode
, target
,
4409 gen_rtx_NOT (mode
, sub
)));
4416 /* See if two shifts will do 2 or more insn's worth of work. */
4417 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4419 HOST_WIDE_INT shift_mask
= ((0xffffffff
4420 << (32 - clear_sign_bit_copies
))
4423 if ((remainder
| shift_mask
) != 0xffffffff)
4427 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4428 insns
= arm_gen_constant (AND
, mode
, cond
,
4429 remainder
| shift_mask
,
4430 new_src
, source
, subtargets
, 1);
4435 rtx targ
= subtargets
? NULL_RTX
: target
;
4436 insns
= arm_gen_constant (AND
, mode
, cond
,
4437 remainder
| shift_mask
,
4438 targ
, source
, subtargets
, 0);
4444 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4445 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4447 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4448 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4454 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4456 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4458 if ((remainder
| shift_mask
) != 0xffffffff)
4462 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4464 insns
= arm_gen_constant (AND
, mode
, cond
,
4465 remainder
| shift_mask
,
4466 new_src
, source
, subtargets
, 1);
4471 rtx targ
= subtargets
? NULL_RTX
: target
;
4473 insns
= arm_gen_constant (AND
, mode
, cond
,
4474 remainder
| shift_mask
,
4475 targ
, source
, subtargets
, 0);
4481 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4482 rtx shift
= GEN_INT (clear_zero_bit_copies
);
4484 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
4485 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
4497 /* Calculate what the instruction sequences would be if we generated it
4498 normally, negated, or inverted. */
4500 /* AND cannot be split into multiple insns, so invert and use BIC. */
4503 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
4506 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
4511 if (can_invert
|| final_invert
)
4512 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
4517 immediates
= &pos_immediates
;
4519 /* Is the negated immediate sequence more efficient? */
4520 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
4523 immediates
= &neg_immediates
;
4528 /* Is the inverted immediate sequence more efficient?
4529 We must allow for an extra NOT instruction for XOR operations, although
4530 there is some chance that the final 'mvn' will get optimized later. */
4531 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
4534 immediates
= &inv_immediates
;
4542 /* Now output the chosen sequence as instructions. */
4545 for (i
= 0; i
< insns
; i
++)
4547 rtx new_src
, temp1_rtx
;
4549 temp1
= immediates
->i
[i
];
4551 if (code
== SET
|| code
== MINUS
)
4552 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
4553 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
4554 new_src
= gen_reg_rtx (mode
);
4560 else if (can_negate
)
4563 temp1
= trunc_int_for_mode (temp1
, mode
);
4564 temp1_rtx
= GEN_INT (temp1
);
4568 else if (code
== MINUS
)
4569 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
4571 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
4573 emit_constant_insn (cond
,
4574 gen_rtx_SET (VOIDmode
, new_src
,
4580 can_negate
= can_invert
;
4584 else if (code
== MINUS
)
4592 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
4593 gen_rtx_NOT (mode
, source
)));
4600 /* Canonicalize a comparison so that we are more likely to recognize it.
4601 This can be done for a few constant compares, where we can make the
4602 immediate value easier to load. */
4605 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
4606 bool op0_preserve_value
)
4609 unsigned HOST_WIDE_INT i
, maxval
;
4611 mode
= GET_MODE (*op0
);
4612 if (mode
== VOIDmode
)
4613 mode
= GET_MODE (*op1
);
4615 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
4617 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4618 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4619 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4620 for GTU/LEU in Thumb mode. */
4624 if (*code
== GT
|| *code
== LE
4625 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
4627 /* Missing comparison. First try to use an available
4629 if (CONST_INT_P (*op1
))
4637 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4639 *op1
= GEN_INT (i
+ 1);
4640 *code
= *code
== GT
? GE
: LT
;
4646 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4647 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4649 *op1
= GEN_INT (i
+ 1);
4650 *code
= *code
== GTU
? GEU
: LTU
;
4659 /* If that did not work, reverse the condition. */
4660 if (!op0_preserve_value
)
4662 std::swap (*op0
, *op1
);
4663 *code
= (int)swap_condition ((enum rtx_code
)*code
);
4669 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4670 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4671 to facilitate possible combining with a cmp into 'ands'. */
4673 && GET_CODE (*op0
) == ZERO_EXTEND
4674 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
4675 && GET_MODE (XEXP (*op0
, 0)) == QImode
4676 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
4677 && subreg_lowpart_p (XEXP (*op0
, 0))
4678 && *op1
== const0_rtx
)
4679 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
4682 /* Comparisons smaller than DImode. Only adjust comparisons against
4683 an out-of-range constant. */
4684 if (!CONST_INT_P (*op1
)
4685 || const_ok_for_arm (INTVAL (*op1
))
4686 || const_ok_for_arm (- INTVAL (*op1
)))
4700 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4702 *op1
= GEN_INT (i
+ 1);
4703 *code
= *code
== GT
? GE
: LT
;
4711 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4713 *op1
= GEN_INT (i
- 1);
4714 *code
= *code
== GE
? GT
: LE
;
4721 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4722 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4724 *op1
= GEN_INT (i
+ 1);
4725 *code
= *code
== GTU
? GEU
: LTU
;
4733 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4735 *op1
= GEN_INT (i
- 1);
4736 *code
= *code
== GEU
? GTU
: LEU
;
4747 /* Define how to find the value returned by a function. */
4750 arm_function_value(const_tree type
, const_tree func
,
4751 bool outgoing ATTRIBUTE_UNUSED
)
4754 int unsignedp ATTRIBUTE_UNUSED
;
4755 rtx r ATTRIBUTE_UNUSED
;
4757 mode
= TYPE_MODE (type
);
4759 if (TARGET_AAPCS_BASED
)
4760 return aapcs_allocate_return_reg (mode
, type
, func
);
4762 /* Promote integer types. */
4763 if (INTEGRAL_TYPE_P (type
))
4764 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
4766 /* Promotes small structs returned in a register to full-word size
4767 for big-endian AAPCS. */
4768 if (arm_return_in_msb (type
))
4770 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4771 if (size
% UNITS_PER_WORD
!= 0)
4773 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4774 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4778 return arm_libcall_value_1 (mode
);
4781 /* libcall hashtable helpers. */
4783 struct libcall_hasher
: typed_noop_remove
<rtx_def
>
4785 typedef rtx_def value_type
;
4786 typedef rtx_def compare_type
;
4787 static inline hashval_t
hash (const value_type
*);
4788 static inline bool equal (const value_type
*, const compare_type
*);
4789 static inline void remove (value_type
*);
4793 libcall_hasher::equal (const value_type
*p1
, const compare_type
*p2
)
4795 return rtx_equal_p (p1
, p2
);
4799 libcall_hasher::hash (const value_type
*p1
)
4801 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
4804 typedef hash_table
<libcall_hasher
> libcall_table_type
;
4807 add_libcall (libcall_table_type
*htab
, rtx libcall
)
4809 *htab
->find_slot (libcall
, INSERT
) = libcall
;
4813 arm_libcall_uses_aapcs_base (const_rtx libcall
)
4815 static bool init_done
= false;
4816 static libcall_table_type
*libcall_htab
= NULL
;
4822 libcall_htab
= new libcall_table_type (31);
4823 add_libcall (libcall_htab
,
4824 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
4825 add_libcall (libcall_htab
,
4826 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
4827 add_libcall (libcall_htab
,
4828 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
4829 add_libcall (libcall_htab
,
4830 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
4832 add_libcall (libcall_htab
,
4833 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
4834 add_libcall (libcall_htab
,
4835 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
4836 add_libcall (libcall_htab
,
4837 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
4838 add_libcall (libcall_htab
,
4839 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
4841 add_libcall (libcall_htab
,
4842 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
4843 add_libcall (libcall_htab
,
4844 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
4845 add_libcall (libcall_htab
,
4846 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
4847 add_libcall (libcall_htab
,
4848 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
4849 add_libcall (libcall_htab
,
4850 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
4851 add_libcall (libcall_htab
,
4852 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
4853 add_libcall (libcall_htab
,
4854 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
4855 add_libcall (libcall_htab
,
4856 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
4858 /* Values from double-precision helper functions are returned in core
4859 registers if the selected core only supports single-precision
4860 arithmetic, even if we are using the hard-float ABI. The same is
4861 true for single-precision helpers, but we will never be using the
4862 hard-float ABI on a CPU which doesn't support single-precision
4863 operations in hardware. */
4864 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
4865 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
4866 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
4867 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
4868 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
4869 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
4870 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
4871 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
4872 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
4873 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
4874 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
4875 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
4877 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
4881 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
4885 arm_libcall_value_1 (machine_mode mode
)
4887 if (TARGET_AAPCS_BASED
)
4888 return aapcs_libcall_value (mode
);
4889 else if (TARGET_IWMMXT_ABI
4890 && arm_vector_mode_supported_p (mode
))
4891 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
4893 return gen_rtx_REG (mode
, ARG_REGISTER (1));
4896 /* Define how to find the value returned by a library function
4897 assuming the value has mode MODE. */
4900 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
4902 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
4903 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
4905 /* The following libcalls return their result in integer registers,
4906 even though they return a floating point value. */
4907 if (arm_libcall_uses_aapcs_base (libcall
))
4908 return gen_rtx_REG (mode
, ARG_REGISTER(1));
4912 return arm_libcall_value_1 (mode
);
4915 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4918 arm_function_value_regno_p (const unsigned int regno
)
4920 if (regno
== ARG_REGISTER (1)
4922 && TARGET_AAPCS_BASED
4924 && TARGET_HARD_FLOAT
4925 && regno
== FIRST_VFP_REGNUM
)
4926 || (TARGET_IWMMXT_ABI
4927 && regno
== FIRST_IWMMXT_REGNUM
))
4933 /* Determine the amount of memory needed to store the possible return
4934 registers of an untyped call. */
4936 arm_apply_result_size (void)
4942 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
4944 if (TARGET_IWMMXT_ABI
)
4951 /* Decide whether TYPE should be returned in memory (true)
4952 or in a register (false). FNTYPE is the type of the function making
4955 arm_return_in_memory (const_tree type
, const_tree fntype
)
4959 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
4961 if (TARGET_AAPCS_BASED
)
4963 /* Simple, non-aggregate types (ie not including vectors and
4964 complex) are always returned in a register (or registers).
4965 We don't care about which register here, so we can short-cut
4966 some of the detail. */
4967 if (!AGGREGATE_TYPE_P (type
)
4968 && TREE_CODE (type
) != VECTOR_TYPE
4969 && TREE_CODE (type
) != COMPLEX_TYPE
)
4972 /* Any return value that is no larger than one word can be
4974 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
4977 /* Check any available co-processors to see if they accept the
4978 type as a register candidate (VFP, for example, can return
4979 some aggregates in consecutive registers). These aren't
4980 available if the call is variadic. */
4981 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
4984 /* Vector values should be returned using ARM registers, not
4985 memory (unless they're over 16 bytes, which will break since
4986 we only have four call-clobbered registers to play with). */
4987 if (TREE_CODE (type
) == VECTOR_TYPE
)
4988 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
4990 /* The rest go in memory. */
4994 if (TREE_CODE (type
) == VECTOR_TYPE
)
4995 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
4997 if (!AGGREGATE_TYPE_P (type
) &&
4998 (TREE_CODE (type
) != VECTOR_TYPE
))
4999 /* All simple types are returned in registers. */
5002 if (arm_abi
!= ARM_ABI_APCS
)
5004 /* ATPCS and later return aggregate types in memory only if they are
5005 larger than a word (or are variable size). */
5006 return (size
< 0 || size
> UNITS_PER_WORD
);
5009 /* For the arm-wince targets we choose to be compatible with Microsoft's
5010 ARM and Thumb compilers, which always return aggregates in memory. */
5012 /* All structures/unions bigger than one word are returned in memory.
5013 Also catch the case where int_size_in_bytes returns -1. In this case
5014 the aggregate is either huge or of variable size, and in either case
5015 we will want to return it via memory and not in a register. */
5016 if (size
< 0 || size
> UNITS_PER_WORD
)
5019 if (TREE_CODE (type
) == RECORD_TYPE
)
5023 /* For a struct the APCS says that we only return in a register
5024 if the type is 'integer like' and every addressable element
5025 has an offset of zero. For practical purposes this means
5026 that the structure can have at most one non bit-field element
5027 and that this element must be the first one in the structure. */
5029 /* Find the first field, ignoring non FIELD_DECL things which will
5030 have been created by C++. */
5031 for (field
= TYPE_FIELDS (type
);
5032 field
&& TREE_CODE (field
) != FIELD_DECL
;
5033 field
= DECL_CHAIN (field
))
5037 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5039 /* Check that the first field is valid for returning in a register. */
5041 /* ... Floats are not allowed */
5042 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5045 /* ... Aggregates that are not themselves valid for returning in
5046 a register are not allowed. */
5047 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5050 /* Now check the remaining fields, if any. Only bitfields are allowed,
5051 since they are not addressable. */
5052 for (field
= DECL_CHAIN (field
);
5054 field
= DECL_CHAIN (field
))
5056 if (TREE_CODE (field
) != FIELD_DECL
)
5059 if (!DECL_BIT_FIELD_TYPE (field
))
5066 if (TREE_CODE (type
) == UNION_TYPE
)
5070 /* Unions can be returned in registers if every element is
5071 integral, or can be returned in an integer register. */
5072 for (field
= TYPE_FIELDS (type
);
5074 field
= DECL_CHAIN (field
))
5076 if (TREE_CODE (field
) != FIELD_DECL
)
5079 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5082 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5088 #endif /* not ARM_WINCE */
5090 /* Return all other types in memory. */
5094 const struct pcs_attribute_arg
5098 } pcs_attribute_args
[] =
5100 {"aapcs", ARM_PCS_AAPCS
},
5101 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5103 /* We could recognize these, but changes would be needed elsewhere
5104 * to implement them. */
5105 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5106 {"atpcs", ARM_PCS_ATPCS
},
5107 {"apcs", ARM_PCS_APCS
},
5109 {NULL
, ARM_PCS_UNKNOWN
}
5113 arm_pcs_from_attribute (tree attr
)
5115 const struct pcs_attribute_arg
*ptr
;
5118 /* Get the value of the argument. */
5119 if (TREE_VALUE (attr
) == NULL_TREE
5120 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5121 return ARM_PCS_UNKNOWN
;
5123 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5125 /* Check it against the list of known arguments. */
5126 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5127 if (streq (arg
, ptr
->arg
))
5130 /* An unrecognized interrupt type. */
5131 return ARM_PCS_UNKNOWN
;
5134 /* Get the PCS variant to use for this call. TYPE is the function's type
5135 specification, DECL is the specific declartion. DECL may be null if
5136 the call could be indirect or if this is a library call. */
5138 arm_get_pcs_model (const_tree type
, const_tree decl
)
5140 bool user_convention
= false;
5141 enum arm_pcs user_pcs
= arm_pcs_default
;
5146 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5149 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5150 user_convention
= true;
5153 if (TARGET_AAPCS_BASED
)
5155 /* Detect varargs functions. These always use the base rules
5156 (no argument is ever a candidate for a co-processor
5158 bool base_rules
= stdarg_p (type
);
5160 if (user_convention
)
5162 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5163 sorry ("non-AAPCS derived PCS variant");
5164 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5165 error ("variadic functions must use the base AAPCS variant");
5169 return ARM_PCS_AAPCS
;
5170 else if (user_convention
)
5172 else if (decl
&& flag_unit_at_a_time
)
5174 /* Local functions never leak outside this compilation unit,
5175 so we are free to use whatever conventions are
5177 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5178 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5180 return ARM_PCS_AAPCS_LOCAL
;
5183 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5184 sorry ("PCS variant");
5186 /* For everything else we use the target's default. */
5187 return arm_pcs_default
;
5192 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5193 const_tree fntype ATTRIBUTE_UNUSED
,
5194 rtx libcall ATTRIBUTE_UNUSED
,
5195 const_tree fndecl ATTRIBUTE_UNUSED
)
5197 /* Record the unallocated VFP registers. */
5198 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5199 pcum
->aapcs_vfp_reg_alloc
= 0;
5202 /* Walk down the type tree of TYPE counting consecutive base elements.
5203 If *MODEP is VOIDmode, then set it to the first valid floating point
5204 type. If a non-floating point type is found, or if a floating point
5205 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5206 otherwise return the count in the sub-tree. */
5208 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5213 switch (TREE_CODE (type
))
5216 mode
= TYPE_MODE (type
);
5217 if (mode
!= DFmode
&& mode
!= SFmode
)
5220 if (*modep
== VOIDmode
)
5229 mode
= TYPE_MODE (TREE_TYPE (type
));
5230 if (mode
!= DFmode
&& mode
!= SFmode
)
5233 if (*modep
== VOIDmode
)
5242 /* Use V2SImode and V4SImode as representatives of all 64-bit
5243 and 128-bit vector types, whether or not those modes are
5244 supported with the present options. */
5245 size
= int_size_in_bytes (type
);
5258 if (*modep
== VOIDmode
)
5261 /* Vector modes are considered to be opaque: two vectors are
5262 equivalent for the purposes of being homogeneous aggregates
5263 if they are the same size. */
5272 tree index
= TYPE_DOMAIN (type
);
5274 /* Can't handle incomplete types nor sizes that are not
5276 if (!COMPLETE_TYPE_P (type
)
5277 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5280 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5283 || !TYPE_MAX_VALUE (index
)
5284 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5285 || !TYPE_MIN_VALUE (index
)
5286 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5290 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5291 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5293 /* There must be no padding. */
5294 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5306 /* Can't handle incomplete types nor sizes that are not
5308 if (!COMPLETE_TYPE_P (type
)
5309 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5312 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5314 if (TREE_CODE (field
) != FIELD_DECL
)
5317 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5323 /* There must be no padding. */
5324 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5331 case QUAL_UNION_TYPE
:
5333 /* These aren't very interesting except in a degenerate case. */
5338 /* Can't handle incomplete types nor sizes that are not
5340 if (!COMPLETE_TYPE_P (type
)
5341 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5344 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5346 if (TREE_CODE (field
) != FIELD_DECL
)
5349 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5352 count
= count
> sub_count
? count
: sub_count
;
5355 /* There must be no padding. */
5356 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5369 /* Return true if PCS_VARIANT should use VFP registers. */
5371 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5373 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5375 static bool seen_thumb1_vfp
= false;
5377 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5379 sorry ("Thumb-1 hard-float VFP ABI");
5380 /* sorry() is not immediately fatal, so only display this once. */
5381 seen_thumb1_vfp
= true;
5387 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5390 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
5391 (TARGET_VFP_DOUBLE
|| !is_double
));
5394 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5395 suitable for passing or returning in VFP registers for the PCS
5396 variant selected. If it is, then *BASE_MODE is updated to contain
5397 a machine mode describing each element of the argument's type and
5398 *COUNT to hold the number of such elements. */
5400 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5401 machine_mode mode
, const_tree type
,
5402 machine_mode
*base_mode
, int *count
)
5404 machine_mode new_mode
= VOIDmode
;
5406 /* If we have the type information, prefer that to working things
5407 out from the mode. */
5410 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5412 if (ag_count
> 0 && ag_count
<= 4)
5417 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5418 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5419 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5424 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5427 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5433 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5436 *base_mode
= new_mode
;
5441 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5442 machine_mode mode
, const_tree type
)
5444 int count ATTRIBUTE_UNUSED
;
5445 machine_mode ag_mode ATTRIBUTE_UNUSED
;
5447 if (!use_vfp_abi (pcs_variant
, false))
5449 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5454 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5457 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5460 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5461 &pcum
->aapcs_vfp_rmode
,
5462 &pcum
->aapcs_vfp_rcount
);
5466 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5467 const_tree type ATTRIBUTE_UNUSED
)
5469 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
5470 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
5473 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
5474 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
5476 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
5478 || (mode
== TImode
&& ! TARGET_NEON
)
5479 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
5482 int rcount
= pcum
->aapcs_vfp_rcount
;
5484 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
5488 /* Avoid using unsupported vector modes. */
5489 if (rmode
== V2SImode
)
5491 else if (rmode
== V4SImode
)
5498 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
5499 for (i
= 0; i
< rcount
; i
++)
5501 rtx tmp
= gen_rtx_REG (rmode
,
5502 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
5503 tmp
= gen_rtx_EXPR_LIST
5505 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
5506 XVECEXP (par
, 0, i
) = tmp
;
5509 pcum
->aapcs_reg
= par
;
5512 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
5519 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
5521 const_tree type ATTRIBUTE_UNUSED
)
5523 if (!use_vfp_abi (pcs_variant
, false))
5526 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
5529 machine_mode ag_mode
;
5534 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5539 if (ag_mode
== V2SImode
)
5541 else if (ag_mode
== V4SImode
)
5547 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
5548 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
5549 for (i
= 0; i
< count
; i
++)
5551 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
5552 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
5553 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
5554 XVECEXP (par
, 0, i
) = tmp
;
5560 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
5564 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5565 machine_mode mode ATTRIBUTE_UNUSED
,
5566 const_tree type ATTRIBUTE_UNUSED
)
5568 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
5569 pcum
->aapcs_vfp_reg_alloc
= 0;
5573 #define AAPCS_CP(X) \
5575 aapcs_ ## X ## _cum_init, \
5576 aapcs_ ## X ## _is_call_candidate, \
5577 aapcs_ ## X ## _allocate, \
5578 aapcs_ ## X ## _is_return_candidate, \
5579 aapcs_ ## X ## _allocate_return_reg, \
5580 aapcs_ ## X ## _advance \
5583 /* Table of co-processors that can be used to pass arguments in
5584 registers. Idealy no arugment should be a candidate for more than
5585 one co-processor table entry, but the table is processed in order
5586 and stops after the first match. If that entry then fails to put
5587 the argument into a co-processor register, the argument will go on
5591 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5592 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
5594 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5595 BLKmode) is a candidate for this co-processor's registers; this
5596 function should ignore any position-dependent state in
5597 CUMULATIVE_ARGS and only use call-type dependent information. */
5598 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5600 /* Return true if the argument does get a co-processor register; it
5601 should set aapcs_reg to an RTX of the register allocated as is
5602 required for a return from FUNCTION_ARG. */
5603 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5605 /* Return true if a result of mode MODE (or type TYPE if MODE is
5606 BLKmode) is can be returned in this co-processor's registers. */
5607 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
5609 /* Allocate and return an RTX element to hold the return type of a
5610 call, this routine must not fail and will only be called if
5611 is_return_candidate returned true with the same parameters. */
5612 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
5614 /* Finish processing this argument and prepare to start processing
5616 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5617 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
5625 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5630 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5631 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
5638 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
5640 /* We aren't passed a decl, so we can't check that a call is local.
5641 However, it isn't clear that that would be a win anyway, since it
5642 might limit some tail-calling opportunities. */
5643 enum arm_pcs pcs_variant
;
5647 const_tree fndecl
= NULL_TREE
;
5649 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5652 fntype
= TREE_TYPE (fntype
);
5655 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5658 pcs_variant
= arm_pcs_default
;
5660 if (pcs_variant
!= ARM_PCS_AAPCS
)
5664 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5665 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
5674 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
5677 /* We aren't passed a decl, so we can't check that a call is local.
5678 However, it isn't clear that that would be a win anyway, since it
5679 might limit some tail-calling opportunities. */
5680 enum arm_pcs pcs_variant
;
5681 int unsignedp ATTRIBUTE_UNUSED
;
5685 const_tree fndecl
= NULL_TREE
;
5687 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5690 fntype
= TREE_TYPE (fntype
);
5693 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5696 pcs_variant
= arm_pcs_default
;
5698 /* Promote integer types. */
5699 if (type
&& INTEGRAL_TYPE_P (type
))
5700 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
5702 if (pcs_variant
!= ARM_PCS_AAPCS
)
5706 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5707 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
5709 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
5713 /* Promotes small structs returned in a register to full-word size
5714 for big-endian AAPCS. */
5715 if (type
&& arm_return_in_msb (type
))
5717 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5718 if (size
% UNITS_PER_WORD
!= 0)
5720 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5721 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5725 return gen_rtx_REG (mode
, R0_REGNUM
);
5729 aapcs_libcall_value (machine_mode mode
)
5731 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
5732 && GET_MODE_SIZE (mode
) <= 4)
5735 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
5738 /* Lay out a function argument using the AAPCS rules. The rule
5739 numbers referred to here are those in the AAPCS. */
5741 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5742 const_tree type
, bool named
)
5747 /* We only need to do this once per argument. */
5748 if (pcum
->aapcs_arg_processed
)
5751 pcum
->aapcs_arg_processed
= true;
5753 /* Special case: if named is false then we are handling an incoming
5754 anonymous argument which is on the stack. */
5758 /* Is this a potential co-processor register candidate? */
5759 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5761 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
5762 pcum
->aapcs_cprc_slot
= slot
;
5764 /* We don't have to apply any of the rules from part B of the
5765 preparation phase, these are handled elsewhere in the
5770 /* A Co-processor register candidate goes either in its own
5771 class of registers or on the stack. */
5772 if (!pcum
->aapcs_cprc_failed
[slot
])
5774 /* C1.cp - Try to allocate the argument to co-processor
5776 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
5779 /* C2.cp - Put the argument on the stack and note that we
5780 can't assign any more candidates in this slot. We also
5781 need to note that we have allocated stack space, so that
5782 we won't later try to split a non-cprc candidate between
5783 core registers and the stack. */
5784 pcum
->aapcs_cprc_failed
[slot
] = true;
5785 pcum
->can_split
= false;
5788 /* We didn't get a register, so this argument goes on the
5790 gcc_assert (pcum
->can_split
== false);
5795 /* C3 - For double-word aligned arguments, round the NCRN up to the
5796 next even number. */
5797 ncrn
= pcum
->aapcs_ncrn
;
5798 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
5801 nregs
= ARM_NUM_REGS2(mode
, type
);
5803 /* Sigh, this test should really assert that nregs > 0, but a GCC
5804 extension allows empty structs and then gives them empty size; it
5805 then allows such a structure to be passed by value. For some of
5806 the code below we have to pretend that such an argument has
5807 non-zero size so that we 'locate' it correctly either in
5808 registers or on the stack. */
5809 gcc_assert (nregs
>= 0);
5811 nregs2
= nregs
? nregs
: 1;
5813 /* C4 - Argument fits entirely in core registers. */
5814 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
5816 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5817 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
5821 /* C5 - Some core registers left and there are no arguments already
5822 on the stack: split this argument between the remaining core
5823 registers and the stack. */
5824 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
5826 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5827 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5828 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
5832 /* C6 - NCRN is set to 4. */
5833 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5835 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5839 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5840 for a call to a function whose data type is FNTYPE.
5841 For a library call, FNTYPE is NULL. */
5843 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
5845 tree fndecl ATTRIBUTE_UNUSED
)
5847 /* Long call handling. */
5849 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5851 pcum
->pcs_variant
= arm_pcs_default
;
5853 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5855 if (arm_libcall_uses_aapcs_base (libname
))
5856 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
5858 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
5859 pcum
->aapcs_reg
= NULL_RTX
;
5860 pcum
->aapcs_partial
= 0;
5861 pcum
->aapcs_arg_processed
= false;
5862 pcum
->aapcs_cprc_slot
= -1;
5863 pcum
->can_split
= true;
5865 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5869 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5871 pcum
->aapcs_cprc_failed
[i
] = false;
5872 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
5880 /* On the ARM, the offset starts at 0. */
5882 pcum
->iwmmxt_nregs
= 0;
5883 pcum
->can_split
= true;
5885 /* Varargs vectors are treated the same as long long.
5886 named_count avoids having to change the way arm handles 'named' */
5887 pcum
->named_count
= 0;
5890 if (TARGET_REALLY_IWMMXT
&& fntype
)
5894 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
5896 fn_arg
= TREE_CHAIN (fn_arg
))
5897 pcum
->named_count
+= 1;
5899 if (! pcum
->named_count
)
5900 pcum
->named_count
= INT_MAX
;
5904 /* Return true if we use LRA instead of reload pass. */
5908 return arm_lra_flag
;
5911 /* Return true if mode/type need doubleword alignment. */
5913 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
5915 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
5916 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
5920 /* Determine where to put an argument to a function.
5921 Value is zero to push the argument on the stack,
5922 or a hard register in which to store the argument.
5924 MODE is the argument's machine mode.
5925 TYPE is the data type of the argument (as a tree).
5926 This is null for libcalls where that information may
5928 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5929 the preceding args and about the function being called.
5930 NAMED is nonzero if this argument is a named parameter
5931 (otherwise it is an extra parameter matching an ellipsis).
5933 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5934 other arguments are passed on the stack. If (NAMED == 0) (which happens
5935 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5936 defined), say it is passed in the stack (function_prologue will
5937 indeed make it pass in the stack if necessary). */
5940 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
5941 const_tree type
, bool named
)
5943 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5946 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5947 a call insn (op3 of a call_value insn). */
5948 if (mode
== VOIDmode
)
5951 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5953 aapcs_layout_arg (pcum
, mode
, type
, named
);
5954 return pcum
->aapcs_reg
;
5957 /* Varargs vectors are treated the same as long long.
5958 named_count avoids having to change the way arm handles 'named' */
5959 if (TARGET_IWMMXT_ABI
5960 && arm_vector_mode_supported_p (mode
)
5961 && pcum
->named_count
> pcum
->nargs
+ 1)
5963 if (pcum
->iwmmxt_nregs
<= 9)
5964 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
5967 pcum
->can_split
= false;
5972 /* Put doubleword aligned quantities in even register pairs. */
5974 && ARM_DOUBLEWORD_ALIGN
5975 && arm_needs_doubleword_align (mode
, type
))
5978 /* Only allow splitting an arg between regs and memory if all preceding
5979 args were allocated to regs. For args passed by reference we only count
5980 the reference pointer. */
5981 if (pcum
->can_split
)
5984 nregs
= ARM_NUM_REGS2 (mode
, type
);
5986 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
5989 return gen_rtx_REG (mode
, pcum
->nregs
);
5993 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
5995 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
5996 ? DOUBLEWORD_ALIGNMENT
6001 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6002 tree type
, bool named
)
6004 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6005 int nregs
= pcum
->nregs
;
6007 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6009 aapcs_layout_arg (pcum
, mode
, type
, named
);
6010 return pcum
->aapcs_partial
;
6013 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6016 if (NUM_ARG_REGS
> nregs
6017 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6019 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6024 /* Update the data in PCUM to advance over an argument
6025 of mode MODE and data type TYPE.
6026 (TYPE is null for libcalls where that information may not be available.) */
6029 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6030 const_tree type
, bool named
)
6032 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6034 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6036 aapcs_layout_arg (pcum
, mode
, type
, named
);
6038 if (pcum
->aapcs_cprc_slot
>= 0)
6040 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6042 pcum
->aapcs_cprc_slot
= -1;
6045 /* Generic stuff. */
6046 pcum
->aapcs_arg_processed
= false;
6047 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6048 pcum
->aapcs_reg
= NULL_RTX
;
6049 pcum
->aapcs_partial
= 0;
6054 if (arm_vector_mode_supported_p (mode
)
6055 && pcum
->named_count
> pcum
->nargs
6056 && TARGET_IWMMXT_ABI
)
6057 pcum
->iwmmxt_nregs
+= 1;
6059 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6063 /* Variable sized types are passed by reference. This is a GCC
6064 extension to the ARM ABI. */
6067 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6068 machine_mode mode ATTRIBUTE_UNUSED
,
6069 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6071 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6074 /* Encode the current state of the #pragma [no_]long_calls. */
6077 OFF
, /* No #pragma [no_]long_calls is in effect. */
6078 LONG
, /* #pragma long_calls is in effect. */
6079 SHORT
/* #pragma no_long_calls is in effect. */
6082 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6085 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6087 arm_pragma_long_calls
= LONG
;
6091 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6093 arm_pragma_long_calls
= SHORT
;
6097 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6099 arm_pragma_long_calls
= OFF
;
6102 /* Handle an attribute requiring a FUNCTION_DECL;
6103 arguments as in struct attribute_spec.handler. */
6105 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6106 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6108 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6110 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6112 *no_add_attrs
= true;
6118 /* Handle an "interrupt" or "isr" attribute;
6119 arguments as in struct attribute_spec.handler. */
6121 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6126 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6128 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6130 *no_add_attrs
= true;
6132 /* FIXME: the argument if any is checked for type attributes;
6133 should it be checked for decl ones? */
6137 if (TREE_CODE (*node
) == FUNCTION_TYPE
6138 || TREE_CODE (*node
) == METHOD_TYPE
)
6140 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6142 warning (OPT_Wattributes
, "%qE attribute ignored",
6144 *no_add_attrs
= true;
6147 else if (TREE_CODE (*node
) == POINTER_TYPE
6148 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6149 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6150 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6152 *node
= build_variant_type_copy (*node
);
6153 TREE_TYPE (*node
) = build_type_attribute_variant
6155 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6156 *no_add_attrs
= true;
6160 /* Possibly pass this attribute on from the type to a decl. */
6161 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6162 | (int) ATTR_FLAG_FUNCTION_NEXT
6163 | (int) ATTR_FLAG_ARRAY_NEXT
))
6165 *no_add_attrs
= true;
6166 return tree_cons (name
, args
, NULL_TREE
);
6170 warning (OPT_Wattributes
, "%qE attribute ignored",
6179 /* Handle a "pcs" attribute; arguments as in struct
6180 attribute_spec.handler. */
6182 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6183 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6185 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6187 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6188 *no_add_attrs
= true;
6193 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6194 /* Handle the "notshared" attribute. This attribute is another way of
6195 requesting hidden visibility. ARM's compiler supports
6196 "__declspec(notshared)"; we support the same thing via an
6200 arm_handle_notshared_attribute (tree
*node
,
6201 tree name ATTRIBUTE_UNUSED
,
6202 tree args ATTRIBUTE_UNUSED
,
6203 int flags ATTRIBUTE_UNUSED
,
6206 tree decl
= TYPE_NAME (*node
);
6210 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6211 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6212 *no_add_attrs
= false;
6218 /* Return 0 if the attributes for two types are incompatible, 1 if they
6219 are compatible, and 2 if they are nearly compatible (which causes a
6220 warning to be generated). */
6222 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
6226 /* Check for mismatch of non-default calling convention. */
6227 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
6230 /* Check for mismatched call attributes. */
6231 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6232 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6233 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6234 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6236 /* Only bother to check if an attribute is defined. */
6237 if (l1
| l2
| s1
| s2
)
6239 /* If one type has an attribute, the other must have the same attribute. */
6240 if ((l1
!= l2
) || (s1
!= s2
))
6243 /* Disallow mixed attributes. */
6244 if ((l1
& s2
) || (l2
& s1
))
6248 /* Check for mismatched ISR attribute. */
6249 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
6251 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
6252 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
6254 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
6261 /* Assigns default attributes to newly defined type. This is used to
6262 set short_call/long_call attributes for function types of
6263 functions defined inside corresponding #pragma scopes. */
6265 arm_set_default_type_attributes (tree type
)
6267 /* Add __attribute__ ((long_call)) to all functions, when
6268 inside #pragma long_calls or __attribute__ ((short_call)),
6269 when inside #pragma no_long_calls. */
6270 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
6272 tree type_attr_list
, attr_name
;
6273 type_attr_list
= TYPE_ATTRIBUTES (type
);
6275 if (arm_pragma_long_calls
== LONG
)
6276 attr_name
= get_identifier ("long_call");
6277 else if (arm_pragma_long_calls
== SHORT
)
6278 attr_name
= get_identifier ("short_call");
6282 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
6283 TYPE_ATTRIBUTES (type
) = type_attr_list
;
6287 /* Return true if DECL is known to be linked into section SECTION. */
6290 arm_function_in_section_p (tree decl
, section
*section
)
6292 /* We can only be certain about functions defined in the same
6293 compilation unit. */
6294 if (!TREE_STATIC (decl
))
6297 /* Make sure that SYMBOL always binds to the definition in this
6298 compilation unit. */
6299 if (!targetm
.binds_local_p (decl
))
6302 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6303 if (!DECL_SECTION_NAME (decl
))
6305 /* Make sure that we will not create a unique section for DECL. */
6306 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
6310 return function_section (decl
) == section
;
6313 /* Return nonzero if a 32-bit "long_call" should be generated for
6314 a call from the current function to DECL. We generate a long_call
6317 a. has an __attribute__((long call))
6318 or b. is within the scope of a #pragma long_calls
6319 or c. the -mlong-calls command line switch has been specified
6321 However we do not generate a long call if the function:
6323 d. has an __attribute__ ((short_call))
6324 or e. is inside the scope of a #pragma no_long_calls
6325 or f. is defined in the same section as the current function. */
6328 arm_is_long_call_p (tree decl
)
6333 return TARGET_LONG_CALLS
;
6335 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
6336 if (lookup_attribute ("short_call", attrs
))
6339 /* For "f", be conservative, and only cater for cases in which the
6340 whole of the current function is placed in the same section. */
6341 if (!flag_reorder_blocks_and_partition
6342 && TREE_CODE (decl
) == FUNCTION_DECL
6343 && arm_function_in_section_p (decl
, current_function_section ()))
6346 if (lookup_attribute ("long_call", attrs
))
6349 return TARGET_LONG_CALLS
;
6352 /* Return nonzero if it is ok to make a tail-call to DECL. */
6354 arm_function_ok_for_sibcall (tree decl
, tree exp
)
6356 unsigned long func_type
;
6358 if (cfun
->machine
->sibcall_blocked
)
6361 /* Never tailcall something if we are generating code for Thumb-1. */
6365 /* The PIC register is live on entry to VxWorks PLT entries, so we
6366 must make the call before restoring the PIC register. */
6367 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
6370 /* If we are interworking and the function is not declared static
6371 then we can't tail-call it unless we know that it exists in this
6372 compilation unit (since it might be a Thumb routine). */
6373 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
6374 && !TREE_ASM_WRITTEN (decl
))
6377 func_type
= arm_current_func_type ();
6378 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6379 if (IS_INTERRUPT (func_type
))
6382 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6384 /* Check that the return value locations are the same. For
6385 example that we aren't returning a value from the sibling in
6386 a VFP register but then need to transfer it to a core
6390 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
6391 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6393 if (!rtx_equal_p (a
, b
))
6397 /* Never tailcall if function may be called with a misaligned SP. */
6398 if (IS_STACKALIGN (func_type
))
6401 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6402 references should become a NOP. Don't convert such calls into
6404 if (TARGET_AAPCS_BASED
6405 && arm_abi
== ARM_ABI_AAPCS
6407 && DECL_WEAK (decl
))
6410 /* Everything else is ok. */
6415 /* Addressing mode support functions. */
6417 /* Return nonzero if X is a legitimate immediate operand when compiling
6418 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6420 legitimate_pic_operand_p (rtx x
)
6422 if (GET_CODE (x
) == SYMBOL_REF
6423 || (GET_CODE (x
) == CONST
6424 && GET_CODE (XEXP (x
, 0)) == PLUS
6425 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6431 /* Record that the current function needs a PIC register. Initialize
6432 cfun->machine->pic_reg if we have not already done so. */
6435 require_pic_register (void)
6437 /* A lot of the logic here is made obscure by the fact that this
6438 routine gets called as part of the rtx cost estimation process.
6439 We don't want those calls to affect any assumptions about the real
6440 function; and further, we can't call entry_of_function() until we
6441 start the real expansion process. */
6442 if (!crtl
->uses_pic_offset_table
)
6444 gcc_assert (can_create_pseudo_p ());
6445 if (arm_pic_register
!= INVALID_REGNUM
6446 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
6448 if (!cfun
->machine
->pic_reg
)
6449 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
6451 /* Play games to avoid marking the function as needing pic
6452 if we are being called as part of the cost-estimation
6454 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6455 crtl
->uses_pic_offset_table
= 1;
6459 rtx_insn
*seq
, *insn
;
6461 if (!cfun
->machine
->pic_reg
)
6462 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
6464 /* Play games to avoid marking the function as needing pic
6465 if we are being called as part of the cost-estimation
6467 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6469 crtl
->uses_pic_offset_table
= 1;
6472 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
6473 && arm_pic_register
> LAST_LO_REGNUM
)
6474 emit_move_insn (cfun
->machine
->pic_reg
,
6475 gen_rtx_REG (Pmode
, arm_pic_register
));
6477 arm_load_pic_register (0UL);
6482 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
6484 INSN_LOCATION (insn
) = prologue_location
;
6486 /* We can be called during expansion of PHI nodes, where
6487 we can't yet emit instructions directly in the final
6488 insn stream. Queue the insns on the entry edge, they will
6489 be committed after everything else is expanded. */
6490 insert_insn_on_edge (seq
,
6491 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
6498 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
6500 if (GET_CODE (orig
) == SYMBOL_REF
6501 || GET_CODE (orig
) == LABEL_REF
)
6507 gcc_assert (can_create_pseudo_p ());
6508 reg
= gen_reg_rtx (Pmode
);
6511 /* VxWorks does not impose a fixed gap between segments; the run-time
6512 gap can be different from the object-file gap. We therefore can't
6513 use GOTOFF unless we are absolutely sure that the symbol is in the
6514 same segment as the GOT. Unfortunately, the flexibility of linker
6515 scripts means that we can't be sure of that in general, so assume
6516 that GOTOFF is never valid on VxWorks. */
6517 if ((GET_CODE (orig
) == LABEL_REF
6518 || (GET_CODE (orig
) == SYMBOL_REF
&&
6519 SYMBOL_REF_LOCAL_P (orig
)))
6521 && arm_pic_data_is_text_relative
)
6522 insn
= arm_pic_static_addr (orig
, reg
);
6528 /* If this function doesn't have a pic register, create one now. */
6529 require_pic_register ();
6531 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
6533 /* Make the MEM as close to a constant as possible. */
6534 mem
= SET_SRC (pat
);
6535 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
6536 MEM_READONLY_P (mem
) = 1;
6537 MEM_NOTRAP_P (mem
) = 1;
6539 insn
= emit_insn (pat
);
6542 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6544 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
6548 else if (GET_CODE (orig
) == CONST
)
6552 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6553 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
6556 /* Handle the case where we have: const (UNSPEC_TLS). */
6557 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
6558 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
6561 /* Handle the case where we have:
6562 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6564 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6565 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
6566 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
6568 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
6574 gcc_assert (can_create_pseudo_p ());
6575 reg
= gen_reg_rtx (Pmode
);
6578 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
6580 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
6581 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
6582 base
== reg
? 0 : reg
);
6584 if (CONST_INT_P (offset
))
6586 /* The base register doesn't really matter, we only want to
6587 test the index for the appropriate mode. */
6588 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
6590 gcc_assert (can_create_pseudo_p ());
6591 offset
= force_reg (Pmode
, offset
);
6594 if (CONST_INT_P (offset
))
6595 return plus_constant (Pmode
, base
, INTVAL (offset
));
6598 if (GET_MODE_SIZE (mode
) > 4
6599 && (GET_MODE_CLASS (mode
) == MODE_INT
6600 || TARGET_SOFT_FLOAT
))
6602 emit_insn (gen_addsi3 (reg
, base
, offset
));
6606 return gen_rtx_PLUS (Pmode
, base
, offset
);
6613 /* Find a spare register to use during the prolog of a function. */
6616 thumb_find_work_register (unsigned long pushed_regs_mask
)
6620 /* Check the argument registers first as these are call-used. The
6621 register allocation order means that sometimes r3 might be used
6622 but earlier argument registers might not, so check them all. */
6623 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
6624 if (!df_regs_ever_live_p (reg
))
6627 /* Before going on to check the call-saved registers we can try a couple
6628 more ways of deducing that r3 is available. The first is when we are
6629 pushing anonymous arguments onto the stack and we have less than 4
6630 registers worth of fixed arguments(*). In this case r3 will be part of
6631 the variable argument list and so we can be sure that it will be
6632 pushed right at the start of the function. Hence it will be available
6633 for the rest of the prologue.
6634 (*): ie crtl->args.pretend_args_size is greater than 0. */
6635 if (cfun
->machine
->uses_anonymous_args
6636 && crtl
->args
.pretend_args_size
> 0)
6637 return LAST_ARG_REGNUM
;
6639 /* The other case is when we have fixed arguments but less than 4 registers
6640 worth. In this case r3 might be used in the body of the function, but
6641 it is not being used to convey an argument into the function. In theory
6642 we could just check crtl->args.size to see how many bytes are
6643 being passed in argument registers, but it seems that it is unreliable.
6644 Sometimes it will have the value 0 when in fact arguments are being
6645 passed. (See testcase execute/20021111-1.c for an example). So we also
6646 check the args_info.nregs field as well. The problem with this field is
6647 that it makes no allowances for arguments that are passed to the
6648 function but which are not used. Hence we could miss an opportunity
6649 when a function has an unused argument in r3. But it is better to be
6650 safe than to be sorry. */
6651 if (! cfun
->machine
->uses_anonymous_args
6652 && crtl
->args
.size
>= 0
6653 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
6654 && (TARGET_AAPCS_BASED
6655 ? crtl
->args
.info
.aapcs_ncrn
< 4
6656 : crtl
->args
.info
.nregs
< 4))
6657 return LAST_ARG_REGNUM
;
6659 /* Otherwise look for a call-saved register that is going to be pushed. */
6660 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
6661 if (pushed_regs_mask
& (1 << reg
))
6666 /* Thumb-2 can use high regs. */
6667 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
6668 if (pushed_regs_mask
& (1 << reg
))
6671 /* Something went wrong - thumb_compute_save_reg_mask()
6672 should have arranged for a suitable register to be pushed. */
6676 static GTY(()) int pic_labelno
;
6678 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6682 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
6684 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
6686 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
6689 gcc_assert (flag_pic
);
6691 pic_reg
= cfun
->machine
->pic_reg
;
6692 if (TARGET_VXWORKS_RTP
)
6694 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
6695 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6696 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
6698 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
6700 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
6701 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
6705 /* We use an UNSPEC rather than a LABEL_REF because this label
6706 never appears in the code stream. */
6708 labelno
= GEN_INT (pic_labelno
++);
6709 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6710 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6712 /* On the ARM the PC register contains 'dot + 8' at the time of the
6713 addition, on the Thumb it is 'dot + 4'. */
6714 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6715 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
6717 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6721 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6723 else /* TARGET_THUMB1 */
6725 if (arm_pic_register
!= INVALID_REGNUM
6726 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
6728 /* We will have pushed the pic register, so we should always be
6729 able to find a work register. */
6730 pic_tmp
= gen_rtx_REG (SImode
,
6731 thumb_find_work_register (saved_regs
));
6732 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
6733 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
6734 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
6736 else if (arm_pic_register
!= INVALID_REGNUM
6737 && arm_pic_register
> LAST_LO_REGNUM
6738 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
6740 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6741 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
6742 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
6745 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6749 /* Need to emit this whether or not we obey regdecls,
6750 since setjmp/longjmp can cause life info to screw up. */
6754 /* Generate code to load the address of a static var when flag_pic is set. */
6756 arm_pic_static_addr (rtx orig
, rtx reg
)
6758 rtx l1
, labelno
, offset_rtx
, insn
;
6760 gcc_assert (flag_pic
);
6762 /* We use an UNSPEC rather than a LABEL_REF because this label
6763 never appears in the code stream. */
6764 labelno
= GEN_INT (pic_labelno
++);
6765 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6766 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6768 /* On the ARM the PC register contains 'dot + 8' at the time of the
6769 addition, on the Thumb it is 'dot + 4'. */
6770 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6771 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
6772 UNSPEC_SYMBOL_OFFSET
);
6773 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
6775 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
6779 /* Return nonzero if X is valid as an ARM state addressing register. */
6781 arm_address_register_rtx_p (rtx x
, int strict_p
)
6791 return ARM_REGNO_OK_FOR_BASE_P (regno
);
6793 return (regno
<= LAST_ARM_REGNUM
6794 || regno
>= FIRST_PSEUDO_REGISTER
6795 || regno
== FRAME_POINTER_REGNUM
6796 || regno
== ARG_POINTER_REGNUM
);
6799 /* Return TRUE if this rtx is the difference of a symbol and a label,
6800 and will reduce to a PC-relative relocation in the object file.
6801 Expressions like this can be left alone when generating PIC, rather
6802 than forced through the GOT. */
6804 pcrel_constant_p (rtx x
)
6806 if (GET_CODE (x
) == MINUS
)
6807 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
6812 /* Return true if X will surely end up in an index register after next
6815 will_be_in_index_register (const_rtx x
)
6817 /* arm.md: calculate_pic_address will split this into a register. */
6818 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
6821 /* Return nonzero if X is a valid ARM state address operand. */
6823 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
6827 enum rtx_code code
= GET_CODE (x
);
6829 if (arm_address_register_rtx_p (x
, strict_p
))
6832 use_ldrd
= (TARGET_LDRD
6834 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6836 if (code
== POST_INC
|| code
== PRE_DEC
6837 || ((code
== PRE_INC
|| code
== POST_DEC
)
6838 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6839 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6841 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6842 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6843 && GET_CODE (XEXP (x
, 1)) == PLUS
6844 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6846 rtx addend
= XEXP (XEXP (x
, 1), 1);
6848 /* Don't allow ldrd post increment by register because it's hard
6849 to fixup invalid register choices. */
6851 && GET_CODE (x
) == POST_MODIFY
6855 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
6856 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
6859 /* After reload constants split into minipools will have addresses
6860 from a LABEL_REF. */
6861 else if (reload_completed
6862 && (code
== LABEL_REF
6864 && GET_CODE (XEXP (x
, 0)) == PLUS
6865 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6866 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6869 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6872 else if (code
== PLUS
)
6874 rtx xop0
= XEXP (x
, 0);
6875 rtx xop1
= XEXP (x
, 1);
6877 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6878 && ((CONST_INT_P (xop1
)
6879 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
6880 || (!strict_p
&& will_be_in_index_register (xop1
))))
6881 || (arm_address_register_rtx_p (xop1
, strict_p
)
6882 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
6886 /* Reload currently can't handle MINUS, so disable this for now */
6887 else if (GET_CODE (x
) == MINUS
)
6889 rtx xop0
= XEXP (x
, 0);
6890 rtx xop1
= XEXP (x
, 1);
6892 return (arm_address_register_rtx_p (xop0
, strict_p
)
6893 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
6897 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6898 && code
== SYMBOL_REF
6899 && CONSTANT_POOL_ADDRESS_P (x
)
6901 && symbol_mentioned_p (get_pool_constant (x
))
6902 && ! pcrel_constant_p (get_pool_constant (x
))))
6908 /* Return nonzero if X is a valid Thumb-2 address operand. */
6910 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
6913 enum rtx_code code
= GET_CODE (x
);
6915 if (arm_address_register_rtx_p (x
, strict_p
))
6918 use_ldrd
= (TARGET_LDRD
6920 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6922 if (code
== POST_INC
|| code
== PRE_DEC
6923 || ((code
== PRE_INC
|| code
== POST_DEC
)
6924 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6925 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6927 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6928 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6929 && GET_CODE (XEXP (x
, 1)) == PLUS
6930 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6932 /* Thumb-2 only has autoincrement by constant. */
6933 rtx addend
= XEXP (XEXP (x
, 1), 1);
6934 HOST_WIDE_INT offset
;
6936 if (!CONST_INT_P (addend
))
6939 offset
= INTVAL(addend
);
6940 if (GET_MODE_SIZE (mode
) <= 4)
6941 return (offset
> -256 && offset
< 256);
6943 return (use_ldrd
&& offset
> -1024 && offset
< 1024
6944 && (offset
& 3) == 0);
6947 /* After reload constants split into minipools will have addresses
6948 from a LABEL_REF. */
6949 else if (reload_completed
6950 && (code
== LABEL_REF
6952 && GET_CODE (XEXP (x
, 0)) == PLUS
6953 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6954 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6957 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6960 else if (code
== PLUS
)
6962 rtx xop0
= XEXP (x
, 0);
6963 rtx xop1
= XEXP (x
, 1);
6965 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6966 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
6967 || (!strict_p
&& will_be_in_index_register (xop1
))))
6968 || (arm_address_register_rtx_p (xop1
, strict_p
)
6969 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
6972 /* Normally we can assign constant values to target registers without
6973 the help of constant pool. But there are cases we have to use constant
6975 1) assign a label to register.
6976 2) sign-extend a 8bit value to 32bit and then assign to register.
6978 Constant pool access in format:
6979 (set (reg r0) (mem (symbol_ref (".LC0"))))
6980 will cause the use of literal pool (later in function arm_reorg).
6981 So here we mark such format as an invalid format, then the compiler
6982 will adjust it into:
6983 (set (reg r0) (symbol_ref (".LC0")))
6984 (set (reg r0) (mem (reg r0))).
6985 No extra register is required, and (mem (reg r0)) won't cause the use
6986 of literal pools. */
6987 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
6988 && CONSTANT_POOL_ADDRESS_P (x
))
6991 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6992 && code
== SYMBOL_REF
6993 && CONSTANT_POOL_ADDRESS_P (x
)
6995 && symbol_mentioned_p (get_pool_constant (x
))
6996 && ! pcrel_constant_p (get_pool_constant (x
))))
7002 /* Return nonzero if INDEX is valid for an address index operand in
7005 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7008 HOST_WIDE_INT range
;
7009 enum rtx_code code
= GET_CODE (index
);
7011 /* Standard coprocessor addressing modes. */
7012 if (TARGET_HARD_FLOAT
7014 && (mode
== SFmode
|| mode
== DFmode
))
7015 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7016 && INTVAL (index
) > -1024
7017 && (INTVAL (index
) & 3) == 0);
7019 /* For quad modes, we restrict the constant offset to be slightly less
7020 than what the instruction format permits. We do this because for
7021 quad mode moves, we will actually decompose them into two separate
7022 double-mode reads or writes. INDEX must therefore be a valid
7023 (double-mode) offset and so should INDEX+8. */
7024 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7025 return (code
== CONST_INT
7026 && INTVAL (index
) < 1016
7027 && INTVAL (index
) > -1024
7028 && (INTVAL (index
) & 3) == 0);
7030 /* We have no such constraint on double mode offsets, so we permit the
7031 full range of the instruction format. */
7032 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7033 return (code
== CONST_INT
7034 && INTVAL (index
) < 1024
7035 && INTVAL (index
) > -1024
7036 && (INTVAL (index
) & 3) == 0);
7038 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7039 return (code
== CONST_INT
7040 && INTVAL (index
) < 1024
7041 && INTVAL (index
) > -1024
7042 && (INTVAL (index
) & 3) == 0);
7044 if (arm_address_register_rtx_p (index
, strict_p
)
7045 && (GET_MODE_SIZE (mode
) <= 4))
7048 if (mode
== DImode
|| mode
== DFmode
)
7050 if (code
== CONST_INT
)
7052 HOST_WIDE_INT val
= INTVAL (index
);
7055 return val
> -256 && val
< 256;
7057 return val
> -4096 && val
< 4092;
7060 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7063 if (GET_MODE_SIZE (mode
) <= 4
7067 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7071 rtx xiop0
= XEXP (index
, 0);
7072 rtx xiop1
= XEXP (index
, 1);
7074 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7075 && power_of_two_operand (xiop1
, SImode
))
7076 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7077 && power_of_two_operand (xiop0
, SImode
)));
7079 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7080 || code
== ASHIFT
|| code
== ROTATERT
)
7082 rtx op
= XEXP (index
, 1);
7084 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7087 && INTVAL (op
) <= 31);
7091 /* For ARM v4 we may be doing a sign-extend operation during the
7097 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7103 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7105 return (code
== CONST_INT
7106 && INTVAL (index
) < range
7107 && INTVAL (index
) > -range
);
7110 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7111 index operand. i.e. 1, 2, 4 or 8. */
7113 thumb2_index_mul_operand (rtx op
)
7117 if (!CONST_INT_P (op
))
7121 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7124 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7126 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
7128 enum rtx_code code
= GET_CODE (index
);
7130 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7131 /* Standard coprocessor addressing modes. */
7132 if (TARGET_HARD_FLOAT
7134 && (mode
== SFmode
|| mode
== DFmode
))
7135 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7136 /* Thumb-2 allows only > -256 index range for it's core register
7137 load/stores. Since we allow SF/DF in core registers, we have
7138 to use the intersection between -256~4096 (core) and -1024~1024
7140 && INTVAL (index
) > -256
7141 && (INTVAL (index
) & 3) == 0);
7143 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7145 /* For DImode assume values will usually live in core regs
7146 and only allow LDRD addressing modes. */
7147 if (!TARGET_LDRD
|| mode
!= DImode
)
7148 return (code
== CONST_INT
7149 && INTVAL (index
) < 1024
7150 && INTVAL (index
) > -1024
7151 && (INTVAL (index
) & 3) == 0);
7154 /* For quad modes, we restrict the constant offset to be slightly less
7155 than what the instruction format permits. We do this because for
7156 quad mode moves, we will actually decompose them into two separate
7157 double-mode reads or writes. INDEX must therefore be a valid
7158 (double-mode) offset and so should INDEX+8. */
7159 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7160 return (code
== CONST_INT
7161 && INTVAL (index
) < 1016
7162 && INTVAL (index
) > -1024
7163 && (INTVAL (index
) & 3) == 0);
7165 /* We have no such constraint on double mode offsets, so we permit the
7166 full range of the instruction format. */
7167 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7168 return (code
== CONST_INT
7169 && INTVAL (index
) < 1024
7170 && INTVAL (index
) > -1024
7171 && (INTVAL (index
) & 3) == 0);
7173 if (arm_address_register_rtx_p (index
, strict_p
)
7174 && (GET_MODE_SIZE (mode
) <= 4))
7177 if (mode
== DImode
|| mode
== DFmode
)
7179 if (code
== CONST_INT
)
7181 HOST_WIDE_INT val
= INTVAL (index
);
7182 /* ??? Can we assume ldrd for thumb2? */
7183 /* Thumb-2 ldrd only has reg+const addressing modes. */
7184 /* ldrd supports offsets of +-1020.
7185 However the ldr fallback does not. */
7186 return val
> -256 && val
< 256 && (val
& 3) == 0;
7194 rtx xiop0
= XEXP (index
, 0);
7195 rtx xiop1
= XEXP (index
, 1);
7197 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7198 && thumb2_index_mul_operand (xiop1
))
7199 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7200 && thumb2_index_mul_operand (xiop0
)));
7202 else if (code
== ASHIFT
)
7204 rtx op
= XEXP (index
, 1);
7206 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7209 && INTVAL (op
) <= 3);
7212 return (code
== CONST_INT
7213 && INTVAL (index
) < 4096
7214 && INTVAL (index
) > -256);
7217 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7219 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
7229 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
7231 return (regno
<= LAST_LO_REGNUM
7232 || regno
> LAST_VIRTUAL_REGISTER
7233 || regno
== FRAME_POINTER_REGNUM
7234 || (GET_MODE_SIZE (mode
) >= 4
7235 && (regno
== STACK_POINTER_REGNUM
7236 || regno
>= FIRST_PSEUDO_REGISTER
7237 || x
== hard_frame_pointer_rtx
7238 || x
== arg_pointer_rtx
)));
7241 /* Return nonzero if x is a legitimate index register. This is the case
7242 for any base register that can access a QImode object. */
7244 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
7246 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
7249 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7251 The AP may be eliminated to either the SP or the FP, so we use the
7252 least common denominator, e.g. SImode, and offsets from 0 to 64.
7254 ??? Verify whether the above is the right approach.
7256 ??? Also, the FP may be eliminated to the SP, so perhaps that
7257 needs special handling also.
7259 ??? Look at how the mips16 port solves this problem. It probably uses
7260 better ways to solve some of these problems.
7262 Although it is not incorrect, we don't accept QImode and HImode
7263 addresses based on the frame pointer or arg pointer until the
7264 reload pass starts. This is so that eliminating such addresses
7265 into stack based ones won't produce impossible code. */
7267 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7269 /* ??? Not clear if this is right. Experiment. */
7270 if (GET_MODE_SIZE (mode
) < 4
7271 && !(reload_in_progress
|| reload_completed
)
7272 && (reg_mentioned_p (frame_pointer_rtx
, x
)
7273 || reg_mentioned_p (arg_pointer_rtx
, x
)
7274 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
7275 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
7276 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
7277 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
7280 /* Accept any base register. SP only in SImode or larger. */
7281 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
7284 /* This is PC relative data before arm_reorg runs. */
7285 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
7286 && GET_CODE (x
) == SYMBOL_REF
7287 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
7290 /* This is PC relative data after arm_reorg runs. */
7291 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
7293 && (GET_CODE (x
) == LABEL_REF
7294 || (GET_CODE (x
) == CONST
7295 && GET_CODE (XEXP (x
, 0)) == PLUS
7296 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7297 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7300 /* Post-inc indexing only supported for SImode and larger. */
7301 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
7302 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
7305 else if (GET_CODE (x
) == PLUS
)
7307 /* REG+REG address can be any two index registers. */
7308 /* We disallow FRAME+REG addressing since we know that FRAME
7309 will be replaced with STACK, and SP relative addressing only
7310 permits SP+OFFSET. */
7311 if (GET_MODE_SIZE (mode
) <= 4
7312 && XEXP (x
, 0) != frame_pointer_rtx
7313 && XEXP (x
, 1) != frame_pointer_rtx
7314 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7315 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
7316 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
7319 /* REG+const has 5-7 bit offset for non-SP registers. */
7320 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7321 || XEXP (x
, 0) == arg_pointer_rtx
)
7322 && CONST_INT_P (XEXP (x
, 1))
7323 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7326 /* REG+const has 10-bit offset for SP, but only SImode and
7327 larger is supported. */
7328 /* ??? Should probably check for DI/DFmode overflow here
7329 just like GO_IF_LEGITIMATE_OFFSET does. */
7330 else if (REG_P (XEXP (x
, 0))
7331 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
7332 && GET_MODE_SIZE (mode
) >= 4
7333 && CONST_INT_P (XEXP (x
, 1))
7334 && INTVAL (XEXP (x
, 1)) >= 0
7335 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
7336 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7339 else if (REG_P (XEXP (x
, 0))
7340 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
7341 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
7342 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
7343 && REGNO (XEXP (x
, 0))
7344 <= LAST_VIRTUAL_POINTER_REGISTER
))
7345 && GET_MODE_SIZE (mode
) >= 4
7346 && CONST_INT_P (XEXP (x
, 1))
7347 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7351 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7352 && GET_MODE_SIZE (mode
) == 4
7353 && GET_CODE (x
) == SYMBOL_REF
7354 && CONSTANT_POOL_ADDRESS_P (x
)
7356 && symbol_mentioned_p (get_pool_constant (x
))
7357 && ! pcrel_constant_p (get_pool_constant (x
))))
7363 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7364 instruction of mode MODE. */
7366 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
7368 switch (GET_MODE_SIZE (mode
))
7371 return val
>= 0 && val
< 32;
7374 return val
>= 0 && val
< 64 && (val
& 1) == 0;
7378 && (val
+ GET_MODE_SIZE (mode
)) <= 128
7384 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
7387 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
7388 else if (TARGET_THUMB2
)
7389 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
7390 else /* if (TARGET_THUMB1) */
7391 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
7394 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7396 Given an rtx X being reloaded into a reg required to be
7397 in class CLASS, return the class of reg to actually use.
7398 In general this is just CLASS, but for the Thumb core registers and
7399 immediate constants we prefer a LO_REGS class or a subset. */
7402 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
7408 if (rclass
== GENERAL_REGS
)
7415 /* Build the SYMBOL_REF for __tls_get_addr. */
7417 static GTY(()) rtx tls_get_addr_libfunc
;
7420 get_tls_get_addr (void)
7422 if (!tls_get_addr_libfunc
)
7423 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
7424 return tls_get_addr_libfunc
;
7428 arm_load_tp (rtx target
)
7431 target
= gen_reg_rtx (SImode
);
7435 /* Can return in any reg. */
7436 emit_insn (gen_load_tp_hard (target
));
7440 /* Always returned in r0. Immediately copy the result into a pseudo,
7441 otherwise other uses of r0 (e.g. setting up function arguments) may
7442 clobber the value. */
7446 emit_insn (gen_load_tp_soft ());
7448 tmp
= gen_rtx_REG (SImode
, 0);
7449 emit_move_insn (target
, tmp
);
7455 load_tls_operand (rtx x
, rtx reg
)
7459 if (reg
== NULL_RTX
)
7460 reg
= gen_reg_rtx (SImode
);
7462 tmp
= gen_rtx_CONST (SImode
, x
);
7464 emit_move_insn (reg
, tmp
);
7470 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
7472 rtx insns
, label
, labelno
, sum
;
7474 gcc_assert (reloc
!= TLS_DESCSEQ
);
7477 labelno
= GEN_INT (pic_labelno
++);
7478 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7479 label
= gen_rtx_CONST (VOIDmode
, label
);
7481 sum
= gen_rtx_UNSPEC (Pmode
,
7482 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
7483 GEN_INT (TARGET_ARM
? 8 : 4)),
7485 reg
= load_tls_operand (sum
, reg
);
7488 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
7490 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7492 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
7493 LCT_PURE
, /* LCT_CONST? */
7494 Pmode
, 1, reg
, Pmode
);
7496 insns
= get_insns ();
7503 arm_tls_descseq_addr (rtx x
, rtx reg
)
7505 rtx labelno
= GEN_INT (pic_labelno
++);
7506 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7507 rtx sum
= gen_rtx_UNSPEC (Pmode
,
7508 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
7509 gen_rtx_CONST (VOIDmode
, label
),
7510 GEN_INT (!TARGET_ARM
)),
7512 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, 0));
7514 emit_insn (gen_tlscall (x
, labelno
));
7516 reg
= gen_reg_rtx (SImode
);
7518 gcc_assert (REGNO (reg
) != 0);
7520 emit_move_insn (reg
, reg0
);
7526 legitimize_tls_address (rtx x
, rtx reg
)
7528 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
7529 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
7533 case TLS_MODEL_GLOBAL_DYNAMIC
:
7534 if (TARGET_GNU2_TLS
)
7536 reg
= arm_tls_descseq_addr (x
, reg
);
7538 tp
= arm_load_tp (NULL_RTX
);
7540 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7544 /* Original scheme */
7545 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
7546 dest
= gen_reg_rtx (Pmode
);
7547 emit_libcall_block (insns
, dest
, ret
, x
);
7551 case TLS_MODEL_LOCAL_DYNAMIC
:
7552 if (TARGET_GNU2_TLS
)
7554 reg
= arm_tls_descseq_addr (x
, reg
);
7556 tp
= arm_load_tp (NULL_RTX
);
7558 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7562 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
7564 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7565 share the LDM result with other LD model accesses. */
7566 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
7568 dest
= gen_reg_rtx (Pmode
);
7569 emit_libcall_block (insns
, dest
, ret
, eqv
);
7571 /* Load the addend. */
7572 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
7573 GEN_INT (TLS_LDO32
)),
7575 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
7576 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
7580 case TLS_MODEL_INITIAL_EXEC
:
7581 labelno
= GEN_INT (pic_labelno
++);
7582 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7583 label
= gen_rtx_CONST (VOIDmode
, label
);
7584 sum
= gen_rtx_UNSPEC (Pmode
,
7585 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
7586 GEN_INT (TARGET_ARM
? 8 : 4)),
7588 reg
= load_tls_operand (sum
, reg
);
7591 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
7592 else if (TARGET_THUMB2
)
7593 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
7596 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7597 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
7600 tp
= arm_load_tp (NULL_RTX
);
7602 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7604 case TLS_MODEL_LOCAL_EXEC
:
7605 tp
= arm_load_tp (NULL_RTX
);
7607 reg
= gen_rtx_UNSPEC (Pmode
,
7608 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
7610 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
7612 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7619 /* Try machine-dependent ways of modifying an illegitimate address
7620 to be legitimate. If we find one, return the new, valid address. */
7622 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
7624 if (arm_tls_referenced_p (x
))
7628 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
7630 addend
= XEXP (XEXP (x
, 0), 1);
7631 x
= XEXP (XEXP (x
, 0), 0);
7634 if (GET_CODE (x
) != SYMBOL_REF
)
7637 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
7639 x
= legitimize_tls_address (x
, NULL_RTX
);
7643 x
= gen_rtx_PLUS (SImode
, x
, addend
);
7652 /* TODO: legitimize_address for Thumb2. */
7655 return thumb_legitimize_address (x
, orig_x
, mode
);
7658 if (GET_CODE (x
) == PLUS
)
7660 rtx xop0
= XEXP (x
, 0);
7661 rtx xop1
= XEXP (x
, 1);
7663 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
7664 xop0
= force_reg (SImode
, xop0
);
7666 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
7667 && !symbol_mentioned_p (xop1
))
7668 xop1
= force_reg (SImode
, xop1
);
7670 if (ARM_BASE_REGISTER_RTX_P (xop0
)
7671 && CONST_INT_P (xop1
))
7673 HOST_WIDE_INT n
, low_n
;
7677 /* VFP addressing modes actually allow greater offsets, but for
7678 now we just stick with the lowest common denominator. */
7680 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
7692 low_n
= ((mode
) == TImode
? 0
7693 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
7697 base_reg
= gen_reg_rtx (SImode
);
7698 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
7699 emit_move_insn (base_reg
, val
);
7700 x
= plus_constant (Pmode
, base_reg
, low_n
);
7702 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7703 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7706 /* XXX We don't allow MINUS any more -- see comment in
7707 arm_legitimate_address_outer_p (). */
7708 else if (GET_CODE (x
) == MINUS
)
7710 rtx xop0
= XEXP (x
, 0);
7711 rtx xop1
= XEXP (x
, 1);
7713 if (CONSTANT_P (xop0
))
7714 xop0
= force_reg (SImode
, xop0
);
7716 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
7717 xop1
= force_reg (SImode
, xop1
);
7719 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7720 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
7723 /* Make sure to take full advantage of the pre-indexed addressing mode
7724 with absolute addresses which often allows for the base register to
7725 be factorized for multiple adjacent memory references, and it might
7726 even allows for the mini pool to be avoided entirely. */
7727 else if (CONST_INT_P (x
) && optimize
> 0)
7730 HOST_WIDE_INT mask
, base
, index
;
7733 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7734 use a 8-bit index. So let's use a 12-bit index for SImode only and
7735 hope that arm_gen_constant will enable ldrb to use more bits. */
7736 bits
= (mode
== SImode
) ? 12 : 8;
7737 mask
= (1 << bits
) - 1;
7738 base
= INTVAL (x
) & ~mask
;
7739 index
= INTVAL (x
) & mask
;
7740 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
7742 /* It'll most probably be more efficient to generate the base
7743 with more bits set and use a negative index instead. */
7747 base_reg
= force_reg (SImode
, GEN_INT (base
));
7748 x
= plus_constant (Pmode
, base_reg
, index
);
7753 /* We need to find and carefully transform any SYMBOL and LABEL
7754 references; so go back to the original address expression. */
7755 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7757 if (new_x
!= orig_x
)
7765 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7766 to be legitimate. If we find one, return the new, valid address. */
7768 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
7770 if (GET_CODE (x
) == PLUS
7771 && CONST_INT_P (XEXP (x
, 1))
7772 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
7773 || INTVAL (XEXP (x
, 1)) < 0))
7775 rtx xop0
= XEXP (x
, 0);
7776 rtx xop1
= XEXP (x
, 1);
7777 HOST_WIDE_INT offset
= INTVAL (xop1
);
7779 /* Try and fold the offset into a biasing of the base register and
7780 then offsetting that. Don't do this when optimizing for space
7781 since it can cause too many CSEs. */
7782 if (optimize_size
&& offset
>= 0
7783 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
7785 HOST_WIDE_INT delta
;
7788 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
7789 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
7790 delta
= 31 * GET_MODE_SIZE (mode
);
7792 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
7794 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
7796 x
= plus_constant (Pmode
, xop0
, delta
);
7798 else if (offset
< 0 && offset
> -256)
7799 /* Small negative offsets are best done with a subtract before the
7800 dereference, forcing these into a register normally takes two
7802 x
= force_operand (x
, NULL_RTX
);
7805 /* For the remaining cases, force the constant into a register. */
7806 xop1
= force_reg (SImode
, xop1
);
7807 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7810 else if (GET_CODE (x
) == PLUS
7811 && s_register_operand (XEXP (x
, 1), SImode
)
7812 && !s_register_operand (XEXP (x
, 0), SImode
))
7814 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
7816 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
7821 /* We need to find and carefully transform any SYMBOL and LABEL
7822 references; so go back to the original address expression. */
7823 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7825 if (new_x
!= orig_x
)
7833 arm_legitimize_reload_address (rtx
*p
,
7835 int opnum
, int type
,
7836 int ind_levels ATTRIBUTE_UNUSED
)
7838 /* We must recognize output that we have already generated ourselves. */
7839 if (GET_CODE (*p
) == PLUS
7840 && GET_CODE (XEXP (*p
, 0)) == PLUS
7841 && REG_P (XEXP (XEXP (*p
, 0), 0))
7842 && CONST_INT_P (XEXP (XEXP (*p
, 0), 1))
7843 && CONST_INT_P (XEXP (*p
, 1)))
7845 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
7846 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
7847 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7851 if (GET_CODE (*p
) == PLUS
7852 && REG_P (XEXP (*p
, 0))
7853 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p
, 0)))
7854 /* If the base register is equivalent to a constant, let the generic
7855 code handle it. Otherwise we will run into problems if a future
7856 reload pass decides to rematerialize the constant. */
7857 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p
, 0)))
7858 && CONST_INT_P (XEXP (*p
, 1)))
7860 HOST_WIDE_INT val
= INTVAL (XEXP (*p
, 1));
7861 HOST_WIDE_INT low
, high
;
7863 /* Detect coprocessor load/stores. */
7864 bool coproc_p
= ((TARGET_HARD_FLOAT
7866 && (mode
== SFmode
|| mode
== DFmode
))
7867 || (TARGET_REALLY_IWMMXT
7868 && VALID_IWMMXT_REG_MODE (mode
))
7870 && (VALID_NEON_DREG_MODE (mode
)
7871 || VALID_NEON_QREG_MODE (mode
))));
7873 /* For some conditions, bail out when lower two bits are unaligned. */
7874 if ((val
& 0x3) != 0
7875 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7877 /* For DI, and DF under soft-float: */
7878 || ((mode
== DImode
|| mode
== DFmode
)
7879 /* Without ldrd, we use stm/ldm, which does not
7880 fair well with unaligned bits. */
7882 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7883 || TARGET_THUMB2
))))
7886 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7887 of which the (reg+high) gets turned into a reload add insn,
7888 we try to decompose the index into high/low values that can often
7889 also lead to better reload CSE.
7891 ldr r0, [r2, #4100] // Offset too large
7892 ldr r1, [r2, #4104] // Offset too large
7894 is best reloaded as:
7900 which post-reload CSE can simplify in most cases to eliminate the
7901 second add instruction:
7906 The idea here is that we want to split out the bits of the constant
7907 as a mask, rather than as subtracting the maximum offset that the
7908 respective type of load/store used can handle.
7910 When encountering negative offsets, we can still utilize it even if
7911 the overall offset is positive; sometimes this may lead to an immediate
7912 that can be constructed with fewer instructions.
7914 ldr r0, [r2, #0x3FFFFC]
7916 This is best reloaded as:
7917 add t1, r2, #0x400000
7920 The trick for spotting this for a load insn with N bits of offset
7921 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7922 negative offset that is going to make bit N and all the bits below
7923 it become zero in the remainder part.
7925 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7926 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7927 used in most cases of ARM load/store instructions. */
7929 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7930 (((VAL) & ((1 << (N)) - 1)) \
7931 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7936 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 10);
7938 /* NEON quad-word load/stores are made of two double-word accesses,
7939 so the valid index range is reduced by 8. Treat as 9-bit range if
7941 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
) && low
>= 1016)
7942 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 9);
7944 else if (GET_MODE_SIZE (mode
) == 8)
7947 low
= (TARGET_THUMB2
7948 ? SIGN_MAG_LOW_ADDR_BITS (val
, 10)
7949 : SIGN_MAG_LOW_ADDR_BITS (val
, 8));
7951 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7952 to access doublewords. The supported load/store offsets are
7953 -8, -4, and 4, which we try to produce here. */
7954 low
= ((val
& 0xf) ^ 0x8) - 0x8;
7956 else if (GET_MODE_SIZE (mode
) < 8)
7958 /* NEON element load/stores do not have an offset. */
7959 if (TARGET_NEON_FP16
&& mode
== HFmode
)
7964 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7965 Try the wider 12-bit range first, and re-try if the result
7967 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7969 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
7973 if (mode
== HImode
|| mode
== HFmode
)
7976 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
7979 /* The storehi/movhi_bytes fallbacks can use only
7980 [-4094,+4094] of the full ldrb/strb index range. */
7981 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7982 if (low
== 4095 || low
== -4095)
7987 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7993 high
= ((((val
- low
) & (unsigned HOST_WIDE_INT
) 0xffffffff)
7994 ^ (unsigned HOST_WIDE_INT
) 0x80000000)
7995 - (unsigned HOST_WIDE_INT
) 0x80000000);
7996 /* Check for overflow or zero */
7997 if (low
== 0 || high
== 0 || (high
+ low
!= val
))
8000 /* Reload the high part into a base reg; leave the low part
8002 Note that replacing this gen_rtx_PLUS with plus_constant is
8003 wrong in this case because we rely on the
8004 (plus (plus reg c1) c2) structure being preserved so that
8005 XEXP (*p, 0) in push_reload below uses the correct term. */
8006 *p
= gen_rtx_PLUS (GET_MODE (*p
),
8007 gen_rtx_PLUS (GET_MODE (*p
), XEXP (*p
, 0),
8010 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
8011 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
8012 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
8020 thumb_legitimize_reload_address (rtx
*x_p
,
8022 int opnum
, int type
,
8023 int ind_levels ATTRIBUTE_UNUSED
)
8027 if (GET_CODE (x
) == PLUS
8028 && GET_MODE_SIZE (mode
) < 4
8029 && REG_P (XEXP (x
, 0))
8030 && XEXP (x
, 0) == stack_pointer_rtx
8031 && CONST_INT_P (XEXP (x
, 1))
8032 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
8037 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
8038 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
8042 /* If both registers are hi-regs, then it's better to reload the
8043 entire expression rather than each register individually. That
8044 only requires one reload register rather than two. */
8045 if (GET_CODE (x
) == PLUS
8046 && REG_P (XEXP (x
, 0))
8047 && REG_P (XEXP (x
, 1))
8048 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
8049 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
8054 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
8055 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
8062 /* Return TRUE if X contains any TLS symbol references. */
8065 arm_tls_referenced_p (rtx x
)
8067 if (! TARGET_HAVE_TLS
)
8070 subrtx_iterator::array_type array
;
8071 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8073 const_rtx x
= *iter
;
8074 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8077 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8078 TLS offsets, not real symbol references. */
8079 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8080 iter
.skip_subrtxes ();
8085 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8087 On the ARM, allow any integer (invalid ones are removed later by insn
8088 patterns), nice doubles and symbol_refs which refer to the function's
8091 When generating pic allow anything. */
8094 arm_legitimate_constant_p_1 (machine_mode mode
, rtx x
)
8096 /* At present, we have no support for Neon structure constants, so forbid
8097 them here. It might be possible to handle simple cases like 0 and -1
8099 if (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
8102 return flag_pic
|| !label_mentioned_p (x
);
8106 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8108 return (CONST_INT_P (x
)
8109 || CONST_DOUBLE_P (x
)
8110 || CONSTANT_ADDRESS_P (x
)
8115 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8117 return (!arm_cannot_force_const_mem (mode
, x
)
8119 ? arm_legitimate_constant_p_1 (mode
, x
)
8120 : thumb_legitimate_constant_p (mode
, x
)));
8123 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8126 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8130 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8132 split_const (x
, &base
, &offset
);
8133 if (GET_CODE (base
) == SYMBOL_REF
8134 && !offset_within_block_p (base
, INTVAL (offset
)))
8137 return arm_tls_referenced_p (x
);
8140 #define REG_OR_SUBREG_REG(X) \
8142 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8144 #define REG_OR_SUBREG_RTX(X) \
8145 (REG_P (X) ? (X) : SUBREG_REG (X))
8148 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8150 machine_mode mode
= GET_MODE (x
);
8159 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8166 return COSTS_N_INSNS (1);
8169 if (CONST_INT_P (XEXP (x
, 1)))
8172 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8179 return COSTS_N_INSNS (2) + cycles
;
8181 return COSTS_N_INSNS (1) + 16;
8184 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8186 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8187 return (COSTS_N_INSNS (words
)
8188 + 4 * ((MEM_P (SET_SRC (x
)))
8189 + MEM_P (SET_DEST (x
))));
8194 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8196 if (thumb_shiftable_const (INTVAL (x
)))
8197 return COSTS_N_INSNS (2);
8198 return COSTS_N_INSNS (3);
8200 else if ((outer
== PLUS
|| outer
== COMPARE
)
8201 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8203 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8204 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8205 return COSTS_N_INSNS (1);
8206 else if (outer
== AND
)
8209 /* This duplicates the tests in the andsi3 expander. */
8210 for (i
= 9; i
<= 31; i
++)
8211 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8212 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8213 return COSTS_N_INSNS (2);
8215 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8216 || outer
== LSHIFTRT
)
8218 return COSTS_N_INSNS (2);
8224 return COSTS_N_INSNS (3);
8242 /* XXX another guess. */
8243 /* Memory costs quite a lot for the first word, but subsequent words
8244 load at the equivalent of a single insn each. */
8245 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8246 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8251 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8257 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8258 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8264 return total
+ COSTS_N_INSNS (1);
8266 /* Assume a two-shift sequence. Increase the cost slightly so
8267 we prefer actual shifts over an extend operation. */
8268 return total
+ 1 + COSTS_N_INSNS (2);
8276 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
8278 machine_mode mode
= GET_MODE (x
);
8279 enum rtx_code subcode
;
8281 enum rtx_code code
= GET_CODE (x
);
8287 /* Memory costs quite a lot for the first word, but subsequent words
8288 load at the equivalent of a single insn each. */
8289 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8296 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
8297 *total
= COSTS_N_INSNS (2);
8298 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
8299 *total
= COSTS_N_INSNS (4);
8301 *total
= COSTS_N_INSNS (20);
8305 if (REG_P (XEXP (x
, 1)))
8306 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
8307 else if (!CONST_INT_P (XEXP (x
, 1)))
8308 *total
= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8314 *total
+= COSTS_N_INSNS (4);
8319 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
8320 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8323 *total
+= COSTS_N_INSNS (3);
8327 *total
+= COSTS_N_INSNS (1);
8328 /* Increase the cost of complex shifts because they aren't any faster,
8329 and reduce dual issue opportunities. */
8330 if (arm_tune_cortex_a9
8331 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
8339 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8340 if (CONST_INT_P (XEXP (x
, 0))
8341 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8343 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8347 if (CONST_INT_P (XEXP (x
, 1))
8348 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8350 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8357 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8359 if (TARGET_HARD_FLOAT
8361 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8363 *total
= COSTS_N_INSNS (1);
8364 if (CONST_DOUBLE_P (XEXP (x
, 0))
8365 && arm_const_double_rtx (XEXP (x
, 0)))
8367 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8371 if (CONST_DOUBLE_P (XEXP (x
, 1))
8372 && arm_const_double_rtx (XEXP (x
, 1)))
8374 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8380 *total
= COSTS_N_INSNS (20);
8384 *total
= COSTS_N_INSNS (1);
8385 if (CONST_INT_P (XEXP (x
, 0))
8386 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8388 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8392 subcode
= GET_CODE (XEXP (x
, 1));
8393 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8394 || subcode
== LSHIFTRT
8395 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8397 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8398 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8402 /* A shift as a part of RSB costs no more than RSB itself. */
8403 if (GET_CODE (XEXP (x
, 0)) == MULT
8404 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8406 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed
);
8407 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8412 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
8414 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8415 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8419 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
8420 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
8422 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8423 if (REG_P (XEXP (XEXP (x
, 1), 0))
8424 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
8425 *total
+= COSTS_N_INSNS (1);
8433 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
8434 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8435 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8437 *total
= COSTS_N_INSNS (1);
8438 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
8440 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8444 /* MLA: All arguments must be registers. We filter out
8445 multiplication by a power of two, so that we fall down into
8447 if (GET_CODE (XEXP (x
, 0)) == MULT
8448 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8450 /* The cost comes from the cost of the multiply. */
8454 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8456 if (TARGET_HARD_FLOAT
8458 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8460 *total
= COSTS_N_INSNS (1);
8461 if (CONST_DOUBLE_P (XEXP (x
, 1))
8462 && arm_const_double_rtx (XEXP (x
, 1)))
8464 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8471 *total
= COSTS_N_INSNS (20);
8475 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
8476 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
8478 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8479 if (REG_P (XEXP (XEXP (x
, 0), 0))
8480 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
8481 *total
+= COSTS_N_INSNS (1);
8487 case AND
: case XOR
: case IOR
:
8489 /* Normally the frame registers will be spilt into reg+const during
8490 reload, so it is a bad idea to combine them with other instructions,
8491 since then they might not be moved outside of loops. As a compromise
8492 we allow integration with ops that have a constant as their second
8494 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
8495 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
8496 && !CONST_INT_P (XEXP (x
, 1)))
8497 *total
= COSTS_N_INSNS (1);
8501 *total
+= COSTS_N_INSNS (2);
8502 if (CONST_INT_P (XEXP (x
, 1))
8503 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8505 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8512 *total
+= COSTS_N_INSNS (1);
8513 if (CONST_INT_P (XEXP (x
, 1))
8514 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8516 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8519 subcode
= GET_CODE (XEXP (x
, 0));
8520 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8521 || subcode
== LSHIFTRT
8522 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8524 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8525 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8530 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8532 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8533 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8537 if (subcode
== UMIN
|| subcode
== UMAX
8538 || subcode
== SMIN
|| subcode
== SMAX
)
8540 *total
= COSTS_N_INSNS (3);
8547 /* This should have been handled by the CPU specific routines. */
8551 if (arm_arch3m
&& mode
== SImode
8552 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
8553 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8554 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
8555 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
8556 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
8557 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
8559 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, 0, speed
);
8562 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8566 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8568 if (TARGET_HARD_FLOAT
8570 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8572 *total
= COSTS_N_INSNS (1);
8575 *total
= COSTS_N_INSNS (2);
8581 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
8582 if (mode
== SImode
&& code
== NOT
)
8584 subcode
= GET_CODE (XEXP (x
, 0));
8585 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8586 || subcode
== LSHIFTRT
8587 || subcode
== ROTATE
|| subcode
== ROTATERT
8589 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
8591 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8592 /* Register shifts cost an extra cycle. */
8593 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
8594 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
8603 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8605 *total
= COSTS_N_INSNS (4);
8609 operand
= XEXP (x
, 0);
8611 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
8612 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
8613 && REG_P (XEXP (operand
, 0))
8614 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
8615 *total
+= COSTS_N_INSNS (1);
8616 *total
+= (rtx_cost (XEXP (x
, 1), code
, 1, speed
)
8617 + rtx_cost (XEXP (x
, 2), code
, 2, speed
));
8621 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8623 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8629 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8630 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8632 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8638 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8639 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8641 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8661 /* SCC insns. In the case where the comparison has already been
8662 performed, then they cost 2 instructions. Otherwise they need
8663 an additional comparison before them. */
8664 *total
= COSTS_N_INSNS (2);
8665 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8672 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8678 *total
+= COSTS_N_INSNS (1);
8679 if (CONST_INT_P (XEXP (x
, 1))
8680 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8682 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8686 subcode
= GET_CODE (XEXP (x
, 0));
8687 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8688 || subcode
== LSHIFTRT
8689 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8691 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8692 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8697 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8699 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8700 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8710 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8711 if (!CONST_INT_P (XEXP (x
, 1))
8712 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8713 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8717 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8719 if (TARGET_HARD_FLOAT
8721 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8723 *total
= COSTS_N_INSNS (1);
8726 *total
= COSTS_N_INSNS (20);
8729 *total
= COSTS_N_INSNS (1);
8731 *total
+= COSTS_N_INSNS (3);
8737 if (GET_MODE_CLASS (mode
) == MODE_INT
)
8739 rtx op
= XEXP (x
, 0);
8740 machine_mode opmode
= GET_MODE (op
);
8743 *total
+= COSTS_N_INSNS (1);
8745 if (opmode
!= SImode
)
8749 /* If !arm_arch4, we use one of the extendhisi2_mem
8750 or movhi_bytes patterns for HImode. For a QImode
8751 sign extension, we first zero-extend from memory
8752 and then perform a shift sequence. */
8753 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
8754 *total
+= COSTS_N_INSNS (2);
8757 *total
+= COSTS_N_INSNS (1);
8759 /* We don't have the necessary insn, so we need to perform some
8761 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
8762 /* An and with constant 255. */
8763 *total
+= COSTS_N_INSNS (1);
8765 /* A shift sequence. Increase costs slightly to avoid
8766 combining two shifts into an extend operation. */
8767 *total
+= COSTS_N_INSNS (2) + 1;
8773 switch (GET_MODE (XEXP (x
, 0)))
8780 *total
= COSTS_N_INSNS (1);
8790 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8794 if (const_ok_for_arm (INTVAL (x
))
8795 || const_ok_for_arm (~INTVAL (x
)))
8796 *total
= COSTS_N_INSNS (1);
8798 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
8799 INTVAL (x
), NULL_RTX
,
8806 *total
= COSTS_N_INSNS (3);
8810 *total
= COSTS_N_INSNS (1);
8814 *total
= COSTS_N_INSNS (1);
8815 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8819 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
8820 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8821 *total
= COSTS_N_INSNS (1);
8823 *total
= COSTS_N_INSNS (4);
8827 /* The vec_extract patterns accept memory operands that require an
8828 address reload. Account for the cost of that reload to give the
8829 auto-inc-dec pass an incentive to try to replace them. */
8830 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
8831 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
8833 *total
= rtx_cost (SET_DEST (x
), code
, 0, speed
);
8834 if (!neon_vector_mem_operand (SET_DEST (x
), 2, true))
8835 *total
+= COSTS_N_INSNS (1);
8838 /* Likewise for the vec_set patterns. */
8839 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
8840 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
8841 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
8843 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
8844 *total
= rtx_cost (mem
, code
, 0, speed
);
8845 if (!neon_vector_mem_operand (mem
, 2, true))
8846 *total
+= COSTS_N_INSNS (1);
8852 /* We cost this as high as our memory costs to allow this to
8853 be hoisted from loops. */
8854 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
8856 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8862 && TARGET_HARD_FLOAT
8864 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8865 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8866 *total
= COSTS_N_INSNS (1);
8868 *total
= COSTS_N_INSNS (4);
8872 *total
= COSTS_N_INSNS (4);
8877 /* Estimates the size cost of thumb1 instructions.
8878 For now most of the code is copied from thumb1_rtx_costs. We need more
8879 fine grain tuning when we have more related test cases. */
8881 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8883 machine_mode mode
= GET_MODE (x
);
8892 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8896 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8897 defined by RTL expansion, especially for the expansion of
8899 if ((GET_CODE (XEXP (x
, 0)) == MULT
8900 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8901 || (GET_CODE (XEXP (x
, 1)) == MULT
8902 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8903 return COSTS_N_INSNS (2);
8904 /* On purpose fall through for normal RTX. */
8908 return COSTS_N_INSNS (1);
8911 if (CONST_INT_P (XEXP (x
, 1)))
8913 /* Thumb1 mul instruction can't operate on const. We must Load it
8914 into a register first. */
8915 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8916 /* For the targets which have a very small and high-latency multiply
8917 unit, we prefer to synthesize the mult with up to 5 instructions,
8918 giving a good balance between size and performance. */
8919 if (arm_arch6m
&& arm_m_profile_small_mul
)
8920 return COSTS_N_INSNS (5);
8922 return COSTS_N_INSNS (1) + const_size
;
8924 return COSTS_N_INSNS (1);
8927 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8929 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8930 return COSTS_N_INSNS (words
)
8931 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x
))
8932 || satisfies_constraint_K (SET_SRC (x
))
8933 /* thumb1_movdi_insn. */
8934 || ((words
> 1) && MEM_P (SET_SRC (x
))));
8939 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8940 return COSTS_N_INSNS (1);
8941 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8942 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8943 return COSTS_N_INSNS (2);
8944 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8945 if (thumb_shiftable_const (INTVAL (x
)))
8946 return COSTS_N_INSNS (2);
8947 return COSTS_N_INSNS (3);
8949 else if ((outer
== PLUS
|| outer
== COMPARE
)
8950 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8952 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8953 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8954 return COSTS_N_INSNS (1);
8955 else if (outer
== AND
)
8958 /* This duplicates the tests in the andsi3 expander. */
8959 for (i
= 9; i
<= 31; i
++)
8960 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8961 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8962 return COSTS_N_INSNS (2);
8964 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8965 || outer
== LSHIFTRT
)
8967 return COSTS_N_INSNS (2);
8973 return COSTS_N_INSNS (3);
8987 return COSTS_N_INSNS (1);
8990 return (COSTS_N_INSNS (1)
8992 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8993 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8994 ? COSTS_N_INSNS (1) : 0));
8998 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9003 /* XXX still guessing. */
9004 switch (GET_MODE (XEXP (x
, 0)))
9007 return (1 + (mode
== DImode
? 4 : 0)
9008 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9011 return (4 + (mode
== DImode
? 4 : 0)
9012 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9015 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9026 /* RTX costs when optimizing for size. */
9028 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9031 machine_mode mode
= GET_MODE (x
);
9034 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9038 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9042 /* A memory access costs 1 insn if the mode is small, or the address is
9043 a single register, otherwise it costs one insn per word. */
9044 if (REG_P (XEXP (x
, 0)))
9045 *total
= COSTS_N_INSNS (1);
9047 && GET_CODE (XEXP (x
, 0)) == PLUS
9048 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9049 /* This will be split into two instructions.
9050 See arm.md:calculate_pic_address. */
9051 *total
= COSTS_N_INSNS (2);
9053 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9060 /* Needs a libcall, so it costs about this. */
9061 *total
= COSTS_N_INSNS (2);
9065 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9067 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9075 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9077 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9080 else if (mode
== SImode
)
9082 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, false);
9083 /* Slightly disparage register shifts, but not by much. */
9084 if (!CONST_INT_P (XEXP (x
, 1)))
9085 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, 1, false);
9089 /* Needs a libcall. */
9090 *total
= COSTS_N_INSNS (2);
9094 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9095 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9097 *total
= COSTS_N_INSNS (1);
9103 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
9104 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
9106 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
9107 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
9108 || subcode1
== ROTATE
|| subcode1
== ROTATERT
9109 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
9110 || subcode1
== ASHIFTRT
)
9112 /* It's just the cost of the two operands. */
9117 *total
= COSTS_N_INSNS (1);
9121 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9125 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9126 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9128 *total
= COSTS_N_INSNS (1);
9132 /* A shift as a part of ADD costs nothing. */
9133 if (GET_CODE (XEXP (x
, 0)) == MULT
9134 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
9136 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
9137 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, false);
9138 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, false);
9143 case AND
: case XOR
: case IOR
:
9146 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9148 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
9149 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
9150 || (code
== AND
&& subcode
== NOT
))
9152 /* It's just the cost of the two operands. */
9158 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9162 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9166 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9167 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9169 *total
= COSTS_N_INSNS (1);
9175 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9184 if (cc_register (XEXP (x
, 0), VOIDmode
))
9187 *total
= COSTS_N_INSNS (1);
9191 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9192 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9193 *total
= COSTS_N_INSNS (1);
9195 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
9200 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
9203 if (const_ok_for_arm (INTVAL (x
)))
9204 /* A multiplication by a constant requires another instruction
9205 to load the constant to a register. */
9206 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
9208 else if (const_ok_for_arm (~INTVAL (x
)))
9209 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
9210 else if (const_ok_for_arm (-INTVAL (x
)))
9212 if (outer_code
== COMPARE
|| outer_code
== PLUS
9213 || outer_code
== MINUS
)
9216 *total
= COSTS_N_INSNS (1);
9219 *total
= COSTS_N_INSNS (2);
9225 *total
= COSTS_N_INSNS (2);
9229 *total
= COSTS_N_INSNS (4);
9234 && TARGET_HARD_FLOAT
9235 && outer_code
== SET
9236 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
9237 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
9238 *total
= COSTS_N_INSNS (1);
9240 *total
= COSTS_N_INSNS (4);
9245 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9246 cost of these slightly. */
9247 *total
= COSTS_N_INSNS (1) + 1;
9254 if (mode
!= VOIDmode
)
9255 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9257 *total
= COSTS_N_INSNS (4); /* How knows? */
9262 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9263 operand, then return the operand that is being shifted. If the shift
9264 is not by a constant, then set SHIFT_REG to point to the operand.
9265 Return NULL if OP is not a shifter operand. */
9267 shifter_op_p (rtx op
, rtx
*shift_reg
)
9269 enum rtx_code code
= GET_CODE (op
);
9271 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9272 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9273 return XEXP (op
, 0);
9274 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9275 return XEXP (op
, 0);
9276 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9277 || code
== ASHIFTRT
)
9279 if (!CONST_INT_P (XEXP (op
, 1)))
9280 *shift_reg
= XEXP (op
, 1);
9281 return XEXP (op
, 0);
9288 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9290 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9291 gcc_assert (GET_CODE (x
) == UNSPEC
);
9293 switch (XINT (x
, 1))
9295 case UNSPEC_UNALIGNED_LOAD
:
9296 /* We can only do unaligned loads into the integer unit, and we can't
9298 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9300 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9301 + extra_cost
->ldst
.load_unaligned
);
9304 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9305 ADDR_SPACE_GENERIC
, speed_p
);
9309 case UNSPEC_UNALIGNED_STORE
:
9310 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9312 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9313 + extra_cost
->ldst
.store_unaligned
);
9315 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), UNSPEC
, 0, speed_p
);
9317 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9318 ADDR_SPACE_GENERIC
, speed_p
);
9328 *cost
= COSTS_N_INSNS (1);
9330 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9334 *cost
= COSTS_N_INSNS (2);
9340 /* Cost of a libcall. We assume one insn per argument, an amount for the
9341 call (one insn for -Os) and then one for processing the result. */
9342 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9344 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9347 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9348 if (shift_op != NULL \
9349 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9354 *cost += extra_cost->alu.arith_shift_reg; \
9355 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9358 *cost += extra_cost->alu.arith_shift; \
9360 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9361 + rtx_cost (XEXP (x, 1 - IDX), \
9368 /* RTX costs. Make an estimate of the cost of executing the operation
9369 X, which is contained with an operation with code OUTER_CODE.
9370 SPEED_P indicates whether the cost desired is the performance cost,
9371 or the size cost. The estimate is stored in COST and the return
9372 value is TRUE if the cost calculation is final, or FALSE if the
9373 caller should recurse through the operands of X to add additional
9376 We currently make no attempt to model the size savings of Thumb-2
9377 16-bit instructions. At the normal points in compilation where
9378 this code is called we have no measure of whether the condition
9379 flags are live or not, and thus no realistic way to determine what
9380 the size will eventually be. */
9382 arm_new_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9383 const struct cpu_cost_table
*extra_cost
,
9384 int *cost
, bool speed_p
)
9386 machine_mode mode
= GET_MODE (x
);
9391 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9393 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9401 /* SET RTXs don't have a mode so we get it from the destination. */
9402 mode
= GET_MODE (SET_DEST (x
));
9404 if (REG_P (SET_SRC (x
))
9405 && REG_P (SET_DEST (x
)))
9407 /* Assume that most copies can be done with a single insn,
9408 unless we don't have HW FP, in which case everything
9409 larger than word mode will require two insns. */
9410 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9411 && GET_MODE_SIZE (mode
) > 4)
9414 /* Conditional register moves can be encoded
9415 in 16 bits in Thumb mode. */
9416 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9422 if (CONST_INT_P (SET_SRC (x
)))
9424 /* Handle CONST_INT here, since the value doesn't have a mode
9425 and we would otherwise be unable to work out the true cost. */
9426 *cost
= rtx_cost (SET_DEST (x
), SET
, 0, speed_p
);
9428 /* Slightly lower the cost of setting a core reg to a constant.
9429 This helps break up chains and allows for better scheduling. */
9430 if (REG_P (SET_DEST (x
))
9431 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9434 /* Immediate moves with an immediate in the range [0, 255] can be
9435 encoded in 16 bits in Thumb mode. */
9436 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9437 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9439 goto const_int_cost
;
9445 /* A memory access costs 1 insn if the mode is small, or the address is
9446 a single register, otherwise it costs one insn per word. */
9447 if (REG_P (XEXP (x
, 0)))
9448 *cost
= COSTS_N_INSNS (1);
9450 && GET_CODE (XEXP (x
, 0)) == PLUS
9451 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9452 /* This will be split into two instructions.
9453 See arm.md:calculate_pic_address. */
9454 *cost
= COSTS_N_INSNS (2);
9456 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9458 /* For speed optimizations, add the costs of the address and
9459 accessing memory. */
9462 *cost
+= (extra_cost
->ldst
.load
9463 + arm_address_cost (XEXP (x
, 0), mode
,
9464 ADDR_SPACE_GENERIC
, speed_p
));
9466 *cost
+= extra_cost
->ldst
.load
;
9472 /* Calculations of LDM costs are complex. We assume an initial cost
9473 (ldm_1st) which will load the number of registers mentioned in
9474 ldm_regs_per_insn_1st registers; then each additional
9475 ldm_regs_per_insn_subsequent registers cost one more insn. The
9476 formula for N regs is thus:
9478 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9479 + ldm_regs_per_insn_subsequent - 1)
9480 / ldm_regs_per_insn_subsequent).
9482 Additional costs may also be added for addressing. A similar
9483 formula is used for STM. */
9485 bool is_ldm
= load_multiple_operation (x
, SImode
);
9486 bool is_stm
= store_multiple_operation (x
, SImode
);
9488 *cost
= COSTS_N_INSNS (1);
9490 if (is_ldm
|| is_stm
)
9494 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9495 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9496 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9497 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9498 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9499 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9500 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9502 *cost
+= regs_per_insn_1st
9503 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9504 + regs_per_insn_sub
- 1)
9505 / regs_per_insn_sub
);
9514 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9515 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9516 *cost
= COSTS_N_INSNS (speed_p
9517 ? extra_cost
->fp
[mode
!= SFmode
].div
: 1);
9518 else if (mode
== SImode
&& TARGET_IDIV
)
9519 *cost
= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 1);
9521 *cost
= LIBCALL_COST (2);
9522 return false; /* All arguments must be in registers. */
9526 *cost
= LIBCALL_COST (2);
9527 return false; /* All arguments must be in registers. */
9530 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9532 *cost
= (COSTS_N_INSNS (2)
9533 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9535 *cost
+= extra_cost
->alu
.shift_reg
;
9543 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9545 *cost
= (COSTS_N_INSNS (3)
9546 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9548 *cost
+= 2 * extra_cost
->alu
.shift
;
9551 else if (mode
== SImode
)
9553 *cost
= (COSTS_N_INSNS (1)
9554 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9555 /* Slightly disparage register shifts at -Os, but not by much. */
9556 if (!CONST_INT_P (XEXP (x
, 1)))
9557 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9558 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9561 else if (GET_MODE_CLASS (mode
) == MODE_INT
9562 && GET_MODE_SIZE (mode
) < 4)
9566 *cost
= (COSTS_N_INSNS (1)
9567 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9568 /* Slightly disparage register shifts at -Os, but not by
9570 if (!CONST_INT_P (XEXP (x
, 1)))
9571 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9572 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9574 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9576 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9578 /* Can use SBFX/UBFX. */
9579 *cost
= COSTS_N_INSNS (1);
9581 *cost
+= extra_cost
->alu
.bfx
;
9582 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9586 *cost
= COSTS_N_INSNS (2);
9587 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9590 if (CONST_INT_P (XEXP (x
, 1)))
9591 *cost
+= 2 * extra_cost
->alu
.shift
;
9593 *cost
+= (extra_cost
->alu
.shift
9594 + extra_cost
->alu
.shift_reg
);
9597 /* Slightly disparage register shifts. */
9598 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9603 *cost
= COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x
, 1)));
9604 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9607 if (CONST_INT_P (XEXP (x
, 1)))
9608 *cost
+= (2 * extra_cost
->alu
.shift
9609 + extra_cost
->alu
.log_shift
);
9611 *cost
+= (extra_cost
->alu
.shift
9612 + extra_cost
->alu
.shift_reg
9613 + extra_cost
->alu
.log_shift_reg
);
9619 *cost
= LIBCALL_COST (2);
9627 *cost
= COSTS_N_INSNS (1);
9629 *cost
+= extra_cost
->alu
.rev
;
9636 /* No rev instruction available. Look at arm_legacy_rev
9637 and thumb_legacy_rev for the form of RTL used then. */
9640 *cost
= COSTS_N_INSNS (10);
9644 *cost
+= 6 * extra_cost
->alu
.shift
;
9645 *cost
+= 3 * extra_cost
->alu
.logical
;
9650 *cost
= COSTS_N_INSNS (5);
9654 *cost
+= 2 * extra_cost
->alu
.shift
;
9655 *cost
+= extra_cost
->alu
.arith_shift
;
9656 *cost
+= 2 * extra_cost
->alu
.logical
;
9664 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9665 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9667 *cost
= COSTS_N_INSNS (1);
9668 if (GET_CODE (XEXP (x
, 0)) == MULT
9669 || GET_CODE (XEXP (x
, 1)) == MULT
)
9671 rtx mul_op0
, mul_op1
, sub_op
;
9674 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9676 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9678 mul_op0
= XEXP (XEXP (x
, 0), 0);
9679 mul_op1
= XEXP (XEXP (x
, 0), 1);
9680 sub_op
= XEXP (x
, 1);
9684 mul_op0
= XEXP (XEXP (x
, 1), 0);
9685 mul_op1
= XEXP (XEXP (x
, 1), 1);
9686 sub_op
= XEXP (x
, 0);
9689 /* The first operand of the multiply may be optionally
9691 if (GET_CODE (mul_op0
) == NEG
)
9692 mul_op0
= XEXP (mul_op0
, 0);
9694 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9695 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9696 + rtx_cost (sub_op
, code
, 0, speed_p
));
9702 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9708 rtx shift_by_reg
= NULL
;
9712 *cost
= COSTS_N_INSNS (1);
9714 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9715 if (shift_op
== NULL
)
9717 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9718 non_shift_op
= XEXP (x
, 0);
9721 non_shift_op
= XEXP (x
, 1);
9723 if (shift_op
!= NULL
)
9725 if (shift_by_reg
!= NULL
)
9728 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9729 *cost
+= rtx_cost (shift_by_reg
, code
, 0, speed_p
);
9732 *cost
+= extra_cost
->alu
.arith_shift
;
9734 *cost
+= (rtx_cost (shift_op
, code
, 0, speed_p
)
9735 + rtx_cost (non_shift_op
, code
, 0, speed_p
));
9740 && GET_CODE (XEXP (x
, 1)) == MULT
)
9744 *cost
+= extra_cost
->mult
[0].add
;
9745 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9746 + rtx_cost (XEXP (XEXP (x
, 1), 0), MULT
, 0, speed_p
)
9747 + rtx_cost (XEXP (XEXP (x
, 1), 1), MULT
, 1, speed_p
));
9751 if (CONST_INT_P (XEXP (x
, 0)))
9753 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9754 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9756 *cost
= COSTS_N_INSNS (insns
);
9758 *cost
+= insns
* extra_cost
->alu
.arith
;
9759 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9763 *cost
+= extra_cost
->alu
.arith
;
9768 if (GET_MODE_CLASS (mode
) == MODE_INT
9769 && GET_MODE_SIZE (mode
) < 4)
9771 rtx shift_op
, shift_reg
;
9774 /* We check both sides of the MINUS for shifter operands since,
9775 unlike PLUS, it's not commutative. */
9777 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9778 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9780 /* Slightly disparage, as we might need to widen the result. */
9781 *cost
= 1 + COSTS_N_INSNS (1);
9783 *cost
+= extra_cost
->alu
.arith
;
9785 if (CONST_INT_P (XEXP (x
, 0)))
9787 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9796 *cost
= COSTS_N_INSNS (2);
9798 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9800 rtx op1
= XEXP (x
, 1);
9803 *cost
+= 2 * extra_cost
->alu
.arith
;
9805 if (GET_CODE (op1
) == ZERO_EXTEND
)
9806 *cost
+= rtx_cost (XEXP (op1
, 0), ZERO_EXTEND
, 0, speed_p
);
9808 *cost
+= rtx_cost (op1
, MINUS
, 1, speed_p
);
9809 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
,
9813 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9816 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9817 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), SIGN_EXTEND
,
9819 + rtx_cost (XEXP (x
, 1), MINUS
, 1, speed_p
));
9822 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9823 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9826 *cost
+= (extra_cost
->alu
.arith
9827 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9828 ? extra_cost
->alu
.arith
9829 : extra_cost
->alu
.arith_shift
));
9830 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9831 + rtx_cost (XEXP (XEXP (x
, 1), 0),
9832 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9837 *cost
+= 2 * extra_cost
->alu
.arith
;
9843 *cost
= LIBCALL_COST (2);
9847 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9848 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9850 *cost
= COSTS_N_INSNS (1);
9851 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9853 rtx mul_op0
, mul_op1
, add_op
;
9856 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9858 mul_op0
= XEXP (XEXP (x
, 0), 0);
9859 mul_op1
= XEXP (XEXP (x
, 0), 1);
9860 add_op
= XEXP (x
, 1);
9862 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9863 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9864 + rtx_cost (add_op
, code
, 0, speed_p
));
9870 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9873 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9875 *cost
= LIBCALL_COST (2);
9879 /* Narrow modes can be synthesized in SImode, but the range
9880 of useful sub-operations is limited. Check for shift operations
9881 on one of the operands. Only left shifts can be used in the
9883 if (GET_MODE_CLASS (mode
) == MODE_INT
9884 && GET_MODE_SIZE (mode
) < 4)
9886 rtx shift_op
, shift_reg
;
9889 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9891 if (CONST_INT_P (XEXP (x
, 1)))
9893 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9894 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9896 *cost
= COSTS_N_INSNS (insns
);
9898 *cost
+= insns
* extra_cost
->alu
.arith
;
9899 /* Slightly penalize a narrow operation as the result may
9901 *cost
+= 1 + rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9905 /* Slightly penalize a narrow operation as the result may
9907 *cost
= 1 + COSTS_N_INSNS (1);
9909 *cost
+= extra_cost
->alu
.arith
;
9916 rtx shift_op
, shift_reg
;
9918 *cost
= COSTS_N_INSNS (1);
9920 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9921 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9923 /* UXTA[BH] or SXTA[BH]. */
9925 *cost
+= extra_cost
->alu
.extend_arith
;
9926 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9928 + rtx_cost (XEXP (x
, 1), PLUS
, 0, speed_p
));
9933 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9934 if (shift_op
!= NULL
)
9939 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9940 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9943 *cost
+= extra_cost
->alu
.arith_shift
;
9945 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9946 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9949 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9951 rtx mul_op
= XEXP (x
, 0);
9953 *cost
= COSTS_N_INSNS (1);
9955 if (TARGET_DSP_MULTIPLY
9956 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9957 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9958 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9959 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9960 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9961 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9962 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9963 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9964 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9965 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9966 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9967 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9972 *cost
+= extra_cost
->mult
[0].extend_add
;
9973 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0),
9974 SIGN_EXTEND
, 0, speed_p
)
9975 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0),
9976 SIGN_EXTEND
, 0, speed_p
)
9977 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9982 *cost
+= extra_cost
->mult
[0].add
;
9983 *cost
+= (rtx_cost (XEXP (mul_op
, 0), MULT
, 0, speed_p
)
9984 + rtx_cost (XEXP (mul_op
, 1), MULT
, 1, speed_p
)
9985 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9988 if (CONST_INT_P (XEXP (x
, 1)))
9990 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9991 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9993 *cost
= COSTS_N_INSNS (insns
);
9995 *cost
+= insns
* extra_cost
->alu
.arith
;
9996 *cost
+= rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
10000 *cost
+= extra_cost
->alu
.arith
;
10005 if (mode
== DImode
)
10008 && GET_CODE (XEXP (x
, 0)) == MULT
10009 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
10010 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
10011 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
10012 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
10014 *cost
= COSTS_N_INSNS (1);
10016 *cost
+= extra_cost
->mult
[1].extend_add
;
10017 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
10018 ZERO_EXTEND
, 0, speed_p
)
10019 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0),
10020 ZERO_EXTEND
, 0, speed_p
)
10021 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
10025 *cost
= COSTS_N_INSNS (2);
10027 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10028 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10031 *cost
+= (extra_cost
->alu
.arith
10032 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10033 ? extra_cost
->alu
.arith
10034 : extra_cost
->alu
.arith_shift
));
10036 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
10038 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
10043 *cost
+= 2 * extra_cost
->alu
.arith
;
10048 *cost
= LIBCALL_COST (2);
10051 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
10053 *cost
= COSTS_N_INSNS (1);
10055 *cost
+= extra_cost
->alu
.rev
;
10059 /* Fall through. */
10060 case AND
: case XOR
:
10061 if (mode
== SImode
)
10063 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
10064 rtx op0
= XEXP (x
, 0);
10065 rtx shift_op
, shift_reg
;
10067 *cost
= COSTS_N_INSNS (1);
10071 || (code
== IOR
&& TARGET_THUMB2
)))
10072 op0
= XEXP (op0
, 0);
10075 shift_op
= shifter_op_p (op0
, &shift_reg
);
10076 if (shift_op
!= NULL
)
10081 *cost
+= extra_cost
->alu
.log_shift_reg
;
10082 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10085 *cost
+= extra_cost
->alu
.log_shift
;
10087 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10088 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
10092 if (CONST_INT_P (XEXP (x
, 1)))
10094 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
10095 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10098 *cost
= COSTS_N_INSNS (insns
);
10100 *cost
+= insns
* extra_cost
->alu
.logical
;
10101 *cost
+= rtx_cost (op0
, code
, 0, speed_p
);
10106 *cost
+= extra_cost
->alu
.logical
;
10107 *cost
+= (rtx_cost (op0
, code
, 0, speed_p
)
10108 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
10112 if (mode
== DImode
)
10114 rtx op0
= XEXP (x
, 0);
10115 enum rtx_code subcode
= GET_CODE (op0
);
10117 *cost
= COSTS_N_INSNS (2);
10121 || (code
== IOR
&& TARGET_THUMB2
)))
10122 op0
= XEXP (op0
, 0);
10124 if (GET_CODE (op0
) == ZERO_EXTEND
)
10127 *cost
+= 2 * extra_cost
->alu
.logical
;
10129 *cost
+= (rtx_cost (XEXP (op0
, 0), ZERO_EXTEND
, 0, speed_p
)
10130 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
10133 else if (GET_CODE (op0
) == SIGN_EXTEND
)
10136 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
10138 *cost
+= (rtx_cost (XEXP (op0
, 0), SIGN_EXTEND
, 0, speed_p
)
10139 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
10144 *cost
+= 2 * extra_cost
->alu
.logical
;
10150 *cost
= LIBCALL_COST (2);
10154 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10155 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10157 rtx op0
= XEXP (x
, 0);
10159 *cost
= COSTS_N_INSNS (1);
10161 if (GET_CODE (op0
) == NEG
)
10162 op0
= XEXP (op0
, 0);
10165 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10167 *cost
+= (rtx_cost (op0
, MULT
, 0, speed_p
)
10168 + rtx_cost (XEXP (x
, 1), MULT
, 1, speed_p
));
10171 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10173 *cost
= LIBCALL_COST (2);
10177 if (mode
== SImode
)
10179 *cost
= COSTS_N_INSNS (1);
10180 if (TARGET_DSP_MULTIPLY
10181 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10182 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10183 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10184 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10185 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10186 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10187 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10188 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10189 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10190 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10191 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10192 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10195 /* SMUL[TB][TB]. */
10197 *cost
+= extra_cost
->mult
[0].extend
;
10198 *cost
+= (rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed_p
)
10199 + rtx_cost (XEXP (x
, 1), SIGN_EXTEND
, 0, speed_p
));
10203 *cost
+= extra_cost
->mult
[0].simple
;
10207 if (mode
== DImode
)
10210 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10211 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10212 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10213 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10215 *cost
= COSTS_N_INSNS (1);
10217 *cost
+= extra_cost
->mult
[1].extend
;
10218 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0),
10219 ZERO_EXTEND
, 0, speed_p
)
10220 + rtx_cost (XEXP (XEXP (x
, 1), 0),
10221 ZERO_EXTEND
, 0, speed_p
));
10225 *cost
= LIBCALL_COST (2);
10230 *cost
= LIBCALL_COST (2);
10234 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10235 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10237 *cost
= COSTS_N_INSNS (1);
10239 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10243 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10245 *cost
= LIBCALL_COST (1);
10249 if (mode
== SImode
)
10251 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10253 *cost
= COSTS_N_INSNS (2);
10254 /* Assume the non-flag-changing variant. */
10256 *cost
+= (extra_cost
->alu
.log_shift
10257 + extra_cost
->alu
.arith_shift
);
10258 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ABS
, 0, speed_p
);
10262 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10263 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10265 *cost
= COSTS_N_INSNS (2);
10266 /* No extra cost for MOV imm and MVN imm. */
10267 /* If the comparison op is using the flags, there's no further
10268 cost, otherwise we need to add the cost of the comparison. */
10269 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10270 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10271 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10273 *cost
+= (COSTS_N_INSNS (1)
10274 + rtx_cost (XEXP (XEXP (x
, 0), 0), COMPARE
, 0,
10276 + rtx_cost (XEXP (XEXP (x
, 0), 1), COMPARE
, 1,
10279 *cost
+= extra_cost
->alu
.arith
;
10283 *cost
= COSTS_N_INSNS (1);
10285 *cost
+= extra_cost
->alu
.arith
;
10289 if (GET_MODE_CLASS (mode
) == MODE_INT
10290 && GET_MODE_SIZE (mode
) < 4)
10292 /* Slightly disparage, as we might need an extend operation. */
10293 *cost
= 1 + COSTS_N_INSNS (1);
10295 *cost
+= extra_cost
->alu
.arith
;
10299 if (mode
== DImode
)
10301 *cost
= COSTS_N_INSNS (2);
10303 *cost
+= 2 * extra_cost
->alu
.arith
;
10308 *cost
= LIBCALL_COST (1);
10312 if (mode
== SImode
)
10315 rtx shift_reg
= NULL
;
10317 *cost
= COSTS_N_INSNS (1);
10318 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10322 if (shift_reg
!= NULL
)
10325 *cost
+= extra_cost
->alu
.log_shift_reg
;
10326 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10329 *cost
+= extra_cost
->alu
.log_shift
;
10330 *cost
+= rtx_cost (shift_op
, ASHIFT
, 0, speed_p
);
10335 *cost
+= extra_cost
->alu
.logical
;
10338 if (mode
== DImode
)
10340 *cost
= COSTS_N_INSNS (2);
10346 *cost
+= LIBCALL_COST (1);
10351 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10353 *cost
= COSTS_N_INSNS (4);
10356 int op1cost
= rtx_cost (XEXP (x
, 1), SET
, 1, speed_p
);
10357 int op2cost
= rtx_cost (XEXP (x
, 2), SET
, 1, speed_p
);
10359 *cost
= rtx_cost (XEXP (x
, 0), IF_THEN_ELSE
, 0, speed_p
);
10360 /* Assume that if one arm of the if_then_else is a register,
10361 that it will be tied with the result and eliminate the
10362 conditional insn. */
10363 if (REG_P (XEXP (x
, 1)))
10365 else if (REG_P (XEXP (x
, 2)))
10371 if (extra_cost
->alu
.non_exec_costs_exec
)
10372 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10374 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10377 *cost
+= op1cost
+ op2cost
;
10383 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10387 machine_mode op0mode
;
10388 /* We'll mostly assume that the cost of a compare is the cost of the
10389 LHS. However, there are some notable exceptions. */
10391 /* Floating point compares are never done as side-effects. */
10392 op0mode
= GET_MODE (XEXP (x
, 0));
10393 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10394 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10396 *cost
= COSTS_N_INSNS (1);
10398 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10400 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10402 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10408 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10410 *cost
= LIBCALL_COST (2);
10414 /* DImode compares normally take two insns. */
10415 if (op0mode
== DImode
)
10417 *cost
= COSTS_N_INSNS (2);
10419 *cost
+= 2 * extra_cost
->alu
.arith
;
10423 if (op0mode
== SImode
)
10428 if (XEXP (x
, 1) == const0_rtx
10429 && !(REG_P (XEXP (x
, 0))
10430 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10431 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10433 *cost
= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10435 /* Multiply operations that set the flags are often
10436 significantly more expensive. */
10438 && GET_CODE (XEXP (x
, 0)) == MULT
10439 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10440 *cost
+= extra_cost
->mult
[0].flag_setting
;
10443 && GET_CODE (XEXP (x
, 0)) == PLUS
10444 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10445 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10447 *cost
+= extra_cost
->mult
[0].flag_setting
;
10452 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10453 if (shift_op
!= NULL
)
10455 *cost
= COSTS_N_INSNS (1);
10456 if (shift_reg
!= NULL
)
10458 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10460 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10463 *cost
+= extra_cost
->alu
.arith_shift
;
10464 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10465 + rtx_cost (XEXP (x
, 1), COMPARE
, 1, speed_p
));
10469 *cost
= COSTS_N_INSNS (1);
10471 *cost
+= extra_cost
->alu
.arith
;
10472 if (CONST_INT_P (XEXP (x
, 1))
10473 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10475 *cost
+= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10483 *cost
= LIBCALL_COST (2);
10506 if (outer_code
== SET
)
10508 /* Is it a store-flag operation? */
10509 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10510 && XEXP (x
, 1) == const0_rtx
)
10512 /* Thumb also needs an IT insn. */
10513 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10516 if (XEXP (x
, 1) == const0_rtx
)
10521 /* LSR Rd, Rn, #31. */
10522 *cost
= COSTS_N_INSNS (1);
10524 *cost
+= extra_cost
->alu
.shift
;
10534 *cost
= COSTS_N_INSNS (2);
10538 /* RSBS T1, Rn, Rn, LSR #31
10540 *cost
= COSTS_N_INSNS (2);
10542 *cost
+= extra_cost
->alu
.arith_shift
;
10546 /* RSB Rd, Rn, Rn, ASR #1
10547 LSR Rd, Rd, #31. */
10548 *cost
= COSTS_N_INSNS (2);
10550 *cost
+= (extra_cost
->alu
.arith_shift
10551 + extra_cost
->alu
.shift
);
10557 *cost
= COSTS_N_INSNS (2);
10559 *cost
+= extra_cost
->alu
.shift
;
10563 /* Remaining cases are either meaningless or would take
10564 three insns anyway. */
10565 *cost
= COSTS_N_INSNS (3);
10568 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10573 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 4 : 3);
10574 if (CONST_INT_P (XEXP (x
, 1))
10575 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10577 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10584 /* Not directly inside a set. If it involves the condition code
10585 register it must be the condition for a branch, cond_exec or
10586 I_T_E operation. Since the comparison is performed elsewhere
10587 this is just the control part which has no additional
10589 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10590 && XEXP (x
, 1) == const0_rtx
)
10598 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10599 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10601 *cost
= COSTS_N_INSNS (1);
10603 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10607 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10609 *cost
= LIBCALL_COST (1);
10613 if (mode
== SImode
)
10615 *cost
= COSTS_N_INSNS (1);
10617 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10621 *cost
= LIBCALL_COST (1);
10625 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10626 && MEM_P (XEXP (x
, 0)))
10628 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10630 if (mode
== DImode
)
10631 *cost
+= COSTS_N_INSNS (1);
10636 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10637 *cost
+= extra_cost
->ldst
.load
;
10639 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10641 if (mode
== DImode
)
10642 *cost
+= extra_cost
->alu
.shift
;
10647 /* Widening from less than 32-bits requires an extend operation. */
10648 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10650 /* We have SXTB/SXTH. */
10651 *cost
= COSTS_N_INSNS (1);
10652 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10654 *cost
+= extra_cost
->alu
.extend
;
10656 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10658 /* Needs two shifts. */
10659 *cost
= COSTS_N_INSNS (2);
10660 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10662 *cost
+= 2 * extra_cost
->alu
.shift
;
10665 /* Widening beyond 32-bits requires one more insn. */
10666 if (mode
== DImode
)
10668 *cost
+= COSTS_N_INSNS (1);
10670 *cost
+= extra_cost
->alu
.shift
;
10677 || GET_MODE (XEXP (x
, 0)) == SImode
10678 || GET_MODE (XEXP (x
, 0)) == QImode
)
10679 && MEM_P (XEXP (x
, 0)))
10681 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10683 if (mode
== DImode
)
10684 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10689 /* Widening from less than 32-bits requires an extend operation. */
10690 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10692 /* UXTB can be a shorter instruction in Thumb2, but it might
10693 be slower than the AND Rd, Rn, #255 alternative. When
10694 optimizing for speed it should never be slower to use
10695 AND, and we don't really model 16-bit vs 32-bit insns
10697 *cost
= COSTS_N_INSNS (1);
10699 *cost
+= extra_cost
->alu
.logical
;
10701 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10703 /* We have UXTB/UXTH. */
10704 *cost
= COSTS_N_INSNS (1);
10705 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10707 *cost
+= extra_cost
->alu
.extend
;
10709 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10711 /* Needs two shifts. It's marginally preferable to use
10712 shifts rather than two BIC instructions as the second
10713 shift may merge with a subsequent insn as a shifter
10715 *cost
= COSTS_N_INSNS (2);
10716 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10718 *cost
+= 2 * extra_cost
->alu
.shift
;
10720 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10721 *cost
= COSTS_N_INSNS (1);
10723 /* Widening beyond 32-bits requires one more insn. */
10724 if (mode
== DImode
)
10726 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10733 /* CONST_INT has no mode, so we cannot tell for sure how many
10734 insns are really going to be needed. The best we can do is
10735 look at the value passed. If it fits in SImode, then assume
10736 that's the mode it will be used for. Otherwise assume it
10737 will be used in DImode. */
10738 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10743 /* Avoid blowing up in arm_gen_constant (). */
10744 if (!(outer_code
== PLUS
10745 || outer_code
== AND
10746 || outer_code
== IOR
10747 || outer_code
== XOR
10748 || outer_code
== MINUS
))
10752 if (mode
== SImode
)
10754 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10755 INTVAL (x
), NULL
, NULL
,
10761 *cost
+= COSTS_N_INSNS (arm_gen_constant
10762 (outer_code
, SImode
, NULL
,
10763 trunc_int_for_mode (INTVAL (x
), SImode
),
10765 + arm_gen_constant (outer_code
, SImode
, NULL
,
10766 INTVAL (x
) >> 32, NULL
,
10778 if (arm_arch_thumb2
&& !flag_pic
)
10779 *cost
= COSTS_N_INSNS (2);
10781 *cost
= COSTS_N_INSNS (1) + extra_cost
->ldst
.load
;
10784 *cost
= COSTS_N_INSNS (2);
10788 *cost
+= COSTS_N_INSNS (1);
10790 *cost
+= extra_cost
->alu
.arith
;
10796 *cost
= COSTS_N_INSNS (4);
10801 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10802 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10804 if (vfp3_const_double_rtx (x
))
10806 *cost
= COSTS_N_INSNS (1);
10808 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10814 *cost
= COSTS_N_INSNS (1);
10815 if (mode
== DFmode
)
10816 *cost
+= extra_cost
->ldst
.loadd
;
10818 *cost
+= extra_cost
->ldst
.loadf
;
10821 *cost
= COSTS_N_INSNS (2 + (mode
== DFmode
));
10825 *cost
= COSTS_N_INSNS (4);
10831 && TARGET_HARD_FLOAT
10832 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10833 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10834 *cost
= COSTS_N_INSNS (1);
10836 *cost
= COSTS_N_INSNS (4);
10841 *cost
= COSTS_N_INSNS (1);
10842 /* When optimizing for size, we prefer constant pool entries to
10843 MOVW/MOVT pairs, so bump the cost of these slightly. */
10849 *cost
= COSTS_N_INSNS (1);
10851 *cost
+= extra_cost
->alu
.clz
;
10855 if (XEXP (x
, 1) == const0_rtx
)
10857 *cost
= COSTS_N_INSNS (1);
10859 *cost
+= extra_cost
->alu
.log_shift
;
10860 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10863 /* Fall through. */
10867 *cost
= COSTS_N_INSNS (2);
10871 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10872 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10873 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10874 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10875 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10876 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10877 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10878 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10881 *cost
= COSTS_N_INSNS (1);
10883 *cost
+= extra_cost
->mult
[1].extend
;
10884 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), ZERO_EXTEND
, 0,
10886 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), ZERO_EXTEND
,
10890 *cost
= LIBCALL_COST (1);
10894 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10897 /* Reading the PC is like reading any other register. Writing it
10898 is more expensive, but we take that into account elsewhere. */
10903 /* TODO: Simple zero_extract of bottom bits using AND. */
10904 /* Fall through. */
10908 && CONST_INT_P (XEXP (x
, 1))
10909 && CONST_INT_P (XEXP (x
, 2)))
10911 *cost
= COSTS_N_INSNS (1);
10913 *cost
+= extra_cost
->alu
.bfx
;
10914 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10917 /* Without UBFX/SBFX, need to resort to shift operations. */
10918 *cost
= COSTS_N_INSNS (2);
10920 *cost
+= 2 * extra_cost
->alu
.shift
;
10921 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed_p
);
10925 if (TARGET_HARD_FLOAT
)
10927 *cost
= COSTS_N_INSNS (1);
10929 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10930 if (!TARGET_FPU_ARMV8
10931 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10933 /* Pre v8, widening HF->DF is a two-step process, first
10934 widening to SFmode. */
10935 *cost
+= COSTS_N_INSNS (1);
10937 *cost
+= extra_cost
->fp
[0].widen
;
10939 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10943 *cost
= LIBCALL_COST (1);
10946 case FLOAT_TRUNCATE
:
10947 if (TARGET_HARD_FLOAT
)
10949 *cost
= COSTS_N_INSNS (1);
10951 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10952 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10954 /* Vector modes? */
10956 *cost
= LIBCALL_COST (1);
10960 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10962 rtx op0
= XEXP (x
, 0);
10963 rtx op1
= XEXP (x
, 1);
10964 rtx op2
= XEXP (x
, 2);
10966 *cost
= COSTS_N_INSNS (1);
10968 /* vfms or vfnma. */
10969 if (GET_CODE (op0
) == NEG
)
10970 op0
= XEXP (op0
, 0);
10972 /* vfnms or vfnma. */
10973 if (GET_CODE (op2
) == NEG
)
10974 op2
= XEXP (op2
, 0);
10976 *cost
+= rtx_cost (op0
, FMA
, 0, speed_p
);
10977 *cost
+= rtx_cost (op1
, FMA
, 1, speed_p
);
10978 *cost
+= rtx_cost (op2
, FMA
, 2, speed_p
);
10981 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
10986 *cost
= LIBCALL_COST (3);
10991 if (TARGET_HARD_FLOAT
)
10993 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10995 *cost
= COSTS_N_INSNS (1);
10997 *cost
+= extra_cost
->fp
[GET_MODE (XEXP (x
, 0)) == DFmode
].toint
;
10998 /* Strip of the 'cost' of rounding towards zero. */
10999 if (GET_CODE (XEXP (x
, 0)) == FIX
)
11000 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed_p
);
11002 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
11003 /* ??? Increase the cost to deal with transferring from
11004 FP -> CORE registers? */
11007 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11008 && TARGET_FPU_ARMV8
)
11010 *cost
= COSTS_N_INSNS (1);
11012 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
11015 /* Vector costs? */
11017 *cost
= LIBCALL_COST (1);
11021 case UNSIGNED_FLOAT
:
11022 if (TARGET_HARD_FLOAT
)
11024 /* ??? Increase the cost to deal with transferring from CORE
11025 -> FP registers? */
11026 *cost
= COSTS_N_INSNS (1);
11028 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
11031 *cost
= LIBCALL_COST (1);
11035 *cost
= COSTS_N_INSNS (1);
11040 /* Just a guess. Guess number of instructions in the asm
11041 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11042 though (see PR60663). */
11043 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
11044 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
11046 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
11050 if (mode
!= VOIDmode
)
11051 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
11053 *cost
= COSTS_N_INSNS (4); /* Who knows? */
11058 #undef HANDLE_NARROW_SHIFT_ARITH
11060 /* RTX costs when optimizing for size. */
11062 arm_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
11063 int *total
, bool speed
)
11067 if (TARGET_OLD_RTX_COSTS
11068 || (!current_tune
->insn_extra_cost
&& !TARGET_NEW_GENERIC_COSTS
))
11070 /* Old way. (Deprecated.) */
11072 result
= arm_size_rtx_costs (x
, (enum rtx_code
) code
,
11073 (enum rtx_code
) outer_code
, total
);
11075 result
= current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
11076 (enum rtx_code
) outer_code
, total
,
11082 if (current_tune
->insn_extra_cost
)
11083 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11084 (enum rtx_code
) outer_code
,
11085 current_tune
->insn_extra_cost
,
11087 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11088 && current_tune->insn_extra_cost != NULL */
11090 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11091 (enum rtx_code
) outer_code
,
11092 &generic_extra_costs
, total
, speed
);
11095 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
11097 print_rtl_single (dump_file
, x
);
11098 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
11099 *total
, result
? "final" : "partial");
11104 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11105 supported on any "slowmul" cores, so it can be ignored. */
11108 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11109 int *total
, bool speed
)
11111 machine_mode mode
= GET_MODE (x
);
11115 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11122 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11125 *total
= COSTS_N_INSNS (20);
11129 if (CONST_INT_P (XEXP (x
, 1)))
11131 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11132 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11133 int cost
, const_ok
= const_ok_for_arm (i
);
11134 int j
, booth_unit_size
;
11136 /* Tune as appropriate. */
11137 cost
= const_ok
? 4 : 8;
11138 booth_unit_size
= 2;
11139 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11141 i
>>= booth_unit_size
;
11145 *total
= COSTS_N_INSNS (cost
);
11146 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
11150 *total
= COSTS_N_INSNS (20);
11154 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
11159 /* RTX cost for cores with a fast multiply unit (M variants). */
11162 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11163 int *total
, bool speed
)
11165 machine_mode mode
= GET_MODE (x
);
11169 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11173 /* ??? should thumb2 use different costs? */
11177 /* There is no point basing this on the tuning, since it is always the
11178 fast variant if it exists at all. */
11180 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11181 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11182 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11184 *total
= COSTS_N_INSNS(2);
11189 if (mode
== DImode
)
11191 *total
= COSTS_N_INSNS (5);
11195 if (CONST_INT_P (XEXP (x
, 1)))
11197 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11198 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11199 int cost
, const_ok
= const_ok_for_arm (i
);
11200 int j
, booth_unit_size
;
11202 /* Tune as appropriate. */
11203 cost
= const_ok
? 4 : 8;
11204 booth_unit_size
= 8;
11205 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11207 i
>>= booth_unit_size
;
11211 *total
= COSTS_N_INSNS(cost
);
11215 if (mode
== SImode
)
11217 *total
= COSTS_N_INSNS (4);
11221 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11223 if (TARGET_HARD_FLOAT
11225 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11227 *total
= COSTS_N_INSNS (1);
11232 /* Requires a lib call */
11233 *total
= COSTS_N_INSNS (20);
11237 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11242 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11243 so it can be ignored. */
11246 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11247 int *total
, bool speed
)
11249 machine_mode mode
= GET_MODE (x
);
11253 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11260 if (GET_CODE (XEXP (x
, 0)) != MULT
)
11261 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11263 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11264 will stall until the multiplication is complete. */
11265 *total
= COSTS_N_INSNS (3);
11269 /* There is no point basing this on the tuning, since it is always the
11270 fast variant if it exists at all. */
11272 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11273 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11274 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11276 *total
= COSTS_N_INSNS (2);
11281 if (mode
== DImode
)
11283 *total
= COSTS_N_INSNS (5);
11287 if (CONST_INT_P (XEXP (x
, 1)))
11289 /* If operand 1 is a constant we can more accurately
11290 calculate the cost of the multiply. The multiplier can
11291 retire 15 bits on the first cycle and a further 12 on the
11292 second. We do, of course, have to load the constant into
11293 a register first. */
11294 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
11295 /* There's a general overhead of one cycle. */
11297 unsigned HOST_WIDE_INT masked_const
;
11299 if (i
& 0x80000000)
11302 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
11304 masked_const
= i
& 0xffff8000;
11305 if (masked_const
!= 0)
11308 masked_const
= i
& 0xf8000000;
11309 if (masked_const
!= 0)
11312 *total
= COSTS_N_INSNS (cost
);
11316 if (mode
== SImode
)
11318 *total
= COSTS_N_INSNS (3);
11322 /* Requires a lib call */
11323 *total
= COSTS_N_INSNS (20);
11327 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11332 /* RTX costs for 9e (and later) cores. */
11335 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11336 int *total
, bool speed
)
11338 machine_mode mode
= GET_MODE (x
);
11345 /* Small multiply: 32 cycles for an integer multiply inst. */
11346 if (arm_arch6m
&& arm_m_profile_small_mul
)
11347 *total
= COSTS_N_INSNS (32);
11349 *total
= COSTS_N_INSNS (3);
11353 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11361 /* There is no point basing this on the tuning, since it is always the
11362 fast variant if it exists at all. */
11364 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11365 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11366 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11368 *total
= COSTS_N_INSNS (2);
11373 if (mode
== DImode
)
11375 *total
= COSTS_N_INSNS (5);
11379 if (mode
== SImode
)
11381 *total
= COSTS_N_INSNS (2);
11385 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11387 if (TARGET_HARD_FLOAT
11389 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11391 *total
= COSTS_N_INSNS (1);
11396 *total
= COSTS_N_INSNS (20);
11400 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11403 /* All address computations that can be done are free, but rtx cost returns
11404 the same for practically all of them. So we weight the different types
11405 of address here in the order (most pref first):
11406 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11408 arm_arm_address_cost (rtx x
)
11410 enum rtx_code c
= GET_CODE (x
);
11412 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11414 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11419 if (CONST_INT_P (XEXP (x
, 1)))
11422 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11432 arm_thumb_address_cost (rtx x
)
11434 enum rtx_code c
= GET_CODE (x
);
11439 && REG_P (XEXP (x
, 0))
11440 && CONST_INT_P (XEXP (x
, 1)))
11447 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
11448 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11450 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11453 /* Adjust cost hook for XScale. */
11455 xscale_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11457 /* Some true dependencies can have a higher cost depending
11458 on precisely how certain input operands are used. */
11459 if (REG_NOTE_KIND(link
) == 0
11460 && recog_memoized (insn
) >= 0
11461 && recog_memoized (dep
) >= 0)
11463 int shift_opnum
= get_attr_shift (insn
);
11464 enum attr_type attr_type
= get_attr_type (dep
);
11466 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11467 operand for INSN. If we have a shifted input operand and the
11468 instruction we depend on is another ALU instruction, then we may
11469 have to account for an additional stall. */
11470 if (shift_opnum
!= 0
11471 && (attr_type
== TYPE_ALU_SHIFT_IMM
11472 || attr_type
== TYPE_ALUS_SHIFT_IMM
11473 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11474 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11475 || attr_type
== TYPE_ALU_SHIFT_REG
11476 || attr_type
== TYPE_ALUS_SHIFT_REG
11477 || attr_type
== TYPE_LOGIC_SHIFT_REG
11478 || attr_type
== TYPE_LOGICS_SHIFT_REG
11479 || attr_type
== TYPE_MOV_SHIFT
11480 || attr_type
== TYPE_MVN_SHIFT
11481 || attr_type
== TYPE_MOV_SHIFT_REG
11482 || attr_type
== TYPE_MVN_SHIFT_REG
))
11484 rtx shifted_operand
;
11487 /* Get the shifted operand. */
11488 extract_insn (insn
);
11489 shifted_operand
= recog_data
.operand
[shift_opnum
];
11491 /* Iterate over all the operands in DEP. If we write an operand
11492 that overlaps with SHIFTED_OPERAND, then we have increase the
11493 cost of this dependency. */
11494 extract_insn (dep
);
11495 preprocess_constraints (dep
);
11496 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11498 /* We can ignore strict inputs. */
11499 if (recog_data
.operand_type
[opno
] == OP_IN
)
11502 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11514 /* Adjust cost hook for Cortex A9. */
11516 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11518 switch (REG_NOTE_KIND (link
))
11525 case REG_DEP_OUTPUT
:
11526 if (recog_memoized (insn
) >= 0
11527 && recog_memoized (dep
) >= 0)
11529 if (GET_CODE (PATTERN (insn
)) == SET
)
11532 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11534 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11536 enum attr_type attr_type_insn
= get_attr_type (insn
);
11537 enum attr_type attr_type_dep
= get_attr_type (dep
);
11539 /* By default all dependencies of the form
11542 have an extra latency of 1 cycle because
11543 of the input and output dependency in this
11544 case. However this gets modeled as an true
11545 dependency and hence all these checks. */
11546 if (REG_P (SET_DEST (PATTERN (insn
)))
11547 && REG_P (SET_DEST (PATTERN (dep
)))
11548 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
11549 SET_DEST (PATTERN (dep
))))
11551 /* FMACS is a special case where the dependent
11552 instruction can be issued 3 cycles before
11553 the normal latency in case of an output
11555 if ((attr_type_insn
== TYPE_FMACS
11556 || attr_type_insn
== TYPE_FMACD
)
11557 && (attr_type_dep
== TYPE_FMACS
11558 || attr_type_dep
== TYPE_FMACD
))
11560 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11561 *cost
= insn_default_latency (dep
) - 3;
11563 *cost
= insn_default_latency (dep
);
11568 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11569 *cost
= insn_default_latency (dep
) + 1;
11571 *cost
= insn_default_latency (dep
);
11581 gcc_unreachable ();
11587 /* Adjust cost hook for FA726TE. */
11589 fa726te_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11591 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11592 have penalty of 3. */
11593 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
11594 && recog_memoized (insn
) >= 0
11595 && recog_memoized (dep
) >= 0
11596 && get_attr_conds (dep
) == CONDS_SET
)
11598 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11599 if (get_attr_conds (insn
) == CONDS_USE
11600 && get_attr_type (insn
) != TYPE_BRANCH
)
11606 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11607 || get_attr_conds (insn
) == CONDS_USE
)
11617 /* Implement TARGET_REGISTER_MOVE_COST.
11619 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11620 it is typically more expensive than a single memory access. We set
11621 the cost to less than two memory accesses so that floating
11622 point to integer conversion does not go through memory. */
11625 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11626 reg_class_t from
, reg_class_t to
)
11630 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11631 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11633 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11634 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11636 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11643 if (from
== HI_REGS
|| to
== HI_REGS
)
11650 /* Implement TARGET_MEMORY_MOVE_COST. */
11653 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11654 bool in ATTRIBUTE_UNUSED
)
11660 if (GET_MODE_SIZE (mode
) < 4)
11663 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11667 /* Vectorizer cost model implementation. */
11669 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11671 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11673 int misalign ATTRIBUTE_UNUSED
)
11677 switch (type_of_cost
)
11680 return current_tune
->vec_costs
->scalar_stmt_cost
;
11683 return current_tune
->vec_costs
->scalar_load_cost
;
11686 return current_tune
->vec_costs
->scalar_store_cost
;
11689 return current_tune
->vec_costs
->vec_stmt_cost
;
11692 return current_tune
->vec_costs
->vec_align_load_cost
;
11695 return current_tune
->vec_costs
->vec_store_cost
;
11697 case vec_to_scalar
:
11698 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11700 case scalar_to_vec
:
11701 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11703 case unaligned_load
:
11704 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11706 case unaligned_store
:
11707 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11709 case cond_branch_taken
:
11710 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11712 case cond_branch_not_taken
:
11713 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11716 case vec_promote_demote
:
11717 return current_tune
->vec_costs
->vec_stmt_cost
;
11719 case vec_construct
:
11720 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11721 return elements
/ 2 + 1;
11724 gcc_unreachable ();
11728 /* Implement targetm.vectorize.add_stmt_cost. */
11731 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11732 struct _stmt_vec_info
*stmt_info
, int misalign
,
11733 enum vect_cost_model_location where
)
11735 unsigned *cost
= (unsigned *) data
;
11736 unsigned retval
= 0;
11738 if (flag_vect_cost_model
)
11740 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11741 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11743 /* Statements in an inner loop relative to the loop being
11744 vectorized are weighted more heavily. The value here is
11745 arbitrary and could potentially be improved with analysis. */
11746 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11747 count
*= 50; /* FIXME. */
11749 retval
= (unsigned) (count
* stmt_cost
);
11750 cost
[where
] += retval
;
11756 /* Return true if and only if this insn can dual-issue only as older. */
11758 cortexa7_older_only (rtx_insn
*insn
)
11760 if (recog_memoized (insn
) < 0)
11763 switch (get_attr_type (insn
))
11765 case TYPE_ALU_DSP_REG
:
11766 case TYPE_ALU_SREG
:
11767 case TYPE_ALUS_SREG
:
11768 case TYPE_LOGIC_REG
:
11769 case TYPE_LOGICS_REG
:
11771 case TYPE_ADCS_REG
:
11776 case TYPE_SHIFT_IMM
:
11777 case TYPE_SHIFT_REG
:
11778 case TYPE_LOAD_BYTE
:
11781 case TYPE_FFARITHS
:
11783 case TYPE_FFARITHD
:
11801 case TYPE_F_STORES
:
11808 /* Return true if and only if this insn can dual-issue as younger. */
11810 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11812 if (recog_memoized (insn
) < 0)
11815 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11819 switch (get_attr_type (insn
))
11822 case TYPE_ALUS_IMM
:
11823 case TYPE_LOGIC_IMM
:
11824 case TYPE_LOGICS_IMM
:
11829 case TYPE_MOV_SHIFT
:
11830 case TYPE_MOV_SHIFT_REG
:
11840 /* Look for an instruction that can dual issue only as an older
11841 instruction, and move it in front of any instructions that can
11842 dual-issue as younger, while preserving the relative order of all
11843 other instructions in the ready list. This is a hueuristic to help
11844 dual-issue in later cycles, by postponing issue of more flexible
11845 instructions. This heuristic may affect dual issue opportunities
11846 in the current cycle. */
11848 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11849 int *n_readyp
, int clock
)
11852 int first_older_only
= -1, first_younger
= -1;
11856 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11860 /* Traverse the ready list from the head (the instruction to issue
11861 first), and looking for the first instruction that can issue as
11862 younger and the first instruction that can dual-issue only as
11864 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11866 rtx_insn
*insn
= ready
[i
];
11867 if (cortexa7_older_only (insn
))
11869 first_older_only
= i
;
11871 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11874 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11878 /* Nothing to reorder because either no younger insn found or insn
11879 that can dual-issue only as older appears before any insn that
11880 can dual-issue as younger. */
11881 if (first_younger
== -1)
11884 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11888 /* Nothing to reorder because no older-only insn in the ready list. */
11889 if (first_older_only
== -1)
11892 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11896 /* Move first_older_only insn before first_younger. */
11898 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11899 INSN_UID(ready
[first_older_only
]),
11900 INSN_UID(ready
[first_younger
]));
11901 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11902 for (i
= first_older_only
; i
< first_younger
; i
++)
11904 ready
[i
] = ready
[i
+1];
11907 ready
[i
] = first_older_only_insn
;
11911 /* Implement TARGET_SCHED_REORDER. */
11913 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11919 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11922 /* Do nothing for other cores. */
11926 return arm_issue_rate ();
11929 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11930 It corrects the value of COST based on the relationship between
11931 INSN and DEP through the dependence LINK. It returns the new
11932 value. There is a per-core adjust_cost hook to adjust scheduler costs
11933 and the per-core hook can choose to completely override the generic
11934 adjust_cost function. Only put bits of code into arm_adjust_cost that
11935 are common across all cores. */
11937 arm_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int cost
)
11941 /* When generating Thumb-1 code, we want to place flag-setting operations
11942 close to a conditional branch which depends on them, so that we can
11943 omit the comparison. */
11945 && REG_NOTE_KIND (link
) == 0
11946 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11947 && recog_memoized (dep
) >= 0
11948 && get_attr_conds (dep
) == CONDS_SET
)
11951 if (current_tune
->sched_adjust_cost
!= NULL
)
11953 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
11957 /* XXX Is this strictly true? */
11958 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
11959 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11962 /* Call insns don't incur a stall, even if they follow a load. */
11963 if (REG_NOTE_KIND (link
) == 0
11967 if ((i_pat
= single_set (insn
)) != NULL
11968 && MEM_P (SET_SRC (i_pat
))
11969 && (d_pat
= single_set (dep
)) != NULL
11970 && MEM_P (SET_DEST (d_pat
)))
11972 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11973 /* This is a load after a store, there is no conflict if the load reads
11974 from a cached area. Assume that loads from the stack, and from the
11975 constant pool are cached, and that others will miss. This is a
11978 if ((GET_CODE (src_mem
) == SYMBOL_REF
11979 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11980 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11981 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11982 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11990 arm_max_conditional_execute (void)
11992 return max_insns_skipped
;
11996 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11999 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
12001 return (optimize
> 0) ? 2 : 0;
12005 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
12007 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12010 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12011 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12012 sequences of non-executed instructions in IT blocks probably take the same
12013 amount of time as executed instructions (and the IT instruction itself takes
12014 space in icache). This function was experimentally determined to give good
12015 results on a popular embedded benchmark. */
12018 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
12020 return (TARGET_32BIT
&& speed_p
) ? 1
12021 : arm_default_branch_cost (speed_p
, predictable_p
);
12024 static bool fp_consts_inited
= false;
12026 static REAL_VALUE_TYPE value_fp0
;
12029 init_fp_table (void)
12033 r
= REAL_VALUE_ATOF ("0", DFmode
);
12035 fp_consts_inited
= true;
12038 /* Return TRUE if rtx X is a valid immediate FP constant. */
12040 arm_const_double_rtx (rtx x
)
12044 if (!fp_consts_inited
)
12047 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12048 if (REAL_VALUE_MINUS_ZERO (r
))
12051 if (REAL_VALUES_EQUAL (r
, value_fp0
))
12057 /* VFPv3 has a fairly wide range of representable immediates, formed from
12058 "quarter-precision" floating-point values. These can be evaluated using this
12059 formula (with ^ for exponentiation):
12063 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12064 16 <= n <= 31 and 0 <= r <= 7.
12066 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12068 - A (most-significant) is the sign bit.
12069 - BCD are the exponent (encoded as r XOR 3).
12070 - EFGH are the mantissa (encoded as n - 16).
12073 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12074 fconst[sd] instruction, or -1 if X isn't suitable. */
12076 vfp3_const_double_index (rtx x
)
12078 REAL_VALUE_TYPE r
, m
;
12079 int sign
, exponent
;
12080 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
12081 unsigned HOST_WIDE_INT mask
;
12082 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12085 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
12088 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12090 /* We can't represent these things, so detect them first. */
12091 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
12094 /* Extract sign, exponent and mantissa. */
12095 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12096 r
= real_value_abs (&r
);
12097 exponent
= REAL_EXP (&r
);
12098 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12099 highest (sign) bit, with a fixed binary point at bit point_pos.
12100 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12101 bits for the mantissa, this may fail (low bits would be lost). */
12102 real_ldexp (&m
, &r
, point_pos
- exponent
);
12103 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12104 mantissa
= w
.elt (0);
12105 mant_hi
= w
.elt (1);
12107 /* If there are bits set in the low part of the mantissa, we can't
12108 represent this value. */
12112 /* Now make it so that mantissa contains the most-significant bits, and move
12113 the point_pos to indicate that the least-significant bits have been
12115 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12116 mantissa
= mant_hi
;
12118 /* We can permit four significant bits of mantissa only, plus a high bit
12119 which is always 1. */
12120 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
12121 if ((mantissa
& mask
) != 0)
12124 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12125 mantissa
>>= point_pos
- 5;
12127 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12128 floating-point immediate zero with Neon using an integer-zero load, but
12129 that case is handled elsewhere.) */
12133 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12135 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12136 normalized significands are in the range [1, 2). (Our mantissa is shifted
12137 left 4 places at this point relative to normalized IEEE754 values). GCC
12138 internally uses [0.5, 1) (see real.c), so the exponent returned from
12139 REAL_EXP must be altered. */
12140 exponent
= 5 - exponent
;
12142 if (exponent
< 0 || exponent
> 7)
12145 /* Sign, mantissa and exponent are now in the correct form to plug into the
12146 formula described in the comment above. */
12147 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12150 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12152 vfp3_const_double_rtx (rtx x
)
12157 return vfp3_const_double_index (x
) != -1;
12160 /* Recognize immediates which can be used in various Neon instructions. Legal
12161 immediates are described by the following table (for VMVN variants, the
12162 bitwise inverse of the constant shown is recognized. In either case, VMOV
12163 is output and the correct instruction to use for a given constant is chosen
12164 by the assembler). The constant shown is replicated across all elements of
12165 the destination vector.
12167 insn elems variant constant (binary)
12168 ---- ----- ------- -----------------
12169 vmov i32 0 00000000 00000000 00000000 abcdefgh
12170 vmov i32 1 00000000 00000000 abcdefgh 00000000
12171 vmov i32 2 00000000 abcdefgh 00000000 00000000
12172 vmov i32 3 abcdefgh 00000000 00000000 00000000
12173 vmov i16 4 00000000 abcdefgh
12174 vmov i16 5 abcdefgh 00000000
12175 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12176 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12177 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12178 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12179 vmvn i16 10 00000000 abcdefgh
12180 vmvn i16 11 abcdefgh 00000000
12181 vmov i32 12 00000000 00000000 abcdefgh 11111111
12182 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12183 vmov i32 14 00000000 abcdefgh 11111111 11111111
12184 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12185 vmov i8 16 abcdefgh
12186 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12187 eeeeeeee ffffffff gggggggg hhhhhhhh
12188 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12189 vmov f32 19 00000000 00000000 00000000 00000000
12191 For case 18, B = !b. Representable values are exactly those accepted by
12192 vfp3_const_double_index, but are output as floating-point numbers rather
12195 For case 19, we will change it to vmov.i32 when assembling.
12197 Variants 0-5 (inclusive) may also be used as immediates for the second
12198 operand of VORR/VBIC instructions.
12200 The INVERSE argument causes the bitwise inverse of the given operand to be
12201 recognized instead (used for recognizing legal immediates for the VAND/VORN
12202 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12203 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12204 output, rather than the real insns vbic/vorr).
12206 INVERSE makes no difference to the recognition of float vectors.
12208 The return value is the variant of immediate as shown in the above table, or
12209 -1 if the given value doesn't match any of the listed patterns.
12212 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
12213 rtx
*modconst
, int *elementwidth
)
12215 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12217 for (i = 0; i < idx; i += (STRIDE)) \
12222 immtype = (CLASS); \
12223 elsize = (ELSIZE); \
12227 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12228 unsigned int innersize
;
12229 unsigned char bytes
[16];
12230 int immtype
= -1, matches
;
12231 unsigned int invmask
= inverse
? 0xff : 0;
12232 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12236 n_elts
= CONST_VECTOR_NUNITS (op
);
12237 innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12242 if (mode
== VOIDmode
)
12244 innersize
= GET_MODE_SIZE (mode
);
12247 /* Vectors of float constants. */
12248 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
12250 rtx el0
= CONST_VECTOR_ELT (op
, 0);
12251 REAL_VALUE_TYPE r0
;
12253 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
12256 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
12258 for (i
= 1; i
< n_elts
; i
++)
12260 rtx elt
= CONST_VECTOR_ELT (op
, i
);
12261 REAL_VALUE_TYPE re
;
12263 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
12265 if (!REAL_VALUES_EQUAL (r0
, re
))
12270 *modconst
= CONST_VECTOR_ELT (op
, 0);
12275 if (el0
== CONST0_RTX (GET_MODE (el0
)))
12281 /* Splat vector constant out into a byte vector. */
12282 for (i
= 0; i
< n_elts
; i
++)
12284 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
12285 unsigned HOST_WIDE_INT elpart
;
12286 unsigned int part
, parts
;
12288 if (CONST_INT_P (el
))
12290 elpart
= INTVAL (el
);
12293 else if (CONST_DOUBLE_P (el
))
12295 elpart
= CONST_DOUBLE_LOW (el
);
12299 gcc_unreachable ();
12301 for (part
= 0; part
< parts
; part
++)
12304 for (byte
= 0; byte
< innersize
; byte
++)
12306 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
12307 elpart
>>= BITS_PER_UNIT
;
12309 if (CONST_DOUBLE_P (el
))
12310 elpart
= CONST_DOUBLE_HIGH (el
);
12314 /* Sanity check. */
12315 gcc_assert (idx
== GET_MODE_SIZE (mode
));
12319 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
12320 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12322 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12323 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12325 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12326 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12328 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12329 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
12331 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
12333 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
12335 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
12336 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12338 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12339 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12341 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12342 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12344 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12345 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
12347 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
12349 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
12351 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12352 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12354 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12355 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12357 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12358 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12360 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12361 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12363 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
12365 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
12366 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
12374 *elementwidth
= elsize
;
12378 unsigned HOST_WIDE_INT imm
= 0;
12380 /* Un-invert bytes of recognized vector, if necessary. */
12382 for (i
= 0; i
< idx
; i
++)
12383 bytes
[i
] ^= invmask
;
12387 /* FIXME: Broken on 32-bit H_W_I hosts. */
12388 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12390 for (i
= 0; i
< 8; i
++)
12391 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12392 << (i
* BITS_PER_UNIT
);
12394 *modconst
= GEN_INT (imm
);
12398 unsigned HOST_WIDE_INT imm
= 0;
12400 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12401 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12403 *modconst
= GEN_INT (imm
);
12411 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12412 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12413 float elements), and a modified constant (whatever should be output for a
12414 VMOV) in *MODCONST. */
12417 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
12418 rtx
*modconst
, int *elementwidth
)
12422 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12428 *modconst
= tmpconst
;
12431 *elementwidth
= tmpwidth
;
12436 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12437 the immediate is valid, write a constant suitable for using as an operand
12438 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12439 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12442 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
12443 rtx
*modconst
, int *elementwidth
)
12447 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12449 if (retval
< 0 || retval
> 5)
12453 *modconst
= tmpconst
;
12456 *elementwidth
= tmpwidth
;
12461 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12462 the immediate is valid, write a constant suitable for using as an operand
12463 to VSHR/VSHL to *MODCONST and the corresponding element width to
12464 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12465 because they have different limitations. */
12468 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
12469 rtx
*modconst
, int *elementwidth
,
12472 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12473 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12474 unsigned HOST_WIDE_INT last_elt
= 0;
12475 unsigned HOST_WIDE_INT maxshift
;
12477 /* Split vector constant out into a byte vector. */
12478 for (i
= 0; i
< n_elts
; i
++)
12480 rtx el
= CONST_VECTOR_ELT (op
, i
);
12481 unsigned HOST_WIDE_INT elpart
;
12483 if (CONST_INT_P (el
))
12484 elpart
= INTVAL (el
);
12485 else if (CONST_DOUBLE_P (el
))
12488 gcc_unreachable ();
12490 if (i
!= 0 && elpart
!= last_elt
)
12496 /* Shift less than element size. */
12497 maxshift
= innersize
* 8;
12501 /* Left shift immediate value can be from 0 to <size>-1. */
12502 if (last_elt
>= maxshift
)
12507 /* Right shift immediate value can be from 1 to <size>. */
12508 if (last_elt
== 0 || last_elt
> maxshift
)
12513 *elementwidth
= innersize
* 8;
12516 *modconst
= CONST_VECTOR_ELT (op
, 0);
12521 /* Return a string suitable for output of Neon immediate logic operation
12525 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12526 int inverse
, int quad
)
12528 int width
, is_valid
;
12529 static char templ
[40];
12531 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12533 gcc_assert (is_valid
!= 0);
12536 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12538 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12543 /* Return a string suitable for output of Neon immediate shift operation
12544 (VSHR or VSHL) MNEM. */
12547 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12548 machine_mode mode
, int quad
,
12551 int width
, is_valid
;
12552 static char templ
[40];
12554 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12555 gcc_assert (is_valid
!= 0);
12558 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12560 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12565 /* Output a sequence of pairwise operations to implement a reduction.
12566 NOTE: We do "too much work" here, because pairwise operations work on two
12567 registers-worth of operands in one go. Unfortunately we can't exploit those
12568 extra calculations to do the full operation in fewer steps, I don't think.
12569 Although all vector elements of the result but the first are ignored, we
12570 actually calculate the same result in each of the elements. An alternative
12571 such as initially loading a vector with zero to use as each of the second
12572 operands would use up an additional register and take an extra instruction,
12573 for no particular gain. */
12576 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12577 rtx (*reduc
) (rtx
, rtx
, rtx
))
12579 machine_mode inner
= GET_MODE_INNER (mode
);
12580 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
12583 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12585 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12586 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12591 /* If VALS is a vector constant that can be loaded into a register
12592 using VDUP, generate instructions to do so and return an RTX to
12593 assign to the register. Otherwise return NULL_RTX. */
12596 neon_vdup_constant (rtx vals
)
12598 machine_mode mode
= GET_MODE (vals
);
12599 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12600 int n_elts
= GET_MODE_NUNITS (mode
);
12601 bool all_same
= true;
12605 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12608 for (i
= 0; i
< n_elts
; ++i
)
12610 x
= XVECEXP (vals
, 0, i
);
12611 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12616 /* The elements are not all the same. We could handle repeating
12617 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12618 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12622 /* We can load this constant by using VDUP and a constant in a
12623 single ARM register. This will be cheaper than a vector
12626 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12627 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12630 /* Generate code to load VALS, which is a PARALLEL containing only
12631 constants (for vec_init) or CONST_VECTOR, efficiently into a
12632 register. Returns an RTX to copy into the register, or NULL_RTX
12633 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12636 neon_make_constant (rtx vals
)
12638 machine_mode mode
= GET_MODE (vals
);
12640 rtx const_vec
= NULL_RTX
;
12641 int n_elts
= GET_MODE_NUNITS (mode
);
12645 if (GET_CODE (vals
) == CONST_VECTOR
)
12647 else if (GET_CODE (vals
) == PARALLEL
)
12649 /* A CONST_VECTOR must contain only CONST_INTs and
12650 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12651 Only store valid constants in a CONST_VECTOR. */
12652 for (i
= 0; i
< n_elts
; ++i
)
12654 rtx x
= XVECEXP (vals
, 0, i
);
12655 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12658 if (n_const
== n_elts
)
12659 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12662 gcc_unreachable ();
12664 if (const_vec
!= NULL
12665 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12666 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12668 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12669 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12670 pipeline cycle; creating the constant takes one or two ARM
12671 pipeline cycles. */
12673 else if (const_vec
!= NULL_RTX
)
12674 /* Load from constant pool. On Cortex-A8 this takes two cycles
12675 (for either double or quad vectors). We can not take advantage
12676 of single-cycle VLD1 because we need a PC-relative addressing
12680 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12681 We can not construct an initializer. */
12685 /* Initialize vector TARGET to VALS. */
12688 neon_expand_vector_init (rtx target
, rtx vals
)
12690 machine_mode mode
= GET_MODE (target
);
12691 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12692 int n_elts
= GET_MODE_NUNITS (mode
);
12693 int n_var
= 0, one_var
= -1;
12694 bool all_same
= true;
12698 for (i
= 0; i
< n_elts
; ++i
)
12700 x
= XVECEXP (vals
, 0, i
);
12701 if (!CONSTANT_P (x
))
12702 ++n_var
, one_var
= i
;
12704 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12710 rtx constant
= neon_make_constant (vals
);
12711 if (constant
!= NULL_RTX
)
12713 emit_move_insn (target
, constant
);
12718 /* Splat a single non-constant element if we can. */
12719 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12721 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12722 emit_insn (gen_rtx_SET (VOIDmode
, target
,
12723 gen_rtx_VEC_DUPLICATE (mode
, x
)));
12727 /* One field is non-constant. Load constant then overwrite varying
12728 field. This is more efficient than using the stack. */
12731 rtx copy
= copy_rtx (vals
);
12732 rtx index
= GEN_INT (one_var
);
12734 /* Load constant part of vector, substitute neighboring value for
12735 varying element. */
12736 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12737 neon_expand_vector_init (target
, copy
);
12739 /* Insert variable. */
12740 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12744 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12747 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12750 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12753 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12756 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12759 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12762 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12765 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12768 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12771 gcc_unreachable ();
12776 /* Construct the vector in memory one field at a time
12777 and load the whole vector. */
12778 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12779 for (i
= 0; i
< n_elts
; i
++)
12780 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12781 i
* GET_MODE_SIZE (inner_mode
)),
12782 XVECEXP (vals
, 0, i
));
12783 emit_move_insn (target
, mem
);
12786 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12787 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12788 reported source locations are bogus. */
12791 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12794 HOST_WIDE_INT lane
;
12796 gcc_assert (CONST_INT_P (operand
));
12798 lane
= INTVAL (operand
);
12800 if (lane
< low
|| lane
>= high
)
12804 /* Bounds-check lanes. */
12807 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12809 bounds_check (operand
, low
, high
, "lane out of range");
12812 /* Bounds-check constants. */
12815 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12817 bounds_check (operand
, low
, high
, "constant out of range");
12821 neon_element_bits (machine_mode mode
)
12823 if (mode
== DImode
)
12824 return GET_MODE_BITSIZE (mode
);
12826 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
12830 /* Predicates for `match_operand' and `match_operator'. */
12832 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12833 WB is true if full writeback address modes are allowed and is false
12834 if limited writeback address modes (POST_INC and PRE_DEC) are
12838 arm_coproc_mem_operand (rtx op
, bool wb
)
12842 /* Reject eliminable registers. */
12843 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12844 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12845 || reg_mentioned_p (arg_pointer_rtx
, op
)
12846 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12847 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12848 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12849 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12852 /* Constants are converted into offsets from labels. */
12856 ind
= XEXP (op
, 0);
12858 if (reload_completed
12859 && (GET_CODE (ind
) == LABEL_REF
12860 || (GET_CODE (ind
) == CONST
12861 && GET_CODE (XEXP (ind
, 0)) == PLUS
12862 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12863 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12866 /* Match: (mem (reg)). */
12868 return arm_address_register_rtx_p (ind
, 0);
12870 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12871 acceptable in any case (subject to verification by
12872 arm_address_register_rtx_p). We need WB to be true to accept
12873 PRE_INC and POST_DEC. */
12874 if (GET_CODE (ind
) == POST_INC
12875 || GET_CODE (ind
) == PRE_DEC
12877 && (GET_CODE (ind
) == PRE_INC
12878 || GET_CODE (ind
) == POST_DEC
)))
12879 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12882 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12883 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12884 && GET_CODE (XEXP (ind
, 1)) == PLUS
12885 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12886 ind
= XEXP (ind
, 1);
12891 if (GET_CODE (ind
) == PLUS
12892 && REG_P (XEXP (ind
, 0))
12893 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12894 && CONST_INT_P (XEXP (ind
, 1))
12895 && INTVAL (XEXP (ind
, 1)) > -1024
12896 && INTVAL (XEXP (ind
, 1)) < 1024
12897 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12903 /* Return TRUE if OP is a memory operand which we can load or store a vector
12904 to/from. TYPE is one of the following values:
12905 0 - Vector load/stor (vldr)
12906 1 - Core registers (ldm)
12907 2 - Element/structure loads (vld1)
12910 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12914 /* Reject eliminable registers. */
12915 if (! (reload_in_progress
|| reload_completed
)
12916 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12917 || reg_mentioned_p (arg_pointer_rtx
, op
)
12918 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12919 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12920 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12921 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12924 /* Constants are converted into offsets from labels. */
12928 ind
= XEXP (op
, 0);
12930 if (reload_completed
12931 && (GET_CODE (ind
) == LABEL_REF
12932 || (GET_CODE (ind
) == CONST
12933 && GET_CODE (XEXP (ind
, 0)) == PLUS
12934 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12935 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12938 /* Match: (mem (reg)). */
12940 return arm_address_register_rtx_p (ind
, 0);
12942 /* Allow post-increment with Neon registers. */
12943 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12944 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12945 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12947 /* Allow post-increment by register for VLDn */
12948 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12949 && GET_CODE (XEXP (ind
, 1)) == PLUS
12950 && REG_P (XEXP (XEXP (ind
, 1), 1)))
12957 && GET_CODE (ind
) == PLUS
12958 && REG_P (XEXP (ind
, 0))
12959 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12960 && CONST_INT_P (XEXP (ind
, 1))
12961 && INTVAL (XEXP (ind
, 1)) > -1024
12962 /* For quad modes, we restrict the constant offset to be slightly less
12963 than what the instruction format permits. We have no such constraint
12964 on double mode offsets. (This must match arm_legitimate_index_p.) */
12965 && (INTVAL (XEXP (ind
, 1))
12966 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12967 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12973 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12976 neon_struct_mem_operand (rtx op
)
12980 /* Reject eliminable registers. */
12981 if (! (reload_in_progress
|| reload_completed
)
12982 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12983 || reg_mentioned_p (arg_pointer_rtx
, op
)
12984 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12985 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12986 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12987 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12990 /* Constants are converted into offsets from labels. */
12994 ind
= XEXP (op
, 0);
12996 if (reload_completed
12997 && (GET_CODE (ind
) == LABEL_REF
12998 || (GET_CODE (ind
) == CONST
12999 && GET_CODE (XEXP (ind
, 0)) == PLUS
13000 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13001 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13004 /* Match: (mem (reg)). */
13006 return arm_address_register_rtx_p (ind
, 0);
13008 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13009 if (GET_CODE (ind
) == POST_INC
13010 || GET_CODE (ind
) == PRE_DEC
)
13011 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13016 /* Return true if X is a register that will be eliminated later on. */
13018 arm_eliminable_register (rtx x
)
13020 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
13021 || REGNO (x
) == ARG_POINTER_REGNUM
13022 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
13023 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
13026 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13027 coprocessor registers. Otherwise return NO_REGS. */
13030 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
13032 if (mode
== HFmode
)
13034 if (!TARGET_NEON_FP16
)
13035 return GENERAL_REGS
;
13036 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
13038 return GENERAL_REGS
;
13041 /* The neon move patterns handle all legitimate vector and struct
13044 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
13045 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
13046 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
13047 || VALID_NEON_STRUCT_MODE (mode
)))
13050 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
13053 return GENERAL_REGS
;
13056 /* Values which must be returned in the most-significant end of the return
13060 arm_return_in_msb (const_tree valtype
)
13062 return (TARGET_AAPCS_BASED
13063 && BYTES_BIG_ENDIAN
13064 && (AGGREGATE_TYPE_P (valtype
)
13065 || TREE_CODE (valtype
) == COMPLEX_TYPE
13066 || FIXED_POINT_TYPE_P (valtype
)));
13069 /* Return TRUE if X references a SYMBOL_REF. */
13071 symbol_mentioned_p (rtx x
)
13076 if (GET_CODE (x
) == SYMBOL_REF
)
13079 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13080 are constant offsets, not symbols. */
13081 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13084 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13086 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13092 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13093 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
13096 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
13103 /* Return TRUE if X references a LABEL_REF. */
13105 label_mentioned_p (rtx x
)
13110 if (GET_CODE (x
) == LABEL_REF
)
13113 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13114 instruction, but they are constant offsets, not symbols. */
13115 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13118 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13119 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13125 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13126 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
13129 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
13137 tls_mentioned_p (rtx x
)
13139 switch (GET_CODE (x
))
13142 return tls_mentioned_p (XEXP (x
, 0));
13145 if (XINT (x
, 1) == UNSPEC_TLS
)
13153 /* Must not copy any rtx that uses a pc-relative address. */
13156 arm_cannot_copy_insn_p (rtx_insn
*insn
)
13158 /* The tls call insn cannot be copied, as it is paired with a data
13160 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
13163 subrtx_iterator::array_type array
;
13164 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
13166 const_rtx x
= *iter
;
13167 if (GET_CODE (x
) == UNSPEC
13168 && (XINT (x
, 1) == UNSPEC_PIC_BASE
13169 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
13176 minmax_code (rtx x
)
13178 enum rtx_code code
= GET_CODE (x
);
13191 gcc_unreachable ();
13195 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13198 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
13199 int *mask
, bool *signed_sat
)
13201 /* The high bound must be a power of two minus one. */
13202 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
13206 /* The low bound is either zero (for usat) or one less than the
13207 negation of the high bound (for ssat). */
13208 if (INTVAL (lo_bound
) == 0)
13213 *signed_sat
= false;
13218 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
13223 *signed_sat
= true;
13231 /* Return 1 if memory locations are adjacent. */
13233 adjacent_mem_locations (rtx a
, rtx b
)
13235 /* We don't guarantee to preserve the order of these memory refs. */
13236 if (volatile_refs_p (a
) || volatile_refs_p (b
))
13239 if ((REG_P (XEXP (a
, 0))
13240 || (GET_CODE (XEXP (a
, 0)) == PLUS
13241 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
13242 && (REG_P (XEXP (b
, 0))
13243 || (GET_CODE (XEXP (b
, 0)) == PLUS
13244 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
13246 HOST_WIDE_INT val0
= 0, val1
= 0;
13250 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
13252 reg0
= XEXP (XEXP (a
, 0), 0);
13253 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
13256 reg0
= XEXP (a
, 0);
13258 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
13260 reg1
= XEXP (XEXP (b
, 0), 0);
13261 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
13264 reg1
= XEXP (b
, 0);
13266 /* Don't accept any offset that will require multiple
13267 instructions to handle, since this would cause the
13268 arith_adjacentmem pattern to output an overlong sequence. */
13269 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
13272 /* Don't allow an eliminable register: register elimination can make
13273 the offset too large. */
13274 if (arm_eliminable_register (reg0
))
13277 val_diff
= val1
- val0
;
13281 /* If the target has load delay slots, then there's no benefit
13282 to using an ldm instruction unless the offset is zero and
13283 we are optimizing for size. */
13284 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
13285 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
13286 && (val_diff
== 4 || val_diff
== -4));
13289 return ((REGNO (reg0
) == REGNO (reg1
))
13290 && (val_diff
== 4 || val_diff
== -4));
13296 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13297 for load operations, false for store operations. CONSECUTIVE is true
13298 if the register numbers in the operation must be consecutive in the register
13299 bank. RETURN_PC is true if value is to be loaded in PC.
13300 The pattern we are trying to match for load is:
13301 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13302 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13305 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13308 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13309 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13310 3. If consecutive is TRUE, then for kth register being loaded,
13311 REGNO (R_dk) = REGNO (R_d0) + k.
13312 The pattern for store is similar. */
13314 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
13315 bool consecutive
, bool return_pc
)
13317 HOST_WIDE_INT count
= XVECLEN (op
, 0);
13318 rtx reg
, mem
, addr
;
13320 unsigned first_regno
;
13321 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
13323 bool addr_reg_in_reglist
= false;
13324 bool update
= false;
13329 /* If not in SImode, then registers must be consecutive
13330 (e.g., VLDM instructions for DFmode). */
13331 gcc_assert ((mode
== SImode
) || consecutive
);
13332 /* Setting return_pc for stores is illegal. */
13333 gcc_assert (!return_pc
|| load
);
13335 /* Set up the increments and the regs per val based on the mode. */
13336 reg_increment
= GET_MODE_SIZE (mode
);
13337 regs_per_val
= reg_increment
/ 4;
13338 offset_adj
= return_pc
? 1 : 0;
13341 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
13342 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
13345 /* Check if this is a write-back. */
13346 elt
= XVECEXP (op
, 0, offset_adj
);
13347 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
13353 /* The offset adjustment must be the number of registers being
13354 popped times the size of a single register. */
13355 if (!REG_P (SET_DEST (elt
))
13356 || !REG_P (XEXP (SET_SRC (elt
), 0))
13357 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
13358 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
13359 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
13360 ((count
- 1 - offset_adj
) * reg_increment
))
13364 i
= i
+ offset_adj
;
13365 base
= base
+ offset_adj
;
13366 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13367 success depends on the type: VLDM can do just one reg,
13368 LDM must do at least two. */
13369 if ((count
<= i
) && (mode
== SImode
))
13372 elt
= XVECEXP (op
, 0, i
- 1);
13373 if (GET_CODE (elt
) != SET
)
13378 reg
= SET_DEST (elt
);
13379 mem
= SET_SRC (elt
);
13383 reg
= SET_SRC (elt
);
13384 mem
= SET_DEST (elt
);
13387 if (!REG_P (reg
) || !MEM_P (mem
))
13390 regno
= REGNO (reg
);
13391 first_regno
= regno
;
13392 addr
= XEXP (mem
, 0);
13393 if (GET_CODE (addr
) == PLUS
)
13395 if (!CONST_INT_P (XEXP (addr
, 1)))
13398 offset
= INTVAL (XEXP (addr
, 1));
13399 addr
= XEXP (addr
, 0);
13405 /* Don't allow SP to be loaded unless it is also the base register. It
13406 guarantees that SP is reset correctly when an LDM instruction
13407 is interrupted. Otherwise, we might end up with a corrupt stack. */
13408 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13411 for (; i
< count
; i
++)
13413 elt
= XVECEXP (op
, 0, i
);
13414 if (GET_CODE (elt
) != SET
)
13419 reg
= SET_DEST (elt
);
13420 mem
= SET_SRC (elt
);
13424 reg
= SET_SRC (elt
);
13425 mem
= SET_DEST (elt
);
13429 || GET_MODE (reg
) != mode
13430 || REGNO (reg
) <= regno
13433 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13434 /* Don't allow SP to be loaded unless it is also the base register. It
13435 guarantees that SP is reset correctly when an LDM instruction
13436 is interrupted. Otherwise, we might end up with a corrupt stack. */
13437 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13439 || GET_MODE (mem
) != mode
13440 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13441 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13442 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13443 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13444 offset
+ (i
- base
) * reg_increment
))
13445 && (!REG_P (XEXP (mem
, 0))
13446 || offset
+ (i
- base
) * reg_increment
!= 0)))
13449 regno
= REGNO (reg
);
13450 if (regno
== REGNO (addr
))
13451 addr_reg_in_reglist
= true;
13456 if (update
&& addr_reg_in_reglist
)
13459 /* For Thumb-1, address register is always modified - either by write-back
13460 or by explicit load. If the pattern does not describe an update,
13461 then the address register must be in the list of loaded registers. */
13463 return update
|| addr_reg_in_reglist
;
13469 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13470 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13471 instruction. ADD_OFFSET is nonzero if the base address register needs
13472 to be modified with an add instruction before we can use it. */
13475 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13476 int nops
, HOST_WIDE_INT add_offset
)
13478 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13479 if the offset isn't small enough. The reason 2 ldrs are faster
13480 is because these ARMs are able to do more than one cache access
13481 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13482 whilst the ARM8 has a double bandwidth cache. This means that
13483 these cores can do both an instruction fetch and a data fetch in
13484 a single cycle, so the trick of calculating the address into a
13485 scratch register (one of the result regs) and then doing a load
13486 multiple actually becomes slower (and no smaller in code size).
13487 That is the transformation
13489 ldr rd1, [rbase + offset]
13490 ldr rd2, [rbase + offset + 4]
13494 add rd1, rbase, offset
13495 ldmia rd1, {rd1, rd2}
13497 produces worse code -- '3 cycles + any stalls on rd2' instead of
13498 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13499 access per cycle, the first sequence could never complete in less
13500 than 6 cycles, whereas the ldm sequence would only take 5 and
13501 would make better use of sequential accesses if not hitting the
13504 We cheat here and test 'arm_ld_sched' which we currently know to
13505 only be true for the ARM8, ARM9 and StrongARM. If this ever
13506 changes, then the test below needs to be reworked. */
13507 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13510 /* XScale has load-store double instructions, but they have stricter
13511 alignment requirements than load-store multiple, so we cannot
13514 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13515 the pipeline until completion.
13523 An ldr instruction takes 1-3 cycles, but does not block the
13532 Best case ldr will always win. However, the more ldr instructions
13533 we issue, the less likely we are to be able to schedule them well.
13534 Using ldr instructions also increases code size.
13536 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13537 for counts of 3 or 4 regs. */
13538 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13543 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13544 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13545 an array ORDER which describes the sequence to use when accessing the
13546 offsets that produces an ascending order. In this sequence, each
13547 offset must be larger by exactly 4 than the previous one. ORDER[0]
13548 must have been filled in with the lowest offset by the caller.
13549 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13550 we use to verify that ORDER produces an ascending order of registers.
13551 Return true if it was possible to construct such an order, false if
13555 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13556 int *unsorted_regs
)
13559 for (i
= 1; i
< nops
; i
++)
13563 order
[i
] = order
[i
- 1];
13564 for (j
= 0; j
< nops
; j
++)
13565 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13567 /* We must find exactly one offset that is higher than the
13568 previous one by 4. */
13569 if (order
[i
] != order
[i
- 1])
13573 if (order
[i
] == order
[i
- 1])
13575 /* The register numbers must be ascending. */
13576 if (unsorted_regs
!= NULL
13577 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13583 /* Used to determine in a peephole whether a sequence of load
13584 instructions can be changed into a load-multiple instruction.
13585 NOPS is the number of separate load instructions we are examining. The
13586 first NOPS entries in OPERANDS are the destination registers, the
13587 next NOPS entries are memory operands. If this function is
13588 successful, *BASE is set to the common base register of the memory
13589 accesses; *LOAD_OFFSET is set to the first memory location's offset
13590 from that base register.
13591 REGS is an array filled in with the destination register numbers.
13592 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13593 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13594 the sequence of registers in REGS matches the loads from ascending memory
13595 locations, and the function verifies that the register numbers are
13596 themselves ascending. If CHECK_REGS is false, the register numbers
13597 are stored in the order they are found in the operands. */
13599 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13600 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13602 int unsorted_regs
[MAX_LDM_STM_OPS
];
13603 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13604 int order
[MAX_LDM_STM_OPS
];
13605 rtx base_reg_rtx
= NULL
;
13609 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13610 easily extended if required. */
13611 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13613 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13615 /* Loop over the operands and check that the memory references are
13616 suitable (i.e. immediate offsets from the same base register). At
13617 the same time, extract the target register, and the memory
13619 for (i
= 0; i
< nops
; i
++)
13624 /* Convert a subreg of a mem into the mem itself. */
13625 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13626 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13628 gcc_assert (MEM_P (operands
[nops
+ i
]));
13630 /* Don't reorder volatile memory references; it doesn't seem worth
13631 looking for the case where the order is ok anyway. */
13632 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13635 offset
= const0_rtx
;
13637 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13638 || (GET_CODE (reg
) == SUBREG
13639 && REG_P (reg
= SUBREG_REG (reg
))))
13640 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13641 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13642 || (GET_CODE (reg
) == SUBREG
13643 && REG_P (reg
= SUBREG_REG (reg
))))
13644 && (CONST_INT_P (offset
13645 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13649 base_reg
= REGNO (reg
);
13650 base_reg_rtx
= reg
;
13651 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13654 else if (base_reg
!= (int) REGNO (reg
))
13655 /* Not addressed from the same base register. */
13658 unsorted_regs
[i
] = (REG_P (operands
[i
])
13659 ? REGNO (operands
[i
])
13660 : REGNO (SUBREG_REG (operands
[i
])));
13662 /* If it isn't an integer register, or if it overwrites the
13663 base register but isn't the last insn in the list, then
13664 we can't do this. */
13665 if (unsorted_regs
[i
] < 0
13666 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13667 || unsorted_regs
[i
] > 14
13668 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13671 /* Don't allow SP to be loaded unless it is also the base
13672 register. It guarantees that SP is reset correctly when
13673 an LDM instruction is interrupted. Otherwise, we might
13674 end up with a corrupt stack. */
13675 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13678 unsorted_offsets
[i
] = INTVAL (offset
);
13679 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13683 /* Not a suitable memory address. */
13687 /* All the useful information has now been extracted from the
13688 operands into unsorted_regs and unsorted_offsets; additionally,
13689 order[0] has been set to the lowest offset in the list. Sort
13690 the offsets into order, verifying that they are adjacent, and
13691 check that the register numbers are ascending. */
13692 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13693 check_regs
? unsorted_regs
: NULL
))
13697 memcpy (saved_order
, order
, sizeof order
);
13703 for (i
= 0; i
< nops
; i
++)
13704 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13706 *load_offset
= unsorted_offsets
[order
[0]];
13710 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13713 if (unsorted_offsets
[order
[0]] == 0)
13714 ldm_case
= 1; /* ldmia */
13715 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13716 ldm_case
= 2; /* ldmib */
13717 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13718 ldm_case
= 3; /* ldmda */
13719 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13720 ldm_case
= 4; /* ldmdb */
13721 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13722 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13727 if (!multiple_operation_profitable_p (false, nops
,
13729 ? unsorted_offsets
[order
[0]] : 0))
13735 /* Used to determine in a peephole whether a sequence of store instructions can
13736 be changed into a store-multiple instruction.
13737 NOPS is the number of separate store instructions we are examining.
13738 NOPS_TOTAL is the total number of instructions recognized by the peephole
13740 The first NOPS entries in OPERANDS are the source registers, the next
13741 NOPS entries are memory operands. If this function is successful, *BASE is
13742 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13743 to the first memory location's offset from that base register. REGS is an
13744 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13745 likewise filled with the corresponding rtx's.
13746 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13747 numbers to an ascending order of stores.
13748 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13749 from ascending memory locations, and the function verifies that the register
13750 numbers are themselves ascending. If CHECK_REGS is false, the register
13751 numbers are stored in the order they are found in the operands. */
13753 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13754 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13755 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13757 int unsorted_regs
[MAX_LDM_STM_OPS
];
13758 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13759 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13760 int order
[MAX_LDM_STM_OPS
];
13762 rtx base_reg_rtx
= NULL
;
13765 /* Write back of base register is currently only supported for Thumb 1. */
13766 int base_writeback
= TARGET_THUMB1
;
13768 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13769 easily extended if required. */
13770 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13772 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13774 /* Loop over the operands and check that the memory references are
13775 suitable (i.e. immediate offsets from the same base register). At
13776 the same time, extract the target register, and the memory
13778 for (i
= 0; i
< nops
; i
++)
13783 /* Convert a subreg of a mem into the mem itself. */
13784 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13785 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13787 gcc_assert (MEM_P (operands
[nops
+ i
]));
13789 /* Don't reorder volatile memory references; it doesn't seem worth
13790 looking for the case where the order is ok anyway. */
13791 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13794 offset
= const0_rtx
;
13796 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13797 || (GET_CODE (reg
) == SUBREG
13798 && REG_P (reg
= SUBREG_REG (reg
))))
13799 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13800 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13801 || (GET_CODE (reg
) == SUBREG
13802 && REG_P (reg
= SUBREG_REG (reg
))))
13803 && (CONST_INT_P (offset
13804 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13806 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13807 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13808 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13812 base_reg
= REGNO (reg
);
13813 base_reg_rtx
= reg
;
13814 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13817 else if (base_reg
!= (int) REGNO (reg
))
13818 /* Not addressed from the same base register. */
13821 /* If it isn't an integer register, then we can't do this. */
13822 if (unsorted_regs
[i
] < 0
13823 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13824 /* The effects are unpredictable if the base register is
13825 both updated and stored. */
13826 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13827 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13828 || unsorted_regs
[i
] > 14)
13831 unsorted_offsets
[i
] = INTVAL (offset
);
13832 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13836 /* Not a suitable memory address. */
13840 /* All the useful information has now been extracted from the
13841 operands into unsorted_regs and unsorted_offsets; additionally,
13842 order[0] has been set to the lowest offset in the list. Sort
13843 the offsets into order, verifying that they are adjacent, and
13844 check that the register numbers are ascending. */
13845 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13846 check_regs
? unsorted_regs
: NULL
))
13850 memcpy (saved_order
, order
, sizeof order
);
13856 for (i
= 0; i
< nops
; i
++)
13858 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13860 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13863 *load_offset
= unsorted_offsets
[order
[0]];
13867 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13870 if (unsorted_offsets
[order
[0]] == 0)
13871 stm_case
= 1; /* stmia */
13872 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13873 stm_case
= 2; /* stmib */
13874 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13875 stm_case
= 3; /* stmda */
13876 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13877 stm_case
= 4; /* stmdb */
13881 if (!multiple_operation_profitable_p (false, nops
, 0))
13887 /* Routines for use in generating RTL. */
13889 /* Generate a load-multiple instruction. COUNT is the number of loads in
13890 the instruction; REGS and MEMS are arrays containing the operands.
13891 BASEREG is the base register to be used in addressing the memory operands.
13892 WBACK_OFFSET is nonzero if the instruction should update the base
13896 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13897 HOST_WIDE_INT wback_offset
)
13902 if (!multiple_operation_profitable_p (false, count
, 0))
13908 for (i
= 0; i
< count
; i
++)
13909 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13911 if (wback_offset
!= 0)
13912 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13914 seq
= get_insns ();
13920 result
= gen_rtx_PARALLEL (VOIDmode
,
13921 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13922 if (wback_offset
!= 0)
13924 XVECEXP (result
, 0, 0)
13925 = gen_rtx_SET (VOIDmode
, basereg
,
13926 plus_constant (Pmode
, basereg
, wback_offset
));
13931 for (j
= 0; i
< count
; i
++, j
++)
13932 XVECEXP (result
, 0, i
)
13933 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13938 /* Generate a store-multiple instruction. COUNT is the number of stores in
13939 the instruction; REGS and MEMS are arrays containing the operands.
13940 BASEREG is the base register to be used in addressing the memory operands.
13941 WBACK_OFFSET is nonzero if the instruction should update the base
13945 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13946 HOST_WIDE_INT wback_offset
)
13951 if (GET_CODE (basereg
) == PLUS
)
13952 basereg
= XEXP (basereg
, 0);
13954 if (!multiple_operation_profitable_p (false, count
, 0))
13960 for (i
= 0; i
< count
; i
++)
13961 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13963 if (wback_offset
!= 0)
13964 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13966 seq
= get_insns ();
13972 result
= gen_rtx_PARALLEL (VOIDmode
,
13973 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13974 if (wback_offset
!= 0)
13976 XVECEXP (result
, 0, 0)
13977 = gen_rtx_SET (VOIDmode
, basereg
,
13978 plus_constant (Pmode
, basereg
, wback_offset
));
13983 for (j
= 0; i
< count
; i
++, j
++)
13984 XVECEXP (result
, 0, i
)
13985 = gen_rtx_SET (VOIDmode
, mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13990 /* Generate either a load-multiple or a store-multiple instruction. This
13991 function can be used in situations where we can start with a single MEM
13992 rtx and adjust its address upwards.
13993 COUNT is the number of operations in the instruction, not counting a
13994 possible update of the base register. REGS is an array containing the
13996 BASEREG is the base register to be used in addressing the memory operands,
13997 which are constructed from BASEMEM.
13998 WRITE_BACK specifies whether the generated instruction should include an
13999 update of the base register.
14000 OFFSETP is used to pass an offset to and from this function; this offset
14001 is not used when constructing the address (instead BASEMEM should have an
14002 appropriate offset in its address), it is used only for setting
14003 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14006 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
14007 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
14009 rtx mems
[MAX_LDM_STM_OPS
];
14010 HOST_WIDE_INT offset
= *offsetp
;
14013 gcc_assert (count
<= MAX_LDM_STM_OPS
);
14015 if (GET_CODE (basereg
) == PLUS
)
14016 basereg
= XEXP (basereg
, 0);
14018 for (i
= 0; i
< count
; i
++)
14020 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
14021 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
14029 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
14030 write_back
? 4 * count
: 0);
14032 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
14033 write_back
? 4 * count
: 0);
14037 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14038 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14040 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
14045 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14046 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14048 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
14052 /* Called from a peephole2 expander to turn a sequence of loads into an
14053 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14054 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14055 is true if we can reorder the registers because they are used commutatively
14057 Returns true iff we could generate a new instruction. */
14060 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
14062 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14063 rtx mems
[MAX_LDM_STM_OPS
];
14064 int i
, j
, base_reg
;
14066 HOST_WIDE_INT offset
;
14067 int write_back
= FALSE
;
14071 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
14072 &base_reg
, &offset
, !sort_regs
);
14078 for (i
= 0; i
< nops
- 1; i
++)
14079 for (j
= i
+ 1; j
< nops
; j
++)
14080 if (regs
[i
] > regs
[j
])
14086 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14090 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
14091 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
14097 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
14098 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
14100 if (!TARGET_THUMB1
)
14102 base_reg
= regs
[0];
14103 base_reg_rtx
= newbase
;
14107 for (i
= 0; i
< nops
; i
++)
14109 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14110 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14113 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14114 write_back
? offset
+ i
* 4 : 0));
14118 /* Called from a peephole2 expander to turn a sequence of stores into an
14119 STM instruction. OPERANDS are the operands found by the peephole matcher;
14120 NOPS indicates how many separate stores we are trying to combine.
14121 Returns true iff we could generate a new instruction. */
14124 gen_stm_seq (rtx
*operands
, int nops
)
14127 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14128 rtx mems
[MAX_LDM_STM_OPS
];
14131 HOST_WIDE_INT offset
;
14132 int write_back
= FALSE
;
14135 bool base_reg_dies
;
14137 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
14138 mem_order
, &base_reg
, &offset
, true);
14143 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14145 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
14148 gcc_assert (base_reg_dies
);
14154 gcc_assert (base_reg_dies
);
14155 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14159 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14161 for (i
= 0; i
< nops
; i
++)
14163 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14164 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14167 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14168 write_back
? offset
+ i
* 4 : 0));
14172 /* Called from a peephole2 expander to turn a sequence of stores that are
14173 preceded by constant loads into an STM instruction. OPERANDS are the
14174 operands found by the peephole matcher; NOPS indicates how many
14175 separate stores we are trying to combine; there are 2 * NOPS
14176 instructions in the peephole.
14177 Returns true iff we could generate a new instruction. */
14180 gen_const_stm_seq (rtx
*operands
, int nops
)
14182 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
14183 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14184 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
14185 rtx mems
[MAX_LDM_STM_OPS
];
14188 HOST_WIDE_INT offset
;
14189 int write_back
= FALSE
;
14192 bool base_reg_dies
;
14194 HARD_REG_SET allocated
;
14196 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
14197 mem_order
, &base_reg
, &offset
, false);
14202 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
14204 /* If the same register is used more than once, try to find a free
14206 CLEAR_HARD_REG_SET (allocated
);
14207 for (i
= 0; i
< nops
; i
++)
14209 for (j
= i
+ 1; j
< nops
; j
++)
14210 if (regs
[i
] == regs
[j
])
14212 rtx t
= peep2_find_free_register (0, nops
* 2,
14213 TARGET_THUMB1
? "l" : "r",
14214 SImode
, &allocated
);
14218 regs
[i
] = REGNO (t
);
14222 /* Compute an ordering that maps the register numbers to an ascending
14225 for (i
= 0; i
< nops
; i
++)
14226 if (regs
[i
] < regs
[reg_order
[0]])
14229 for (i
= 1; i
< nops
; i
++)
14231 int this_order
= reg_order
[i
- 1];
14232 for (j
= 0; j
< nops
; j
++)
14233 if (regs
[j
] > regs
[reg_order
[i
- 1]]
14234 && (this_order
== reg_order
[i
- 1]
14235 || regs
[j
] < regs
[this_order
]))
14237 reg_order
[i
] = this_order
;
14240 /* Ensure that registers that must be live after the instruction end
14241 up with the correct value. */
14242 for (i
= 0; i
< nops
; i
++)
14244 int this_order
= reg_order
[i
];
14245 if ((this_order
!= mem_order
[i
]
14246 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
14247 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
14251 /* Load the constants. */
14252 for (i
= 0; i
< nops
; i
++)
14254 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
14255 sorted_regs
[i
] = regs
[reg_order
[i
]];
14256 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
14259 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14261 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
14264 gcc_assert (base_reg_dies
);
14270 gcc_assert (base_reg_dies
);
14271 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14275 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14277 for (i
= 0; i
< nops
; i
++)
14279 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14280 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14283 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
14284 write_back
? offset
+ i
* 4 : 0));
14288 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14289 unaligned copies on processors which support unaligned semantics for those
14290 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14291 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14292 An interleave factor of 1 (the minimum) will perform no interleaving.
14293 Load/store multiple are used for aligned addresses where possible. */
14296 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
14297 HOST_WIDE_INT length
,
14298 unsigned int interleave_factor
)
14300 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
14301 int *regnos
= XALLOCAVEC (int, interleave_factor
);
14302 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
14303 HOST_WIDE_INT i
, j
;
14304 HOST_WIDE_INT remaining
= length
, words
;
14305 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
14307 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
14308 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
14309 HOST_WIDE_INT srcoffset
, dstoffset
;
14310 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
14313 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
14315 /* Use hard registers if we have aligned source or destination so we can use
14316 load/store multiple with contiguous registers. */
14317 if (dst_aligned
|| src_aligned
)
14318 for (i
= 0; i
< interleave_factor
; i
++)
14319 regs
[i
] = gen_rtx_REG (SImode
, i
);
14321 for (i
= 0; i
< interleave_factor
; i
++)
14322 regs
[i
] = gen_reg_rtx (SImode
);
14324 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
14325 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
14327 srcoffset
= dstoffset
= 0;
14329 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14330 For copying the last bytes we want to subtract this offset again. */
14331 src_autoinc
= dst_autoinc
= 0;
14333 for (i
= 0; i
< interleave_factor
; i
++)
14336 /* Copy BLOCK_SIZE_BYTES chunks. */
14338 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
14341 if (src_aligned
&& interleave_factor
> 1)
14343 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
14344 TRUE
, srcbase
, &srcoffset
));
14345 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14349 for (j
= 0; j
< interleave_factor
; j
++)
14351 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
14353 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14354 srcoffset
+ j
* UNITS_PER_WORD
);
14355 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14357 srcoffset
+= block_size_bytes
;
14361 if (dst_aligned
&& interleave_factor
> 1)
14363 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
14364 TRUE
, dstbase
, &dstoffset
));
14365 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14369 for (j
= 0; j
< interleave_factor
; j
++)
14371 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
14373 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14374 dstoffset
+ j
* UNITS_PER_WORD
);
14375 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14377 dstoffset
+= block_size_bytes
;
14380 remaining
-= block_size_bytes
;
14383 /* Copy any whole words left (note these aren't interleaved with any
14384 subsequent halfword/byte load/stores in the interests of simplicity). */
14386 words
= remaining
/ UNITS_PER_WORD
;
14388 gcc_assert (words
< interleave_factor
);
14390 if (src_aligned
&& words
> 1)
14392 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
14394 src_autoinc
+= UNITS_PER_WORD
* words
;
14398 for (j
= 0; j
< words
; j
++)
14400 addr
= plus_constant (Pmode
, src
,
14401 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
14402 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14403 srcoffset
+ j
* UNITS_PER_WORD
);
14404 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14406 srcoffset
+= words
* UNITS_PER_WORD
;
14409 if (dst_aligned
&& words
> 1)
14411 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14413 dst_autoinc
+= words
* UNITS_PER_WORD
;
14417 for (j
= 0; j
< words
; j
++)
14419 addr
= plus_constant (Pmode
, dst
,
14420 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14421 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14422 dstoffset
+ j
* UNITS_PER_WORD
);
14423 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14425 dstoffset
+= words
* UNITS_PER_WORD
;
14428 remaining
-= words
* UNITS_PER_WORD
;
14430 gcc_assert (remaining
< 4);
14432 /* Copy a halfword if necessary. */
14434 if (remaining
>= 2)
14436 halfword_tmp
= gen_reg_rtx (SImode
);
14438 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14439 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14440 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14442 /* Either write out immediately, or delay until we've loaded the last
14443 byte, depending on interleave factor. */
14444 if (interleave_factor
== 1)
14446 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14447 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14448 emit_insn (gen_unaligned_storehi (mem
,
14449 gen_lowpart (HImode
, halfword_tmp
)));
14450 halfword_tmp
= NULL
;
14458 gcc_assert (remaining
< 2);
14460 /* Copy last byte. */
14462 if ((remaining
& 1) != 0)
14464 byte_tmp
= gen_reg_rtx (SImode
);
14466 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14467 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14468 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14470 if (interleave_factor
== 1)
14472 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14473 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14474 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14483 /* Store last halfword if we haven't done so already. */
14487 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14488 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14489 emit_insn (gen_unaligned_storehi (mem
,
14490 gen_lowpart (HImode
, halfword_tmp
)));
14494 /* Likewise for last byte. */
14498 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14499 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14500 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14504 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14507 /* From mips_adjust_block_mem:
14509 Helper function for doing a loop-based block operation on memory
14510 reference MEM. Each iteration of the loop will operate on LENGTH
14513 Create a new base register for use within the loop and point it to
14514 the start of MEM. Create a new memory reference that uses this
14515 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14518 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14521 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14523 /* Although the new mem does not refer to a known location,
14524 it does keep up to LENGTH bytes of alignment. */
14525 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14526 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14529 /* From mips_block_move_loop:
14531 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14532 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14533 the memory regions do not overlap. */
14536 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14537 unsigned int interleave_factor
,
14538 HOST_WIDE_INT bytes_per_iter
)
14540 rtx src_reg
, dest_reg
, final_src
, test
;
14541 HOST_WIDE_INT leftover
;
14543 leftover
= length
% bytes_per_iter
;
14544 length
-= leftover
;
14546 /* Create registers and memory references for use within the loop. */
14547 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14548 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14550 /* Calculate the value that SRC_REG should have after the last iteration of
14552 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14553 0, 0, OPTAB_WIDEN
);
14555 /* Emit the start of the loop. */
14556 rtx_code_label
*label
= gen_label_rtx ();
14557 emit_label (label
);
14559 /* Emit the loop body. */
14560 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14561 interleave_factor
);
14563 /* Move on to the next block. */
14564 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14565 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14567 /* Emit the loop condition. */
14568 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14569 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14571 /* Mop up any left-over bytes. */
14573 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14576 /* Emit a block move when either the source or destination is unaligned (not
14577 aligned to a four-byte boundary). This may need further tuning depending on
14578 core type, optimize_size setting, etc. */
14581 arm_movmemqi_unaligned (rtx
*operands
)
14583 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14587 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14588 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14589 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14590 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14591 or dst_aligned though: allow more interleaving in those cases since the
14592 resulting code can be smaller. */
14593 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14594 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14597 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14598 interleave_factor
, bytes_per_iter
);
14600 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14601 interleave_factor
);
14605 /* Note that the loop created by arm_block_move_unaligned_loop may be
14606 subject to loop unrolling, which makes tuning this condition a little
14609 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14611 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14618 arm_gen_movmemqi (rtx
*operands
)
14620 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14621 HOST_WIDE_INT srcoffset
, dstoffset
;
14623 rtx src
, dst
, srcbase
, dstbase
;
14624 rtx part_bytes_reg
= NULL
;
14627 if (!CONST_INT_P (operands
[2])
14628 || !CONST_INT_P (operands
[3])
14629 || INTVAL (operands
[2]) > 64)
14632 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14633 return arm_movmemqi_unaligned (operands
);
14635 if (INTVAL (operands
[3]) & 3)
14638 dstbase
= operands
[0];
14639 srcbase
= operands
[1];
14641 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14642 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14644 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14645 out_words_to_go
= INTVAL (operands
[2]) / 4;
14646 last_bytes
= INTVAL (operands
[2]) & 3;
14647 dstoffset
= srcoffset
= 0;
14649 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14650 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14652 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14654 if (in_words_to_go
> 4)
14655 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14656 TRUE
, srcbase
, &srcoffset
));
14658 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14659 src
, FALSE
, srcbase
,
14662 if (out_words_to_go
)
14664 if (out_words_to_go
> 4)
14665 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14666 TRUE
, dstbase
, &dstoffset
));
14667 else if (out_words_to_go
!= 1)
14668 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14669 out_words_to_go
, dst
,
14672 dstbase
, &dstoffset
));
14675 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14676 emit_move_insn (mem
, gen_rtx_REG (SImode
, 0));
14677 if (last_bytes
!= 0)
14679 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14685 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14686 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14689 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14690 if (out_words_to_go
)
14694 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14695 sreg
= copy_to_reg (mem
);
14697 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14698 emit_move_insn (mem
, sreg
);
14701 gcc_assert (!in_words_to_go
); /* Sanity check */
14704 if (in_words_to_go
)
14706 gcc_assert (in_words_to_go
> 0);
14708 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14709 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14712 gcc_assert (!last_bytes
|| part_bytes_reg
);
14714 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14716 rtx tmp
= gen_reg_rtx (SImode
);
14718 /* The bytes we want are in the top end of the word. */
14719 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14720 GEN_INT (8 * (4 - last_bytes
))));
14721 part_bytes_reg
= tmp
;
14725 mem
= adjust_automodify_address (dstbase
, QImode
,
14726 plus_constant (Pmode
, dst
,
14728 dstoffset
+ last_bytes
- 1);
14729 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14733 tmp
= gen_reg_rtx (SImode
);
14734 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14735 part_bytes_reg
= tmp
;
14742 if (last_bytes
> 1)
14744 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14745 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14749 rtx tmp
= gen_reg_rtx (SImode
);
14750 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14751 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14752 part_bytes_reg
= tmp
;
14759 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14760 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14767 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14770 next_consecutive_mem (rtx mem
)
14772 machine_mode mode
= GET_MODE (mem
);
14773 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14774 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14776 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14779 /* Copy using LDRD/STRD instructions whenever possible.
14780 Returns true upon success. */
14782 gen_movmem_ldrd_strd (rtx
*operands
)
14784 unsigned HOST_WIDE_INT len
;
14785 HOST_WIDE_INT align
;
14786 rtx src
, dst
, base
;
14788 bool src_aligned
, dst_aligned
;
14789 bool src_volatile
, dst_volatile
;
14791 gcc_assert (CONST_INT_P (operands
[2]));
14792 gcc_assert (CONST_INT_P (operands
[3]));
14794 len
= UINTVAL (operands
[2]);
14798 /* Maximum alignment we can assume for both src and dst buffers. */
14799 align
= INTVAL (operands
[3]);
14801 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14804 /* Place src and dst addresses in registers
14805 and update the corresponding mem rtx. */
14807 dst_volatile
= MEM_VOLATILE_P (dst
);
14808 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14809 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14810 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14813 src_volatile
= MEM_VOLATILE_P (src
);
14814 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14815 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14816 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14818 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14821 if (src_volatile
|| dst_volatile
)
14824 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14825 if (!(dst_aligned
|| src_aligned
))
14826 return arm_gen_movmemqi (operands
);
14828 src
= adjust_address (src
, DImode
, 0);
14829 dst
= adjust_address (dst
, DImode
, 0);
14833 reg0
= gen_reg_rtx (DImode
);
14835 emit_move_insn (reg0
, src
);
14837 emit_insn (gen_unaligned_loaddi (reg0
, src
));
14840 emit_move_insn (dst
, reg0
);
14842 emit_insn (gen_unaligned_storedi (dst
, reg0
));
14844 src
= next_consecutive_mem (src
);
14845 dst
= next_consecutive_mem (dst
);
14848 gcc_assert (len
< 8);
14851 /* More than a word but less than a double-word to copy. Copy a word. */
14852 reg0
= gen_reg_rtx (SImode
);
14853 src
= adjust_address (src
, SImode
, 0);
14854 dst
= adjust_address (dst
, SImode
, 0);
14856 emit_move_insn (reg0
, src
);
14858 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14861 emit_move_insn (dst
, reg0
);
14863 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14865 src
= next_consecutive_mem (src
);
14866 dst
= next_consecutive_mem (dst
);
14873 /* Copy the remaining bytes. */
14876 dst
= adjust_address (dst
, HImode
, 0);
14877 src
= adjust_address (src
, HImode
, 0);
14878 reg0
= gen_reg_rtx (SImode
);
14880 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14882 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14885 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14887 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14889 src
= next_consecutive_mem (src
);
14890 dst
= next_consecutive_mem (dst
);
14895 dst
= adjust_address (dst
, QImode
, 0);
14896 src
= adjust_address (src
, QImode
, 0);
14897 reg0
= gen_reg_rtx (QImode
);
14898 emit_move_insn (reg0
, src
);
14899 emit_move_insn (dst
, reg0
);
14903 /* Select a dominance comparison mode if possible for a test of the general
14904 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14905 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14906 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14907 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14908 In all cases OP will be either EQ or NE, but we don't need to know which
14909 here. If we are unable to support a dominance comparison we return
14910 CC mode. This will then fail to match for the RTL expressions that
14911 generate this call. */
14913 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14915 enum rtx_code cond1
, cond2
;
14918 /* Currently we will probably get the wrong result if the individual
14919 comparisons are not simple. This also ensures that it is safe to
14920 reverse a comparison if necessary. */
14921 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14923 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14927 /* The if_then_else variant of this tests the second condition if the
14928 first passes, but is true if the first fails. Reverse the first
14929 condition to get a true "inclusive-or" expression. */
14930 if (cond_or
== DOM_CC_NX_OR_Y
)
14931 cond1
= reverse_condition (cond1
);
14933 /* If the comparisons are not equal, and one doesn't dominate the other,
14934 then we can't do this. */
14936 && !comparison_dominates_p (cond1
, cond2
)
14937 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14941 std::swap (cond1
, cond2
);
14946 if (cond_or
== DOM_CC_X_AND_Y
)
14951 case EQ
: return CC_DEQmode
;
14952 case LE
: return CC_DLEmode
;
14953 case LEU
: return CC_DLEUmode
;
14954 case GE
: return CC_DGEmode
;
14955 case GEU
: return CC_DGEUmode
;
14956 default: gcc_unreachable ();
14960 if (cond_or
== DOM_CC_X_AND_Y
)
14972 gcc_unreachable ();
14976 if (cond_or
== DOM_CC_X_AND_Y
)
14988 gcc_unreachable ();
14992 if (cond_or
== DOM_CC_X_AND_Y
)
14993 return CC_DLTUmode
;
14998 return CC_DLTUmode
;
15000 return CC_DLEUmode
;
15004 gcc_unreachable ();
15008 if (cond_or
== DOM_CC_X_AND_Y
)
15009 return CC_DGTUmode
;
15014 return CC_DGTUmode
;
15016 return CC_DGEUmode
;
15020 gcc_unreachable ();
15023 /* The remaining cases only occur when both comparisons are the
15026 gcc_assert (cond1
== cond2
);
15030 gcc_assert (cond1
== cond2
);
15034 gcc_assert (cond1
== cond2
);
15038 gcc_assert (cond1
== cond2
);
15039 return CC_DLEUmode
;
15042 gcc_assert (cond1
== cond2
);
15043 return CC_DGEUmode
;
15046 gcc_unreachable ();
15051 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
15053 /* All floating point compares return CCFP if it is an equality
15054 comparison, and CCFPE otherwise. */
15055 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
15078 gcc_unreachable ();
15082 /* A compare with a shifted operand. Because of canonicalization, the
15083 comparison will have to be swapped when we emit the assembler. */
15084 if (GET_MODE (y
) == SImode
15085 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15086 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15087 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
15088 || GET_CODE (x
) == ROTATERT
))
15091 /* This operation is performed swapped, but since we only rely on the Z
15092 flag we don't need an additional mode. */
15093 if (GET_MODE (y
) == SImode
15094 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15095 && GET_CODE (x
) == NEG
15096 && (op
== EQ
|| op
== NE
))
15099 /* This is a special case that is used by combine to allow a
15100 comparison of a shifted byte load to be split into a zero-extend
15101 followed by a comparison of the shifted integer (only valid for
15102 equalities and unsigned inequalities). */
15103 if (GET_MODE (x
) == SImode
15104 && GET_CODE (x
) == ASHIFT
15105 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
15106 && GET_CODE (XEXP (x
, 0)) == SUBREG
15107 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
15108 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
15109 && (op
== EQ
|| op
== NE
15110 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
15111 && CONST_INT_P (y
))
15114 /* A construct for a conditional compare, if the false arm contains
15115 0, then both conditions must be true, otherwise either condition
15116 must be true. Not all conditions are possible, so CCmode is
15117 returned if it can't be done. */
15118 if (GET_CODE (x
) == IF_THEN_ELSE
15119 && (XEXP (x
, 2) == const0_rtx
15120 || XEXP (x
, 2) == const1_rtx
)
15121 && COMPARISON_P (XEXP (x
, 0))
15122 && COMPARISON_P (XEXP (x
, 1)))
15123 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15124 INTVAL (XEXP (x
, 2)));
15126 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15127 if (GET_CODE (x
) == AND
15128 && (op
== EQ
|| op
== NE
)
15129 && COMPARISON_P (XEXP (x
, 0))
15130 && COMPARISON_P (XEXP (x
, 1)))
15131 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15134 if (GET_CODE (x
) == IOR
15135 && (op
== EQ
|| op
== NE
)
15136 && COMPARISON_P (XEXP (x
, 0))
15137 && COMPARISON_P (XEXP (x
, 1)))
15138 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15141 /* An operation (on Thumb) where we want to test for a single bit.
15142 This is done by shifting that bit up into the top bit of a
15143 scratch register; we can then branch on the sign bit. */
15145 && GET_MODE (x
) == SImode
15146 && (op
== EQ
|| op
== NE
)
15147 && GET_CODE (x
) == ZERO_EXTRACT
15148 && XEXP (x
, 1) == const1_rtx
)
15151 /* An operation that sets the condition codes as a side-effect, the
15152 V flag is not set correctly, so we can only use comparisons where
15153 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15155 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15156 if (GET_MODE (x
) == SImode
15158 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
15159 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
15160 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
15161 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
15162 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
15163 || GET_CODE (x
) == LSHIFTRT
15164 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15165 || GET_CODE (x
) == ROTATERT
15166 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
15167 return CC_NOOVmode
;
15169 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
15172 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
15173 && GET_CODE (x
) == PLUS
15174 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
15177 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
15183 /* A DImode comparison against zero can be implemented by
15184 or'ing the two halves together. */
15185 if (y
== const0_rtx
)
15188 /* We can do an equality test in three Thumb instructions. */
15198 /* DImode unsigned comparisons can be implemented by cmp +
15199 cmpeq without a scratch register. Not worth doing in
15210 /* DImode signed and unsigned comparisons can be implemented
15211 by cmp + sbcs with a scratch register, but that does not
15212 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15213 gcc_assert (op
!= EQ
&& op
!= NE
);
15217 gcc_unreachable ();
15221 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
15222 return GET_MODE (x
);
15227 /* X and Y are two things to compare using CODE. Emit the compare insn and
15228 return the rtx for register 0 in the proper mode. FP means this is a
15229 floating point compare: I don't think that it is needed on the arm. */
15231 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
15235 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
15237 /* We might have X as a constant, Y as a register because of the predicates
15238 used for cmpdi. If so, force X to a register here. */
15239 if (dimode_comparison
&& !REG_P (x
))
15240 x
= force_reg (DImode
, x
);
15242 mode
= SELECT_CC_MODE (code
, x
, y
);
15243 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
15245 if (dimode_comparison
15246 && mode
!= CC_CZmode
)
15250 /* To compare two non-zero values for equality, XOR them and
15251 then compare against zero. Not used for ARM mode; there
15252 CC_CZmode is cheaper. */
15253 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
15255 gcc_assert (!reload_completed
);
15256 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
15260 /* A scratch register is required. */
15261 if (reload_completed
)
15262 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
15264 scratch
= gen_rtx_SCRATCH (SImode
);
15266 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
15267 set
= gen_rtx_SET (VOIDmode
, cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15268 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
15271 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15276 /* Generate a sequence of insns that will generate the correct return
15277 address mask depending on the physical architecture that the program
15280 arm_gen_return_addr_mask (void)
15282 rtx reg
= gen_reg_rtx (Pmode
);
15284 emit_insn (gen_return_addr_mask (reg
));
15289 arm_reload_in_hi (rtx
*operands
)
15291 rtx ref
= operands
[1];
15293 HOST_WIDE_INT offset
= 0;
15295 if (GET_CODE (ref
) == SUBREG
)
15297 offset
= SUBREG_BYTE (ref
);
15298 ref
= SUBREG_REG (ref
);
15303 /* We have a pseudo which has been spilt onto the stack; there
15304 are two cases here: the first where there is a simple
15305 stack-slot replacement and a second where the stack-slot is
15306 out of range, or is used as a subreg. */
15307 if (reg_equiv_mem (REGNO (ref
)))
15309 ref
= reg_equiv_mem (REGNO (ref
));
15310 base
= find_replacement (&XEXP (ref
, 0));
15313 /* The slot is out of range, or was dressed up in a SUBREG. */
15314 base
= reg_equiv_address (REGNO (ref
));
15317 base
= find_replacement (&XEXP (ref
, 0));
15319 /* Handle the case where the address is too complex to be offset by 1. */
15320 if (GET_CODE (base
) == MINUS
15321 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15323 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15325 emit_set_insn (base_plus
, base
);
15328 else if (GET_CODE (base
) == PLUS
)
15330 /* The addend must be CONST_INT, or we would have dealt with it above. */
15331 HOST_WIDE_INT hi
, lo
;
15333 offset
+= INTVAL (XEXP (base
, 1));
15334 base
= XEXP (base
, 0);
15336 /* Rework the address into a legal sequence of insns. */
15337 /* Valid range for lo is -4095 -> 4095 */
15340 : -((-offset
) & 0xfff));
15342 /* Corner case, if lo is the max offset then we would be out of range
15343 once we have added the additional 1 below, so bump the msb into the
15344 pre-loading insn(s). */
15348 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15349 ^ (HOST_WIDE_INT
) 0x80000000)
15350 - (HOST_WIDE_INT
) 0x80000000);
15352 gcc_assert (hi
+ lo
== offset
);
15356 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15358 /* Get the base address; addsi3 knows how to handle constants
15359 that require more than one insn. */
15360 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15366 /* Operands[2] may overlap operands[0] (though it won't overlap
15367 operands[1]), that's why we asked for a DImode reg -- so we can
15368 use the bit that does not overlap. */
15369 if (REGNO (operands
[2]) == REGNO (operands
[0]))
15370 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15372 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15374 emit_insn (gen_zero_extendqisi2 (scratch
,
15375 gen_rtx_MEM (QImode
,
15376 plus_constant (Pmode
, base
,
15378 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15379 gen_rtx_MEM (QImode
,
15380 plus_constant (Pmode
, base
,
15382 if (!BYTES_BIG_ENDIAN
)
15383 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15384 gen_rtx_IOR (SImode
,
15387 gen_rtx_SUBREG (SImode
, operands
[0], 0),
15391 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15392 gen_rtx_IOR (SImode
,
15393 gen_rtx_ASHIFT (SImode
, scratch
,
15395 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
15398 /* Handle storing a half-word to memory during reload by synthesizing as two
15399 byte stores. Take care not to clobber the input values until after we
15400 have moved them somewhere safe. This code assumes that if the DImode
15401 scratch in operands[2] overlaps either the input value or output address
15402 in some way, then that value must die in this insn (we absolutely need
15403 two scratch registers for some corner cases). */
15405 arm_reload_out_hi (rtx
*operands
)
15407 rtx ref
= operands
[0];
15408 rtx outval
= operands
[1];
15410 HOST_WIDE_INT offset
= 0;
15412 if (GET_CODE (ref
) == SUBREG
)
15414 offset
= SUBREG_BYTE (ref
);
15415 ref
= SUBREG_REG (ref
);
15420 /* We have a pseudo which has been spilt onto the stack; there
15421 are two cases here: the first where there is a simple
15422 stack-slot replacement and a second where the stack-slot is
15423 out of range, or is used as a subreg. */
15424 if (reg_equiv_mem (REGNO (ref
)))
15426 ref
= reg_equiv_mem (REGNO (ref
));
15427 base
= find_replacement (&XEXP (ref
, 0));
15430 /* The slot is out of range, or was dressed up in a SUBREG. */
15431 base
= reg_equiv_address (REGNO (ref
));
15434 base
= find_replacement (&XEXP (ref
, 0));
15436 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15438 /* Handle the case where the address is too complex to be offset by 1. */
15439 if (GET_CODE (base
) == MINUS
15440 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15442 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15444 /* Be careful not to destroy OUTVAL. */
15445 if (reg_overlap_mentioned_p (base_plus
, outval
))
15447 /* Updating base_plus might destroy outval, see if we can
15448 swap the scratch and base_plus. */
15449 if (!reg_overlap_mentioned_p (scratch
, outval
))
15450 std::swap (scratch
, base_plus
);
15453 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15455 /* Be conservative and copy OUTVAL into the scratch now,
15456 this should only be necessary if outval is a subreg
15457 of something larger than a word. */
15458 /* XXX Might this clobber base? I can't see how it can,
15459 since scratch is known to overlap with OUTVAL, and
15460 must be wider than a word. */
15461 emit_insn (gen_movhi (scratch_hi
, outval
));
15462 outval
= scratch_hi
;
15466 emit_set_insn (base_plus
, base
);
15469 else if (GET_CODE (base
) == PLUS
)
15471 /* The addend must be CONST_INT, or we would have dealt with it above. */
15472 HOST_WIDE_INT hi
, lo
;
15474 offset
+= INTVAL (XEXP (base
, 1));
15475 base
= XEXP (base
, 0);
15477 /* Rework the address into a legal sequence of insns. */
15478 /* Valid range for lo is -4095 -> 4095 */
15481 : -((-offset
) & 0xfff));
15483 /* Corner case, if lo is the max offset then we would be out of range
15484 once we have added the additional 1 below, so bump the msb into the
15485 pre-loading insn(s). */
15489 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15490 ^ (HOST_WIDE_INT
) 0x80000000)
15491 - (HOST_WIDE_INT
) 0x80000000);
15493 gcc_assert (hi
+ lo
== offset
);
15497 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15499 /* Be careful not to destroy OUTVAL. */
15500 if (reg_overlap_mentioned_p (base_plus
, outval
))
15502 /* Updating base_plus might destroy outval, see if we
15503 can swap the scratch and base_plus. */
15504 if (!reg_overlap_mentioned_p (scratch
, outval
))
15505 std::swap (scratch
, base_plus
);
15508 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15510 /* Be conservative and copy outval into scratch now,
15511 this should only be necessary if outval is a
15512 subreg of something larger than a word. */
15513 /* XXX Might this clobber base? I can't see how it
15514 can, since scratch is known to overlap with
15516 emit_insn (gen_movhi (scratch_hi
, outval
));
15517 outval
= scratch_hi
;
15521 /* Get the base address; addsi3 knows how to handle constants
15522 that require more than one insn. */
15523 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15529 if (BYTES_BIG_ENDIAN
)
15531 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15532 plus_constant (Pmode
, base
,
15534 gen_lowpart (QImode
, outval
)));
15535 emit_insn (gen_lshrsi3 (scratch
,
15536 gen_rtx_SUBREG (SImode
, outval
, 0),
15538 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15540 gen_lowpart (QImode
, scratch
)));
15544 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15546 gen_lowpart (QImode
, outval
)));
15547 emit_insn (gen_lshrsi3 (scratch
,
15548 gen_rtx_SUBREG (SImode
, outval
, 0),
15550 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15551 plus_constant (Pmode
, base
,
15553 gen_lowpart (QImode
, scratch
)));
15557 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15558 (padded to the size of a word) should be passed in a register. */
15561 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15563 if (TARGET_AAPCS_BASED
)
15564 return must_pass_in_stack_var_size (mode
, type
);
15566 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15570 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15571 Return true if an argument passed on the stack should be padded upwards,
15572 i.e. if the least-significant byte has useful data.
15573 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15574 aggregate types are placed in the lowest memory address. */
15577 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15579 if (!TARGET_AAPCS_BASED
)
15580 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15582 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15589 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15590 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15591 register has useful data, and return the opposite if the most
15592 significant byte does. */
15595 arm_pad_reg_upward (machine_mode mode
,
15596 tree type
, int first ATTRIBUTE_UNUSED
)
15598 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15600 /* For AAPCS, small aggregates, small fixed-point types,
15601 and small complex types are always padded upwards. */
15604 if ((AGGREGATE_TYPE_P (type
)
15605 || TREE_CODE (type
) == COMPLEX_TYPE
15606 || FIXED_POINT_TYPE_P (type
))
15607 && int_size_in_bytes (type
) <= 4)
15612 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15613 && GET_MODE_SIZE (mode
) <= 4)
15618 /* Otherwise, use default padding. */
15619 return !BYTES_BIG_ENDIAN
;
15622 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15623 assuming that the address in the base register is word aligned. */
15625 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15627 HOST_WIDE_INT max_offset
;
15629 /* Offset must be a multiple of 4 in Thumb mode. */
15630 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15635 else if (TARGET_ARM
)
15640 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15643 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15644 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15645 Assumes that the address in the base register RN is word aligned. Pattern
15646 guarantees that both memory accesses use the same base register,
15647 the offsets are constants within the range, and the gap between the offsets is 4.
15648 If preload complete then check that registers are legal. WBACK indicates whether
15649 address is updated. LOAD indicates whether memory access is load or store. */
15651 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15652 bool wback
, bool load
)
15654 unsigned int t
, t2
, n
;
15656 if (!reload_completed
)
15659 if (!offset_ok_for_ldrd_strd (offset
))
15666 if ((TARGET_THUMB2
)
15667 && ((wback
&& (n
== t
|| n
== t2
))
15668 || (t
== SP_REGNUM
)
15669 || (t
== PC_REGNUM
)
15670 || (t2
== SP_REGNUM
)
15671 || (t2
== PC_REGNUM
)
15672 || (!load
&& (n
== PC_REGNUM
))
15673 || (load
&& (t
== t2
))
15674 /* Triggers Cortex-M3 LDRD errata. */
15675 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15679 && ((wback
&& (n
== t
|| n
== t2
))
15680 || (t2
== PC_REGNUM
)
15681 || (t
% 2 != 0) /* First destination register is not even. */
15683 /* PC can be used as base register (for offset addressing only),
15684 but it is depricated. */
15685 || (n
== PC_REGNUM
)))
15691 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15692 operand MEM's address contains an immediate offset from the base
15693 register and has no side effects, in which case it sets BASE and
15694 OFFSET accordingly. */
15696 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15700 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15702 /* TODO: Handle more general memory operand patterns, such as
15703 PRE_DEC and PRE_INC. */
15705 if (side_effects_p (mem
))
15708 /* Can't deal with subregs. */
15709 if (GET_CODE (mem
) == SUBREG
)
15712 gcc_assert (MEM_P (mem
));
15714 *offset
= const0_rtx
;
15716 addr
= XEXP (mem
, 0);
15718 /* If addr isn't valid for DImode, then we can't handle it. */
15719 if (!arm_legitimate_address_p (DImode
, addr
,
15720 reload_in_progress
|| reload_completed
))
15728 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15730 *base
= XEXP (addr
, 0);
15731 *offset
= XEXP (addr
, 1);
15732 return (REG_P (*base
) && CONST_INT_P (*offset
));
15738 /* Called from a peephole2 to replace two word-size accesses with a
15739 single LDRD/STRD instruction. Returns true iff we can generate a
15740 new instruction sequence. That is, both accesses use the same base
15741 register and the gap between constant offsets is 4. This function
15742 may reorder its operands to match ldrd/strd RTL templates.
15743 OPERANDS are the operands found by the peephole matcher;
15744 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15745 corresponding memory operands. LOAD indicaates whether the access
15746 is load or store. CONST_STORE indicates a store of constant
15747 integer values held in OPERANDS[4,5] and assumes that the pattern
15748 is of length 4 insn, for the purpose of checking dead registers.
15749 COMMUTE indicates that register operands may be reordered. */
15751 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15752 bool const_store
, bool commute
)
15755 HOST_WIDE_INT offsets
[2], offset
;
15756 rtx base
= NULL_RTX
;
15757 rtx cur_base
, cur_offset
, tmp
;
15759 HARD_REG_SET regset
;
15761 gcc_assert (!const_store
|| !load
);
15762 /* Check that the memory references are immediate offsets from the
15763 same base register. Extract the base register, the destination
15764 registers, and the corresponding memory offsets. */
15765 for (i
= 0; i
< nops
; i
++)
15767 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15772 else if (REGNO (base
) != REGNO (cur_base
))
15775 offsets
[i
] = INTVAL (cur_offset
);
15776 if (GET_CODE (operands
[i
]) == SUBREG
)
15778 tmp
= SUBREG_REG (operands
[i
]);
15779 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15784 /* Make sure there is no dependency between the individual loads. */
15785 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15786 return false; /* RAW */
15788 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15789 return false; /* WAW */
15791 /* If the same input register is used in both stores
15792 when storing different constants, try to find a free register.
15793 For example, the code
15798 can be transformed into
15801 in Thumb mode assuming that r1 is free. */
15803 && REGNO (operands
[0]) == REGNO (operands
[1])
15804 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15808 CLEAR_HARD_REG_SET (regset
);
15809 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15810 if (tmp
== NULL_RTX
)
15813 /* Use the new register in the first load to ensure that
15814 if the original input register is not dead after peephole,
15815 then it will have the correct constant value. */
15818 else if (TARGET_ARM
)
15821 int regno
= REGNO (operands
[0]);
15822 if (!peep2_reg_dead_p (4, operands
[0]))
15824 /* When the input register is even and is not dead after the
15825 pattern, it has to hold the second constant but we cannot
15826 form a legal STRD in ARM mode with this register as the second
15828 if (regno
% 2 == 0)
15831 /* Is regno-1 free? */
15832 SET_HARD_REG_SET (regset
);
15833 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15834 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15835 if (tmp
== NULL_RTX
)
15842 /* Find a DImode register. */
15843 CLEAR_HARD_REG_SET (regset
);
15844 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15845 if (tmp
!= NULL_RTX
)
15847 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15848 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15852 /* Can we use the input register to form a DI register? */
15853 SET_HARD_REG_SET (regset
);
15854 CLEAR_HARD_REG_BIT(regset
,
15855 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15856 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15857 if (tmp
== NULL_RTX
)
15859 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15863 gcc_assert (operands
[0] != NULL_RTX
);
15864 gcc_assert (operands
[1] != NULL_RTX
);
15865 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15866 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15870 /* Make sure the instructions are ordered with lower memory access first. */
15871 if (offsets
[0] > offsets
[1])
15873 gap
= offsets
[0] - offsets
[1];
15874 offset
= offsets
[1];
15876 /* Swap the instructions such that lower memory is accessed first. */
15877 std::swap (operands
[0], operands
[1]);
15878 std::swap (operands
[2], operands
[3]);
15880 std::swap (operands
[4], operands
[5]);
15884 gap
= offsets
[1] - offsets
[0];
15885 offset
= offsets
[0];
15888 /* Make sure accesses are to consecutive memory locations. */
15892 /* Make sure we generate legal instructions. */
15893 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15897 /* In Thumb state, where registers are almost unconstrained, there
15898 is little hope to fix it. */
15902 if (load
&& commute
)
15904 /* Try reordering registers. */
15905 std::swap (operands
[0], operands
[1]);
15906 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15913 /* If input registers are dead after this pattern, they can be
15914 reordered or replaced by other registers that are free in the
15915 current pattern. */
15916 if (!peep2_reg_dead_p (4, operands
[0])
15917 || !peep2_reg_dead_p (4, operands
[1]))
15920 /* Try to reorder the input registers. */
15921 /* For example, the code
15926 can be transformed into
15931 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15934 std::swap (operands
[0], operands
[1]);
15938 /* Try to find a free DI register. */
15939 CLEAR_HARD_REG_SET (regset
);
15940 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15941 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15944 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15945 if (tmp
== NULL_RTX
)
15948 /* DREG must be an even-numbered register in DImode.
15949 Split it into SI registers. */
15950 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15951 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15952 gcc_assert (operands
[0] != NULL_RTX
);
15953 gcc_assert (operands
[1] != NULL_RTX
);
15954 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15955 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15957 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15969 /* Print a symbolic form of X to the debug file, F. */
15971 arm_print_value (FILE *f
, rtx x
)
15973 switch (GET_CODE (x
))
15976 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15980 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15988 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15990 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15991 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15999 fprintf (f
, "\"%s\"", XSTR (x
, 0));
16003 fprintf (f
, "`%s'", XSTR (x
, 0));
16007 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
16011 arm_print_value (f
, XEXP (x
, 0));
16015 arm_print_value (f
, XEXP (x
, 0));
16017 arm_print_value (f
, XEXP (x
, 1));
16025 fprintf (f
, "????");
16030 /* Routines for manipulation of the constant pool. */
16032 /* Arm instructions cannot load a large constant directly into a
16033 register; they have to come from a pc relative load. The constant
16034 must therefore be placed in the addressable range of the pc
16035 relative load. Depending on the precise pc relative load
16036 instruction the range is somewhere between 256 bytes and 4k. This
16037 means that we often have to dump a constant inside a function, and
16038 generate code to branch around it.
16040 It is important to minimize this, since the branches will slow
16041 things down and make the code larger.
16043 Normally we can hide the table after an existing unconditional
16044 branch so that there is no interruption of the flow, but in the
16045 worst case the code looks like this:
16063 We fix this by performing a scan after scheduling, which notices
16064 which instructions need to have their operands fetched from the
16065 constant table and builds the table.
16067 The algorithm starts by building a table of all the constants that
16068 need fixing up and all the natural barriers in the function (places
16069 where a constant table can be dropped without breaking the flow).
16070 For each fixup we note how far the pc-relative replacement will be
16071 able to reach and the offset of the instruction into the function.
16073 Having built the table we then group the fixes together to form
16074 tables that are as large as possible (subject to addressing
16075 constraints) and emit each table of constants after the last
16076 barrier that is within range of all the instructions in the group.
16077 If a group does not contain a barrier, then we forcibly create one
16078 by inserting a jump instruction into the flow. Once the table has
16079 been inserted, the insns are then modified to reference the
16080 relevant entry in the pool.
16082 Possible enhancements to the algorithm (not implemented) are:
16084 1) For some processors and object formats, there may be benefit in
16085 aligning the pools to the start of cache lines; this alignment
16086 would need to be taken into account when calculating addressability
16089 /* These typedefs are located at the start of this file, so that
16090 they can be used in the prototypes there. This comment is to
16091 remind readers of that fact so that the following structures
16092 can be understood more easily.
16094 typedef struct minipool_node Mnode;
16095 typedef struct minipool_fixup Mfix; */
16097 struct minipool_node
16099 /* Doubly linked chain of entries. */
16102 /* The maximum offset into the code that this entry can be placed. While
16103 pushing fixes for forward references, all entries are sorted in order
16104 of increasing max_address. */
16105 HOST_WIDE_INT max_address
;
16106 /* Similarly for an entry inserted for a backwards ref. */
16107 HOST_WIDE_INT min_address
;
16108 /* The number of fixes referencing this entry. This can become zero
16109 if we "unpush" an entry. In this case we ignore the entry when we
16110 come to emit the code. */
16112 /* The offset from the start of the minipool. */
16113 HOST_WIDE_INT offset
;
16114 /* The value in table. */
16116 /* The mode of value. */
16118 /* The size of the value. With iWMMXt enabled
16119 sizes > 4 also imply an alignment of 8-bytes. */
16123 struct minipool_fixup
16127 HOST_WIDE_INT address
;
16133 HOST_WIDE_INT forwards
;
16134 HOST_WIDE_INT backwards
;
16137 /* Fixes less than a word need padding out to a word boundary. */
16138 #define MINIPOOL_FIX_SIZE(mode) \
16139 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16141 static Mnode
* minipool_vector_head
;
16142 static Mnode
* minipool_vector_tail
;
16143 static rtx_code_label
*minipool_vector_label
;
16144 static int minipool_pad
;
16146 /* The linked list of all minipool fixes required for this function. */
16147 Mfix
* minipool_fix_head
;
16148 Mfix
* minipool_fix_tail
;
16149 /* The fix entry for the current minipool, once it has been placed. */
16150 Mfix
* minipool_barrier
;
16152 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16153 #define JUMP_TABLES_IN_TEXT_SECTION 0
16156 static HOST_WIDE_INT
16157 get_jump_table_size (rtx_jump_table_data
*insn
)
16159 /* ADDR_VECs only take room if read-only data does into the text
16161 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
16163 rtx body
= PATTERN (insn
);
16164 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
16165 HOST_WIDE_INT size
;
16166 HOST_WIDE_INT modesize
;
16168 modesize
= GET_MODE_SIZE (GET_MODE (body
));
16169 size
= modesize
* XVECLEN (body
, elt
);
16173 /* Round up size of TBB table to a halfword boundary. */
16174 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
16177 /* No padding necessary for TBH. */
16180 /* Add two bytes for alignment on Thumb. */
16185 gcc_unreachable ();
16193 /* Return the maximum amount of padding that will be inserted before
16196 static HOST_WIDE_INT
16197 get_label_padding (rtx label
)
16199 HOST_WIDE_INT align
, min_insn_size
;
16201 align
= 1 << label_to_alignment (label
);
16202 min_insn_size
= TARGET_THUMB
? 2 : 4;
16203 return align
> min_insn_size
? align
- min_insn_size
: 0;
16206 /* Move a minipool fix MP from its current location to before MAX_MP.
16207 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16208 constraints may need updating. */
16210 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
16211 HOST_WIDE_INT max_address
)
16213 /* The code below assumes these are different. */
16214 gcc_assert (mp
!= max_mp
);
16216 if (max_mp
== NULL
)
16218 if (max_address
< mp
->max_address
)
16219 mp
->max_address
= max_address
;
16223 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16224 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16226 mp
->max_address
= max_address
;
16228 /* Unlink MP from its current position. Since max_mp is non-null,
16229 mp->prev must be non-null. */
16230 mp
->prev
->next
= mp
->next
;
16231 if (mp
->next
!= NULL
)
16232 mp
->next
->prev
= mp
->prev
;
16234 minipool_vector_tail
= mp
->prev
;
16236 /* Re-insert it before MAX_MP. */
16238 mp
->prev
= max_mp
->prev
;
16241 if (mp
->prev
!= NULL
)
16242 mp
->prev
->next
= mp
;
16244 minipool_vector_head
= mp
;
16247 /* Save the new entry. */
16250 /* Scan over the preceding entries and adjust their addresses as
16252 while (mp
->prev
!= NULL
16253 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16255 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16262 /* Add a constant to the minipool for a forward reference. Returns the
16263 node added or NULL if the constant will not fit in this pool. */
16265 add_minipool_forward_ref (Mfix
*fix
)
16267 /* If set, max_mp is the first pool_entry that has a lower
16268 constraint than the one we are trying to add. */
16269 Mnode
* max_mp
= NULL
;
16270 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
16273 /* If the minipool starts before the end of FIX->INSN then this FIX
16274 can not be placed into the current pool. Furthermore, adding the
16275 new constant pool entry may cause the pool to start FIX_SIZE bytes
16277 if (minipool_vector_head
&&
16278 (fix
->address
+ get_attr_length (fix
->insn
)
16279 >= minipool_vector_head
->max_address
- fix
->fix_size
))
16282 /* Scan the pool to see if a constant with the same value has
16283 already been added. While we are doing this, also note the
16284 location where we must insert the constant if it doesn't already
16286 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16288 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16289 && fix
->mode
== mp
->mode
16290 && (!LABEL_P (fix
->value
)
16291 || (CODE_LABEL_NUMBER (fix
->value
)
16292 == CODE_LABEL_NUMBER (mp
->value
)))
16293 && rtx_equal_p (fix
->value
, mp
->value
))
16295 /* More than one fix references this entry. */
16297 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
16300 /* Note the insertion point if necessary. */
16302 && mp
->max_address
> max_address
)
16305 /* If we are inserting an 8-bytes aligned quantity and
16306 we have not already found an insertion point, then
16307 make sure that all such 8-byte aligned quantities are
16308 placed at the start of the pool. */
16309 if (ARM_DOUBLEWORD_ALIGN
16311 && fix
->fix_size
>= 8
16312 && mp
->fix_size
< 8)
16315 max_address
= mp
->max_address
;
16319 /* The value is not currently in the minipool, so we need to create
16320 a new entry for it. If MAX_MP is NULL, the entry will be put on
16321 the end of the list since the placement is less constrained than
16322 any existing entry. Otherwise, we insert the new fix before
16323 MAX_MP and, if necessary, adjust the constraints on the other
16326 mp
->fix_size
= fix
->fix_size
;
16327 mp
->mode
= fix
->mode
;
16328 mp
->value
= fix
->value
;
16330 /* Not yet required for a backwards ref. */
16331 mp
->min_address
= -65536;
16333 if (max_mp
== NULL
)
16335 mp
->max_address
= max_address
;
16337 mp
->prev
= minipool_vector_tail
;
16339 if (mp
->prev
== NULL
)
16341 minipool_vector_head
= mp
;
16342 minipool_vector_label
= gen_label_rtx ();
16345 mp
->prev
->next
= mp
;
16347 minipool_vector_tail
= mp
;
16351 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16352 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16354 mp
->max_address
= max_address
;
16357 mp
->prev
= max_mp
->prev
;
16359 if (mp
->prev
!= NULL
)
16360 mp
->prev
->next
= mp
;
16362 minipool_vector_head
= mp
;
16365 /* Save the new entry. */
16368 /* Scan over the preceding entries and adjust their addresses as
16370 while (mp
->prev
!= NULL
16371 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16373 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16381 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16382 HOST_WIDE_INT min_address
)
16384 HOST_WIDE_INT offset
;
16386 /* The code below assumes these are different. */
16387 gcc_assert (mp
!= min_mp
);
16389 if (min_mp
== NULL
)
16391 if (min_address
> mp
->min_address
)
16392 mp
->min_address
= min_address
;
16396 /* We will adjust this below if it is too loose. */
16397 mp
->min_address
= min_address
;
16399 /* Unlink MP from its current position. Since min_mp is non-null,
16400 mp->next must be non-null. */
16401 mp
->next
->prev
= mp
->prev
;
16402 if (mp
->prev
!= NULL
)
16403 mp
->prev
->next
= mp
->next
;
16405 minipool_vector_head
= mp
->next
;
16407 /* Reinsert it after MIN_MP. */
16409 mp
->next
= min_mp
->next
;
16411 if (mp
->next
!= NULL
)
16412 mp
->next
->prev
= mp
;
16414 minipool_vector_tail
= mp
;
16420 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16422 mp
->offset
= offset
;
16423 if (mp
->refcount
> 0)
16424 offset
+= mp
->fix_size
;
16426 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16427 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16433 /* Add a constant to the minipool for a backward reference. Returns the
16434 node added or NULL if the constant will not fit in this pool.
16436 Note that the code for insertion for a backwards reference can be
16437 somewhat confusing because the calculated offsets for each fix do
16438 not take into account the size of the pool (which is still under
16441 add_minipool_backward_ref (Mfix
*fix
)
16443 /* If set, min_mp is the last pool_entry that has a lower constraint
16444 than the one we are trying to add. */
16445 Mnode
*min_mp
= NULL
;
16446 /* This can be negative, since it is only a constraint. */
16447 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16450 /* If we can't reach the current pool from this insn, or if we can't
16451 insert this entry at the end of the pool without pushing other
16452 fixes out of range, then we don't try. This ensures that we
16453 can't fail later on. */
16454 if (min_address
>= minipool_barrier
->address
16455 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16456 >= minipool_barrier
->address
))
16459 /* Scan the pool to see if a constant with the same value has
16460 already been added. While we are doing this, also note the
16461 location where we must insert the constant if it doesn't already
16463 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16465 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16466 && fix
->mode
== mp
->mode
16467 && (!LABEL_P (fix
->value
)
16468 || (CODE_LABEL_NUMBER (fix
->value
)
16469 == CODE_LABEL_NUMBER (mp
->value
)))
16470 && rtx_equal_p (fix
->value
, mp
->value
)
16471 /* Check that there is enough slack to move this entry to the
16472 end of the table (this is conservative). */
16473 && (mp
->max_address
16474 > (minipool_barrier
->address
16475 + minipool_vector_tail
->offset
16476 + minipool_vector_tail
->fix_size
)))
16479 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16482 if (min_mp
!= NULL
)
16483 mp
->min_address
+= fix
->fix_size
;
16486 /* Note the insertion point if necessary. */
16487 if (mp
->min_address
< min_address
)
16489 /* For now, we do not allow the insertion of 8-byte alignment
16490 requiring nodes anywhere but at the start of the pool. */
16491 if (ARM_DOUBLEWORD_ALIGN
16492 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16497 else if (mp
->max_address
16498 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16500 /* Inserting before this entry would push the fix beyond
16501 its maximum address (which can happen if we have
16502 re-located a forwards fix); force the new fix to come
16504 if (ARM_DOUBLEWORD_ALIGN
16505 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16510 min_address
= mp
->min_address
+ fix
->fix_size
;
16513 /* Do not insert a non-8-byte aligned quantity before 8-byte
16514 aligned quantities. */
16515 else if (ARM_DOUBLEWORD_ALIGN
16516 && fix
->fix_size
< 8
16517 && mp
->fix_size
>= 8)
16520 min_address
= mp
->min_address
+ fix
->fix_size
;
16525 /* We need to create a new entry. */
16527 mp
->fix_size
= fix
->fix_size
;
16528 mp
->mode
= fix
->mode
;
16529 mp
->value
= fix
->value
;
16531 mp
->max_address
= minipool_barrier
->address
+ 65536;
16533 mp
->min_address
= min_address
;
16535 if (min_mp
== NULL
)
16538 mp
->next
= minipool_vector_head
;
16540 if (mp
->next
== NULL
)
16542 minipool_vector_tail
= mp
;
16543 minipool_vector_label
= gen_label_rtx ();
16546 mp
->next
->prev
= mp
;
16548 minipool_vector_head
= mp
;
16552 mp
->next
= min_mp
->next
;
16556 if (mp
->next
!= NULL
)
16557 mp
->next
->prev
= mp
;
16559 minipool_vector_tail
= mp
;
16562 /* Save the new entry. */
16570 /* Scan over the following entries and adjust their offsets. */
16571 while (mp
->next
!= NULL
)
16573 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16574 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16577 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16579 mp
->next
->offset
= mp
->offset
;
16588 assign_minipool_offsets (Mfix
*barrier
)
16590 HOST_WIDE_INT offset
= 0;
16593 minipool_barrier
= barrier
;
16595 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16597 mp
->offset
= offset
;
16599 if (mp
->refcount
> 0)
16600 offset
+= mp
->fix_size
;
16604 /* Output the literal table */
16606 dump_minipool (rtx_insn
*scan
)
16612 if (ARM_DOUBLEWORD_ALIGN
)
16613 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16614 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16621 fprintf (dump_file
,
16622 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16623 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16625 scan
= emit_label_after (gen_label_rtx (), scan
);
16626 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16627 scan
= emit_label_after (minipool_vector_label
, scan
);
16629 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16631 if (mp
->refcount
> 0)
16635 fprintf (dump_file
,
16636 ";; Offset %u, min %ld, max %ld ",
16637 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16638 (unsigned long) mp
->max_address
);
16639 arm_print_value (dump_file
, mp
->value
);
16640 fputc ('\n', dump_file
);
16643 switch (GET_MODE_SIZE (mp
->mode
))
16645 #ifdef HAVE_consttable_1
16647 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
16651 #ifdef HAVE_consttable_2
16653 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
16657 #ifdef HAVE_consttable_4
16659 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
16663 #ifdef HAVE_consttable_8
16665 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
16669 #ifdef HAVE_consttable_16
16671 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
16676 gcc_unreachable ();
16684 minipool_vector_head
= minipool_vector_tail
= NULL
;
16685 scan
= emit_insn_after (gen_consttable_end (), scan
);
16686 scan
= emit_barrier_after (scan
);
16689 /* Return the cost of forcibly inserting a barrier after INSN. */
16691 arm_barrier_cost (rtx insn
)
16693 /* Basing the location of the pool on the loop depth is preferable,
16694 but at the moment, the basic block information seems to be
16695 corrupt by this stage of the compilation. */
16696 int base_cost
= 50;
16697 rtx next
= next_nonnote_insn (insn
);
16699 if (next
!= NULL
&& LABEL_P (next
))
16702 switch (GET_CODE (insn
))
16705 /* It will always be better to place the table before the label, rather
16714 return base_cost
- 10;
16717 return base_cost
+ 10;
16721 /* Find the best place in the insn stream in the range
16722 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16723 Create the barrier by inserting a jump and add a new fix entry for
16726 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16728 HOST_WIDE_INT count
= 0;
16729 rtx_barrier
*barrier
;
16730 rtx_insn
*from
= fix
->insn
;
16731 /* The instruction after which we will insert the jump. */
16732 rtx_insn
*selected
= NULL
;
16734 /* The address at which the jump instruction will be placed. */
16735 HOST_WIDE_INT selected_address
;
16737 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16738 rtx_code_label
*label
= gen_label_rtx ();
16740 selected_cost
= arm_barrier_cost (from
);
16741 selected_address
= fix
->address
;
16743 while (from
&& count
< max_count
)
16745 rtx_jump_table_data
*tmp
;
16748 /* This code shouldn't have been called if there was a natural barrier
16750 gcc_assert (!BARRIER_P (from
));
16752 /* Count the length of this insn. This must stay in sync with the
16753 code that pushes minipool fixes. */
16754 if (LABEL_P (from
))
16755 count
+= get_label_padding (from
);
16757 count
+= get_attr_length (from
);
16759 /* If there is a jump table, add its length. */
16760 if (tablejump_p (from
, NULL
, &tmp
))
16762 count
+= get_jump_table_size (tmp
);
16764 /* Jump tables aren't in a basic block, so base the cost on
16765 the dispatch insn. If we select this location, we will
16766 still put the pool after the table. */
16767 new_cost
= arm_barrier_cost (from
);
16769 if (count
< max_count
16770 && (!selected
|| new_cost
<= selected_cost
))
16773 selected_cost
= new_cost
;
16774 selected_address
= fix
->address
+ count
;
16777 /* Continue after the dispatch table. */
16778 from
= NEXT_INSN (tmp
);
16782 new_cost
= arm_barrier_cost (from
);
16784 if (count
< max_count
16785 && (!selected
|| new_cost
<= selected_cost
))
16788 selected_cost
= new_cost
;
16789 selected_address
= fix
->address
+ count
;
16792 from
= NEXT_INSN (from
);
16795 /* Make sure that we found a place to insert the jump. */
16796 gcc_assert (selected
);
16798 /* Make sure we do not split a call and its corresponding
16799 CALL_ARG_LOCATION note. */
16800 if (CALL_P (selected
))
16802 rtx_insn
*next
= NEXT_INSN (selected
);
16803 if (next
&& NOTE_P (next
)
16804 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16808 /* Create a new JUMP_INSN that branches around a barrier. */
16809 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16810 JUMP_LABEL (from
) = label
;
16811 barrier
= emit_barrier_after (from
);
16812 emit_label_after (label
, barrier
);
16814 /* Create a minipool barrier entry for the new barrier. */
16815 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16816 new_fix
->insn
= barrier
;
16817 new_fix
->address
= selected_address
;
16818 new_fix
->next
= fix
->next
;
16819 fix
->next
= new_fix
;
16824 /* Record that there is a natural barrier in the insn stream at
16827 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16829 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16832 fix
->address
= address
;
16835 if (minipool_fix_head
!= NULL
)
16836 minipool_fix_tail
->next
= fix
;
16838 minipool_fix_head
= fix
;
16840 minipool_fix_tail
= fix
;
16843 /* Record INSN, which will need fixing up to load a value from the
16844 minipool. ADDRESS is the offset of the insn since the start of the
16845 function; LOC is a pointer to the part of the insn which requires
16846 fixing; VALUE is the constant that must be loaded, which is of type
16849 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16850 machine_mode mode
, rtx value
)
16852 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16855 fix
->address
= address
;
16858 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16859 fix
->value
= value
;
16860 fix
->forwards
= get_attr_pool_range (insn
);
16861 fix
->backwards
= get_attr_neg_pool_range (insn
);
16862 fix
->minipool
= NULL
;
16864 /* If an insn doesn't have a range defined for it, then it isn't
16865 expecting to be reworked by this code. Better to stop now than
16866 to generate duff assembly code. */
16867 gcc_assert (fix
->forwards
|| fix
->backwards
);
16869 /* If an entry requires 8-byte alignment then assume all constant pools
16870 require 4 bytes of padding. Trying to do this later on a per-pool
16871 basis is awkward because existing pool entries have to be modified. */
16872 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16877 fprintf (dump_file
,
16878 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16879 GET_MODE_NAME (mode
),
16880 INSN_UID (insn
), (unsigned long) address
,
16881 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16882 arm_print_value (dump_file
, fix
->value
);
16883 fprintf (dump_file
, "\n");
16886 /* Add it to the chain of fixes. */
16889 if (minipool_fix_head
!= NULL
)
16890 minipool_fix_tail
->next
= fix
;
16892 minipool_fix_head
= fix
;
16894 minipool_fix_tail
= fix
;
16897 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16898 Returns the number of insns needed, or 99 if we always want to synthesize
16901 arm_max_const_double_inline_cost ()
16903 /* Let the value get synthesized to avoid the use of literal pools. */
16904 if (arm_disable_literal_pool
)
16907 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16910 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16911 Returns the number of insns needed, or 99 if we don't know how to
16914 arm_const_double_inline_cost (rtx val
)
16916 rtx lowpart
, highpart
;
16919 mode
= GET_MODE (val
);
16921 if (mode
== VOIDmode
)
16924 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16926 lowpart
= gen_lowpart (SImode
, val
);
16927 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16929 gcc_assert (CONST_INT_P (lowpart
));
16930 gcc_assert (CONST_INT_P (highpart
));
16932 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16933 NULL_RTX
, NULL_RTX
, 0, 0)
16934 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16935 NULL_RTX
, NULL_RTX
, 0, 0));
16938 /* Cost of loading a SImode constant. */
16940 arm_const_inline_cost (enum rtx_code code
, rtx val
)
16942 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
16943 NULL_RTX
, NULL_RTX
, 1, 0);
16946 /* Return true if it is worthwhile to split a 64-bit constant into two
16947 32-bit operations. This is the case if optimizing for size, or
16948 if we have load delay slots, or if one 32-bit part can be done with
16949 a single data operation. */
16951 arm_const_double_by_parts (rtx val
)
16953 machine_mode mode
= GET_MODE (val
);
16956 if (optimize_size
|| arm_ld_sched
)
16959 if (mode
== VOIDmode
)
16962 part
= gen_highpart_mode (SImode
, mode
, val
);
16964 gcc_assert (CONST_INT_P (part
));
16966 if (const_ok_for_arm (INTVAL (part
))
16967 || const_ok_for_arm (~INTVAL (part
)))
16970 part
= gen_lowpart (SImode
, val
);
16972 gcc_assert (CONST_INT_P (part
));
16974 if (const_ok_for_arm (INTVAL (part
))
16975 || const_ok_for_arm (~INTVAL (part
)))
16981 /* Return true if it is possible to inline both the high and low parts
16982 of a 64-bit constant into 32-bit data processing instructions. */
16984 arm_const_double_by_immediates (rtx val
)
16986 machine_mode mode
= GET_MODE (val
);
16989 if (mode
== VOIDmode
)
16992 part
= gen_highpart_mode (SImode
, mode
, val
);
16994 gcc_assert (CONST_INT_P (part
));
16996 if (!const_ok_for_arm (INTVAL (part
)))
16999 part
= gen_lowpart (SImode
, val
);
17001 gcc_assert (CONST_INT_P (part
));
17003 if (!const_ok_for_arm (INTVAL (part
)))
17009 /* Scan INSN and note any of its operands that need fixing.
17010 If DO_PUSHES is false we do not actually push any of the fixups
17013 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
17017 extract_constrain_insn (insn
);
17019 if (recog_data
.n_alternatives
== 0)
17022 /* Fill in recog_op_alt with information about the constraints of
17024 preprocess_constraints (insn
);
17026 const operand_alternative
*op_alt
= which_op_alt ();
17027 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
17029 /* Things we need to fix can only occur in inputs. */
17030 if (recog_data
.operand_type
[opno
] != OP_IN
)
17033 /* If this alternative is a memory reference, then any mention
17034 of constants in this alternative is really to fool reload
17035 into allowing us to accept one there. We need to fix them up
17036 now so that we output the right code. */
17037 if (op_alt
[opno
].memory_ok
)
17039 rtx op
= recog_data
.operand
[opno
];
17041 if (CONSTANT_P (op
))
17044 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
17045 recog_data
.operand_mode
[opno
], op
);
17047 else if (MEM_P (op
)
17048 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
17049 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
17053 rtx cop
= avoid_constant_pool_reference (op
);
17055 /* Casting the address of something to a mode narrower
17056 than a word can cause avoid_constant_pool_reference()
17057 to return the pool reference itself. That's no good to
17058 us here. Lets just hope that we can use the
17059 constant pool value directly. */
17061 cop
= get_pool_constant (XEXP (op
, 0));
17063 push_minipool_fix (insn
, address
,
17064 recog_data
.operand_loc
[opno
],
17065 recog_data
.operand_mode
[opno
], cop
);
17075 /* Rewrite move insn into subtract of 0 if the condition codes will
17076 be useful in next conditional jump insn. */
17079 thumb1_reorg (void)
17083 FOR_EACH_BB_FN (bb
, cfun
)
17086 rtx pat
, op0
, set
= NULL
;
17087 rtx_insn
*prev
, *insn
= BB_END (bb
);
17088 bool insn_clobbered
= false;
17090 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17091 insn
= PREV_INSN (insn
);
17093 /* Find the last cbranchsi4_insn in basic block BB. */
17094 if (insn
== BB_HEAD (bb
)
17095 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17098 /* Get the register with which we are comparing. */
17099 pat
= PATTERN (insn
);
17100 op0
= XEXP (XEXP (SET_SRC (pat
), 0), 0);
17102 /* Find the first flag setting insn before INSN in basic block BB. */
17103 gcc_assert (insn
!= BB_HEAD (bb
));
17104 for (prev
= PREV_INSN (insn
);
17106 && prev
!= BB_HEAD (bb
)
17108 || DEBUG_INSN_P (prev
)
17109 || ((set
= single_set (prev
)) != NULL
17110 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17111 prev
= PREV_INSN (prev
))
17113 if (reg_set_p (op0
, prev
))
17114 insn_clobbered
= true;
17117 /* Skip if op0 is clobbered by insn other than prev. */
17118 if (insn_clobbered
)
17124 dest
= SET_DEST (set
);
17125 src
= SET_SRC (set
);
17126 if (!low_register_operand (dest
, SImode
)
17127 || !low_register_operand (src
, SImode
))
17130 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17131 in INSN. Both src and dest of the move insn are checked. */
17132 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17134 dest
= copy_rtx (dest
);
17135 src
= copy_rtx (src
);
17136 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17137 PATTERN (prev
) = gen_rtx_SET (VOIDmode
, dest
, src
);
17138 INSN_CODE (prev
) = -1;
17139 /* Set test register in INSN to dest. */
17140 XEXP (XEXP (SET_SRC (pat
), 0), 0) = copy_rtx (dest
);
17141 INSN_CODE (insn
) = -1;
17146 /* Convert instructions to their cc-clobbering variant if possible, since
17147 that allows us to use smaller encodings. */
17150 thumb2_reorg (void)
17155 INIT_REG_SET (&live
);
17157 /* We are freeing block_for_insn in the toplev to keep compatibility
17158 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17159 compute_bb_for_insn ();
17162 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17164 FOR_EACH_BB_FN (bb
, cfun
)
17166 if (current_tune
->disparage_flag_setting_t16_encodings
17167 && optimize_bb_for_speed_p (bb
))
17171 Convert_Action action
= SKIP
;
17172 Convert_Action action_for_partial_flag_setting
17173 = (current_tune
->disparage_partial_flag_setting_t16_encodings
17174 && optimize_bb_for_speed_p (bb
))
17177 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17178 df_simulate_initialize_backwards (bb
, &live
);
17179 FOR_BB_INSNS_REVERSE (bb
, insn
)
17181 if (NONJUMP_INSN_P (insn
)
17182 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17183 && GET_CODE (PATTERN (insn
)) == SET
)
17186 rtx pat
= PATTERN (insn
);
17187 rtx dst
= XEXP (pat
, 0);
17188 rtx src
= XEXP (pat
, 1);
17189 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17191 if (!OBJECT_P (src
))
17192 op0
= XEXP (src
, 0);
17194 if (BINARY_P (src
))
17195 op1
= XEXP (src
, 1);
17197 if (low_register_operand (dst
, SImode
))
17199 switch (GET_CODE (src
))
17202 /* Adding two registers and storing the result
17203 in the first source is already a 16-bit
17205 if (rtx_equal_p (dst
, op0
)
17206 && register_operand (op1
, SImode
))
17209 if (low_register_operand (op0
, SImode
))
17211 /* ADDS <Rd>,<Rn>,<Rm> */
17212 if (low_register_operand (op1
, SImode
))
17214 /* ADDS <Rdn>,#<imm8> */
17215 /* SUBS <Rdn>,#<imm8> */
17216 else if (rtx_equal_p (dst
, op0
)
17217 && CONST_INT_P (op1
)
17218 && IN_RANGE (INTVAL (op1
), -255, 255))
17220 /* ADDS <Rd>,<Rn>,#<imm3> */
17221 /* SUBS <Rd>,<Rn>,#<imm3> */
17222 else if (CONST_INT_P (op1
)
17223 && IN_RANGE (INTVAL (op1
), -7, 7))
17226 /* ADCS <Rd>, <Rn> */
17227 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17228 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17229 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17231 && COMPARISON_P (op1
)
17232 && cc_register (XEXP (op1
, 0), VOIDmode
)
17233 && maybe_get_arm_condition_code (op1
) == ARM_CS
17234 && XEXP (op1
, 1) == const0_rtx
)
17239 /* RSBS <Rd>,<Rn>,#0
17240 Not handled here: see NEG below. */
17241 /* SUBS <Rd>,<Rn>,#<imm3>
17243 Not handled here: see PLUS above. */
17244 /* SUBS <Rd>,<Rn>,<Rm> */
17245 if (low_register_operand (op0
, SImode
)
17246 && low_register_operand (op1
, SImode
))
17251 /* MULS <Rdm>,<Rn>,<Rdm>
17252 As an exception to the rule, this is only used
17253 when optimizing for size since MULS is slow on all
17254 known implementations. We do not even want to use
17255 MULS in cold code, if optimizing for speed, so we
17256 test the global flag here. */
17257 if (!optimize_size
)
17259 /* else fall through. */
17263 /* ANDS <Rdn>,<Rm> */
17264 if (rtx_equal_p (dst
, op0
)
17265 && low_register_operand (op1
, SImode
))
17266 action
= action_for_partial_flag_setting
;
17267 else if (rtx_equal_p (dst
, op1
)
17268 && low_register_operand (op0
, SImode
))
17269 action
= action_for_partial_flag_setting
== SKIP
17270 ? SKIP
: SWAP_CONV
;
17276 /* ASRS <Rdn>,<Rm> */
17277 /* LSRS <Rdn>,<Rm> */
17278 /* LSLS <Rdn>,<Rm> */
17279 if (rtx_equal_p (dst
, op0
)
17280 && low_register_operand (op1
, SImode
))
17281 action
= action_for_partial_flag_setting
;
17282 /* ASRS <Rd>,<Rm>,#<imm5> */
17283 /* LSRS <Rd>,<Rm>,#<imm5> */
17284 /* LSLS <Rd>,<Rm>,#<imm5> */
17285 else if (low_register_operand (op0
, SImode
)
17286 && CONST_INT_P (op1
)
17287 && IN_RANGE (INTVAL (op1
), 0, 31))
17288 action
= action_for_partial_flag_setting
;
17292 /* RORS <Rdn>,<Rm> */
17293 if (rtx_equal_p (dst
, op0
)
17294 && low_register_operand (op1
, SImode
))
17295 action
= action_for_partial_flag_setting
;
17299 /* MVNS <Rd>,<Rm> */
17300 if (low_register_operand (op0
, SImode
))
17301 action
= action_for_partial_flag_setting
;
17305 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17306 if (low_register_operand (op0
, SImode
))
17311 /* MOVS <Rd>,#<imm8> */
17312 if (CONST_INT_P (src
)
17313 && IN_RANGE (INTVAL (src
), 0, 255))
17314 action
= action_for_partial_flag_setting
;
17318 /* MOVS and MOV<c> with registers have different
17319 encodings, so are not relevant here. */
17327 if (action
!= SKIP
)
17329 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17330 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17333 if (action
== SWAP_CONV
)
17335 src
= copy_rtx (src
);
17336 XEXP (src
, 0) = op1
;
17337 XEXP (src
, 1) = op0
;
17338 pat
= gen_rtx_SET (VOIDmode
, dst
, src
);
17339 vec
= gen_rtvec (2, pat
, clobber
);
17341 else /* action == CONV */
17342 vec
= gen_rtvec (2, pat
, clobber
);
17344 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17345 INSN_CODE (insn
) = -1;
17349 if (NONDEBUG_INSN_P (insn
))
17350 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17354 CLEAR_REG_SET (&live
);
17357 /* Gcc puts the pool in the wrong place for ARM, since we can only
17358 load addresses a limited distance around the pc. We do some
17359 special munging to move the constant pool values to the correct
17360 point in the code. */
17365 HOST_WIDE_INT address
= 0;
17370 else if (TARGET_THUMB2
)
17373 /* Ensure all insns that must be split have been split at this point.
17374 Otherwise, the pool placement code below may compute incorrect
17375 insn lengths. Note that when optimizing, all insns have already
17376 been split at this point. */
17378 split_all_insns_noflow ();
17380 minipool_fix_head
= minipool_fix_tail
= NULL
;
17382 /* The first insn must always be a note, or the code below won't
17383 scan it properly. */
17384 insn
= get_insns ();
17385 gcc_assert (NOTE_P (insn
));
17388 /* Scan all the insns and record the operands that will need fixing. */
17389 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17391 if (BARRIER_P (insn
))
17392 push_minipool_barrier (insn
, address
);
17393 else if (INSN_P (insn
))
17395 rtx_jump_table_data
*table
;
17397 note_invalid_constants (insn
, address
, true);
17398 address
+= get_attr_length (insn
);
17400 /* If the insn is a vector jump, add the size of the table
17401 and skip the table. */
17402 if (tablejump_p (insn
, NULL
, &table
))
17404 address
+= get_jump_table_size (table
);
17408 else if (LABEL_P (insn
))
17409 /* Add the worst-case padding due to alignment. We don't add
17410 the _current_ padding because the minipool insertions
17411 themselves might change it. */
17412 address
+= get_label_padding (insn
);
17415 fix
= minipool_fix_head
;
17417 /* Now scan the fixups and perform the required changes. */
17422 Mfix
* last_added_fix
;
17423 Mfix
* last_barrier
= NULL
;
17426 /* Skip any further barriers before the next fix. */
17427 while (fix
&& BARRIER_P (fix
->insn
))
17430 /* No more fixes. */
17434 last_added_fix
= NULL
;
17436 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17438 if (BARRIER_P (ftmp
->insn
))
17440 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17443 last_barrier
= ftmp
;
17445 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17448 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17451 /* If we found a barrier, drop back to that; any fixes that we
17452 could have reached but come after the barrier will now go in
17453 the next mini-pool. */
17454 if (last_barrier
!= NULL
)
17456 /* Reduce the refcount for those fixes that won't go into this
17458 for (fdel
= last_barrier
->next
;
17459 fdel
&& fdel
!= ftmp
;
17462 fdel
->minipool
->refcount
--;
17463 fdel
->minipool
= NULL
;
17466 ftmp
= last_barrier
;
17470 /* ftmp is first fix that we can't fit into this pool and
17471 there no natural barriers that we could use. Insert a
17472 new barrier in the code somewhere between the previous
17473 fix and this one, and arrange to jump around it. */
17474 HOST_WIDE_INT max_address
;
17476 /* The last item on the list of fixes must be a barrier, so
17477 we can never run off the end of the list of fixes without
17478 last_barrier being set. */
17481 max_address
= minipool_vector_head
->max_address
;
17482 /* Check that there isn't another fix that is in range that
17483 we couldn't fit into this pool because the pool was
17484 already too large: we need to put the pool before such an
17485 instruction. The pool itself may come just after the
17486 fix because create_fix_barrier also allows space for a
17487 jump instruction. */
17488 if (ftmp
->address
< max_address
)
17489 max_address
= ftmp
->address
+ 1;
17491 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17494 assign_minipool_offsets (last_barrier
);
17498 if (!BARRIER_P (ftmp
->insn
)
17499 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17506 /* Scan over the fixes we have identified for this pool, fixing them
17507 up and adding the constants to the pool itself. */
17508 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17509 this_fix
= this_fix
->next
)
17510 if (!BARRIER_P (this_fix
->insn
))
17513 = plus_constant (Pmode
,
17514 gen_rtx_LABEL_REF (VOIDmode
,
17515 minipool_vector_label
),
17516 this_fix
->minipool
->offset
);
17517 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17520 dump_minipool (last_barrier
->insn
);
17524 /* From now on we must synthesize any constants that we can't handle
17525 directly. This can happen if the RTL gets split during final
17526 instruction generation. */
17527 cfun
->machine
->after_arm_reorg
= 1;
17529 /* Free the minipool memory. */
17530 obstack_free (&minipool_obstack
, minipool_startobj
);
17533 /* Routines to output assembly language. */
17535 /* Return string representation of passed in real value. */
17536 static const char *
17537 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17539 if (!fp_consts_inited
)
17542 gcc_assert (REAL_VALUES_EQUAL (*r
, value_fp0
));
17546 /* OPERANDS[0] is the entire list of insns that constitute pop,
17547 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17548 is in the list, UPDATE is true iff the list contains explicit
17549 update of base register. */
17551 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17557 const char *conditional
;
17558 int num_saves
= XVECLEN (operands
[0], 0);
17559 unsigned int regno
;
17560 unsigned int regno_base
= REGNO (operands
[1]);
17563 offset
+= update
? 1 : 0;
17564 offset
+= return_pc
? 1 : 0;
17566 /* Is the base register in the list? */
17567 for (i
= offset
; i
< num_saves
; i
++)
17569 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17570 /* If SP is in the list, then the base register must be SP. */
17571 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17572 /* If base register is in the list, there must be no explicit update. */
17573 if (regno
== regno_base
)
17574 gcc_assert (!update
);
17577 conditional
= reverse
? "%?%D0" : "%?%d0";
17578 if ((regno_base
== SP_REGNUM
) && TARGET_UNIFIED_ASM
)
17580 /* Output pop (not stmfd) because it has a shorter encoding. */
17581 gcc_assert (update
);
17582 sprintf (pattern
, "pop%s\t{", conditional
);
17586 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17587 It's just a convention, their semantics are identical. */
17588 if (regno_base
== SP_REGNUM
)
17589 sprintf (pattern
, "ldm%sfd\t", conditional
);
17590 else if (TARGET_UNIFIED_ASM
)
17591 sprintf (pattern
, "ldmia%s\t", conditional
);
17593 sprintf (pattern
, "ldm%sia\t", conditional
);
17595 strcat (pattern
, reg_names
[regno_base
]);
17597 strcat (pattern
, "!, {");
17599 strcat (pattern
, ", {");
17602 /* Output the first destination register. */
17604 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17606 /* Output the rest of the destination registers. */
17607 for (i
= offset
+ 1; i
< num_saves
; i
++)
17609 strcat (pattern
, ", ");
17611 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17614 strcat (pattern
, "}");
17616 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc
)
17617 strcat (pattern
, "^");
17619 output_asm_insn (pattern
, &cond
);
17623 /* Output the assembly for a store multiple. */
17626 vfp_output_vstmd (rtx
* operands
)
17632 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17633 ? XEXP (operands
[0], 0)
17634 : XEXP (XEXP (operands
[0], 0), 0);
17635 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17638 strcpy (pattern
, "vpush%?.64\t{%P1");
17640 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17642 p
= strlen (pattern
);
17644 gcc_assert (REG_P (operands
[1]));
17646 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17647 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17649 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17651 strcpy (&pattern
[p
], "}");
17653 output_asm_insn (pattern
, operands
);
17658 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17659 number of bytes pushed. */
17662 vfp_emit_fstmd (int base_reg
, int count
)
17669 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17670 register pairs are stored by a store multiple insn. We avoid this
17671 by pushing an extra pair. */
17672 if (count
== 2 && !arm_arch6
)
17674 if (base_reg
== LAST_VFP_REGNUM
- 3)
17679 /* FSTMD may not store more than 16 doubleword registers at once. Split
17680 larger stores into multiple parts (up to a maximum of two, in
17685 /* NOTE: base_reg is an internal register number, so each D register
17687 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17688 saved
+= vfp_emit_fstmd (base_reg
, 16);
17692 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17693 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17695 reg
= gen_rtx_REG (DFmode
, base_reg
);
17698 XVECEXP (par
, 0, 0)
17699 = gen_rtx_SET (VOIDmode
,
17702 gen_rtx_PRE_MODIFY (Pmode
,
17705 (Pmode
, stack_pointer_rtx
,
17708 gen_rtx_UNSPEC (BLKmode
,
17709 gen_rtvec (1, reg
),
17710 UNSPEC_PUSH_MULT
));
17712 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
17713 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17714 RTX_FRAME_RELATED_P (tmp
) = 1;
17715 XVECEXP (dwarf
, 0, 0) = tmp
;
17717 tmp
= gen_rtx_SET (VOIDmode
,
17718 gen_frame_mem (DFmode
, stack_pointer_rtx
),
17720 RTX_FRAME_RELATED_P (tmp
) = 1;
17721 XVECEXP (dwarf
, 0, 1) = tmp
;
17723 for (i
= 1; i
< count
; i
++)
17725 reg
= gen_rtx_REG (DFmode
, base_reg
);
17727 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17729 tmp
= gen_rtx_SET (VOIDmode
,
17730 gen_frame_mem (DFmode
,
17731 plus_constant (Pmode
,
17735 RTX_FRAME_RELATED_P (tmp
) = 1;
17736 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17739 par
= emit_insn (par
);
17740 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17741 RTX_FRAME_RELATED_P (par
) = 1;
17746 /* Emit a call instruction with pattern PAT. ADDR is the address of
17747 the call target. */
17750 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
17754 insn
= emit_call_insn (pat
);
17756 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17757 If the call might use such an entry, add a use of the PIC register
17758 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17759 if (TARGET_VXWORKS_RTP
17762 && GET_CODE (addr
) == SYMBOL_REF
17763 && (SYMBOL_REF_DECL (addr
)
17764 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17765 : !SYMBOL_REF_LOCAL_P (addr
)))
17767 require_pic_register ();
17768 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17771 if (TARGET_AAPCS_BASED
)
17773 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17774 linker. We need to add an IP clobber to allow setting
17775 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17776 is not needed since it's a fixed register. */
17777 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
17778 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
17782 /* Output a 'call' insn. */
17784 output_call (rtx
*operands
)
17786 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17788 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17789 if (REGNO (operands
[0]) == LR_REGNUM
)
17791 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17792 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17795 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17797 if (TARGET_INTERWORK
|| arm_arch4t
)
17798 output_asm_insn ("bx%?\t%0", operands
);
17800 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17805 /* Output a 'call' insn that is a reference in memory. This is
17806 disabled for ARMv5 and we prefer a blx instead because otherwise
17807 there's a significant performance overhead. */
17809 output_call_mem (rtx
*operands
)
17811 gcc_assert (!arm_arch5
);
17812 if (TARGET_INTERWORK
)
17814 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17815 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17816 output_asm_insn ("bx%?\t%|ip", operands
);
17818 else if (regno_use_in (LR_REGNUM
, operands
[0]))
17820 /* LR is used in the memory address. We load the address in the
17821 first instruction. It's safe to use IP as the target of the
17822 load since the call will kill it anyway. */
17823 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17824 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17826 output_asm_insn ("bx%?\t%|ip", operands
);
17828 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
17832 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17833 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
17840 /* Output a move from arm registers to arm registers of a long double
17841 OPERANDS[0] is the destination.
17842 OPERANDS[1] is the source. */
17844 output_mov_long_double_arm_from_arm (rtx
*operands
)
17846 /* We have to be careful here because the two might overlap. */
17847 int dest_start
= REGNO (operands
[0]);
17848 int src_start
= REGNO (operands
[1]);
17852 if (dest_start
< src_start
)
17854 for (i
= 0; i
< 3; i
++)
17856 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17857 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17858 output_asm_insn ("mov%?\t%0, %1", ops
);
17863 for (i
= 2; i
>= 0; i
--)
17865 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17866 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17867 output_asm_insn ("mov%?\t%0, %1", ops
);
17875 arm_emit_movpair (rtx dest
, rtx src
)
17877 /* If the src is an immediate, simplify it. */
17878 if (CONST_INT_P (src
))
17880 HOST_WIDE_INT val
= INTVAL (src
);
17881 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17882 if ((val
>> 16) & 0x0000ffff)
17883 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17885 GEN_INT ((val
>> 16) & 0x0000ffff));
17888 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17889 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17892 /* Output a move between double words. It must be REG<-MEM
17895 output_move_double (rtx
*operands
, bool emit
, int *count
)
17897 enum rtx_code code0
= GET_CODE (operands
[0]);
17898 enum rtx_code code1
= GET_CODE (operands
[1]);
17903 /* The only case when this might happen is when
17904 you are looking at the length of a DImode instruction
17905 that has an invalid constant in it. */
17906 if (code0
== REG
&& code1
!= MEM
)
17908 gcc_assert (!emit
);
17915 unsigned int reg0
= REGNO (operands
[0]);
17917 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17919 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17921 switch (GET_CODE (XEXP (operands
[1], 0)))
17928 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17929 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
17931 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
17936 gcc_assert (TARGET_LDRD
);
17938 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
17945 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
17947 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
17955 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
17957 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
17962 gcc_assert (TARGET_LDRD
);
17964 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
17969 /* Autoicrement addressing modes should never have overlapping
17970 base and destination registers, and overlapping index registers
17971 are already prohibited, so this doesn't need to worry about
17973 otherops
[0] = operands
[0];
17974 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
17975 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
17977 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
17979 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
17981 /* Registers overlap so split out the increment. */
17984 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
17985 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
17992 /* Use a single insn if we can.
17993 FIXME: IWMMXT allows offsets larger than ldrd can
17994 handle, fix these up with a pair of ldr. */
17996 || !CONST_INT_P (otherops
[2])
17997 || (INTVAL (otherops
[2]) > -256
17998 && INTVAL (otherops
[2]) < 256))
18001 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
18007 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
18008 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18018 /* Use a single insn if we can.
18019 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18020 fix these up with a pair of ldr. */
18022 || !CONST_INT_P (otherops
[2])
18023 || (INTVAL (otherops
[2]) > -256
18024 && INTVAL (otherops
[2]) < 256))
18027 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
18033 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18034 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18044 /* We might be able to use ldrd %0, %1 here. However the range is
18045 different to ldr/adr, and it is broken on some ARMv7-M
18046 implementations. */
18047 /* Use the second register of the pair to avoid problematic
18049 otherops
[1] = operands
[1];
18051 output_asm_insn ("adr%?\t%0, %1", otherops
);
18052 operands
[1] = otherops
[0];
18056 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
18058 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
18065 /* ??? This needs checking for thumb2. */
18067 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18068 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18070 otherops
[0] = operands
[0];
18071 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18072 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18074 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18076 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18078 switch ((int) INTVAL (otherops
[2]))
18082 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
18088 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
18094 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
18098 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18099 operands
[1] = otherops
[0];
18101 && (REG_P (otherops
[2])
18103 || (CONST_INT_P (otherops
[2])
18104 && INTVAL (otherops
[2]) > -256
18105 && INTVAL (otherops
[2]) < 256)))
18107 if (reg_overlap_mentioned_p (operands
[0],
18110 /* Swap base and index registers over to
18111 avoid a conflict. */
18112 std::swap (otherops
[1], otherops
[2]);
18114 /* If both registers conflict, it will usually
18115 have been fixed by a splitter. */
18116 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18117 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18121 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18122 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
18129 otherops
[0] = operands
[0];
18131 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
18136 if (CONST_INT_P (otherops
[2]))
18140 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18141 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18143 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18149 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18155 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18162 return "ldr%(d%)\t%0, [%1]";
18164 return "ldm%(ia%)\t%1, %M0";
18168 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18169 /* Take care of overlapping base/data reg. */
18170 if (reg_mentioned_p (operands
[0], operands
[1]))
18174 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18175 output_asm_insn ("ldr%?\t%0, %1", operands
);
18185 output_asm_insn ("ldr%?\t%0, %1", operands
);
18186 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18196 /* Constraints should ensure this. */
18197 gcc_assert (code0
== MEM
&& code1
== REG
);
18198 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18199 || (TARGET_ARM
&& TARGET_LDRD
));
18201 switch (GET_CODE (XEXP (operands
[0], 0)))
18207 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
18209 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18214 gcc_assert (TARGET_LDRD
);
18216 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
18223 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
18225 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
18233 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
18235 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
18240 gcc_assert (TARGET_LDRD
);
18242 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
18247 otherops
[0] = operands
[1];
18248 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18249 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18251 /* IWMMXT allows offsets larger than ldrd can handle,
18252 fix these up with a pair of ldr. */
18254 && CONST_INT_P (otherops
[2])
18255 && (INTVAL(otherops
[2]) <= -256
18256 || INTVAL(otherops
[2]) >= 256))
18258 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18262 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18263 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18272 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18273 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18279 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18282 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
18287 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
18292 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18293 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18295 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18299 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
18306 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
18313 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
18318 && (REG_P (otherops
[2])
18320 || (CONST_INT_P (otherops
[2])
18321 && INTVAL (otherops
[2]) > -256
18322 && INTVAL (otherops
[2]) < 256)))
18324 otherops
[0] = operands
[1];
18325 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18327 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
18333 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18334 otherops
[1] = operands
[1];
18337 output_asm_insn ("str%?\t%1, %0", operands
);
18338 output_asm_insn ("str%?\t%H1, %0", otherops
);
18348 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18349 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18352 output_move_quad (rtx
*operands
)
18354 if (REG_P (operands
[0]))
18356 /* Load, or reg->reg move. */
18358 if (MEM_P (operands
[1]))
18360 switch (GET_CODE (XEXP (operands
[1], 0)))
18363 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
18368 output_asm_insn ("adr%?\t%0, %1", operands
);
18369 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
18373 gcc_unreachable ();
18381 gcc_assert (REG_P (operands
[1]));
18383 dest
= REGNO (operands
[0]);
18384 src
= REGNO (operands
[1]);
18386 /* This seems pretty dumb, but hopefully GCC won't try to do it
18389 for (i
= 0; i
< 4; i
++)
18391 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18392 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18393 output_asm_insn ("mov%?\t%0, %1", ops
);
18396 for (i
= 3; i
>= 0; i
--)
18398 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18399 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18400 output_asm_insn ("mov%?\t%0, %1", ops
);
18406 gcc_assert (MEM_P (operands
[0]));
18407 gcc_assert (REG_P (operands
[1]));
18408 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18410 switch (GET_CODE (XEXP (operands
[0], 0)))
18413 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18417 gcc_unreachable ();
18424 /* Output a VFP load or store instruction. */
18427 output_move_vfp (rtx
*operands
)
18429 rtx reg
, mem
, addr
, ops
[2];
18430 int load
= REG_P (operands
[0]);
18431 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18432 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18437 reg
= operands
[!load
];
18438 mem
= operands
[load
];
18440 mode
= GET_MODE (reg
);
18442 gcc_assert (REG_P (reg
));
18443 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18444 gcc_assert (mode
== SFmode
18448 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18449 gcc_assert (MEM_P (mem
));
18451 addr
= XEXP (mem
, 0);
18453 switch (GET_CODE (addr
))
18456 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18457 ops
[0] = XEXP (addr
, 0);
18462 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18463 ops
[0] = XEXP (addr
, 0);
18468 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18474 sprintf (buff
, templ
,
18475 load
? "ld" : "st",
18478 integer_p
? "\t%@ int" : "");
18479 output_asm_insn (buff
, ops
);
18484 /* Output a Neon double-word or quad-word load or store, or a load
18485 or store for larger structure modes.
18487 WARNING: The ordering of elements is weird in big-endian mode,
18488 because the EABI requires that vectors stored in memory appear
18489 as though they were stored by a VSTM, as required by the EABI.
18490 GCC RTL defines element ordering based on in-memory order.
18491 This can be different from the architectural ordering of elements
18492 within a NEON register. The intrinsics defined in arm_neon.h use the
18493 NEON register element ordering, not the GCC RTL element ordering.
18495 For example, the in-memory ordering of a big-endian a quadword
18496 vector with 16-bit elements when stored from register pair {d0,d1}
18497 will be (lowest address first, d0[N] is NEON register element N):
18499 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18501 When necessary, quadword registers (dN, dN+1) are moved to ARM
18502 registers from rN in the order:
18504 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18506 So that STM/LDM can be used on vectors in ARM registers, and the
18507 same memory layout will result as if VSTM/VLDM were used.
18509 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18510 possible, which allows use of appropriate alignment tags.
18511 Note that the choice of "64" is independent of the actual vector
18512 element size; this size simply ensures that the behavior is
18513 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18515 Due to limitations of those instructions, use of VST1.64/VLD1.64
18516 is not possible if:
18517 - the address contains PRE_DEC, or
18518 - the mode refers to more than 4 double-word registers
18520 In those cases, it would be possible to replace VSTM/VLDM by a
18521 sequence of instructions; this is not currently implemented since
18522 this is not certain to actually improve performance. */
18525 output_move_neon (rtx
*operands
)
18527 rtx reg
, mem
, addr
, ops
[2];
18528 int regno
, nregs
, load
= REG_P (operands
[0]);
18533 reg
= operands
[!load
];
18534 mem
= operands
[load
];
18536 mode
= GET_MODE (reg
);
18538 gcc_assert (REG_P (reg
));
18539 regno
= REGNO (reg
);
18540 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18541 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18542 || NEON_REGNO_OK_FOR_QUAD (regno
));
18543 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18544 || VALID_NEON_QREG_MODE (mode
)
18545 || VALID_NEON_STRUCT_MODE (mode
));
18546 gcc_assert (MEM_P (mem
));
18548 addr
= XEXP (mem
, 0);
18550 /* Strip off const from addresses like (const (plus (...))). */
18551 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18552 addr
= XEXP (addr
, 0);
18554 switch (GET_CODE (addr
))
18557 /* We have to use vldm / vstm for too-large modes. */
18560 templ
= "v%smia%%?\t%%0!, %%h1";
18561 ops
[0] = XEXP (addr
, 0);
18565 templ
= "v%s1.64\t%%h1, %%A0";
18572 /* We have to use vldm / vstm in this case, since there is no
18573 pre-decrement form of the vld1 / vst1 instructions. */
18574 templ
= "v%smdb%%?\t%%0!, %%h1";
18575 ops
[0] = XEXP (addr
, 0);
18580 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18581 gcc_unreachable ();
18584 /* We have to use vldm / vstm for too-large modes. */
18588 templ
= "v%smia%%?\t%%m0, %%h1";
18590 templ
= "v%s1.64\t%%h1, %%A0";
18596 /* Fall through. */
18602 for (i
= 0; i
< nregs
; i
++)
18604 /* We're only using DImode here because it's a convenient size. */
18605 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18606 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18607 if (reg_overlap_mentioned_p (ops
[0], mem
))
18609 gcc_assert (overlap
== -1);
18614 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18615 output_asm_insn (buff
, ops
);
18620 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18621 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18622 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18623 output_asm_insn (buff
, ops
);
18630 gcc_unreachable ();
18633 sprintf (buff
, templ
, load
? "ld" : "st");
18634 output_asm_insn (buff
, ops
);
18639 /* Compute and return the length of neon_mov<mode>, where <mode> is
18640 one of VSTRUCT modes: EI, OI, CI or XI. */
18642 arm_attr_length_move_neon (rtx_insn
*insn
)
18644 rtx reg
, mem
, addr
;
18648 extract_insn_cached (insn
);
18650 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18652 mode
= GET_MODE (recog_data
.operand
[0]);
18663 gcc_unreachable ();
18667 load
= REG_P (recog_data
.operand
[0]);
18668 reg
= recog_data
.operand
[!load
];
18669 mem
= recog_data
.operand
[load
];
18671 gcc_assert (MEM_P (mem
));
18673 mode
= GET_MODE (reg
);
18674 addr
= XEXP (mem
, 0);
18676 /* Strip off const from addresses like (const (plus (...))). */
18677 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18678 addr
= XEXP (addr
, 0);
18680 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18682 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18689 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18693 arm_address_offset_is_imm (rtx_insn
*insn
)
18697 extract_insn_cached (insn
);
18699 if (REG_P (recog_data
.operand
[0]))
18702 mem
= recog_data
.operand
[0];
18704 gcc_assert (MEM_P (mem
));
18706 addr
= XEXP (mem
, 0);
18709 || (GET_CODE (addr
) == PLUS
18710 && REG_P (XEXP (addr
, 0))
18711 && CONST_INT_P (XEXP (addr
, 1))))
18717 /* Output an ADD r, s, #n where n may be too big for one instruction.
18718 If adding zero to one register, output nothing. */
18720 output_add_immediate (rtx
*operands
)
18722 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18724 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18727 output_multi_immediate (operands
,
18728 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18731 output_multi_immediate (operands
,
18732 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18739 /* Output a multiple immediate operation.
18740 OPERANDS is the vector of operands referred to in the output patterns.
18741 INSTR1 is the output pattern to use for the first constant.
18742 INSTR2 is the output pattern to use for subsequent constants.
18743 IMMED_OP is the index of the constant slot in OPERANDS.
18744 N is the constant value. */
18745 static const char *
18746 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18747 int immed_op
, HOST_WIDE_INT n
)
18749 #if HOST_BITS_PER_WIDE_INT > 32
18755 /* Quick and easy output. */
18756 operands
[immed_op
] = const0_rtx
;
18757 output_asm_insn (instr1
, operands
);
18762 const char * instr
= instr1
;
18764 /* Note that n is never zero here (which would give no output). */
18765 for (i
= 0; i
< 32; i
+= 2)
18769 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18770 output_asm_insn (instr
, operands
);
18780 /* Return the name of a shifter operation. */
18781 static const char *
18782 arm_shift_nmem(enum rtx_code code
)
18787 return ARM_LSL_NAME
;
18803 /* Return the appropriate ARM instruction for the operation code.
18804 The returned result should not be overwritten. OP is the rtx of the
18805 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18808 arithmetic_instr (rtx op
, int shift_first_arg
)
18810 switch (GET_CODE (op
))
18816 return shift_first_arg
? "rsb" : "sub";
18831 return arm_shift_nmem(GET_CODE(op
));
18834 gcc_unreachable ();
18838 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18839 for the operation code. The returned result should not be overwritten.
18840 OP is the rtx code of the shift.
18841 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18843 static const char *
18844 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18847 enum rtx_code code
= GET_CODE (op
);
18852 if (!CONST_INT_P (XEXP (op
, 1)))
18854 output_operand_lossage ("invalid shift operand");
18859 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18867 mnem
= arm_shift_nmem(code
);
18868 if (CONST_INT_P (XEXP (op
, 1)))
18870 *amountp
= INTVAL (XEXP (op
, 1));
18872 else if (REG_P (XEXP (op
, 1)))
18879 output_operand_lossage ("invalid shift operand");
18885 /* We never have to worry about the amount being other than a
18886 power of 2, since this case can never be reloaded from a reg. */
18887 if (!CONST_INT_P (XEXP (op
, 1)))
18889 output_operand_lossage ("invalid shift operand");
18893 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18895 /* Amount must be a power of two. */
18896 if (*amountp
& (*amountp
- 1))
18898 output_operand_lossage ("invalid shift operand");
18902 *amountp
= int_log2 (*amountp
);
18903 return ARM_LSL_NAME
;
18906 output_operand_lossage ("invalid shift operand");
18910 /* This is not 100% correct, but follows from the desire to merge
18911 multiplication by a power of 2 with the recognizer for a
18912 shift. >=32 is not a valid shift for "lsl", so we must try and
18913 output a shift that produces the correct arithmetical result.
18914 Using lsr #32 is identical except for the fact that the carry bit
18915 is not set correctly if we set the flags; but we never use the
18916 carry bit from such an operation, so we can ignore that. */
18917 if (code
== ROTATERT
)
18918 /* Rotate is just modulo 32. */
18920 else if (*amountp
!= (*amountp
& 31))
18922 if (code
== ASHIFT
)
18927 /* Shifts of 0 are no-ops. */
18934 /* Obtain the shift from the POWER of two. */
18936 static HOST_WIDE_INT
18937 int_log2 (HOST_WIDE_INT power
)
18939 HOST_WIDE_INT shift
= 0;
18941 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
18943 gcc_assert (shift
<= 31);
18950 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18951 because /bin/as is horribly restrictive. The judgement about
18952 whether or not each character is 'printable' (and can be output as
18953 is) or not (and must be printed with an octal escape) must be made
18954 with reference to the *host* character set -- the situation is
18955 similar to that discussed in the comments above pp_c_char in
18956 c-pretty-print.c. */
18958 #define MAX_ASCII_LEN 51
18961 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18964 int len_so_far
= 0;
18966 fputs ("\t.ascii\t\"", stream
);
18968 for (i
= 0; i
< len
; i
++)
18972 if (len_so_far
>= MAX_ASCII_LEN
)
18974 fputs ("\"\n\t.ascii\t\"", stream
);
18980 if (c
== '\\' || c
== '\"')
18982 putc ('\\', stream
);
18990 fprintf (stream
, "\\%03o", c
);
18995 fputs ("\"\n", stream
);
18998 /* Compute the register save mask for registers 0 through 12
18999 inclusive. This code is used by arm_compute_save_reg_mask. */
19001 static unsigned long
19002 arm_compute_save_reg0_reg12_mask (void)
19004 unsigned long func_type
= arm_current_func_type ();
19005 unsigned long save_reg_mask
= 0;
19008 if (IS_INTERRUPT (func_type
))
19010 unsigned int max_reg
;
19011 /* Interrupt functions must not corrupt any registers,
19012 even call clobbered ones. If this is a leaf function
19013 we can just examine the registers used by the RTL, but
19014 otherwise we have to assume that whatever function is
19015 called might clobber anything, and so we have to save
19016 all the call-clobbered registers as well. */
19017 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
19018 /* FIQ handlers have registers r8 - r12 banked, so
19019 we only need to check r0 - r7, Normal ISRs only
19020 bank r14 and r15, so we must check up to r12.
19021 r13 is the stack pointer which is always preserved,
19022 so we do not need to consider it here. */
19027 for (reg
= 0; reg
<= max_reg
; reg
++)
19028 if (df_regs_ever_live_p (reg
)
19029 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
19030 save_reg_mask
|= (1 << reg
);
19032 /* Also save the pic base register if necessary. */
19034 && !TARGET_SINGLE_PIC_BASE
19035 && arm_pic_register
!= INVALID_REGNUM
19036 && crtl
->uses_pic_offset_table
)
19037 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19039 else if (IS_VOLATILE(func_type
))
19041 /* For noreturn functions we historically omitted register saves
19042 altogether. However this really messes up debugging. As a
19043 compromise save just the frame pointers. Combined with the link
19044 register saved elsewhere this should be sufficient to get
19046 if (frame_pointer_needed
)
19047 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19048 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19049 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19050 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19051 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19055 /* In the normal case we only need to save those registers
19056 which are call saved and which are used by this function. */
19057 for (reg
= 0; reg
<= 11; reg
++)
19058 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
19059 save_reg_mask
|= (1 << reg
);
19061 /* Handle the frame pointer as a special case. */
19062 if (frame_pointer_needed
)
19063 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19065 /* If we aren't loading the PIC register,
19066 don't stack it even though it may be live. */
19068 && !TARGET_SINGLE_PIC_BASE
19069 && arm_pic_register
!= INVALID_REGNUM
19070 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19071 || crtl
->uses_pic_offset_table
))
19072 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19074 /* The prologue will copy SP into R0, so save it. */
19075 if (IS_STACKALIGN (func_type
))
19076 save_reg_mask
|= 1;
19079 /* Save registers so the exception handler can modify them. */
19080 if (crtl
->calls_eh_return
)
19086 reg
= EH_RETURN_DATA_REGNO (i
);
19087 if (reg
== INVALID_REGNUM
)
19089 save_reg_mask
|= 1 << reg
;
19093 return save_reg_mask
;
19096 /* Return true if r3 is live at the start of the function. */
19099 arm_r3_live_at_start_p (void)
19101 /* Just look at cfg info, which is still close enough to correct at this
19102 point. This gives false positives for broken functions that might use
19103 uninitialized data that happens to be allocated in r3, but who cares? */
19104 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19107 /* Compute the number of bytes used to store the static chain register on the
19108 stack, above the stack frame. We need to know this accurately to get the
19109 alignment of the rest of the stack frame correct. */
19112 arm_compute_static_chain_stack_bytes (void)
19114 /* See the defining assertion in arm_expand_prologue. */
19115 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
19116 && IS_NESTED (arm_current_func_type ())
19117 && arm_r3_live_at_start_p ()
19118 && crtl
->args
.pretend_args_size
== 0)
19124 /* Compute a bit mask of which registers need to be
19125 saved on the stack for the current function.
19126 This is used by arm_get_frame_offsets, which may add extra registers. */
19128 static unsigned long
19129 arm_compute_save_reg_mask (void)
19131 unsigned int save_reg_mask
= 0;
19132 unsigned long func_type
= arm_current_func_type ();
19135 if (IS_NAKED (func_type
))
19136 /* This should never really happen. */
19139 /* If we are creating a stack frame, then we must save the frame pointer,
19140 IP (which will hold the old stack pointer), LR and the PC. */
19141 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19143 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19146 | (1 << PC_REGNUM
);
19148 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19150 /* Decide if we need to save the link register.
19151 Interrupt routines have their own banked link register,
19152 so they never need to save it.
19153 Otherwise if we do not use the link register we do not need to save
19154 it. If we are pushing other registers onto the stack however, we
19155 can save an instruction in the epilogue by pushing the link register
19156 now and then popping it back into the PC. This incurs extra memory
19157 accesses though, so we only do it when optimizing for size, and only
19158 if we know that we will not need a fancy return sequence. */
19159 if (df_regs_ever_live_p (LR_REGNUM
)
19162 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19163 && !crtl
->calls_eh_return
))
19164 save_reg_mask
|= 1 << LR_REGNUM
;
19166 if (cfun
->machine
->lr_save_eliminated
)
19167 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19169 if (TARGET_REALLY_IWMMXT
19170 && ((bit_count (save_reg_mask
)
19171 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19172 arm_compute_static_chain_stack_bytes())
19175 /* The total number of registers that are going to be pushed
19176 onto the stack is odd. We need to ensure that the stack
19177 is 64-bit aligned before we start to save iWMMXt registers,
19178 and also before we start to create locals. (A local variable
19179 might be a double or long long which we will load/store using
19180 an iWMMXt instruction). Therefore we need to push another
19181 ARM register, so that the stack will be 64-bit aligned. We
19182 try to avoid using the arg registers (r0 -r3) as they might be
19183 used to pass values in a tail call. */
19184 for (reg
= 4; reg
<= 12; reg
++)
19185 if ((save_reg_mask
& (1 << reg
)) == 0)
19189 save_reg_mask
|= (1 << reg
);
19192 cfun
->machine
->sibcall_blocked
= 1;
19193 save_reg_mask
|= (1 << 3);
19197 /* We may need to push an additional register for use initializing the
19198 PIC base register. */
19199 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19200 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19202 reg
= thumb_find_work_register (1 << 4);
19203 if (!call_used_regs
[reg
])
19204 save_reg_mask
|= (1 << reg
);
19207 return save_reg_mask
;
19211 /* Compute a bit mask of which registers need to be
19212 saved on the stack for the current function. */
19213 static unsigned long
19214 thumb1_compute_save_reg_mask (void)
19216 unsigned long mask
;
19220 for (reg
= 0; reg
< 12; reg
++)
19221 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
19225 && !TARGET_SINGLE_PIC_BASE
19226 && arm_pic_register
!= INVALID_REGNUM
19227 && crtl
->uses_pic_offset_table
)
19228 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19230 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19231 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19232 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19234 /* LR will also be pushed if any lo regs are pushed. */
19235 if (mask
& 0xff || thumb_force_lr_save ())
19236 mask
|= (1 << LR_REGNUM
);
19238 /* Make sure we have a low work register if we need one.
19239 We will need one if we are going to push a high register,
19240 but we are not currently intending to push a low register. */
19241 if ((mask
& 0xff) == 0
19242 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19244 /* Use thumb_find_work_register to choose which register
19245 we will use. If the register is live then we will
19246 have to push it. Use LAST_LO_REGNUM as our fallback
19247 choice for the register to select. */
19248 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19249 /* Make sure the register returned by thumb_find_work_register is
19250 not part of the return value. */
19251 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19252 reg
= LAST_LO_REGNUM
;
19254 if (! call_used_regs
[reg
])
19258 /* The 504 below is 8 bytes less than 512 because there are two possible
19259 alignment words. We can't tell here if they will be present or not so we
19260 have to play it safe and assume that they are. */
19261 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19262 ROUND_UP_WORD (get_frame_size ()) +
19263 crtl
->outgoing_args_size
) >= 504)
19265 /* This is the same as the code in thumb1_expand_prologue() which
19266 determines which register to use for stack decrement. */
19267 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19268 if (mask
& (1 << reg
))
19271 if (reg
> LAST_LO_REGNUM
)
19273 /* Make sure we have a register available for stack decrement. */
19274 mask
|= 1 << LAST_LO_REGNUM
;
19282 /* Return the number of bytes required to save VFP registers. */
19284 arm_get_vfp_saved_size (void)
19286 unsigned int regno
;
19291 /* Space for saved VFP registers. */
19292 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
19295 for (regno
= FIRST_VFP_REGNUM
;
19296 regno
< LAST_VFP_REGNUM
;
19299 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19300 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19304 /* Workaround ARM10 VFPr1 bug. */
19305 if (count
== 2 && !arm_arch6
)
19307 saved
+= count
* 8;
19316 if (count
== 2 && !arm_arch6
)
19318 saved
+= count
* 8;
19325 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19326 everything bar the final return instruction. If simple_return is true,
19327 then do not output epilogue, because it has already been emitted in RTL. */
19329 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19330 bool simple_return
)
19332 char conditional
[10];
19335 unsigned long live_regs_mask
;
19336 unsigned long func_type
;
19337 arm_stack_offsets
*offsets
;
19339 func_type
= arm_current_func_type ();
19341 if (IS_NAKED (func_type
))
19344 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19346 /* If this function was declared non-returning, and we have
19347 found a tail call, then we have to trust that the called
19348 function won't return. */
19353 /* Otherwise, trap an attempted return by aborting. */
19355 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19357 assemble_external_libcall (ops
[1]);
19358 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19364 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19366 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19368 cfun
->machine
->return_used_this_function
= 1;
19370 offsets
= arm_get_frame_offsets ();
19371 live_regs_mask
= offsets
->saved_regs_mask
;
19373 if (!simple_return
&& live_regs_mask
)
19375 const char * return_reg
;
19377 /* If we do not have any special requirements for function exit
19378 (e.g. interworking) then we can load the return address
19379 directly into the PC. Otherwise we must load it into LR. */
19381 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19382 return_reg
= reg_names
[PC_REGNUM
];
19384 return_reg
= reg_names
[LR_REGNUM
];
19386 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19388 /* There are three possible reasons for the IP register
19389 being saved. 1) a stack frame was created, in which case
19390 IP contains the old stack pointer, or 2) an ISR routine
19391 corrupted it, or 3) it was saved to align the stack on
19392 iWMMXt. In case 1, restore IP into SP, otherwise just
19394 if (frame_pointer_needed
)
19396 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19397 live_regs_mask
|= (1 << SP_REGNUM
);
19400 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19403 /* On some ARM architectures it is faster to use LDR rather than
19404 LDM to load a single register. On other architectures, the
19405 cost is the same. In 26 bit mode, or for exception handlers,
19406 we have to use LDM to load the PC so that the CPSR is also
19408 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19409 if (live_regs_mask
== (1U << reg
))
19412 if (reg
<= LAST_ARM_REGNUM
19413 && (reg
!= LR_REGNUM
19415 || ! IS_INTERRUPT (func_type
)))
19417 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19418 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19425 /* Generate the load multiple instruction to restore the
19426 registers. Note we can get here, even if
19427 frame_pointer_needed is true, but only if sp already
19428 points to the base of the saved core registers. */
19429 if (live_regs_mask
& (1 << SP_REGNUM
))
19431 unsigned HOST_WIDE_INT stack_adjust
;
19433 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19434 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19436 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19437 if (TARGET_UNIFIED_ASM
)
19438 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19440 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
19443 /* If we can't use ldmib (SA110 bug),
19444 then try to pop r3 instead. */
19446 live_regs_mask
|= 1 << 3;
19448 if (TARGET_UNIFIED_ASM
)
19449 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19451 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
19455 if (TARGET_UNIFIED_ASM
)
19456 sprintf (instr
, "pop%s\t{", conditional
);
19458 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
19460 p
= instr
+ strlen (instr
);
19462 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19463 if (live_regs_mask
& (1 << reg
))
19465 int l
= strlen (reg_names
[reg
]);
19471 memcpy (p
, ", ", 2);
19475 memcpy (p
, "%|", 2);
19476 memcpy (p
+ 2, reg_names
[reg
], l
);
19480 if (live_regs_mask
& (1 << LR_REGNUM
))
19482 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19483 /* If returning from an interrupt, restore the CPSR. */
19484 if (IS_INTERRUPT (func_type
))
19491 output_asm_insn (instr
, & operand
);
19493 /* See if we need to generate an extra instruction to
19494 perform the actual function return. */
19496 && func_type
!= ARM_FT_INTERWORKED
19497 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19499 /* The return has already been handled
19500 by loading the LR into the PC. */
19507 switch ((int) ARM_FUNC_TYPE (func_type
))
19511 /* ??? This is wrong for unified assembly syntax. */
19512 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19515 case ARM_FT_INTERWORKED
:
19516 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19519 case ARM_FT_EXCEPTION
:
19520 /* ??? This is wrong for unified assembly syntax. */
19521 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19525 /* Use bx if it's available. */
19526 if (arm_arch5
|| arm_arch4t
)
19527 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19529 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19533 output_asm_insn (instr
, & operand
);
19539 /* Write the function name into the code section, directly preceding
19540 the function prologue.
19542 Code will be output similar to this:
19544 .ascii "arm_poke_function_name", 0
19547 .word 0xff000000 + (t1 - t0)
19548 arm_poke_function_name
19550 stmfd sp!, {fp, ip, lr, pc}
19553 When performing a stack backtrace, code can inspect the value
19554 of 'pc' stored at 'fp' + 0. If the trace function then looks
19555 at location pc - 12 and the top 8 bits are set, then we know
19556 that there is a function name embedded immediately preceding this
19557 location and has length ((pc[-3]) & 0xff000000).
19559 We assume that pc is declared as a pointer to an unsigned long.
19561 It is of no benefit to output the function name if we are assembling
19562 a leaf function. These function types will not contain a stack
19563 backtrace structure, therefore it is not possible to determine the
19566 arm_poke_function_name (FILE *stream
, const char *name
)
19568 unsigned long alignlength
;
19569 unsigned long length
;
19572 length
= strlen (name
) + 1;
19573 alignlength
= ROUND_UP_WORD (length
);
19575 ASM_OUTPUT_ASCII (stream
, name
, length
);
19576 ASM_OUTPUT_ALIGN (stream
, 2);
19577 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19578 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19581 /* Place some comments into the assembler stream
19582 describing the current function. */
19584 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19586 unsigned long func_type
;
19588 /* ??? Do we want to print some of the below anyway? */
19592 /* Sanity check. */
19593 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19595 func_type
= arm_current_func_type ();
19597 switch ((int) ARM_FUNC_TYPE (func_type
))
19600 case ARM_FT_NORMAL
:
19602 case ARM_FT_INTERWORKED
:
19603 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19606 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19609 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19611 case ARM_FT_EXCEPTION
:
19612 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19616 if (IS_NAKED (func_type
))
19617 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19619 if (IS_VOLATILE (func_type
))
19620 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19622 if (IS_NESTED (func_type
))
19623 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19624 if (IS_STACKALIGN (func_type
))
19625 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19627 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19629 crtl
->args
.pretend_args_size
, frame_size
);
19631 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19632 frame_pointer_needed
,
19633 cfun
->machine
->uses_anonymous_args
);
19635 if (cfun
->machine
->lr_save_eliminated
)
19636 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19638 if (crtl
->calls_eh_return
)
19639 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19644 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19645 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19647 arm_stack_offsets
*offsets
;
19653 /* Emit any call-via-reg trampolines that are needed for v4t support
19654 of call_reg and call_value_reg type insns. */
19655 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19657 rtx label
= cfun
->machine
->call_via
[regno
];
19661 switch_to_section (function_section (current_function_decl
));
19662 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19663 CODE_LABEL_NUMBER (label
));
19664 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19668 /* ??? Probably not safe to set this here, since it assumes that a
19669 function will be emitted as assembly immediately after we generate
19670 RTL for it. This does not happen for inline functions. */
19671 cfun
->machine
->return_used_this_function
= 0;
19673 else /* TARGET_32BIT */
19675 /* We need to take into account any stack-frame rounding. */
19676 offsets
= arm_get_frame_offsets ();
19678 gcc_assert (!use_return_insn (FALSE
, NULL
)
19679 || (cfun
->machine
->return_used_this_function
!= 0)
19680 || offsets
->saved_regs
== offsets
->outgoing_args
19681 || frame_pointer_needed
);
19685 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19686 STR and STRD. If an even number of registers are being pushed, one
19687 or more STRD patterns are created for each register pair. If an
19688 odd number of registers are pushed, emit an initial STR followed by
19689 as many STRD instructions as are needed. This works best when the
19690 stack is initially 64-bit aligned (the normal case), since it
19691 ensures that each STRD is also 64-bit aligned. */
19693 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19698 rtx par
= NULL_RTX
;
19699 rtx dwarf
= NULL_RTX
;
19703 num_regs
= bit_count (saved_regs_mask
);
19705 /* Must be at least one register to save, and can't save SP or PC. */
19706 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19707 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19708 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19710 /* Create sequence for DWARF info. All the frame-related data for
19711 debugging is held in this wrapper. */
19712 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19714 /* Describe the stack adjustment. */
19715 tmp
= gen_rtx_SET (VOIDmode
,
19717 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19718 RTX_FRAME_RELATED_P (tmp
) = 1;
19719 XVECEXP (dwarf
, 0, 0) = tmp
;
19721 /* Find the first register. */
19722 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19727 /* If there's an odd number of registers to push. Start off by
19728 pushing a single register. This ensures that subsequent strd
19729 operations are dword aligned (assuming that SP was originally
19730 64-bit aligned). */
19731 if ((num_regs
& 1) != 0)
19733 rtx reg
, mem
, insn
;
19735 reg
= gen_rtx_REG (SImode
, regno
);
19737 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19738 stack_pointer_rtx
));
19740 mem
= gen_frame_mem (Pmode
,
19742 (Pmode
, stack_pointer_rtx
,
19743 plus_constant (Pmode
, stack_pointer_rtx
,
19746 tmp
= gen_rtx_SET (VOIDmode
, mem
, reg
);
19747 RTX_FRAME_RELATED_P (tmp
) = 1;
19748 insn
= emit_insn (tmp
);
19749 RTX_FRAME_RELATED_P (insn
) = 1;
19750 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19751 tmp
= gen_rtx_SET (VOIDmode
, gen_frame_mem (Pmode
, stack_pointer_rtx
),
19753 RTX_FRAME_RELATED_P (tmp
) = 1;
19756 XVECEXP (dwarf
, 0, i
) = tmp
;
19760 while (i
< num_regs
)
19761 if (saved_regs_mask
& (1 << regno
))
19763 rtx reg1
, reg2
, mem1
, mem2
;
19764 rtx tmp0
, tmp1
, tmp2
;
19767 /* Find the register to pair with this one. */
19768 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19772 reg1
= gen_rtx_REG (SImode
, regno
);
19773 reg2
= gen_rtx_REG (SImode
, regno2
);
19780 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19783 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19785 -4 * (num_regs
- 1)));
19786 tmp0
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
19787 plus_constant (Pmode
, stack_pointer_rtx
,
19789 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19790 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19791 RTX_FRAME_RELATED_P (tmp0
) = 1;
19792 RTX_FRAME_RELATED_P (tmp1
) = 1;
19793 RTX_FRAME_RELATED_P (tmp2
) = 1;
19794 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19795 XVECEXP (par
, 0, 0) = tmp0
;
19796 XVECEXP (par
, 0, 1) = tmp1
;
19797 XVECEXP (par
, 0, 2) = tmp2
;
19798 insn
= emit_insn (par
);
19799 RTX_FRAME_RELATED_P (insn
) = 1;
19800 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19804 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19807 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19810 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19811 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19812 RTX_FRAME_RELATED_P (tmp1
) = 1;
19813 RTX_FRAME_RELATED_P (tmp2
) = 1;
19814 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19815 XVECEXP (par
, 0, 0) = tmp1
;
19816 XVECEXP (par
, 0, 1) = tmp2
;
19820 /* Create unwind information. This is an approximation. */
19821 tmp1
= gen_rtx_SET (VOIDmode
,
19822 gen_frame_mem (Pmode
,
19823 plus_constant (Pmode
,
19827 tmp2
= gen_rtx_SET (VOIDmode
,
19828 gen_frame_mem (Pmode
,
19829 plus_constant (Pmode
,
19834 RTX_FRAME_RELATED_P (tmp1
) = 1;
19835 RTX_FRAME_RELATED_P (tmp2
) = 1;
19836 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19837 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19839 regno
= regno2
+ 1;
19847 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19848 whenever possible, otherwise it emits single-word stores. The first store
19849 also allocates stack space for all saved registers, using writeback with
19850 post-addressing mode. All other stores use offset addressing. If no STRD
19851 can be emitted, this function emits a sequence of single-word stores,
19852 and not an STM as before, because single-word stores provide more freedom
19853 scheduling and can be turned into an STM by peephole optimizations. */
19855 arm_emit_strd_push (unsigned long saved_regs_mask
)
19858 int i
, j
, dwarf_index
= 0;
19860 rtx dwarf
= NULL_RTX
;
19861 rtx insn
= NULL_RTX
;
19864 /* TODO: A more efficient code can be emitted by changing the
19865 layout, e.g., first push all pairs that can use STRD to keep the
19866 stack aligned, and then push all other registers. */
19867 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19868 if (saved_regs_mask
& (1 << i
))
19871 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19872 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19873 gcc_assert (num_regs
> 0);
19875 /* Create sequence for DWARF info. */
19876 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19878 /* For dwarf info, we generate explicit stack update. */
19879 tmp
= gen_rtx_SET (VOIDmode
,
19881 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19882 RTX_FRAME_RELATED_P (tmp
) = 1;
19883 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19885 /* Save registers. */
19886 offset
= - 4 * num_regs
;
19888 while (j
<= LAST_ARM_REGNUM
)
19889 if (saved_regs_mask
& (1 << j
))
19892 && (saved_regs_mask
& (1 << (j
+ 1))))
19894 /* Current register and previous register form register pair for
19895 which STRD can be generated. */
19898 /* Allocate stack space for all saved registers. */
19899 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19900 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19901 mem
= gen_frame_mem (DImode
, tmp
);
19904 else if (offset
> 0)
19905 mem
= gen_frame_mem (DImode
,
19906 plus_constant (Pmode
,
19910 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19912 tmp
= gen_rtx_SET (DImode
, mem
, gen_rtx_REG (DImode
, j
));
19913 RTX_FRAME_RELATED_P (tmp
) = 1;
19914 tmp
= emit_insn (tmp
);
19916 /* Record the first store insn. */
19917 if (dwarf_index
== 1)
19920 /* Generate dwarf info. */
19921 mem
= gen_frame_mem (SImode
,
19922 plus_constant (Pmode
,
19925 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19926 RTX_FRAME_RELATED_P (tmp
) = 1;
19927 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19929 mem
= gen_frame_mem (SImode
,
19930 plus_constant (Pmode
,
19933 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
+ 1));
19934 RTX_FRAME_RELATED_P (tmp
) = 1;
19935 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19942 /* Emit a single word store. */
19945 /* Allocate stack space for all saved registers. */
19946 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19947 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19948 mem
= gen_frame_mem (SImode
, tmp
);
19951 else if (offset
> 0)
19952 mem
= gen_frame_mem (SImode
,
19953 plus_constant (Pmode
,
19957 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
19959 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19960 RTX_FRAME_RELATED_P (tmp
) = 1;
19961 tmp
= emit_insn (tmp
);
19963 /* Record the first store insn. */
19964 if (dwarf_index
== 1)
19967 /* Generate dwarf info. */
19968 mem
= gen_frame_mem (SImode
,
19969 plus_constant(Pmode
,
19972 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19973 RTX_FRAME_RELATED_P (tmp
) = 1;
19974 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19983 /* Attach dwarf info to the first insn we generate. */
19984 gcc_assert (insn
!= NULL_RTX
);
19985 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19986 RTX_FRAME_RELATED_P (insn
) = 1;
19989 /* Generate and emit an insn that we will recognize as a push_multi.
19990 Unfortunately, since this insn does not reflect very well the actual
19991 semantics of the operation, we need to annotate the insn for the benefit
19992 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19993 MASK for registers that should be annotated for DWARF2 frame unwind
19996 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
19999 int num_dwarf_regs
= 0;
20003 int dwarf_par_index
;
20006 /* We don't record the PC in the dwarf frame information. */
20007 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20009 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20011 if (mask
& (1 << i
))
20013 if (dwarf_regs_mask
& (1 << i
))
20017 gcc_assert (num_regs
&& num_regs
<= 16);
20018 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20020 /* For the body of the insn we are going to generate an UNSPEC in
20021 parallel with several USEs. This allows the insn to be recognized
20022 by the push_multi pattern in the arm.md file.
20024 The body of the insn looks something like this:
20027 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20028 (const_int:SI <num>)))
20029 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20035 For the frame note however, we try to be more explicit and actually
20036 show each register being stored into the stack frame, plus a (single)
20037 decrement of the stack pointer. We do it this way in order to be
20038 friendly to the stack unwinding code, which only wants to see a single
20039 stack decrement per instruction. The RTL we generate for the note looks
20040 something like this:
20043 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20044 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20045 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20046 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20050 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20051 instead we'd have a parallel expression detailing all
20052 the stores to the various memory addresses so that debug
20053 information is more up-to-date. Remember however while writing
20054 this to take care of the constraints with the push instruction.
20056 Note also that this has to be taken care of for the VFP registers.
20058 For more see PR43399. */
20060 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20061 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20062 dwarf_par_index
= 1;
20064 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20066 if (mask
& (1 << i
))
20068 reg
= gen_rtx_REG (SImode
, i
);
20070 XVECEXP (par
, 0, 0)
20071 = gen_rtx_SET (VOIDmode
,
20074 gen_rtx_PRE_MODIFY (Pmode
,
20077 (Pmode
, stack_pointer_rtx
,
20080 gen_rtx_UNSPEC (BLKmode
,
20081 gen_rtvec (1, reg
),
20082 UNSPEC_PUSH_MULT
));
20084 if (dwarf_regs_mask
& (1 << i
))
20086 tmp
= gen_rtx_SET (VOIDmode
,
20087 gen_frame_mem (SImode
, stack_pointer_rtx
),
20089 RTX_FRAME_RELATED_P (tmp
) = 1;
20090 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20097 for (j
= 1, i
++; j
< num_regs
; i
++)
20099 if (mask
& (1 << i
))
20101 reg
= gen_rtx_REG (SImode
, i
);
20103 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20105 if (dwarf_regs_mask
& (1 << i
))
20108 = gen_rtx_SET (VOIDmode
,
20111 plus_constant (Pmode
, stack_pointer_rtx
,
20114 RTX_FRAME_RELATED_P (tmp
) = 1;
20115 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20122 par
= emit_insn (par
);
20124 tmp
= gen_rtx_SET (VOIDmode
,
20126 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20127 RTX_FRAME_RELATED_P (tmp
) = 1;
20128 XVECEXP (dwarf
, 0, 0) = tmp
;
20130 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20135 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20136 SIZE is the offset to be adjusted.
20137 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20139 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20143 RTX_FRAME_RELATED_P (insn
) = 1;
20144 dwarf
= gen_rtx_SET (VOIDmode
, dest
, plus_constant (Pmode
, src
, size
));
20145 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20148 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20149 SAVED_REGS_MASK shows which registers need to be restored.
20151 Unfortunately, since this insn does not reflect very well the actual
20152 semantics of the operation, we need to annotate the insn for the benefit
20153 of DWARF2 frame unwind information. */
20155 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20160 rtx dwarf
= NULL_RTX
;
20166 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
20167 offset_adj
= return_in_pc
? 1 : 0;
20168 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20169 if (saved_regs_mask
& (1 << i
))
20172 gcc_assert (num_regs
&& num_regs
<= 16);
20174 /* If SP is in reglist, then we don't emit SP update insn. */
20175 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20177 /* The parallel needs to hold num_regs SETs
20178 and one SET for the stack update. */
20179 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20184 XVECEXP (par
, 0, 0) = tmp
;
20189 /* Increment the stack pointer, based on there being
20190 num_regs 4-byte registers to restore. */
20191 tmp
= gen_rtx_SET (VOIDmode
,
20193 plus_constant (Pmode
,
20196 RTX_FRAME_RELATED_P (tmp
) = 1;
20197 XVECEXP (par
, 0, offset_adj
) = tmp
;
20200 /* Now restore every reg, which may include PC. */
20201 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20202 if (saved_regs_mask
& (1 << i
))
20204 reg
= gen_rtx_REG (SImode
, i
);
20205 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20207 /* Emit single load with writeback. */
20208 tmp
= gen_frame_mem (SImode
,
20209 gen_rtx_POST_INC (Pmode
,
20210 stack_pointer_rtx
));
20211 tmp
= emit_insn (gen_rtx_SET (VOIDmode
, reg
, tmp
));
20212 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20216 tmp
= gen_rtx_SET (VOIDmode
,
20220 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20221 RTX_FRAME_RELATED_P (tmp
) = 1;
20222 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20224 /* We need to maintain a sequence for DWARF info too. As dwarf info
20225 should not have PC, skip PC. */
20226 if (i
!= PC_REGNUM
)
20227 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20233 par
= emit_jump_insn (par
);
20235 par
= emit_insn (par
);
20237 REG_NOTES (par
) = dwarf
;
20239 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20240 stack_pointer_rtx
, stack_pointer_rtx
);
20243 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20244 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20246 Unfortunately, since this insn does not reflect very well the actual
20247 semantics of the operation, we need to annotate the insn for the benefit
20248 of DWARF2 frame unwind information. */
20250 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20254 rtx dwarf
= NULL_RTX
;
20257 gcc_assert (num_regs
&& num_regs
<= 32);
20259 /* Workaround ARM10 VFPr1 bug. */
20260 if (num_regs
== 2 && !arm_arch6
)
20262 if (first_reg
== 15)
20268 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20269 there could be up to 32 D-registers to restore.
20270 If there are more than 16 D-registers, make two recursive calls,
20271 each of which emits one pop_multi instruction. */
20274 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20275 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20279 /* The parallel needs to hold num_regs SETs
20280 and one SET for the stack update. */
20281 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20283 /* Increment the stack pointer, based on there being
20284 num_regs 8-byte registers to restore. */
20285 tmp
= gen_rtx_SET (VOIDmode
,
20287 plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20288 RTX_FRAME_RELATED_P (tmp
) = 1;
20289 XVECEXP (par
, 0, 0) = tmp
;
20291 /* Now show every reg that will be restored, using a SET for each. */
20292 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20294 reg
= gen_rtx_REG (DFmode
, i
);
20296 tmp
= gen_rtx_SET (VOIDmode
,
20300 plus_constant (Pmode
, base_reg
, 8 * j
)));
20301 RTX_FRAME_RELATED_P (tmp
) = 1;
20302 XVECEXP (par
, 0, j
+ 1) = tmp
;
20304 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20309 par
= emit_insn (par
);
20310 REG_NOTES (par
) = dwarf
;
20312 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20313 if (TARGET_VFP
&& REGNO (base_reg
) == IP_REGNUM
)
20315 RTX_FRAME_RELATED_P (par
) = 1;
20316 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20319 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20320 base_reg
, base_reg
);
20323 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20324 number of registers are being popped, multiple LDRD patterns are created for
20325 all register pairs. If odd number of registers are popped, last register is
20326 loaded by using LDR pattern. */
20328 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20332 rtx par
= NULL_RTX
;
20333 rtx dwarf
= NULL_RTX
;
20334 rtx tmp
, reg
, tmp1
;
20337 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
20338 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20339 if (saved_regs_mask
& (1 << i
))
20342 gcc_assert (num_regs
&& num_regs
<= 16);
20344 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20345 to be popped. So, if num_regs is even, now it will become odd,
20346 and we can generate pop with PC. If num_regs is odd, it will be
20347 even now, and ldr with return can be generated for PC. */
20351 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20353 /* Var j iterates over all the registers to gather all the registers in
20354 saved_regs_mask. Var i gives index of saved registers in stack frame.
20355 A PARALLEL RTX of register-pair is created here, so that pattern for
20356 LDRD can be matched. As PC is always last register to be popped, and
20357 we have already decremented num_regs if PC, we don't have to worry
20358 about PC in this loop. */
20359 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20360 if (saved_regs_mask
& (1 << j
))
20362 /* Create RTX for memory load. */
20363 reg
= gen_rtx_REG (SImode
, j
);
20364 tmp
= gen_rtx_SET (SImode
,
20366 gen_frame_mem (SImode
,
20367 plus_constant (Pmode
,
20368 stack_pointer_rtx
, 4 * i
)));
20369 RTX_FRAME_RELATED_P (tmp
) = 1;
20373 /* When saved-register index (i) is even, the RTX to be emitted is
20374 yet to be created. Hence create it first. The LDRD pattern we
20375 are generating is :
20376 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20377 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20378 where target registers need not be consecutive. */
20379 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20383 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20384 added as 0th element and if i is odd, reg_i is added as 1st element
20385 of LDRD pattern shown above. */
20386 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20387 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20391 /* When saved-register index (i) is odd, RTXs for both the registers
20392 to be loaded are generated in above given LDRD pattern, and the
20393 pattern can be emitted now. */
20394 par
= emit_insn (par
);
20395 REG_NOTES (par
) = dwarf
;
20396 RTX_FRAME_RELATED_P (par
) = 1;
20402 /* If the number of registers pushed is odd AND return_in_pc is false OR
20403 number of registers are even AND return_in_pc is true, last register is
20404 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20405 then LDR with post increment. */
20407 /* Increment the stack pointer, based on there being
20408 num_regs 4-byte registers to restore. */
20409 tmp
= gen_rtx_SET (VOIDmode
,
20411 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20412 RTX_FRAME_RELATED_P (tmp
) = 1;
20413 tmp
= emit_insn (tmp
);
20416 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20417 stack_pointer_rtx
, stack_pointer_rtx
);
20422 if (((num_regs
% 2) == 1 && !return_in_pc
)
20423 || ((num_regs
% 2) == 0 && return_in_pc
))
20425 /* Scan for the single register to be popped. Skip until the saved
20426 register is found. */
20427 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20429 /* Gen LDR with post increment here. */
20430 tmp1
= gen_rtx_MEM (SImode
,
20431 gen_rtx_POST_INC (SImode
,
20432 stack_pointer_rtx
));
20433 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20435 reg
= gen_rtx_REG (SImode
, j
);
20436 tmp
= gen_rtx_SET (SImode
, reg
, tmp1
);
20437 RTX_FRAME_RELATED_P (tmp
) = 1;
20438 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20442 /* If return_in_pc, j must be PC_REGNUM. */
20443 gcc_assert (j
== PC_REGNUM
);
20444 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20445 XVECEXP (par
, 0, 0) = ret_rtx
;
20446 XVECEXP (par
, 0, 1) = tmp
;
20447 par
= emit_jump_insn (par
);
20451 par
= emit_insn (tmp
);
20452 REG_NOTES (par
) = dwarf
;
20453 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20454 stack_pointer_rtx
, stack_pointer_rtx
);
20458 else if ((num_regs
% 2) == 1 && return_in_pc
)
20460 /* There are 2 registers to be popped. So, generate the pattern
20461 pop_multiple_with_stack_update_and_return to pop in PC. */
20462 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20468 /* LDRD in ARM mode needs consecutive registers as operands. This function
20469 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20470 offset addressing and then generates one separate stack udpate. This provides
20471 more scheduling freedom, compared to writeback on every load. However,
20472 if the function returns using load into PC directly
20473 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20474 before the last load. TODO: Add a peephole optimization to recognize
20475 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20476 peephole optimization to merge the load at stack-offset zero
20477 with the stack update instruction using load with writeback
20478 in post-index addressing mode. */
20480 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20484 rtx par
= NULL_RTX
;
20485 rtx dwarf
= NULL_RTX
;
20488 /* Restore saved registers. */
20489 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20491 while (j
<= LAST_ARM_REGNUM
)
20492 if (saved_regs_mask
& (1 << j
))
20495 && (saved_regs_mask
& (1 << (j
+ 1)))
20496 && (j
+ 1) != PC_REGNUM
)
20498 /* Current register and next register form register pair for which
20499 LDRD can be generated. PC is always the last register popped, and
20500 we handle it separately. */
20502 mem
= gen_frame_mem (DImode
,
20503 plus_constant (Pmode
,
20507 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20509 tmp
= gen_rtx_SET (DImode
, gen_rtx_REG (DImode
, j
), mem
);
20510 tmp
= emit_insn (tmp
);
20511 RTX_FRAME_RELATED_P (tmp
) = 1;
20513 /* Generate dwarf info. */
20515 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20516 gen_rtx_REG (SImode
, j
),
20518 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20519 gen_rtx_REG (SImode
, j
+ 1),
20522 REG_NOTES (tmp
) = dwarf
;
20527 else if (j
!= PC_REGNUM
)
20529 /* Emit a single word load. */
20531 mem
= gen_frame_mem (SImode
,
20532 plus_constant (Pmode
,
20536 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20538 tmp
= gen_rtx_SET (SImode
, gen_rtx_REG (SImode
, j
), mem
);
20539 tmp
= emit_insn (tmp
);
20540 RTX_FRAME_RELATED_P (tmp
) = 1;
20542 /* Generate dwarf info. */
20543 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20544 gen_rtx_REG (SImode
, j
),
20550 else /* j == PC_REGNUM */
20556 /* Update the stack. */
20559 tmp
= gen_rtx_SET (Pmode
,
20561 plus_constant (Pmode
,
20564 tmp
= emit_insn (tmp
);
20565 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20566 stack_pointer_rtx
, stack_pointer_rtx
);
20570 if (saved_regs_mask
& (1 << PC_REGNUM
))
20572 /* Only PC is to be popped. */
20573 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20574 XVECEXP (par
, 0, 0) = ret_rtx
;
20575 tmp
= gen_rtx_SET (SImode
,
20576 gen_rtx_REG (SImode
, PC_REGNUM
),
20577 gen_frame_mem (SImode
,
20578 gen_rtx_POST_INC (SImode
,
20579 stack_pointer_rtx
)));
20580 RTX_FRAME_RELATED_P (tmp
) = 1;
20581 XVECEXP (par
, 0, 1) = tmp
;
20582 par
= emit_jump_insn (par
);
20584 /* Generate dwarf info. */
20585 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20586 gen_rtx_REG (SImode
, PC_REGNUM
),
20588 REG_NOTES (par
) = dwarf
;
20589 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20590 stack_pointer_rtx
, stack_pointer_rtx
);
20594 /* Calculate the size of the return value that is passed in registers. */
20596 arm_size_return_regs (void)
20600 if (crtl
->return_rtx
!= 0)
20601 mode
= GET_MODE (crtl
->return_rtx
);
20603 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20605 return GET_MODE_SIZE (mode
);
20608 /* Return true if the current function needs to save/restore LR. */
20610 thumb_force_lr_save (void)
20612 return !cfun
->machine
->lr_save_eliminated
20613 && (!leaf_function_p ()
20614 || thumb_far_jump_used_p ()
20615 || df_regs_ever_live_p (LR_REGNUM
));
20618 /* We do not know if r3 will be available because
20619 we do have an indirect tailcall happening in this
20620 particular case. */
20622 is_indirect_tailcall_p (rtx call
)
20624 rtx pat
= PATTERN (call
);
20626 /* Indirect tail call. */
20627 pat
= XVECEXP (pat
, 0, 0);
20628 if (GET_CODE (pat
) == SET
)
20629 pat
= SET_SRC (pat
);
20631 pat
= XEXP (XEXP (pat
, 0), 0);
20632 return REG_P (pat
);
20635 /* Return true if r3 is used by any of the tail call insns in the
20636 current function. */
20638 any_sibcall_could_use_r3 (void)
20643 if (!crtl
->tail_call_emit
)
20645 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20646 if (e
->flags
& EDGE_SIBCALL
)
20648 rtx call
= BB_END (e
->src
);
20649 if (!CALL_P (call
))
20650 call
= prev_nonnote_nondebug_insn (call
);
20651 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20652 if (find_regno_fusage (call
, USE
, 3)
20653 || is_indirect_tailcall_p (call
))
20660 /* Compute the distance from register FROM to register TO.
20661 These can be the arg pointer (26), the soft frame pointer (25),
20662 the stack pointer (13) or the hard frame pointer (11).
20663 In thumb mode r7 is used as the soft frame pointer, if needed.
20664 Typical stack layout looks like this:
20666 old stack pointer -> | |
20669 | | saved arguments for
20670 | | vararg functions
20673 hard FP & arg pointer -> | | \
20681 soft frame pointer -> | | /
20686 locals base pointer -> | | /
20691 current stack pointer -> | | /
20694 For a given function some or all of these stack components
20695 may not be needed, giving rise to the possibility of
20696 eliminating some of the registers.
20698 The values returned by this function must reflect the behavior
20699 of arm_expand_prologue() and arm_compute_save_reg_mask().
20701 The sign of the number returned reflects the direction of stack
20702 growth, so the values are positive for all eliminations except
20703 from the soft frame pointer to the hard frame pointer.
20705 SFP may point just inside the local variables block to ensure correct
20709 /* Calculate stack offsets. These are used to calculate register elimination
20710 offsets and in prologue/epilogue code. Also calculates which registers
20711 should be saved. */
20713 static arm_stack_offsets
*
20714 arm_get_frame_offsets (void)
20716 struct arm_stack_offsets
*offsets
;
20717 unsigned long func_type
;
20721 HOST_WIDE_INT frame_size
;
20724 offsets
= &cfun
->machine
->stack_offsets
;
20726 /* We need to know if we are a leaf function. Unfortunately, it
20727 is possible to be called after start_sequence has been called,
20728 which causes get_insns to return the insns for the sequence,
20729 not the function, which will cause leaf_function_p to return
20730 the incorrect result.
20732 to know about leaf functions once reload has completed, and the
20733 frame size cannot be changed after that time, so we can safely
20734 use the cached value. */
20736 if (reload_completed
)
20739 /* Initially this is the size of the local variables. It will translated
20740 into an offset once we have determined the size of preceding data. */
20741 frame_size
= ROUND_UP_WORD (get_frame_size ());
20743 leaf
= leaf_function_p ();
20745 /* Space for variadic functions. */
20746 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20748 /* In Thumb mode this is incorrect, but never used. */
20750 = (offsets
->saved_args
20751 + arm_compute_static_chain_stack_bytes ()
20752 + (frame_pointer_needed
? 4 : 0));
20756 unsigned int regno
;
20758 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20759 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20760 saved
= core_saved
;
20762 /* We know that SP will be doubleword aligned on entry, and we must
20763 preserve that condition at any subroutine call. We also require the
20764 soft frame pointer to be doubleword aligned. */
20766 if (TARGET_REALLY_IWMMXT
)
20768 /* Check for the call-saved iWMMXt registers. */
20769 for (regno
= FIRST_IWMMXT_REGNUM
;
20770 regno
<= LAST_IWMMXT_REGNUM
;
20772 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20776 func_type
= arm_current_func_type ();
20777 /* Space for saved VFP registers. */
20778 if (! IS_VOLATILE (func_type
)
20779 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
20780 saved
+= arm_get_vfp_saved_size ();
20782 else /* TARGET_THUMB1 */
20784 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20785 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20786 saved
= core_saved
;
20787 if (TARGET_BACKTRACE
)
20791 /* Saved registers include the stack frame. */
20792 offsets
->saved_regs
20793 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20794 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20796 /* A leaf function does not need any stack alignment if it has nothing
20798 if (leaf
&& frame_size
== 0
20799 /* However if it calls alloca(), we have a dynamically allocated
20800 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20801 && ! cfun
->calls_alloca
)
20803 offsets
->outgoing_args
= offsets
->soft_frame
;
20804 offsets
->locals_base
= offsets
->soft_frame
;
20808 /* Ensure SFP has the correct alignment. */
20809 if (ARM_DOUBLEWORD_ALIGN
20810 && (offsets
->soft_frame
& 7))
20812 offsets
->soft_frame
+= 4;
20813 /* Try to align stack by pushing an extra reg. Don't bother doing this
20814 when there is a stack frame as the alignment will be rolled into
20815 the normal stack adjustment. */
20816 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20820 /* Register r3 is caller-saved. Normally it does not need to be
20821 saved on entry by the prologue. However if we choose to save
20822 it for padding then we may confuse the compiler into thinking
20823 a prologue sequence is required when in fact it is not. This
20824 will occur when shrink-wrapping if r3 is used as a scratch
20825 register and there are no other callee-saved writes.
20827 This situation can be avoided when other callee-saved registers
20828 are available and r3 is not mandatory if we choose a callee-saved
20829 register for padding. */
20830 bool prefer_callee_reg_p
= false;
20832 /* If it is safe to use r3, then do so. This sometimes
20833 generates better code on Thumb-2 by avoiding the need to
20834 use 32-bit push/pop instructions. */
20835 if (! any_sibcall_could_use_r3 ()
20836 && arm_size_return_regs () <= 12
20837 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20839 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20842 if (!TARGET_THUMB2
)
20843 prefer_callee_reg_p
= true;
20846 || prefer_callee_reg_p
)
20848 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20850 /* Avoid fixed registers; they may be changed at
20851 arbitrary times so it's unsafe to restore them
20852 during the epilogue. */
20854 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20864 offsets
->saved_regs
+= 4;
20865 offsets
->saved_regs_mask
|= (1 << reg
);
20870 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20871 offsets
->outgoing_args
= (offsets
->locals_base
20872 + crtl
->outgoing_args_size
);
20874 if (ARM_DOUBLEWORD_ALIGN
)
20876 /* Ensure SP remains doubleword aligned. */
20877 if (offsets
->outgoing_args
& 7)
20878 offsets
->outgoing_args
+= 4;
20879 gcc_assert (!(offsets
->outgoing_args
& 7));
20886 /* Calculate the relative offsets for the different stack pointers. Positive
20887 offsets are in the direction of stack growth. */
20890 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20892 arm_stack_offsets
*offsets
;
20894 offsets
= arm_get_frame_offsets ();
20896 /* OK, now we have enough information to compute the distances.
20897 There must be an entry in these switch tables for each pair
20898 of registers in ELIMINABLE_REGS, even if some of the entries
20899 seem to be redundant or useless. */
20902 case ARG_POINTER_REGNUM
:
20905 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20908 case FRAME_POINTER_REGNUM
:
20909 /* This is the reverse of the soft frame pointer
20910 to hard frame pointer elimination below. */
20911 return offsets
->soft_frame
- offsets
->saved_args
;
20913 case ARM_HARD_FRAME_POINTER_REGNUM
:
20914 /* This is only non-zero in the case where the static chain register
20915 is stored above the frame. */
20916 return offsets
->frame
- offsets
->saved_args
- 4;
20918 case STACK_POINTER_REGNUM
:
20919 /* If nothing has been pushed on the stack at all
20920 then this will return -4. This *is* correct! */
20921 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
20924 gcc_unreachable ();
20926 gcc_unreachable ();
20928 case FRAME_POINTER_REGNUM
:
20931 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20934 case ARM_HARD_FRAME_POINTER_REGNUM
:
20935 /* The hard frame pointer points to the top entry in the
20936 stack frame. The soft frame pointer to the bottom entry
20937 in the stack frame. If there is no stack frame at all,
20938 then they are identical. */
20940 return offsets
->frame
- offsets
->soft_frame
;
20942 case STACK_POINTER_REGNUM
:
20943 return offsets
->outgoing_args
- offsets
->soft_frame
;
20946 gcc_unreachable ();
20948 gcc_unreachable ();
20951 /* You cannot eliminate from the stack pointer.
20952 In theory you could eliminate from the hard frame
20953 pointer to the stack pointer, but this will never
20954 happen, since if a stack frame is not needed the
20955 hard frame pointer will never be used. */
20956 gcc_unreachable ();
20960 /* Given FROM and TO register numbers, say whether this elimination is
20961 allowed. Frame pointer elimination is automatically handled.
20963 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20964 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20965 pointer, we must eliminate FRAME_POINTER_REGNUM into
20966 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20967 ARG_POINTER_REGNUM. */
20970 arm_can_eliminate (const int from
, const int to
)
20972 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
20973 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
20974 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
20975 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
20979 /* Emit RTL to save coprocessor registers on function entry. Returns the
20980 number of bytes pushed. */
20983 arm_save_coproc_regs(void)
20985 int saved_size
= 0;
20987 unsigned start_reg
;
20990 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
20991 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
20993 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20994 insn
= gen_rtx_MEM (V2SImode
, insn
);
20995 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
20996 RTX_FRAME_RELATED_P (insn
) = 1;
21000 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
21002 start_reg
= FIRST_VFP_REGNUM
;
21004 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
21006 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
21007 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
21009 if (start_reg
!= reg
)
21010 saved_size
+= vfp_emit_fstmd (start_reg
,
21011 (reg
- start_reg
) / 2);
21012 start_reg
= reg
+ 2;
21015 if (start_reg
!= reg
)
21016 saved_size
+= vfp_emit_fstmd (start_reg
,
21017 (reg
- start_reg
) / 2);
21023 /* Set the Thumb frame pointer from the stack pointer. */
21026 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21028 HOST_WIDE_INT amount
;
21031 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21033 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21034 stack_pointer_rtx
, GEN_INT (amount
)));
21037 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21038 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21039 expects the first two operands to be the same. */
21042 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21044 hard_frame_pointer_rtx
));
21048 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21049 hard_frame_pointer_rtx
,
21050 stack_pointer_rtx
));
21052 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
21053 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21054 RTX_FRAME_RELATED_P (dwarf
) = 1;
21055 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21058 RTX_FRAME_RELATED_P (insn
) = 1;
21061 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21064 arm_expand_prologue (void)
21069 unsigned long live_regs_mask
;
21070 unsigned long func_type
;
21072 int saved_pretend_args
= 0;
21073 int saved_regs
= 0;
21074 unsigned HOST_WIDE_INT args_to_push
;
21075 arm_stack_offsets
*offsets
;
21077 func_type
= arm_current_func_type ();
21079 /* Naked functions don't have prologues. */
21080 if (IS_NAKED (func_type
))
21083 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21084 args_to_push
= crtl
->args
.pretend_args_size
;
21086 /* Compute which register we will have to save onto the stack. */
21087 offsets
= arm_get_frame_offsets ();
21088 live_regs_mask
= offsets
->saved_regs_mask
;
21090 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21092 if (IS_STACKALIGN (func_type
))
21096 /* Handle a word-aligned stack pointer. We generate the following:
21101 <save and restore r0 in normal prologue/epilogue>
21105 The unwinder doesn't need to know about the stack realignment.
21106 Just tell it we saved SP in r0. */
21107 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21109 r0
= gen_rtx_REG (SImode
, 0);
21110 r1
= gen_rtx_REG (SImode
, 1);
21112 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21113 RTX_FRAME_RELATED_P (insn
) = 1;
21114 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21116 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21118 /* ??? The CFA changes here, which may cause GDB to conclude that it
21119 has entered a different function. That said, the unwind info is
21120 correct, individually, before and after this instruction because
21121 we've described the save of SP, which will override the default
21122 handling of SP as restoring from the CFA. */
21123 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21126 /* For APCS frames, if IP register is clobbered
21127 when creating frame, save that register in a special
21129 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21131 if (IS_INTERRUPT (func_type
))
21133 /* Interrupt functions must not corrupt any registers.
21134 Creating a frame pointer however, corrupts the IP
21135 register, so we must push it first. */
21136 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21138 /* Do not set RTX_FRAME_RELATED_P on this insn.
21139 The dwarf stack unwinding code only wants to see one
21140 stack decrement per function, and this is not it. If
21141 this instruction is labeled as being part of the frame
21142 creation sequence then dwarf2out_frame_debug_expr will
21143 die when it encounters the assignment of IP to FP
21144 later on, since the use of SP here establishes SP as
21145 the CFA register and not IP.
21147 Anyway this instruction is not really part of the stack
21148 frame creation although it is part of the prologue. */
21150 else if (IS_NESTED (func_type
))
21152 /* The static chain register is the same as the IP register
21153 used as a scratch register during stack frame creation.
21154 To get around this need to find somewhere to store IP
21155 whilst the frame is being created. We try the following
21158 1. The last argument register r3 if it is available.
21159 2. A slot on the stack above the frame if there are no
21160 arguments to push onto the stack.
21161 3. Register r3 again, after pushing the argument registers
21162 onto the stack, if this is a varargs function.
21163 4. The last slot on the stack created for the arguments to
21164 push, if this isn't a varargs function.
21166 Note - we only need to tell the dwarf2 backend about the SP
21167 adjustment in the second variant; the static chain register
21168 doesn't need to be unwound, as it doesn't contain a value
21169 inherited from the caller. */
21171 if (!arm_r3_live_at_start_p ())
21172 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21173 else if (args_to_push
== 0)
21177 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21180 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21181 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21184 /* Just tell the dwarf backend that we adjusted SP. */
21185 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
21186 plus_constant (Pmode
, stack_pointer_rtx
,
21188 RTX_FRAME_RELATED_P (insn
) = 1;
21189 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21193 /* Store the args on the stack. */
21194 if (cfun
->machine
->uses_anonymous_args
)
21197 = emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21198 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21199 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21200 saved_pretend_args
= 1;
21206 if (args_to_push
== 4)
21207 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21210 = gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21211 plus_constant (Pmode
,
21215 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21217 /* Just tell the dwarf backend that we adjusted SP. */
21219 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
21220 plus_constant (Pmode
, stack_pointer_rtx
,
21222 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21225 RTX_FRAME_RELATED_P (insn
) = 1;
21226 fp_offset
= args_to_push
;
21231 insn
= emit_set_insn (ip_rtx
,
21232 plus_constant (Pmode
, stack_pointer_rtx
,
21234 RTX_FRAME_RELATED_P (insn
) = 1;
21239 /* Push the argument registers, or reserve space for them. */
21240 if (cfun
->machine
->uses_anonymous_args
)
21241 insn
= emit_multi_reg_push
21242 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21243 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21246 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21247 GEN_INT (- args_to_push
)));
21248 RTX_FRAME_RELATED_P (insn
) = 1;
21251 /* If this is an interrupt service routine, and the link register
21252 is going to be pushed, and we're not generating extra
21253 push of IP (needed when frame is needed and frame layout if apcs),
21254 subtracting four from LR now will mean that the function return
21255 can be done with a single instruction. */
21256 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21257 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21258 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21261 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21263 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21266 if (live_regs_mask
)
21268 unsigned long dwarf_regs_mask
= live_regs_mask
;
21270 saved_regs
+= bit_count (live_regs_mask
) * 4;
21271 if (optimize_size
&& !frame_pointer_needed
21272 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21274 /* If no coprocessor registers are being pushed and we don't have
21275 to worry about a frame pointer then push extra registers to
21276 create the stack frame. This is done is a way that does not
21277 alter the frame layout, so is independent of the epilogue. */
21281 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21283 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21284 if (frame
&& n
* 4 >= frame
)
21287 live_regs_mask
|= (1 << n
) - 1;
21288 saved_regs
+= frame
;
21293 && current_tune
->prefer_ldrd_strd
21294 && !optimize_function_for_size_p (cfun
))
21296 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21298 thumb2_emit_strd_push (live_regs_mask
);
21299 else if (TARGET_ARM
21300 && !TARGET_APCS_FRAME
21301 && !IS_INTERRUPT (func_type
))
21302 arm_emit_strd_push (live_regs_mask
);
21305 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21306 RTX_FRAME_RELATED_P (insn
) = 1;
21311 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21312 RTX_FRAME_RELATED_P (insn
) = 1;
21316 if (! IS_VOLATILE (func_type
))
21317 saved_regs
+= arm_save_coproc_regs ();
21319 if (frame_pointer_needed
&& TARGET_ARM
)
21321 /* Create the new frame pointer. */
21322 if (TARGET_APCS_FRAME
)
21324 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21325 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21326 RTX_FRAME_RELATED_P (insn
) = 1;
21328 if (IS_NESTED (func_type
))
21330 /* Recover the static chain register. */
21331 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21332 insn
= gen_rtx_REG (SImode
, 3);
21335 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21336 insn
= gen_frame_mem (SImode
, insn
);
21338 emit_set_insn (ip_rtx
, insn
);
21339 /* Add a USE to stop propagate_one_insn() from barfing. */
21340 emit_insn (gen_force_register_use (ip_rtx
));
21345 insn
= GEN_INT (saved_regs
- 4);
21346 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21347 stack_pointer_rtx
, insn
));
21348 RTX_FRAME_RELATED_P (insn
) = 1;
21352 if (flag_stack_usage_info
)
21353 current_function_static_stack_size
21354 = offsets
->outgoing_args
- offsets
->saved_args
;
21356 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21358 /* This add can produce multiple insns for a large constant, so we
21359 need to get tricky. */
21360 rtx_insn
*last
= get_last_insn ();
21362 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21363 - offsets
->outgoing_args
);
21365 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21369 last
= last
? NEXT_INSN (last
) : get_insns ();
21370 RTX_FRAME_RELATED_P (last
) = 1;
21372 while (last
!= insn
);
21374 /* If the frame pointer is needed, emit a special barrier that
21375 will prevent the scheduler from moving stores to the frame
21376 before the stack adjustment. */
21377 if (frame_pointer_needed
)
21378 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
21379 hard_frame_pointer_rtx
));
21383 if (frame_pointer_needed
&& TARGET_THUMB2
)
21384 thumb_set_frame_pointer (offsets
);
21386 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21388 unsigned long mask
;
21390 mask
= live_regs_mask
;
21391 mask
&= THUMB2_WORK_REGS
;
21392 if (!IS_NESTED (func_type
))
21393 mask
|= (1 << IP_REGNUM
);
21394 arm_load_pic_register (mask
);
21397 /* If we are profiling, make sure no instructions are scheduled before
21398 the call to mcount. Similarly if the user has requested no
21399 scheduling in the prolog. Similarly if we want non-call exceptions
21400 using the EABI unwinder, to prevent faulting instructions from being
21401 swapped with a stack adjustment. */
21402 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21403 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21404 && cfun
->can_throw_non_call_exceptions
))
21405 emit_insn (gen_blockage ());
21407 /* If the link register is being kept alive, with the return address in it,
21408 then make sure that it does not get reused by the ce2 pass. */
21409 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21410 cfun
->machine
->lr_save_eliminated
= 1;
21413 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21415 arm_print_condition (FILE *stream
)
21417 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21419 /* Branch conversion is not implemented for Thumb-2. */
21422 output_operand_lossage ("predicated Thumb instruction");
21425 if (current_insn_predicate
!= NULL
)
21427 output_operand_lossage
21428 ("predicated instruction in conditional sequence");
21432 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21434 else if (current_insn_predicate
)
21436 enum arm_cond_code code
;
21440 output_operand_lossage ("predicated Thumb instruction");
21444 code
= get_arm_condition_code (current_insn_predicate
);
21445 fputs (arm_condition_codes
[code
], stream
);
21450 /* Globally reserved letters: acln
21451 Puncutation letters currently used: @_|?().!#
21452 Lower case letters currently used: bcdefhimpqtvwxyz
21453 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21454 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21456 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21458 If CODE is 'd', then the X is a condition operand and the instruction
21459 should only be executed if the condition is true.
21460 if CODE is 'D', then the X is a condition operand and the instruction
21461 should only be executed if the condition is false: however, if the mode
21462 of the comparison is CCFPEmode, then always execute the instruction -- we
21463 do this because in these circumstances !GE does not necessarily imply LT;
21464 in these cases the instruction pattern will take care to make sure that
21465 an instruction containing %d will follow, thereby undoing the effects of
21466 doing this instruction unconditionally.
21467 If CODE is 'N' then X is a floating point operand that must be negated
21469 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21470 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21472 arm_print_operand (FILE *stream
, rtx x
, int code
)
21477 fputs (ASM_COMMENT_START
, stream
);
21481 fputs (user_label_prefix
, stream
);
21485 fputs (REGISTER_PREFIX
, stream
);
21489 arm_print_condition (stream
);
21493 /* Nothing in unified syntax, otherwise the current condition code. */
21494 if (!TARGET_UNIFIED_ASM
)
21495 arm_print_condition (stream
);
21499 /* The current condition code in unified syntax, otherwise nothing. */
21500 if (TARGET_UNIFIED_ASM
)
21501 arm_print_condition (stream
);
21505 /* The current condition code for a condition code setting instruction.
21506 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21507 if (TARGET_UNIFIED_ASM
)
21509 fputc('s', stream
);
21510 arm_print_condition (stream
);
21514 arm_print_condition (stream
);
21515 fputc('s', stream
);
21520 /* If the instruction is conditionally executed then print
21521 the current condition code, otherwise print 's'. */
21522 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
21523 if (current_insn_predicate
)
21524 arm_print_condition (stream
);
21526 fputc('s', stream
);
21529 /* %# is a "break" sequence. It doesn't output anything, but is used to
21530 separate e.g. operand numbers from following text, if that text consists
21531 of further digits which we don't want to be part of the operand
21539 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
21540 r
= real_value_negate (&r
);
21541 fprintf (stream
, "%s", fp_const_from_val (&r
));
21545 /* An integer or symbol address without a preceding # sign. */
21547 switch (GET_CODE (x
))
21550 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21554 output_addr_const (stream
, x
);
21558 if (GET_CODE (XEXP (x
, 0)) == PLUS
21559 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21561 output_addr_const (stream
, x
);
21564 /* Fall through. */
21567 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21571 /* An integer that we want to print in HEX. */
21573 switch (GET_CODE (x
))
21576 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21580 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21585 if (CONST_INT_P (x
))
21588 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21589 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21593 putc ('~', stream
);
21594 output_addr_const (stream
, x
);
21599 /* Print the log2 of a CONST_INT. */
21603 if (!CONST_INT_P (x
)
21604 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
21605 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21607 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21612 /* The low 16 bits of an immediate constant. */
21613 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21617 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21621 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21629 shift
= shift_op (x
, &val
);
21633 fprintf (stream
, ", %s ", shift
);
21635 arm_print_operand (stream
, XEXP (x
, 1), 0);
21637 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21642 /* An explanation of the 'Q', 'R' and 'H' register operands:
21644 In a pair of registers containing a DI or DF value the 'Q'
21645 operand returns the register number of the register containing
21646 the least significant part of the value. The 'R' operand returns
21647 the register number of the register containing the most
21648 significant part of the value.
21650 The 'H' operand returns the higher of the two register numbers.
21651 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21652 same as the 'Q' operand, since the most significant part of the
21653 value is held in the lower number register. The reverse is true
21654 on systems where WORDS_BIG_ENDIAN is false.
21656 The purpose of these operands is to distinguish between cases
21657 where the endian-ness of the values is important (for example
21658 when they are added together), and cases where the endian-ness
21659 is irrelevant, but the order of register operations is important.
21660 For example when loading a value from memory into a register
21661 pair, the endian-ness does not matter. Provided that the value
21662 from the lower memory address is put into the lower numbered
21663 register, and the value from the higher address is put into the
21664 higher numbered register, the load will work regardless of whether
21665 the value being loaded is big-wordian or little-wordian. The
21666 order of the two register loads can matter however, if the address
21667 of the memory location is actually held in one of the registers
21668 being overwritten by the load.
21670 The 'Q' and 'R' constraints are also available for 64-bit
21673 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21675 rtx part
= gen_lowpart (SImode
, x
);
21676 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21680 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21682 output_operand_lossage ("invalid operand for code '%c'", code
);
21686 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
21690 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21692 machine_mode mode
= GET_MODE (x
);
21695 if (mode
== VOIDmode
)
21697 part
= gen_highpart_mode (SImode
, mode
, x
);
21698 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21702 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21704 output_operand_lossage ("invalid operand for code '%c'", code
);
21708 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
21712 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21714 output_operand_lossage ("invalid operand for code '%c'", code
);
21718 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
21722 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21724 output_operand_lossage ("invalid operand for code '%c'", code
);
21728 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
21732 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21734 output_operand_lossage ("invalid operand for code '%c'", code
);
21738 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
21742 asm_fprintf (stream
, "%r",
21743 REG_P (XEXP (x
, 0))
21744 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
21748 asm_fprintf (stream
, "{%r-%r}",
21750 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
21753 /* Like 'M', but writing doubleword vector registers, for use by Neon
21757 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
21758 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
21760 asm_fprintf (stream
, "{d%d}", regno
);
21762 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
21767 /* CONST_TRUE_RTX means always -- that's the default. */
21768 if (x
== const_true_rtx
)
21771 if (!COMPARISON_P (x
))
21773 output_operand_lossage ("invalid operand for code '%c'", code
);
21777 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
21782 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21783 want to do that. */
21784 if (x
== const_true_rtx
)
21786 output_operand_lossage ("instruction never executed");
21789 if (!COMPARISON_P (x
))
21791 output_operand_lossage ("invalid operand for code '%c'", code
);
21795 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
21796 (get_arm_condition_code (x
))],
21806 /* Former Maverick support, removed after GCC-4.7. */
21807 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
21812 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
21813 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
21814 /* Bad value for wCG register number. */
21816 output_operand_lossage ("invalid operand for code '%c'", code
);
21821 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
21824 /* Print an iWMMXt control register name. */
21826 if (!CONST_INT_P (x
)
21828 || INTVAL (x
) >= 16)
21829 /* Bad value for wC register number. */
21831 output_operand_lossage ("invalid operand for code '%c'", code
);
21837 static const char * wc_reg_names
[16] =
21839 "wCID", "wCon", "wCSSF", "wCASF",
21840 "wC4", "wC5", "wC6", "wC7",
21841 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21842 "wC12", "wC13", "wC14", "wC15"
21845 fputs (wc_reg_names
[INTVAL (x
)], stream
);
21849 /* Print the high single-precision register of a VFP double-precision
21853 machine_mode mode
= GET_MODE (x
);
21856 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
21858 output_operand_lossage ("invalid operand for code '%c'", code
);
21863 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
21865 output_operand_lossage ("invalid operand for code '%c'", code
);
21869 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
21873 /* Print a VFP/Neon double precision or quad precision register name. */
21877 machine_mode mode
= GET_MODE (x
);
21878 int is_quad
= (code
== 'q');
21881 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
21883 output_operand_lossage ("invalid operand for code '%c'", code
);
21888 || !IS_VFP_REGNUM (REGNO (x
)))
21890 output_operand_lossage ("invalid operand for code '%c'", code
);
21895 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
21896 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
21898 output_operand_lossage ("invalid operand for code '%c'", code
);
21902 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
21903 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
21907 /* These two codes print the low/high doubleword register of a Neon quad
21908 register, respectively. For pair-structure types, can also print
21909 low/high quadword registers. */
21913 machine_mode mode
= GET_MODE (x
);
21916 if ((GET_MODE_SIZE (mode
) != 16
21917 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
21919 output_operand_lossage ("invalid operand for code '%c'", code
);
21924 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
21926 output_operand_lossage ("invalid operand for code '%c'", code
);
21930 if (GET_MODE_SIZE (mode
) == 16)
21931 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
21932 + (code
== 'f' ? 1 : 0));
21934 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
21935 + (code
== 'f' ? 1 : 0));
21939 /* Print a VFPv3 floating-point constant, represented as an integer
21943 int index
= vfp3_const_double_index (x
);
21944 gcc_assert (index
!= -1);
21945 fprintf (stream
, "%d", index
);
21949 /* Print bits representing opcode features for Neon.
21951 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21952 and polynomials as unsigned.
21954 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21956 Bit 2 is 1 for rounding functions, 0 otherwise. */
21958 /* Identify the type as 's', 'u', 'p' or 'f'. */
21961 HOST_WIDE_INT bits
= INTVAL (x
);
21962 fputc ("uspf"[bits
& 3], stream
);
21966 /* Likewise, but signed and unsigned integers are both 'i'. */
21969 HOST_WIDE_INT bits
= INTVAL (x
);
21970 fputc ("iipf"[bits
& 3], stream
);
21974 /* As for 'T', but emit 'u' instead of 'p'. */
21977 HOST_WIDE_INT bits
= INTVAL (x
);
21978 fputc ("usuf"[bits
& 3], stream
);
21982 /* Bit 2: rounding (vs none). */
21985 HOST_WIDE_INT bits
= INTVAL (x
);
21986 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
21990 /* Memory operand for vld1/vst1 instruction. */
21994 bool postinc
= FALSE
;
21995 rtx postinc_reg
= NULL
;
21996 unsigned align
, memsize
, align_bits
;
21998 gcc_assert (MEM_P (x
));
21999 addr
= XEXP (x
, 0);
22000 if (GET_CODE (addr
) == POST_INC
)
22003 addr
= XEXP (addr
, 0);
22005 if (GET_CODE (addr
) == POST_MODIFY
)
22007 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22008 addr
= XEXP (addr
, 0);
22010 asm_fprintf (stream
, "[%r", REGNO (addr
));
22012 /* We know the alignment of this access, so we can emit a hint in the
22013 instruction (for some alignments) as an aid to the memory subsystem
22015 align
= MEM_ALIGN (x
) >> 3;
22016 memsize
= MEM_SIZE (x
);
22018 /* Only certain alignment specifiers are supported by the hardware. */
22019 if (memsize
== 32 && (align
% 32) == 0)
22021 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22023 else if (memsize
>= 8 && (align
% 8) == 0)
22028 if (align_bits
!= 0)
22029 asm_fprintf (stream
, ":%d", align_bits
);
22031 asm_fprintf (stream
, "]");
22034 fputs("!", stream
);
22036 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22044 gcc_assert (MEM_P (x
));
22045 addr
= XEXP (x
, 0);
22046 gcc_assert (REG_P (addr
));
22047 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22051 /* Translate an S register number into a D register number and element index. */
22054 machine_mode mode
= GET_MODE (x
);
22057 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22059 output_operand_lossage ("invalid operand for code '%c'", code
);
22064 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22066 output_operand_lossage ("invalid operand for code '%c'", code
);
22070 regno
= regno
- FIRST_VFP_REGNUM
;
22071 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22076 gcc_assert (CONST_DOUBLE_P (x
));
22078 result
= vfp3_const_double_for_fract_bits (x
);
22080 result
= vfp3_const_double_for_bits (x
);
22081 fprintf (stream
, "#%d", result
);
22084 /* Register specifier for vld1.16/vst1.16. Translate the S register
22085 number into a D register number and element index. */
22088 machine_mode mode
= GET_MODE (x
);
22091 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22093 output_operand_lossage ("invalid operand for code '%c'", code
);
22098 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22100 output_operand_lossage ("invalid operand for code '%c'", code
);
22104 regno
= regno
- FIRST_VFP_REGNUM
;
22105 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22112 output_operand_lossage ("missing operand");
22116 switch (GET_CODE (x
))
22119 asm_fprintf (stream
, "%r", REGNO (x
));
22123 output_memory_reference_mode
= GET_MODE (x
);
22124 output_address (XEXP (x
, 0));
22130 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22131 sizeof (fpstr
), 0, 1);
22132 fprintf (stream
, "#%s", fpstr
);
22137 gcc_assert (GET_CODE (x
) != NEG
);
22138 fputc ('#', stream
);
22139 if (GET_CODE (x
) == HIGH
)
22141 fputs (":lower16:", stream
);
22145 output_addr_const (stream
, x
);
22151 /* Target hook for printing a memory address. */
22153 arm_print_operand_address (FILE *stream
, rtx x
)
22157 int is_minus
= GET_CODE (x
) == MINUS
;
22160 asm_fprintf (stream
, "[%r]", REGNO (x
));
22161 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22163 rtx base
= XEXP (x
, 0);
22164 rtx index
= XEXP (x
, 1);
22165 HOST_WIDE_INT offset
= 0;
22167 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22169 /* Ensure that BASE is a register. */
22170 /* (one of them must be). */
22171 /* Also ensure the SP is not used as in index register. */
22172 std::swap (base
, index
);
22174 switch (GET_CODE (index
))
22177 offset
= INTVAL (index
);
22180 asm_fprintf (stream
, "[%r, #%wd]",
22181 REGNO (base
), offset
);
22185 asm_fprintf (stream
, "[%r, %s%r]",
22186 REGNO (base
), is_minus
? "-" : "",
22196 asm_fprintf (stream
, "[%r, %s%r",
22197 REGNO (base
), is_minus
? "-" : "",
22198 REGNO (XEXP (index
, 0)));
22199 arm_print_operand (stream
, index
, 'S');
22200 fputs ("]", stream
);
22205 gcc_unreachable ();
22208 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22209 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22211 extern machine_mode output_memory_reference_mode
;
22213 gcc_assert (REG_P (XEXP (x
, 0)));
22215 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22216 asm_fprintf (stream
, "[%r, #%s%d]!",
22217 REGNO (XEXP (x
, 0)),
22218 GET_CODE (x
) == PRE_DEC
? "-" : "",
22219 GET_MODE_SIZE (output_memory_reference_mode
));
22221 asm_fprintf (stream
, "[%r], #%s%d",
22222 REGNO (XEXP (x
, 0)),
22223 GET_CODE (x
) == POST_DEC
? "-" : "",
22224 GET_MODE_SIZE (output_memory_reference_mode
));
22226 else if (GET_CODE (x
) == PRE_MODIFY
)
22228 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22229 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22230 asm_fprintf (stream
, "#%wd]!",
22231 INTVAL (XEXP (XEXP (x
, 1), 1)));
22233 asm_fprintf (stream
, "%r]!",
22234 REGNO (XEXP (XEXP (x
, 1), 1)));
22236 else if (GET_CODE (x
) == POST_MODIFY
)
22238 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22239 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22240 asm_fprintf (stream
, "#%wd",
22241 INTVAL (XEXP (XEXP (x
, 1), 1)));
22243 asm_fprintf (stream
, "%r",
22244 REGNO (XEXP (XEXP (x
, 1), 1)));
22246 else output_addr_const (stream
, x
);
22251 asm_fprintf (stream
, "[%r]", REGNO (x
));
22252 else if (GET_CODE (x
) == POST_INC
)
22253 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22254 else if (GET_CODE (x
) == PLUS
)
22256 gcc_assert (REG_P (XEXP (x
, 0)));
22257 if (CONST_INT_P (XEXP (x
, 1)))
22258 asm_fprintf (stream
, "[%r, #%wd]",
22259 REGNO (XEXP (x
, 0)),
22260 INTVAL (XEXP (x
, 1)));
22262 asm_fprintf (stream
, "[%r, %r]",
22263 REGNO (XEXP (x
, 0)),
22264 REGNO (XEXP (x
, 1)));
22267 output_addr_const (stream
, x
);
22271 /* Target hook for indicating whether a punctuation character for
22272 TARGET_PRINT_OPERAND is valid. */
22274 arm_print_operand_punct_valid_p (unsigned char code
)
22276 return (code
== '@' || code
== '|' || code
== '.'
22277 || code
== '(' || code
== ')' || code
== '#'
22278 || (TARGET_32BIT
&& (code
== '?'))
22279 || (TARGET_THUMB2
&& (code
== '!'))
22280 || (TARGET_THUMB
&& (code
== '_')));
22283 /* Target hook for assembling integer objects. The ARM version needs to
22284 handle word-sized values specially. */
22286 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22290 if (size
== UNITS_PER_WORD
&& aligned_p
)
22292 fputs ("\t.word\t", asm_out_file
);
22293 output_addr_const (asm_out_file
, x
);
22295 /* Mark symbols as position independent. We only do this in the
22296 .text segment, not in the .data segment. */
22297 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22298 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22300 /* See legitimize_pic_address for an explanation of the
22301 TARGET_VXWORKS_RTP check. */
22302 if (!arm_pic_data_is_text_relative
22303 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
22304 fputs ("(GOT)", asm_out_file
);
22306 fputs ("(GOTOFF)", asm_out_file
);
22308 fputc ('\n', asm_out_file
);
22312 mode
= GET_MODE (x
);
22314 if (arm_vector_mode_supported_p (mode
))
22318 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22320 units
= CONST_VECTOR_NUNITS (x
);
22321 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
22323 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22324 for (i
= 0; i
< units
; i
++)
22326 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22328 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22331 for (i
= 0; i
< units
; i
++)
22333 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22334 REAL_VALUE_TYPE rval
;
22336 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
22339 (rval
, GET_MODE_INNER (mode
),
22340 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22346 return default_assemble_integer (x
, size
, aligned_p
);
22350 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22354 if (!TARGET_AAPCS_BASED
)
22357 default_named_section_asm_out_constructor
22358 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22362 /* Put these in the .init_array section, using a special relocation. */
22363 if (priority
!= DEFAULT_INIT_PRIORITY
)
22366 sprintf (buf
, "%s.%.5u",
22367 is_ctor
? ".init_array" : ".fini_array",
22369 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
22376 switch_to_section (s
);
22377 assemble_align (POINTER_SIZE
);
22378 fputs ("\t.word\t", asm_out_file
);
22379 output_addr_const (asm_out_file
, symbol
);
22380 fputs ("(target1)\n", asm_out_file
);
22383 /* Add a function to the list of static constructors. */
22386 arm_elf_asm_constructor (rtx symbol
, int priority
)
22388 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22391 /* Add a function to the list of static destructors. */
22394 arm_elf_asm_destructor (rtx symbol
, int priority
)
22396 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22399 /* A finite state machine takes care of noticing whether or not instructions
22400 can be conditionally executed, and thus decrease execution time and code
22401 size by deleting branch instructions. The fsm is controlled by
22402 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22404 /* The state of the fsm controlling condition codes are:
22405 0: normal, do nothing special
22406 1: make ASM_OUTPUT_OPCODE not output this instruction
22407 2: make ASM_OUTPUT_OPCODE not output this instruction
22408 3: make instructions conditional
22409 4: make instructions conditional
22411 State transitions (state->state by whom under condition):
22412 0 -> 1 final_prescan_insn if the `target' is a label
22413 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22414 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22415 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22416 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22417 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22418 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22419 (the target insn is arm_target_insn).
22421 If the jump clobbers the conditions then we use states 2 and 4.
22423 A similar thing can be done with conditional return insns.
22425 XXX In case the `target' is an unconditional branch, this conditionalising
22426 of the instructions always reduces code size, but not always execution
22427 time. But then, I want to reduce the code size to somewhere near what
22428 /bin/cc produces. */
22430 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22431 instructions. When a COND_EXEC instruction is seen the subsequent
22432 instructions are scanned so that multiple conditional instructions can be
22433 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22434 specify the length and true/false mask for the IT block. These will be
22435 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22437 /* Returns the index of the ARM condition code string in
22438 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22439 COMPARISON should be an rtx like `(eq (...) (...))'. */
22442 maybe_get_arm_condition_code (rtx comparison
)
22444 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22445 enum arm_cond_code code
;
22446 enum rtx_code comp_code
= GET_CODE (comparison
);
22448 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22449 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22450 XEXP (comparison
, 1));
22454 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22455 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22456 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22457 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22458 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22459 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22460 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22461 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22462 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22463 case CC_DLTUmode
: code
= ARM_CC
;
22466 if (comp_code
== EQ
)
22467 return ARM_INVERSE_CONDITION_CODE (code
);
22468 if (comp_code
== NE
)
22475 case NE
: return ARM_NE
;
22476 case EQ
: return ARM_EQ
;
22477 case GE
: return ARM_PL
;
22478 case LT
: return ARM_MI
;
22479 default: return ARM_NV
;
22485 case NE
: return ARM_NE
;
22486 case EQ
: return ARM_EQ
;
22487 default: return ARM_NV
;
22493 case NE
: return ARM_MI
;
22494 case EQ
: return ARM_PL
;
22495 default: return ARM_NV
;
22500 /* We can handle all cases except UNEQ and LTGT. */
22503 case GE
: return ARM_GE
;
22504 case GT
: return ARM_GT
;
22505 case LE
: return ARM_LS
;
22506 case LT
: return ARM_MI
;
22507 case NE
: return ARM_NE
;
22508 case EQ
: return ARM_EQ
;
22509 case ORDERED
: return ARM_VC
;
22510 case UNORDERED
: return ARM_VS
;
22511 case UNLT
: return ARM_LT
;
22512 case UNLE
: return ARM_LE
;
22513 case UNGT
: return ARM_HI
;
22514 case UNGE
: return ARM_PL
;
22515 /* UNEQ and LTGT do not have a representation. */
22516 case UNEQ
: /* Fall through. */
22517 case LTGT
: /* Fall through. */
22518 default: return ARM_NV
;
22524 case NE
: return ARM_NE
;
22525 case EQ
: return ARM_EQ
;
22526 case GE
: return ARM_LE
;
22527 case GT
: return ARM_LT
;
22528 case LE
: return ARM_GE
;
22529 case LT
: return ARM_GT
;
22530 case GEU
: return ARM_LS
;
22531 case GTU
: return ARM_CC
;
22532 case LEU
: return ARM_CS
;
22533 case LTU
: return ARM_HI
;
22534 default: return ARM_NV
;
22540 case LTU
: return ARM_CS
;
22541 case GEU
: return ARM_CC
;
22542 default: return ARM_NV
;
22548 case NE
: return ARM_NE
;
22549 case EQ
: return ARM_EQ
;
22550 case GEU
: return ARM_CS
;
22551 case GTU
: return ARM_HI
;
22552 case LEU
: return ARM_LS
;
22553 case LTU
: return ARM_CC
;
22554 default: return ARM_NV
;
22560 case GE
: return ARM_GE
;
22561 case LT
: return ARM_LT
;
22562 case GEU
: return ARM_CS
;
22563 case LTU
: return ARM_CC
;
22564 default: return ARM_NV
;
22570 case NE
: return ARM_NE
;
22571 case EQ
: return ARM_EQ
;
22572 case GE
: return ARM_GE
;
22573 case GT
: return ARM_GT
;
22574 case LE
: return ARM_LE
;
22575 case LT
: return ARM_LT
;
22576 case GEU
: return ARM_CS
;
22577 case GTU
: return ARM_HI
;
22578 case LEU
: return ARM_LS
;
22579 case LTU
: return ARM_CC
;
22580 default: return ARM_NV
;
22583 default: gcc_unreachable ();
22587 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22588 static enum arm_cond_code
22589 get_arm_condition_code (rtx comparison
)
22591 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22592 gcc_assert (code
!= ARM_NV
);
22596 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22599 thumb2_final_prescan_insn (rtx_insn
*insn
)
22601 rtx_insn
*first_insn
= insn
;
22602 rtx body
= PATTERN (insn
);
22604 enum arm_cond_code code
;
22609 /* max_insns_skipped in the tune was already taken into account in the
22610 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22611 just emit the IT blocks as we can. It does not make sense to split
22613 max
= MAX_INSN_PER_IT_BLOCK
;
22615 /* Remove the previous insn from the count of insns to be output. */
22616 if (arm_condexec_count
)
22617 arm_condexec_count
--;
22619 /* Nothing to do if we are already inside a conditional block. */
22620 if (arm_condexec_count
)
22623 if (GET_CODE (body
) != COND_EXEC
)
22626 /* Conditional jumps are implemented directly. */
22630 predicate
= COND_EXEC_TEST (body
);
22631 arm_current_cc
= get_arm_condition_code (predicate
);
22633 n
= get_attr_ce_count (insn
);
22634 arm_condexec_count
= 1;
22635 arm_condexec_mask
= (1 << n
) - 1;
22636 arm_condexec_masklen
= n
;
22637 /* See if subsequent instructions can be combined into the same block. */
22640 insn
= next_nonnote_insn (insn
);
22642 /* Jumping into the middle of an IT block is illegal, so a label or
22643 barrier terminates the block. */
22644 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22647 body
= PATTERN (insn
);
22648 /* USE and CLOBBER aren't really insns, so just skip them. */
22649 if (GET_CODE (body
) == USE
22650 || GET_CODE (body
) == CLOBBER
)
22653 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22654 if (GET_CODE (body
) != COND_EXEC
)
22656 /* Maximum number of conditionally executed instructions in a block. */
22657 n
= get_attr_ce_count (insn
);
22658 if (arm_condexec_masklen
+ n
> max
)
22661 predicate
= COND_EXEC_TEST (body
);
22662 code
= get_arm_condition_code (predicate
);
22663 mask
= (1 << n
) - 1;
22664 if (arm_current_cc
== code
)
22665 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22666 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22669 arm_condexec_count
++;
22670 arm_condexec_masklen
+= n
;
22672 /* A jump must be the last instruction in a conditional block. */
22676 /* Restore recog_data (getting the attributes of other insns can
22677 destroy this array, but final.c assumes that it remains intact
22678 across this call). */
22679 extract_constrain_insn_cached (first_insn
);
22683 arm_final_prescan_insn (rtx_insn
*insn
)
22685 /* BODY will hold the body of INSN. */
22686 rtx body
= PATTERN (insn
);
22688 /* This will be 1 if trying to repeat the trick, and things need to be
22689 reversed if it appears to fail. */
22692 /* If we start with a return insn, we only succeed if we find another one. */
22693 int seeking_return
= 0;
22694 enum rtx_code return_code
= UNKNOWN
;
22696 /* START_INSN will hold the insn from where we start looking. This is the
22697 first insn after the following code_label if REVERSE is true. */
22698 rtx_insn
*start_insn
= insn
;
22700 /* If in state 4, check if the target branch is reached, in order to
22701 change back to state 0. */
22702 if (arm_ccfsm_state
== 4)
22704 if (insn
== arm_target_insn
)
22706 arm_target_insn
= NULL
;
22707 arm_ccfsm_state
= 0;
22712 /* If in state 3, it is possible to repeat the trick, if this insn is an
22713 unconditional branch to a label, and immediately following this branch
22714 is the previous target label which is only used once, and the label this
22715 branch jumps to is not too far off. */
22716 if (arm_ccfsm_state
== 3)
22718 if (simplejump_p (insn
))
22720 start_insn
= next_nonnote_insn (start_insn
);
22721 if (BARRIER_P (start_insn
))
22723 /* XXX Isn't this always a barrier? */
22724 start_insn
= next_nonnote_insn (start_insn
);
22726 if (LABEL_P (start_insn
)
22727 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22728 && LABEL_NUSES (start_insn
) == 1)
22733 else if (ANY_RETURN_P (body
))
22735 start_insn
= next_nonnote_insn (start_insn
);
22736 if (BARRIER_P (start_insn
))
22737 start_insn
= next_nonnote_insn (start_insn
);
22738 if (LABEL_P (start_insn
)
22739 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22740 && LABEL_NUSES (start_insn
) == 1)
22743 seeking_return
= 1;
22744 return_code
= GET_CODE (body
);
22753 gcc_assert (!arm_ccfsm_state
|| reverse
);
22754 if (!JUMP_P (insn
))
22757 /* This jump might be paralleled with a clobber of the condition codes
22758 the jump should always come first */
22759 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
22760 body
= XVECEXP (body
, 0, 0);
22763 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
22764 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
22767 int fail
= FALSE
, succeed
= FALSE
;
22768 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22769 int then_not_else
= TRUE
;
22770 rtx_insn
*this_insn
= start_insn
;
22773 /* Register the insn jumped to. */
22776 if (!seeking_return
)
22777 label
= XEXP (SET_SRC (body
), 0);
22779 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
22780 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
22781 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
22783 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
22784 then_not_else
= FALSE
;
22786 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
22788 seeking_return
= 1;
22789 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
22791 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
22793 seeking_return
= 1;
22794 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
22795 then_not_else
= FALSE
;
22798 gcc_unreachable ();
22800 /* See how many insns this branch skips, and what kind of insns. If all
22801 insns are okay, and the label or unconditional branch to the same
22802 label is not too far away, succeed. */
22803 for (insns_skipped
= 0;
22804 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
22808 this_insn
= next_nonnote_insn (this_insn
);
22812 switch (GET_CODE (this_insn
))
22815 /* Succeed if it is the target label, otherwise fail since
22816 control falls in from somewhere else. */
22817 if (this_insn
== label
)
22819 arm_ccfsm_state
= 1;
22827 /* Succeed if the following insn is the target label.
22829 If return insns are used then the last insn in a function
22830 will be a barrier. */
22831 this_insn
= next_nonnote_insn (this_insn
);
22832 if (this_insn
&& this_insn
== label
)
22834 arm_ccfsm_state
= 1;
22842 /* The AAPCS says that conditional calls should not be
22843 used since they make interworking inefficient (the
22844 linker can't transform BL<cond> into BLX). That's
22845 only a problem if the machine has BLX. */
22852 /* Succeed if the following insn is the target label, or
22853 if the following two insns are a barrier and the
22855 this_insn
= next_nonnote_insn (this_insn
);
22856 if (this_insn
&& BARRIER_P (this_insn
))
22857 this_insn
= next_nonnote_insn (this_insn
);
22859 if (this_insn
&& this_insn
== label
22860 && insns_skipped
< max_insns_skipped
)
22862 arm_ccfsm_state
= 1;
22870 /* If this is an unconditional branch to the same label, succeed.
22871 If it is to another label, do nothing. If it is conditional,
22873 /* XXX Probably, the tests for SET and the PC are
22876 scanbody
= PATTERN (this_insn
);
22877 if (GET_CODE (scanbody
) == SET
22878 && GET_CODE (SET_DEST (scanbody
)) == PC
)
22880 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
22881 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
22883 arm_ccfsm_state
= 2;
22886 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
22889 /* Fail if a conditional return is undesirable (e.g. on a
22890 StrongARM), but still allow this if optimizing for size. */
22891 else if (GET_CODE (scanbody
) == return_code
22892 && !use_return_insn (TRUE
, NULL
)
22895 else if (GET_CODE (scanbody
) == return_code
)
22897 arm_ccfsm_state
= 2;
22900 else if (GET_CODE (scanbody
) == PARALLEL
)
22902 switch (get_attr_conds (this_insn
))
22912 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
22917 /* Instructions using or affecting the condition codes make it
22919 scanbody
= PATTERN (this_insn
);
22920 if (!(GET_CODE (scanbody
) == SET
22921 || GET_CODE (scanbody
) == PARALLEL
)
22922 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
22932 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
22933 arm_target_label
= CODE_LABEL_NUMBER (label
);
22936 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
22938 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
22940 this_insn
= next_nonnote_insn (this_insn
);
22941 gcc_assert (!this_insn
22942 || (!BARRIER_P (this_insn
)
22943 && !LABEL_P (this_insn
)));
22947 /* Oh, dear! we ran off the end.. give up. */
22948 extract_constrain_insn_cached (insn
);
22949 arm_ccfsm_state
= 0;
22950 arm_target_insn
= NULL
;
22953 arm_target_insn
= this_insn
;
22956 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22959 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
22961 if (reverse
|| then_not_else
)
22962 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
22965 /* Restore recog_data (getting the attributes of other insns can
22966 destroy this array, but final.c assumes that it remains intact
22967 across this call. */
22968 extract_constrain_insn_cached (insn
);
22972 /* Output IT instructions. */
22974 thumb2_asm_output_opcode (FILE * stream
)
22979 if (arm_condexec_mask
)
22981 for (n
= 0; n
< arm_condexec_masklen
; n
++)
22982 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
22984 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
22985 arm_condition_codes
[arm_current_cc
]);
22986 arm_condexec_mask
= 0;
22990 /* Returns true if REGNO is a valid register
22991 for holding a quantity of type MODE. */
22993 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
22995 if (GET_MODE_CLASS (mode
) == MODE_CC
)
22996 return (regno
== CC_REGNUM
22997 || (TARGET_HARD_FLOAT
&& TARGET_VFP
22998 && regno
== VFPCC_REGNUM
));
23000 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23004 /* For the Thumb we only allow values bigger than SImode in
23005 registers 0 - 6, so that there is always a second low
23006 register available to hold the upper part of the value.
23007 We probably we ought to ensure that the register is the
23008 start of an even numbered register pair. */
23009 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23011 if (TARGET_HARD_FLOAT
&& TARGET_VFP
23012 && IS_VFP_REGNUM (regno
))
23014 if (mode
== SFmode
|| mode
== SImode
)
23015 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23017 if (mode
== DFmode
)
23018 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23020 /* VFP registers can hold HFmode values, but there is no point in
23021 putting them there unless we have hardware conversion insns. */
23022 if (mode
== HFmode
)
23023 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
23026 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23027 || (VALID_NEON_QREG_MODE (mode
)
23028 && NEON_REGNO_OK_FOR_QUAD (regno
))
23029 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23030 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23031 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23032 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23033 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23038 if (TARGET_REALLY_IWMMXT
)
23040 if (IS_IWMMXT_GR_REGNUM (regno
))
23041 return mode
== SImode
;
23043 if (IS_IWMMXT_REGNUM (regno
))
23044 return VALID_IWMMXT_REG_MODE (mode
);
23047 /* We allow almost any value to be stored in the general registers.
23048 Restrict doubleword quantities to even register pairs in ARM state
23049 so that we can use ldrd. Do not allow very large Neon structure
23050 opaque modes in general registers; they would use too many. */
23051 if (regno
<= LAST_ARM_REGNUM
)
23053 if (ARM_NUM_REGS (mode
) > 4)
23059 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23062 if (regno
== FRAME_POINTER_REGNUM
23063 || regno
== ARG_POINTER_REGNUM
)
23064 /* We only allow integers in the fake hard registers. */
23065 return GET_MODE_CLASS (mode
) == MODE_INT
;
23070 /* Implement MODES_TIEABLE_P. */
23073 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23075 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23078 /* We specifically want to allow elements of "structure" modes to
23079 be tieable to the structure. This more general condition allows
23080 other rarer situations too. */
23082 && (VALID_NEON_DREG_MODE (mode1
)
23083 || VALID_NEON_QREG_MODE (mode1
)
23084 || VALID_NEON_STRUCT_MODE (mode1
))
23085 && (VALID_NEON_DREG_MODE (mode2
)
23086 || VALID_NEON_QREG_MODE (mode2
)
23087 || VALID_NEON_STRUCT_MODE (mode2
)))
23093 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23094 not used in arm mode. */
23097 arm_regno_class (int regno
)
23099 if (regno
== PC_REGNUM
)
23104 if (regno
== STACK_POINTER_REGNUM
)
23106 if (regno
== CC_REGNUM
)
23113 if (TARGET_THUMB2
&& regno
< 8)
23116 if ( regno
<= LAST_ARM_REGNUM
23117 || regno
== FRAME_POINTER_REGNUM
23118 || regno
== ARG_POINTER_REGNUM
)
23119 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23121 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23122 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23124 if (IS_VFP_REGNUM (regno
))
23126 if (regno
<= D7_VFP_REGNUM
)
23127 return VFP_D0_D7_REGS
;
23128 else if (regno
<= LAST_LO_VFP_REGNUM
)
23129 return VFP_LO_REGS
;
23131 return VFP_HI_REGS
;
23134 if (IS_IWMMXT_REGNUM (regno
))
23135 return IWMMXT_REGS
;
23137 if (IS_IWMMXT_GR_REGNUM (regno
))
23138 return IWMMXT_GR_REGS
;
23143 /* Handle a special case when computing the offset
23144 of an argument from the frame pointer. */
23146 arm_debugger_arg_offset (int value
, rtx addr
)
23150 /* We are only interested if dbxout_parms() failed to compute the offset. */
23154 /* We can only cope with the case where the address is held in a register. */
23158 /* If we are using the frame pointer to point at the argument, then
23159 an offset of 0 is correct. */
23160 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23163 /* If we are using the stack pointer to point at the
23164 argument, then an offset of 0 is correct. */
23165 /* ??? Check this is consistent with thumb2 frame layout. */
23166 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23167 && REGNO (addr
) == SP_REGNUM
)
23170 /* Oh dear. The argument is pointed to by a register rather
23171 than being held in a register, or being stored at a known
23172 offset from the frame pointer. Since GDB only understands
23173 those two kinds of argument we must translate the address
23174 held in the register into an offset from the frame pointer.
23175 We do this by searching through the insns for the function
23176 looking to see where this register gets its value. If the
23177 register is initialized from the frame pointer plus an offset
23178 then we are in luck and we can continue, otherwise we give up.
23180 This code is exercised by producing debugging information
23181 for a function with arguments like this:
23183 double func (double a, double b, int c, double d) {return d;}
23185 Without this code the stab for parameter 'd' will be set to
23186 an offset of 0 from the frame pointer, rather than 8. */
23188 /* The if() statement says:
23190 If the insn is a normal instruction
23191 and if the insn is setting the value in a register
23192 and if the register being set is the register holding the address of the argument
23193 and if the address is computing by an addition
23194 that involves adding to a register
23195 which is the frame pointer
23200 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23202 if ( NONJUMP_INSN_P (insn
)
23203 && GET_CODE (PATTERN (insn
)) == SET
23204 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23205 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23206 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23207 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23208 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23211 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23220 warning (0, "unable to compute real location of stacked parameter");
23221 value
= 8; /* XXX magic hack */
23227 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23229 static const char *
23230 arm_invalid_parameter_type (const_tree t
)
23232 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23233 return N_("function parameters cannot have __fp16 type");
23237 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23239 static const char *
23240 arm_invalid_return_type (const_tree t
)
23242 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23243 return N_("functions cannot return __fp16 type");
23247 /* Implement TARGET_PROMOTED_TYPE. */
23250 arm_promoted_type (const_tree t
)
23252 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23253 return float_type_node
;
23257 /* Implement TARGET_CONVERT_TO_TYPE.
23258 Specifically, this hook implements the peculiarity of the ARM
23259 half-precision floating-point C semantics that requires conversions between
23260 __fp16 to or from double to do an intermediate conversion to float. */
23263 arm_convert_to_type (tree type
, tree expr
)
23265 tree fromtype
= TREE_TYPE (expr
);
23266 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
23268 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
23269 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
23270 return convert (type
, convert (float_type_node
, expr
));
23274 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23275 This simply adds HFmode as a supported mode; even though we don't
23276 implement arithmetic on this type directly, it's supported by
23277 optabs conversions, much the way the double-word arithmetic is
23278 special-cased in the default hook. */
23281 arm_scalar_mode_supported_p (machine_mode mode
)
23283 if (mode
== HFmode
)
23284 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23285 else if (ALL_FIXED_POINT_MODE_P (mode
))
23288 return default_scalar_mode_supported_p (mode
);
23291 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23293 neon_reinterpret (rtx dest
, rtx src
)
23295 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
23298 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23299 not to early-clobber SRC registers in the process.
23301 We assume that the operands described by SRC and DEST represent a
23302 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23303 number of components into which the copy has been decomposed. */
23305 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23309 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23310 || REGNO (operands
[0]) < REGNO (operands
[1]))
23312 for (i
= 0; i
< count
; i
++)
23314 operands
[2 * i
] = dest
[i
];
23315 operands
[2 * i
+ 1] = src
[i
];
23320 for (i
= 0; i
< count
; i
++)
23322 operands
[2 * i
] = dest
[count
- i
- 1];
23323 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23328 /* Split operands into moves from op[1] + op[2] into op[0]. */
23331 neon_split_vcombine (rtx operands
[3])
23333 unsigned int dest
= REGNO (operands
[0]);
23334 unsigned int src1
= REGNO (operands
[1]);
23335 unsigned int src2
= REGNO (operands
[2]);
23336 machine_mode halfmode
= GET_MODE (operands
[1]);
23337 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
23338 rtx destlo
, desthi
;
23340 if (src1
== dest
&& src2
== dest
+ halfregs
)
23342 /* No-op move. Can't split to nothing; emit something. */
23343 emit_note (NOTE_INSN_DELETED
);
23347 /* Preserve register attributes for variable tracking. */
23348 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23349 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23350 GET_MODE_SIZE (halfmode
));
23352 /* Special case of reversed high/low parts. Use VSWP. */
23353 if (src2
== dest
&& src1
== dest
+ halfregs
)
23355 rtx x
= gen_rtx_SET (VOIDmode
, destlo
, operands
[1]);
23356 rtx y
= gen_rtx_SET (VOIDmode
, desthi
, operands
[2]);
23357 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
23361 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
23363 /* Try to avoid unnecessary moves if part of the result
23364 is in the right place already. */
23366 emit_move_insn (destlo
, operands
[1]);
23367 if (src2
!= dest
+ halfregs
)
23368 emit_move_insn (desthi
, operands
[2]);
23372 if (src2
!= dest
+ halfregs
)
23373 emit_move_insn (desthi
, operands
[2]);
23375 emit_move_insn (destlo
, operands
[1]);
23379 /* Return the number (counting from 0) of
23380 the least significant set bit in MASK. */
23383 number_of_first_bit_set (unsigned mask
)
23385 return ctz_hwi (mask
);
23388 /* Like emit_multi_reg_push, but allowing for a different set of
23389 registers to be described as saved. MASK is the set of registers
23390 to be saved; REAL_REGS is the set of registers to be described as
23391 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23394 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
23396 unsigned long regno
;
23397 rtx par
[10], tmp
, reg
;
23401 /* Build the parallel of the registers actually being stored. */
23402 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
23404 regno
= ctz_hwi (mask
);
23405 reg
= gen_rtx_REG (SImode
, regno
);
23408 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
23410 tmp
= gen_rtx_USE (VOIDmode
, reg
);
23415 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23416 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
23417 tmp
= gen_frame_mem (BLKmode
, tmp
);
23418 tmp
= gen_rtx_SET (VOIDmode
, tmp
, par
[0]);
23421 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
23422 insn
= emit_insn (tmp
);
23424 /* Always build the stack adjustment note for unwind info. */
23425 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23426 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
);
23429 /* Build the parallel of the registers recorded as saved for unwind. */
23430 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
23432 regno
= ctz_hwi (real_regs
);
23433 reg
= gen_rtx_REG (SImode
, regno
);
23435 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
23436 tmp
= gen_frame_mem (SImode
, tmp
);
23437 tmp
= gen_rtx_SET (VOIDmode
, tmp
, reg
);
23438 RTX_FRAME_RELATED_P (tmp
) = 1;
23446 RTX_FRAME_RELATED_P (par
[0]) = 1;
23447 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
23450 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
23455 /* Emit code to push or pop registers to or from the stack. F is the
23456 assembly file. MASK is the registers to pop. */
23458 thumb_pop (FILE *f
, unsigned long mask
)
23461 int lo_mask
= mask
& 0xFF;
23462 int pushed_words
= 0;
23466 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
23468 /* Special case. Do not generate a POP PC statement here, do it in
23470 thumb_exit (f
, -1);
23474 fprintf (f
, "\tpop\t{");
23476 /* Look at the low registers first. */
23477 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
23481 asm_fprintf (f
, "%r", regno
);
23483 if ((lo_mask
& ~1) != 0)
23490 if (mask
& (1 << PC_REGNUM
))
23492 /* Catch popping the PC. */
23493 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
23494 || crtl
->calls_eh_return
)
23496 /* The PC is never poped directly, instead
23497 it is popped into r3 and then BX is used. */
23498 fprintf (f
, "}\n");
23500 thumb_exit (f
, -1);
23509 asm_fprintf (f
, "%r", PC_REGNUM
);
23513 fprintf (f
, "}\n");
23516 /* Generate code to return from a thumb function.
23517 If 'reg_containing_return_addr' is -1, then the return address is
23518 actually on the stack, at the stack pointer. */
23520 thumb_exit (FILE *f
, int reg_containing_return_addr
)
23522 unsigned regs_available_for_popping
;
23523 unsigned regs_to_pop
;
23525 unsigned available
;
23529 int restore_a4
= FALSE
;
23531 /* Compute the registers we need to pop. */
23535 if (reg_containing_return_addr
== -1)
23537 regs_to_pop
|= 1 << LR_REGNUM
;
23541 if (TARGET_BACKTRACE
)
23543 /* Restore the (ARM) frame pointer and stack pointer. */
23544 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
23548 /* If there is nothing to pop then just emit the BX instruction and
23550 if (pops_needed
== 0)
23552 if (crtl
->calls_eh_return
)
23553 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23555 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23558 /* Otherwise if we are not supporting interworking and we have not created
23559 a backtrace structure and the function was not entered in ARM mode then
23560 just pop the return address straight into the PC. */
23561 else if (!TARGET_INTERWORK
23562 && !TARGET_BACKTRACE
23563 && !is_called_in_ARM_mode (current_function_decl
)
23564 && !crtl
->calls_eh_return
)
23566 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
23570 /* Find out how many of the (return) argument registers we can corrupt. */
23571 regs_available_for_popping
= 0;
23573 /* If returning via __builtin_eh_return, the bottom three registers
23574 all contain information needed for the return. */
23575 if (crtl
->calls_eh_return
)
23579 /* If we can deduce the registers used from the function's
23580 return value. This is more reliable that examining
23581 df_regs_ever_live_p () because that will be set if the register is
23582 ever used in the function, not just if the register is used
23583 to hold a return value. */
23585 if (crtl
->return_rtx
!= 0)
23586 mode
= GET_MODE (crtl
->return_rtx
);
23588 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
23590 size
= GET_MODE_SIZE (mode
);
23594 /* In a void function we can use any argument register.
23595 In a function that returns a structure on the stack
23596 we can use the second and third argument registers. */
23597 if (mode
== VOIDmode
)
23598 regs_available_for_popping
=
23599 (1 << ARG_REGISTER (1))
23600 | (1 << ARG_REGISTER (2))
23601 | (1 << ARG_REGISTER (3));
23603 regs_available_for_popping
=
23604 (1 << ARG_REGISTER (2))
23605 | (1 << ARG_REGISTER (3));
23607 else if (size
<= 4)
23608 regs_available_for_popping
=
23609 (1 << ARG_REGISTER (2))
23610 | (1 << ARG_REGISTER (3));
23611 else if (size
<= 8)
23612 regs_available_for_popping
=
23613 (1 << ARG_REGISTER (3));
23616 /* Match registers to be popped with registers into which we pop them. */
23617 for (available
= regs_available_for_popping
,
23618 required
= regs_to_pop
;
23619 required
!= 0 && available
!= 0;
23620 available
&= ~(available
& - available
),
23621 required
&= ~(required
& - required
))
23624 /* If we have any popping registers left over, remove them. */
23626 regs_available_for_popping
&= ~available
;
23628 /* Otherwise if we need another popping register we can use
23629 the fourth argument register. */
23630 else if (pops_needed
)
23632 /* If we have not found any free argument registers and
23633 reg a4 contains the return address, we must move it. */
23634 if (regs_available_for_popping
== 0
23635 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
23637 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
23638 reg_containing_return_addr
= LR_REGNUM
;
23640 else if (size
> 12)
23642 /* Register a4 is being used to hold part of the return value,
23643 but we have dire need of a free, low register. */
23646 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
23649 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
23651 /* The fourth argument register is available. */
23652 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
23658 /* Pop as many registers as we can. */
23659 thumb_pop (f
, regs_available_for_popping
);
23661 /* Process the registers we popped. */
23662 if (reg_containing_return_addr
== -1)
23664 /* The return address was popped into the lowest numbered register. */
23665 regs_to_pop
&= ~(1 << LR_REGNUM
);
23667 reg_containing_return_addr
=
23668 number_of_first_bit_set (regs_available_for_popping
);
23670 /* Remove this register for the mask of available registers, so that
23671 the return address will not be corrupted by further pops. */
23672 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
23675 /* If we popped other registers then handle them here. */
23676 if (regs_available_for_popping
)
23680 /* Work out which register currently contains the frame pointer. */
23681 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
23683 /* Move it into the correct place. */
23684 asm_fprintf (f
, "\tmov\t%r, %r\n",
23685 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
23687 /* (Temporarily) remove it from the mask of popped registers. */
23688 regs_available_for_popping
&= ~(1 << frame_pointer
);
23689 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
23691 if (regs_available_for_popping
)
23695 /* We popped the stack pointer as well,
23696 find the register that contains it. */
23697 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
23699 /* Move it into the stack register. */
23700 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
23702 /* At this point we have popped all necessary registers, so
23703 do not worry about restoring regs_available_for_popping
23704 to its correct value:
23706 assert (pops_needed == 0)
23707 assert (regs_available_for_popping == (1 << frame_pointer))
23708 assert (regs_to_pop == (1 << STACK_POINTER)) */
23712 /* Since we have just move the popped value into the frame
23713 pointer, the popping register is available for reuse, and
23714 we know that we still have the stack pointer left to pop. */
23715 regs_available_for_popping
|= (1 << frame_pointer
);
23719 /* If we still have registers left on the stack, but we no longer have
23720 any registers into which we can pop them, then we must move the return
23721 address into the link register and make available the register that
23723 if (regs_available_for_popping
== 0 && pops_needed
> 0)
23725 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
23727 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
23728 reg_containing_return_addr
);
23730 reg_containing_return_addr
= LR_REGNUM
;
23733 /* If we have registers left on the stack then pop some more.
23734 We know that at most we will want to pop FP and SP. */
23735 if (pops_needed
> 0)
23740 thumb_pop (f
, regs_available_for_popping
);
23742 /* We have popped either FP or SP.
23743 Move whichever one it is into the correct register. */
23744 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
23745 move_to
= number_of_first_bit_set (regs_to_pop
);
23747 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
23749 regs_to_pop
&= ~(1 << move_to
);
23754 /* If we still have not popped everything then we must have only
23755 had one register available to us and we are now popping the SP. */
23756 if (pops_needed
> 0)
23760 thumb_pop (f
, regs_available_for_popping
);
23762 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
23764 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
23766 assert (regs_to_pop == (1 << STACK_POINTER))
23767 assert (pops_needed == 1)
23771 /* If necessary restore the a4 register. */
23774 if (reg_containing_return_addr
!= LR_REGNUM
)
23776 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
23777 reg_containing_return_addr
= LR_REGNUM
;
23780 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
23783 if (crtl
->calls_eh_return
)
23784 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23786 /* Return to caller. */
23787 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23790 /* Scan INSN just before assembler is output for it.
23791 For Thumb-1, we track the status of the condition codes; this
23792 information is used in the cbranchsi4_insn pattern. */
23794 thumb1_final_prescan_insn (rtx_insn
*insn
)
23796 if (flag_print_asm_name
)
23797 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
23798 INSN_ADDRESSES (INSN_UID (insn
)));
23799 /* Don't overwrite the previous setter when we get to a cbranch. */
23800 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
23802 enum attr_conds conds
;
23804 if (cfun
->machine
->thumb1_cc_insn
)
23806 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
23807 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
23810 conds
= get_attr_conds (insn
);
23811 if (conds
== CONDS_SET
)
23813 rtx set
= single_set (insn
);
23814 cfun
->machine
->thumb1_cc_insn
= insn
;
23815 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
23816 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
23817 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
23818 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
23820 rtx src1
= XEXP (SET_SRC (set
), 1);
23821 if (src1
== const0_rtx
)
23822 cfun
->machine
->thumb1_cc_mode
= CCmode
;
23824 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
23826 /* Record the src register operand instead of dest because
23827 cprop_hardreg pass propagates src. */
23828 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
23831 else if (conds
!= CONDS_NOCOND
)
23832 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
23835 /* Check if unexpected far jump is used. */
23836 if (cfun
->machine
->lr_save_eliminated
23837 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
23838 internal_error("Unexpected thumb1 far jump");
23842 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
23844 unsigned HOST_WIDE_INT mask
= 0xff;
23847 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
23848 if (val
== 0) /* XXX */
23851 for (i
= 0; i
< 25; i
++)
23852 if ((val
& (mask
<< i
)) == val
)
23858 /* Returns nonzero if the current function contains,
23859 or might contain a far jump. */
23861 thumb_far_jump_used_p (void)
23864 bool far_jump
= false;
23865 unsigned int func_size
= 0;
23867 /* This test is only important for leaf functions. */
23868 /* assert (!leaf_function_p ()); */
23870 /* If we have already decided that far jumps may be used,
23871 do not bother checking again, and always return true even if
23872 it turns out that they are not being used. Once we have made
23873 the decision that far jumps are present (and that hence the link
23874 register will be pushed onto the stack) we cannot go back on it. */
23875 if (cfun
->machine
->far_jump_used
)
23878 /* If this function is not being called from the prologue/epilogue
23879 generation code then it must be being called from the
23880 INITIAL_ELIMINATION_OFFSET macro. */
23881 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
23883 /* In this case we know that we are being asked about the elimination
23884 of the arg pointer register. If that register is not being used,
23885 then there are no arguments on the stack, and we do not have to
23886 worry that a far jump might force the prologue to push the link
23887 register, changing the stack offsets. In this case we can just
23888 return false, since the presence of far jumps in the function will
23889 not affect stack offsets.
23891 If the arg pointer is live (or if it was live, but has now been
23892 eliminated and so set to dead) then we do have to test to see if
23893 the function might contain a far jump. This test can lead to some
23894 false negatives, since before reload is completed, then length of
23895 branch instructions is not known, so gcc defaults to returning their
23896 longest length, which in turn sets the far jump attribute to true.
23898 A false negative will not result in bad code being generated, but it
23899 will result in a needless push and pop of the link register. We
23900 hope that this does not occur too often.
23902 If we need doubleword stack alignment this could affect the other
23903 elimination offsets so we can't risk getting it wrong. */
23904 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
23905 cfun
->machine
->arg_pointer_live
= 1;
23906 else if (!cfun
->machine
->arg_pointer_live
)
23910 /* We should not change far_jump_used during or after reload, as there is
23911 no chance to change stack frame layout. */
23912 if (reload_in_progress
|| reload_completed
)
23915 /* Check to see if the function contains a branch
23916 insn with the far jump attribute set. */
23917 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23919 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
23923 func_size
+= get_attr_length (insn
);
23926 /* Attribute far_jump will always be true for thumb1 before
23927 shorten_branch pass. So checking far_jump attribute before
23928 shorten_branch isn't much useful.
23930 Following heuristic tries to estimate more accurately if a far jump
23931 may finally be used. The heuristic is very conservative as there is
23932 no chance to roll-back the decision of not to use far jump.
23934 Thumb1 long branch offset is -2048 to 2046. The worst case is each
23935 2-byte insn is associated with a 4 byte constant pool. Using
23936 function size 2048/3 as the threshold is conservative enough. */
23939 if ((func_size
* 3) >= 2048)
23941 /* Record the fact that we have decided that
23942 the function does use far jumps. */
23943 cfun
->machine
->far_jump_used
= 1;
23951 /* Return nonzero if FUNC must be entered in ARM mode. */
23953 is_called_in_ARM_mode (tree func
)
23955 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
23957 /* Ignore the problem about functions whose address is taken. */
23958 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
23962 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
23968 /* Given the stack offsets and register mask in OFFSETS, decide how
23969 many additional registers to push instead of subtracting a constant
23970 from SP. For epilogues the principle is the same except we use pop.
23971 FOR_PROLOGUE indicates which we're generating. */
23973 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
23975 HOST_WIDE_INT amount
;
23976 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
23977 /* Extract a mask of the ones we can give to the Thumb's push/pop
23979 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
23980 /* Then count how many other high registers will need to be pushed. */
23981 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
23982 int n_free
, reg_base
, size
;
23984 if (!for_prologue
&& frame_pointer_needed
)
23985 amount
= offsets
->locals_base
- offsets
->saved_regs
;
23987 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
23989 /* If the stack frame size is 512 exactly, we can save one load
23990 instruction, which should make this a win even when optimizing
23992 if (!optimize_size
&& amount
!= 512)
23995 /* Can't do this if there are high registers to push. */
23996 if (high_regs_pushed
!= 0)
23999 /* Shouldn't do it in the prologue if no registers would normally
24000 be pushed at all. In the epilogue, also allow it if we'll have
24001 a pop insn for the PC. */
24004 || TARGET_BACKTRACE
24005 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
24006 || TARGET_INTERWORK
24007 || crtl
->args
.pretend_args_size
!= 0))
24010 /* Don't do this if thumb_expand_prologue wants to emit instructions
24011 between the push and the stack frame allocation. */
24013 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24014 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
24021 size
= arm_size_return_regs ();
24022 reg_base
= ARM_NUM_INTS (size
);
24023 live_regs_mask
>>= reg_base
;
24026 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
24027 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
24029 live_regs_mask
>>= 1;
24035 gcc_assert (amount
/ 4 * 4 == amount
);
24037 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
24038 return (amount
- 508) / 4;
24039 if (amount
<= n_free
* 4)
24044 /* The bits which aren't usefully expanded as rtl. */
24046 thumb1_unexpanded_epilogue (void)
24048 arm_stack_offsets
*offsets
;
24050 unsigned long live_regs_mask
= 0;
24051 int high_regs_pushed
= 0;
24053 int had_to_push_lr
;
24056 if (cfun
->machine
->return_used_this_function
!= 0)
24059 if (IS_NAKED (arm_current_func_type ()))
24062 offsets
= arm_get_frame_offsets ();
24063 live_regs_mask
= offsets
->saved_regs_mask
;
24064 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24066 /* If we can deduce the registers used from the function's return value.
24067 This is more reliable that examining df_regs_ever_live_p () because that
24068 will be set if the register is ever used in the function, not just if
24069 the register is used to hold a return value. */
24070 size
= arm_size_return_regs ();
24072 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24075 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24076 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24079 /* The prolog may have pushed some high registers to use as
24080 work registers. e.g. the testsuite file:
24081 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24082 compiles to produce:
24083 push {r4, r5, r6, r7, lr}
24087 as part of the prolog. We have to undo that pushing here. */
24089 if (high_regs_pushed
)
24091 unsigned long mask
= live_regs_mask
& 0xff;
24094 /* The available low registers depend on the size of the value we are
24102 /* Oh dear! We have no low registers into which we can pop
24105 ("no low registers available for popping high registers");
24107 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
24108 if (live_regs_mask
& (1 << next_hi_reg
))
24111 while (high_regs_pushed
)
24113 /* Find lo register(s) into which the high register(s) can
24115 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24117 if (mask
& (1 << regno
))
24118 high_regs_pushed
--;
24119 if (high_regs_pushed
== 0)
24123 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24125 /* Pop the values into the low register(s). */
24126 thumb_pop (asm_out_file
, mask
);
24128 /* Move the value(s) into the high registers. */
24129 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24131 if (mask
& (1 << regno
))
24133 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24136 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24137 if (live_regs_mask
& (1 << next_hi_reg
))
24142 live_regs_mask
&= ~0x0f00;
24145 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24146 live_regs_mask
&= 0xff;
24148 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24150 /* Pop the return address into the PC. */
24151 if (had_to_push_lr
)
24152 live_regs_mask
|= 1 << PC_REGNUM
;
24154 /* Either no argument registers were pushed or a backtrace
24155 structure was created which includes an adjusted stack
24156 pointer, so just pop everything. */
24157 if (live_regs_mask
)
24158 thumb_pop (asm_out_file
, live_regs_mask
);
24160 /* We have either just popped the return address into the
24161 PC or it is was kept in LR for the entire function.
24162 Note that thumb_pop has already called thumb_exit if the
24163 PC was in the list. */
24164 if (!had_to_push_lr
)
24165 thumb_exit (asm_out_file
, LR_REGNUM
);
24169 /* Pop everything but the return address. */
24170 if (live_regs_mask
)
24171 thumb_pop (asm_out_file
, live_regs_mask
);
24173 if (had_to_push_lr
)
24177 /* We have no free low regs, so save one. */
24178 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24182 /* Get the return address into a temporary register. */
24183 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24187 /* Move the return address to lr. */
24188 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24190 /* Restore the low register. */
24191 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24196 regno
= LAST_ARG_REGNUM
;
24201 /* Remove the argument registers that were pushed onto the stack. */
24202 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24203 SP_REGNUM
, SP_REGNUM
,
24204 crtl
->args
.pretend_args_size
);
24206 thumb_exit (asm_out_file
, regno
);
24212 /* Functions to save and restore machine-specific function data. */
24213 static struct machine_function
*
24214 arm_init_machine_status (void)
24216 struct machine_function
*machine
;
24217 machine
= ggc_cleared_alloc
<machine_function
> ();
24219 #if ARM_FT_UNKNOWN != 0
24220 machine
->func_type
= ARM_FT_UNKNOWN
;
24225 /* Return an RTX indicating where the return address to the
24226 calling function can be found. */
24228 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24233 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24236 /* Do anything needed before RTL is emitted for each function. */
24238 arm_init_expanders (void)
24240 /* Arrange to initialize and mark the machine per-function status. */
24241 init_machine_status
= arm_init_machine_status
;
24243 /* This is to stop the combine pass optimizing away the alignment
24244 adjustment of va_arg. */
24245 /* ??? It is claimed that this should not be necessary. */
24247 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24251 /* Like arm_compute_initial_elimination offset. Simpler because there
24252 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24253 to point at the base of the local variables after static stack
24254 space for a function has been allocated. */
24257 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24259 arm_stack_offsets
*offsets
;
24261 offsets
= arm_get_frame_offsets ();
24265 case ARG_POINTER_REGNUM
:
24268 case STACK_POINTER_REGNUM
:
24269 return offsets
->outgoing_args
- offsets
->saved_args
;
24271 case FRAME_POINTER_REGNUM
:
24272 return offsets
->soft_frame
- offsets
->saved_args
;
24274 case ARM_HARD_FRAME_POINTER_REGNUM
:
24275 return offsets
->saved_regs
- offsets
->saved_args
;
24277 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24278 return offsets
->locals_base
- offsets
->saved_args
;
24281 gcc_unreachable ();
24285 case FRAME_POINTER_REGNUM
:
24288 case STACK_POINTER_REGNUM
:
24289 return offsets
->outgoing_args
- offsets
->soft_frame
;
24291 case ARM_HARD_FRAME_POINTER_REGNUM
:
24292 return offsets
->saved_regs
- offsets
->soft_frame
;
24294 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24295 return offsets
->locals_base
- offsets
->soft_frame
;
24298 gcc_unreachable ();
24303 gcc_unreachable ();
24307 /* Generate the function's prologue. */
24310 thumb1_expand_prologue (void)
24314 HOST_WIDE_INT amount
;
24315 arm_stack_offsets
*offsets
;
24316 unsigned long func_type
;
24318 unsigned long live_regs_mask
;
24319 unsigned long l_mask
;
24320 unsigned high_regs_pushed
= 0;
24322 func_type
= arm_current_func_type ();
24324 /* Naked functions don't have prologues. */
24325 if (IS_NAKED (func_type
))
24328 if (IS_INTERRUPT (func_type
))
24330 error ("interrupt Service Routines cannot be coded in Thumb mode");
24334 if (is_called_in_ARM_mode (current_function_decl
))
24335 emit_insn (gen_prologue_thumb1_interwork ());
24337 offsets
= arm_get_frame_offsets ();
24338 live_regs_mask
= offsets
->saved_regs_mask
;
24340 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24341 l_mask
= live_regs_mask
& 0x40ff;
24342 /* Then count how many other high registers will need to be pushed. */
24343 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24345 if (crtl
->args
.pretend_args_size
)
24347 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
24349 if (cfun
->machine
->uses_anonymous_args
)
24351 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
24352 unsigned long mask
;
24354 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
24355 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
24357 insn
= thumb1_emit_multi_reg_push (mask
, 0);
24361 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24362 stack_pointer_rtx
, x
));
24364 RTX_FRAME_RELATED_P (insn
) = 1;
24367 if (TARGET_BACKTRACE
)
24369 HOST_WIDE_INT offset
= 0;
24370 unsigned work_register
;
24371 rtx work_reg
, x
, arm_hfp_rtx
;
24373 /* We have been asked to create a stack backtrace structure.
24374 The code looks like this:
24378 0 sub SP, #16 Reserve space for 4 registers.
24379 2 push {R7} Push low registers.
24380 4 add R7, SP, #20 Get the stack pointer before the push.
24381 6 str R7, [SP, #8] Store the stack pointer
24382 (before reserving the space).
24383 8 mov R7, PC Get hold of the start of this code + 12.
24384 10 str R7, [SP, #16] Store it.
24385 12 mov R7, FP Get hold of the current frame pointer.
24386 14 str R7, [SP, #4] Store it.
24387 16 mov R7, LR Get hold of the current return address.
24388 18 str R7, [SP, #12] Store it.
24389 20 add R7, SP, #16 Point at the start of the
24390 backtrace structure.
24391 22 mov FP, R7 Put this value into the frame pointer. */
24393 work_register
= thumb_find_work_register (live_regs_mask
);
24394 work_reg
= gen_rtx_REG (SImode
, work_register
);
24395 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
24397 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24398 stack_pointer_rtx
, GEN_INT (-16)));
24399 RTX_FRAME_RELATED_P (insn
) = 1;
24403 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
24404 RTX_FRAME_RELATED_P (insn
) = 1;
24406 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
24409 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
24410 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24412 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
24413 x
= gen_frame_mem (SImode
, x
);
24414 emit_move_insn (x
, work_reg
);
24416 /* Make sure that the instruction fetching the PC is in the right place
24417 to calculate "start of backtrace creation code + 12". */
24418 /* ??? The stores using the common WORK_REG ought to be enough to
24419 prevent the scheduler from doing anything weird. Failing that
24420 we could always move all of the following into an UNSPEC_VOLATILE. */
24423 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24424 emit_move_insn (work_reg
, x
);
24426 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24427 x
= gen_frame_mem (SImode
, x
);
24428 emit_move_insn (x
, work_reg
);
24430 emit_move_insn (work_reg
, arm_hfp_rtx
);
24432 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24433 x
= gen_frame_mem (SImode
, x
);
24434 emit_move_insn (x
, work_reg
);
24438 emit_move_insn (work_reg
, arm_hfp_rtx
);
24440 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24441 x
= gen_frame_mem (SImode
, x
);
24442 emit_move_insn (x
, work_reg
);
24444 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24445 emit_move_insn (work_reg
, x
);
24447 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24448 x
= gen_frame_mem (SImode
, x
);
24449 emit_move_insn (x
, work_reg
);
24452 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
24453 emit_move_insn (work_reg
, x
);
24455 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
24456 x
= gen_frame_mem (SImode
, x
);
24457 emit_move_insn (x
, work_reg
);
24459 x
= GEN_INT (offset
+ 12);
24460 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24462 emit_move_insn (arm_hfp_rtx
, work_reg
);
24464 /* Optimization: If we are not pushing any low registers but we are going
24465 to push some high registers then delay our first push. This will just
24466 be a push of LR and we can combine it with the push of the first high
24468 else if ((l_mask
& 0xff) != 0
24469 || (high_regs_pushed
== 0 && l_mask
))
24471 unsigned long mask
= l_mask
;
24472 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
24473 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
24474 RTX_FRAME_RELATED_P (insn
) = 1;
24477 if (high_regs_pushed
)
24479 unsigned pushable_regs
;
24480 unsigned next_hi_reg
;
24481 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
24482 : crtl
->args
.info
.nregs
;
24483 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
24485 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
24486 if (live_regs_mask
& (1 << next_hi_reg
))
24489 /* Here we need to mask out registers used for passing arguments
24490 even if they can be pushed. This is to avoid using them to stash the high
24491 registers. Such kind of stash may clobber the use of arguments. */
24492 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
24494 if (pushable_regs
== 0)
24495 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
24497 while (high_regs_pushed
> 0)
24499 unsigned long real_regs_mask
= 0;
24501 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
24503 if (pushable_regs
& (1 << regno
))
24505 emit_move_insn (gen_rtx_REG (SImode
, regno
),
24506 gen_rtx_REG (SImode
, next_hi_reg
));
24508 high_regs_pushed
--;
24509 real_regs_mask
|= (1 << next_hi_reg
);
24511 if (high_regs_pushed
)
24513 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
24515 if (live_regs_mask
& (1 << next_hi_reg
))
24520 pushable_regs
&= ~((1 << regno
) - 1);
24526 /* If we had to find a work register and we have not yet
24527 saved the LR then add it to the list of regs to push. */
24528 if (l_mask
== (1 << LR_REGNUM
))
24530 pushable_regs
|= l_mask
;
24531 real_regs_mask
|= l_mask
;
24535 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
24536 RTX_FRAME_RELATED_P (insn
) = 1;
24540 /* Load the pic register before setting the frame pointer,
24541 so we can use r7 as a temporary work register. */
24542 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24543 arm_load_pic_register (live_regs_mask
);
24545 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
24546 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
24547 stack_pointer_rtx
);
24549 if (flag_stack_usage_info
)
24550 current_function_static_stack_size
24551 = offsets
->outgoing_args
- offsets
->saved_args
;
24553 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24554 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
24559 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24560 GEN_INT (- amount
)));
24561 RTX_FRAME_RELATED_P (insn
) = 1;
24567 /* The stack decrement is too big for an immediate value in a single
24568 insn. In theory we could issue multiple subtracts, but after
24569 three of them it becomes more space efficient to place the full
24570 value in the constant pool and load into a register. (Also the
24571 ARM debugger really likes to see only one stack decrement per
24572 function). So instead we look for a scratch register into which
24573 we can load the decrement, and then we subtract this from the
24574 stack pointer. Unfortunately on the thumb the only available
24575 scratch registers are the argument registers, and we cannot use
24576 these as they may hold arguments to the function. Instead we
24577 attempt to locate a call preserved register which is used by this
24578 function. If we can find one, then we know that it will have
24579 been pushed at the start of the prologue and so we can corrupt
24581 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
24582 if (live_regs_mask
& (1 << regno
))
24585 gcc_assert(regno
<= LAST_LO_REGNUM
);
24587 reg
= gen_rtx_REG (SImode
, regno
);
24589 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
24591 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24592 stack_pointer_rtx
, reg
));
24594 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
24595 plus_constant (Pmode
, stack_pointer_rtx
,
24597 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
24598 RTX_FRAME_RELATED_P (insn
) = 1;
24602 if (frame_pointer_needed
)
24603 thumb_set_frame_pointer (offsets
);
24605 /* If we are profiling, make sure no instructions are scheduled before
24606 the call to mcount. Similarly if the user has requested no
24607 scheduling in the prolog. Similarly if we want non-call exceptions
24608 using the EABI unwinder, to prevent faulting instructions from being
24609 swapped with a stack adjustment. */
24610 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
24611 || (arm_except_unwind_info (&global_options
) == UI_TARGET
24612 && cfun
->can_throw_non_call_exceptions
))
24613 emit_insn (gen_blockage ());
24615 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
24616 if (live_regs_mask
& 0xff)
24617 cfun
->machine
->lr_save_eliminated
= 0;
24620 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24621 POP instruction can be generated. LR should be replaced by PC. All
24622 the checks required are already done by USE_RETURN_INSN (). Hence,
24623 all we really need to check here is if single register is to be
24624 returned, or multiple register return. */
24626 thumb2_expand_return (bool simple_return
)
24629 unsigned long saved_regs_mask
;
24630 arm_stack_offsets
*offsets
;
24632 offsets
= arm_get_frame_offsets ();
24633 saved_regs_mask
= offsets
->saved_regs_mask
;
24635 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
24636 if (saved_regs_mask
& (1 << i
))
24639 if (!simple_return
&& saved_regs_mask
)
24643 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
24644 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
24645 rtx addr
= gen_rtx_MEM (SImode
,
24646 gen_rtx_POST_INC (SImode
,
24647 stack_pointer_rtx
));
24648 set_mem_alias_set (addr
, get_frame_alias_set ());
24649 XVECEXP (par
, 0, 0) = ret_rtx
;
24650 XVECEXP (par
, 0, 1) = gen_rtx_SET (SImode
, reg
, addr
);
24651 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
24652 emit_jump_insn (par
);
24656 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
24657 saved_regs_mask
|= (1 << PC_REGNUM
);
24658 arm_emit_multi_reg_pop (saved_regs_mask
);
24663 emit_jump_insn (simple_return_rtx
);
24668 thumb1_expand_epilogue (void)
24670 HOST_WIDE_INT amount
;
24671 arm_stack_offsets
*offsets
;
24674 /* Naked functions don't have prologues. */
24675 if (IS_NAKED (arm_current_func_type ()))
24678 offsets
= arm_get_frame_offsets ();
24679 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24681 if (frame_pointer_needed
)
24683 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
24684 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24686 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
24688 gcc_assert (amount
>= 0);
24691 emit_insn (gen_blockage ());
24694 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24695 GEN_INT (amount
)));
24698 /* r3 is always free in the epilogue. */
24699 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
24701 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
24702 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
24706 /* Emit a USE (stack_pointer_rtx), so that
24707 the stack adjustment will not be deleted. */
24708 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24710 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
24711 emit_insn (gen_blockage ());
24713 /* Emit a clobber for each insn that will be restored in the epilogue,
24714 so that flow2 will get register lifetimes correct. */
24715 for (regno
= 0; regno
< 13; regno
++)
24716 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
24717 emit_clobber (gen_rtx_REG (SImode
, regno
));
24719 if (! df_regs_ever_live_p (LR_REGNUM
))
24720 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
24723 /* Epilogue code for APCS frame. */
24725 arm_expand_epilogue_apcs_frame (bool really_return
)
24727 unsigned long func_type
;
24728 unsigned long saved_regs_mask
;
24731 int floats_from_frame
= 0;
24732 arm_stack_offsets
*offsets
;
24734 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
24735 func_type
= arm_current_func_type ();
24737 /* Get frame offsets for ARM. */
24738 offsets
= arm_get_frame_offsets ();
24739 saved_regs_mask
= offsets
->saved_regs_mask
;
24741 /* Find the offset of the floating-point save area in the frame. */
24743 = (offsets
->saved_args
24744 + arm_compute_static_chain_stack_bytes ()
24747 /* Compute how many core registers saved and how far away the floats are. */
24748 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
24749 if (saved_regs_mask
& (1 << i
))
24752 floats_from_frame
+= 4;
24755 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
24758 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
24760 /* The offset is from IP_REGNUM. */
24761 int saved_size
= arm_get_vfp_saved_size ();
24762 if (saved_size
> 0)
24765 floats_from_frame
+= saved_size
;
24766 insn
= emit_insn (gen_addsi3 (ip_rtx
,
24767 hard_frame_pointer_rtx
,
24768 GEN_INT (-floats_from_frame
)));
24769 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
24770 ip_rtx
, hard_frame_pointer_rtx
);
24773 /* Generate VFP register multi-pop. */
24774 start_reg
= FIRST_VFP_REGNUM
;
24776 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
24777 /* Look for a case where a reg does not need restoring. */
24778 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
24779 && (!df_regs_ever_live_p (i
+ 1)
24780 || call_used_regs
[i
+ 1]))
24782 if (start_reg
!= i
)
24783 arm_emit_vfp_multi_reg_pop (start_reg
,
24784 (i
- start_reg
) / 2,
24785 gen_rtx_REG (SImode
,
24790 /* Restore the remaining regs that we have discovered (or possibly
24791 even all of them, if the conditional in the for loop never
24793 if (start_reg
!= i
)
24794 arm_emit_vfp_multi_reg_pop (start_reg
,
24795 (i
- start_reg
) / 2,
24796 gen_rtx_REG (SImode
, IP_REGNUM
));
24801 /* The frame pointer is guaranteed to be non-double-word aligned, as
24802 it is set to double-word-aligned old_stack_pointer - 4. */
24804 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
24806 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
24807 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
24809 rtx addr
= gen_frame_mem (V2SImode
,
24810 plus_constant (Pmode
, hard_frame_pointer_rtx
,
24812 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
24813 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24814 gen_rtx_REG (V2SImode
, i
),
24820 /* saved_regs_mask should contain IP which contains old stack pointer
24821 at the time of activation creation. Since SP and IP are adjacent registers,
24822 we can restore the value directly into SP. */
24823 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
24824 saved_regs_mask
&= ~(1 << IP_REGNUM
);
24825 saved_regs_mask
|= (1 << SP_REGNUM
);
24827 /* There are two registers left in saved_regs_mask - LR and PC. We
24828 only need to restore LR (the return address), but to
24829 save time we can load it directly into PC, unless we need a
24830 special function exit sequence, or we are not really returning. */
24832 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
24833 && !crtl
->calls_eh_return
)
24834 /* Delete LR from the register mask, so that LR on
24835 the stack is loaded into the PC in the register mask. */
24836 saved_regs_mask
&= ~(1 << LR_REGNUM
);
24838 saved_regs_mask
&= ~(1 << PC_REGNUM
);
24840 num_regs
= bit_count (saved_regs_mask
);
24841 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
24844 emit_insn (gen_blockage ());
24845 /* Unwind the stack to just below the saved registers. */
24846 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24847 hard_frame_pointer_rtx
,
24848 GEN_INT (- 4 * num_regs
)));
24850 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
24851 stack_pointer_rtx
, hard_frame_pointer_rtx
);
24854 arm_emit_multi_reg_pop (saved_regs_mask
);
24856 if (IS_INTERRUPT (func_type
))
24858 /* Interrupt handlers will have pushed the
24859 IP onto the stack, so restore it now. */
24861 rtx addr
= gen_rtx_MEM (SImode
,
24862 gen_rtx_POST_INC (SImode
,
24863 stack_pointer_rtx
));
24864 set_mem_alias_set (addr
, get_frame_alias_set ());
24865 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
24866 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24867 gen_rtx_REG (SImode
, IP_REGNUM
),
24871 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
24874 if (crtl
->calls_eh_return
)
24875 emit_insn (gen_addsi3 (stack_pointer_rtx
,
24877 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
24879 if (IS_STACKALIGN (func_type
))
24880 /* Restore the original stack pointer. Before prologue, the stack was
24881 realigned and the original stack pointer saved in r0. For details,
24882 see comment in arm_expand_prologue. */
24883 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
24885 emit_jump_insn (simple_return_rtx
);
24888 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24889 function is not a sibcall. */
24891 arm_expand_epilogue (bool really_return
)
24893 unsigned long func_type
;
24894 unsigned long saved_regs_mask
;
24898 arm_stack_offsets
*offsets
;
24900 func_type
= arm_current_func_type ();
24902 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24903 let output_return_instruction take care of instruction emission if any. */
24904 if (IS_NAKED (func_type
)
24905 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
24908 emit_jump_insn (simple_return_rtx
);
24912 /* If we are throwing an exception, then we really must be doing a
24913 return, so we can't tail-call. */
24914 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
24916 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
24918 arm_expand_epilogue_apcs_frame (really_return
);
24922 /* Get frame offsets for ARM. */
24923 offsets
= arm_get_frame_offsets ();
24924 saved_regs_mask
= offsets
->saved_regs_mask
;
24925 num_regs
= bit_count (saved_regs_mask
);
24927 if (frame_pointer_needed
)
24930 /* Restore stack pointer if necessary. */
24933 /* In ARM mode, frame pointer points to first saved register.
24934 Restore stack pointer to last saved register. */
24935 amount
= offsets
->frame
- offsets
->saved_regs
;
24937 /* Force out any pending memory operations that reference stacked data
24938 before stack de-allocation occurs. */
24939 emit_insn (gen_blockage ());
24940 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24941 hard_frame_pointer_rtx
,
24942 GEN_INT (amount
)));
24943 arm_add_cfa_adjust_cfa_note (insn
, amount
,
24945 hard_frame_pointer_rtx
);
24947 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24949 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24953 /* In Thumb-2 mode, the frame pointer points to the last saved
24955 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24958 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
24959 hard_frame_pointer_rtx
,
24960 GEN_INT (amount
)));
24961 arm_add_cfa_adjust_cfa_note (insn
, amount
,
24962 hard_frame_pointer_rtx
,
24963 hard_frame_pointer_rtx
);
24966 /* Force out any pending memory operations that reference stacked data
24967 before stack de-allocation occurs. */
24968 emit_insn (gen_blockage ());
24969 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
24970 hard_frame_pointer_rtx
));
24971 arm_add_cfa_adjust_cfa_note (insn
, 0,
24973 hard_frame_pointer_rtx
);
24974 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24976 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24981 /* Pop off outgoing args and local frame to adjust stack pointer to
24982 last saved register. */
24983 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24987 /* Force out any pending memory operations that reference stacked data
24988 before stack de-allocation occurs. */
24989 emit_insn (gen_blockage ());
24990 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24992 GEN_INT (amount
)));
24993 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
24994 stack_pointer_rtx
, stack_pointer_rtx
);
24995 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
24997 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25001 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
25003 /* Generate VFP register multi-pop. */
25004 int end_reg
= LAST_VFP_REGNUM
+ 1;
25006 /* Scan the registers in reverse order. We need to match
25007 any groupings made in the prologue and generate matching
25008 vldm operations. The need to match groups is because,
25009 unlike pop, vldm can only do consecutive regs. */
25010 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
25011 /* Look for a case where a reg does not need restoring. */
25012 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25013 && (!df_regs_ever_live_p (i
+ 1)
25014 || call_used_regs
[i
+ 1]))
25016 /* Restore the regs discovered so far (from reg+2 to
25018 if (end_reg
> i
+ 2)
25019 arm_emit_vfp_multi_reg_pop (i
+ 2,
25020 (end_reg
- (i
+ 2)) / 2,
25021 stack_pointer_rtx
);
25025 /* Restore the remaining regs that we have discovered (or possibly
25026 even all of them, if the conditional in the for loop never
25028 if (end_reg
> i
+ 2)
25029 arm_emit_vfp_multi_reg_pop (i
+ 2,
25030 (end_reg
- (i
+ 2)) / 2,
25031 stack_pointer_rtx
);
25035 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
25036 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25039 rtx addr
= gen_rtx_MEM (V2SImode
,
25040 gen_rtx_POST_INC (SImode
,
25041 stack_pointer_rtx
));
25042 set_mem_alias_set (addr
, get_frame_alias_set ());
25043 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25044 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25045 gen_rtx_REG (V2SImode
, i
),
25047 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25048 stack_pointer_rtx
, stack_pointer_rtx
);
25051 if (saved_regs_mask
)
25054 bool return_in_pc
= false;
25056 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
25057 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
25058 && !IS_STACKALIGN (func_type
)
25060 && crtl
->args
.pretend_args_size
== 0
25061 && saved_regs_mask
& (1 << LR_REGNUM
)
25062 && !crtl
->calls_eh_return
)
25064 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25065 saved_regs_mask
|= (1 << PC_REGNUM
);
25066 return_in_pc
= true;
25069 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
25071 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25072 if (saved_regs_mask
& (1 << i
))
25074 rtx addr
= gen_rtx_MEM (SImode
,
25075 gen_rtx_POST_INC (SImode
,
25076 stack_pointer_rtx
));
25077 set_mem_alias_set (addr
, get_frame_alias_set ());
25079 if (i
== PC_REGNUM
)
25081 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25082 XVECEXP (insn
, 0, 0) = ret_rtx
;
25083 XVECEXP (insn
, 0, 1) = gen_rtx_SET (SImode
,
25084 gen_rtx_REG (SImode
, i
),
25086 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
25087 insn
= emit_jump_insn (insn
);
25091 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
25093 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25094 gen_rtx_REG (SImode
, i
),
25096 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25098 stack_pointer_rtx
);
25105 && current_tune
->prefer_ldrd_strd
25106 && !optimize_function_for_size_p (cfun
))
25109 thumb2_emit_ldrd_pop (saved_regs_mask
);
25110 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
25111 arm_emit_ldrd_pop (saved_regs_mask
);
25113 arm_emit_multi_reg_pop (saved_regs_mask
);
25116 arm_emit_multi_reg_pop (saved_regs_mask
);
25119 if (return_in_pc
== true)
25123 if (crtl
->args
.pretend_args_size
)
25126 rtx dwarf
= NULL_RTX
;
25128 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25130 GEN_INT (crtl
->args
.pretend_args_size
)));
25132 RTX_FRAME_RELATED_P (tmp
) = 1;
25134 if (cfun
->machine
->uses_anonymous_args
)
25136 /* Restore pretend args. Refer arm_expand_prologue on how to save
25137 pretend_args in stack. */
25138 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25139 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25140 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25141 if (saved_regs_mask
& (1 << i
))
25143 rtx reg
= gen_rtx_REG (SImode
, i
);
25144 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25147 REG_NOTES (tmp
) = dwarf
;
25149 arm_add_cfa_adjust_cfa_note (tmp
, crtl
->args
.pretend_args_size
,
25150 stack_pointer_rtx
, stack_pointer_rtx
);
25153 if (!really_return
)
25156 if (crtl
->calls_eh_return
)
25157 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25159 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25161 if (IS_STACKALIGN (func_type
))
25162 /* Restore the original stack pointer. Before prologue, the stack was
25163 realigned and the original stack pointer saved in r0. For details,
25164 see comment in arm_expand_prologue. */
25165 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
25167 emit_jump_insn (simple_return_rtx
);
25170 /* Implementation of insn prologue_thumb1_interwork. This is the first
25171 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25174 thumb1_output_interwork (void)
25177 FILE *f
= asm_out_file
;
25179 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25180 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25182 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25184 /* Generate code sequence to switch us into Thumb mode. */
25185 /* The .code 32 directive has already been emitted by
25186 ASM_DECLARE_FUNCTION_NAME. */
25187 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25188 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
25190 /* Generate a label, so that the debugger will notice the
25191 change in instruction sets. This label is also used by
25192 the assembler to bypass the ARM code when this function
25193 is called from a Thumb encoded function elsewhere in the
25194 same file. Hence the definition of STUB_NAME here must
25195 agree with the definition in gas/config/tc-arm.c. */
25197 #define STUB_NAME ".real_start_of"
25199 fprintf (f
, "\t.code\t16\n");
25201 if (arm_dllexport_name_p (name
))
25202 name
= arm_strip_name_encoding (name
);
25204 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
25205 fprintf (f
, "\t.thumb_func\n");
25206 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
25211 /* Handle the case of a double word load into a low register from
25212 a computed memory address. The computed address may involve a
25213 register which is overwritten by the load. */
25215 thumb_load_double_from_address (rtx
*operands
)
25223 gcc_assert (REG_P (operands
[0]));
25224 gcc_assert (MEM_P (operands
[1]));
25226 /* Get the memory address. */
25227 addr
= XEXP (operands
[1], 0);
25229 /* Work out how the memory address is computed. */
25230 switch (GET_CODE (addr
))
25233 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25235 if (REGNO (operands
[0]) == REGNO (addr
))
25237 output_asm_insn ("ldr\t%H0, %2", operands
);
25238 output_asm_insn ("ldr\t%0, %1", operands
);
25242 output_asm_insn ("ldr\t%0, %1", operands
);
25243 output_asm_insn ("ldr\t%H0, %2", operands
);
25248 /* Compute <address> + 4 for the high order load. */
25249 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25251 output_asm_insn ("ldr\t%0, %1", operands
);
25252 output_asm_insn ("ldr\t%H0, %2", operands
);
25256 arg1
= XEXP (addr
, 0);
25257 arg2
= XEXP (addr
, 1);
25259 if (CONSTANT_P (arg1
))
25260 base
= arg2
, offset
= arg1
;
25262 base
= arg1
, offset
= arg2
;
25264 gcc_assert (REG_P (base
));
25266 /* Catch the case of <address> = <reg> + <reg> */
25267 if (REG_P (offset
))
25269 int reg_offset
= REGNO (offset
);
25270 int reg_base
= REGNO (base
);
25271 int reg_dest
= REGNO (operands
[0]);
25273 /* Add the base and offset registers together into the
25274 higher destination register. */
25275 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
25276 reg_dest
+ 1, reg_base
, reg_offset
);
25278 /* Load the lower destination register from the address in
25279 the higher destination register. */
25280 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
25281 reg_dest
, reg_dest
+ 1);
25283 /* Load the higher destination register from its own address
25285 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
25286 reg_dest
+ 1, reg_dest
+ 1);
25290 /* Compute <address> + 4 for the high order load. */
25291 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25293 /* If the computed address is held in the low order register
25294 then load the high order register first, otherwise always
25295 load the low order register first. */
25296 if (REGNO (operands
[0]) == REGNO (base
))
25298 output_asm_insn ("ldr\t%H0, %2", operands
);
25299 output_asm_insn ("ldr\t%0, %1", operands
);
25303 output_asm_insn ("ldr\t%0, %1", operands
);
25304 output_asm_insn ("ldr\t%H0, %2", operands
);
25310 /* With no registers to worry about we can just load the value
25312 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25314 output_asm_insn ("ldr\t%H0, %2", operands
);
25315 output_asm_insn ("ldr\t%0, %1", operands
);
25319 gcc_unreachable ();
25326 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
25333 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25336 operands
[4] = operands
[5];
25339 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
25340 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
25344 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25345 std::swap (operands
[4], operands
[5]);
25346 if (REGNO (operands
[5]) > REGNO (operands
[6]))
25347 std::swap (operands
[5], operands
[6]);
25348 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25349 std::swap (operands
[4], operands
[5]);
25351 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
25352 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
25356 gcc_unreachable ();
25362 /* Output a call-via instruction for thumb state. */
25364 thumb_call_via_reg (rtx reg
)
25366 int regno
= REGNO (reg
);
25369 gcc_assert (regno
< LR_REGNUM
);
25371 /* If we are in the normal text section we can use a single instance
25372 per compilation unit. If we are doing function sections, then we need
25373 an entry per section, since we can't rely on reachability. */
25374 if (in_section
== text_section
)
25376 thumb_call_reg_needed
= 1;
25378 if (thumb_call_via_label
[regno
] == NULL
)
25379 thumb_call_via_label
[regno
] = gen_label_rtx ();
25380 labelp
= thumb_call_via_label
+ regno
;
25384 if (cfun
->machine
->call_via
[regno
] == NULL
)
25385 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
25386 labelp
= cfun
->machine
->call_via
+ regno
;
25389 output_asm_insn ("bl\t%a0", labelp
);
25393 /* Routines for generating rtl. */
25395 thumb_expand_movmemqi (rtx
*operands
)
25397 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
25398 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
25399 HOST_WIDE_INT len
= INTVAL (operands
[2]);
25400 HOST_WIDE_INT offset
= 0;
25404 emit_insn (gen_movmem12b (out
, in
, out
, in
));
25410 emit_insn (gen_movmem8b (out
, in
, out
, in
));
25416 rtx reg
= gen_reg_rtx (SImode
);
25417 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
25418 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
25425 rtx reg
= gen_reg_rtx (HImode
);
25426 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
25427 plus_constant (Pmode
, in
,
25429 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
25438 rtx reg
= gen_reg_rtx (QImode
);
25439 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
25440 plus_constant (Pmode
, in
,
25442 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
25449 thumb_reload_out_hi (rtx
*operands
)
25451 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
25454 /* Handle reading a half-word from memory during reload. */
25456 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
25458 gcc_unreachable ();
25461 /* Return the length of a function name prefix
25462 that starts with the character 'c'. */
25464 arm_get_strip_length (int c
)
25468 ARM_NAME_ENCODING_LENGTHS
25473 /* Return a pointer to a function's name with any
25474 and all prefix encodings stripped from it. */
25476 arm_strip_name_encoding (const char *name
)
25480 while ((skip
= arm_get_strip_length (* name
)))
25486 /* If there is a '*' anywhere in the name's prefix, then
25487 emit the stripped name verbatim, otherwise prepend an
25488 underscore if leading underscores are being used. */
25490 arm_asm_output_labelref (FILE *stream
, const char *name
)
25495 while ((skip
= arm_get_strip_length (* name
)))
25497 verbatim
|= (*name
== '*');
25502 fputs (name
, stream
);
25504 asm_fprintf (stream
, "%U%s", name
);
25507 /* This function is used to emit an EABI tag and its associated value.
25508 We emit the numerical value of the tag in case the assembler does not
25509 support textual tags. (Eg gas prior to 2.20). If requested we include
25510 the tag name in a comment so that anyone reading the assembler output
25511 will know which tag is being set.
25513 This function is not static because arm-c.c needs it too. */
25516 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
25518 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
25519 if (flag_verbose_asm
|| flag_debug_asm
)
25520 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
25521 asm_fprintf (asm_out_file
, "\n");
25525 arm_file_start (void)
25529 if (TARGET_UNIFIED_ASM
)
25530 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
25534 const char *fpu_name
;
25535 if (arm_selected_arch
)
25537 /* armv7ve doesn't support any extensions. */
25538 if (strcmp (arm_selected_arch
->name
, "armv7ve") == 0)
25540 /* Keep backward compatability for assemblers
25541 which don't support armv7ve. */
25542 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
25543 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
25544 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
25545 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
25546 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
25550 const char* pos
= strchr (arm_selected_arch
->name
, '+');
25554 gcc_assert (strlen (arm_selected_arch
->name
)
25555 <= sizeof (buf
) / sizeof (*pos
));
25556 strncpy (buf
, arm_selected_arch
->name
,
25557 (pos
- arm_selected_arch
->name
) * sizeof (*pos
));
25558 buf
[pos
- arm_selected_arch
->name
] = '\0';
25559 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
25560 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
25563 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
25566 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
25567 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
25570 const char* truncated_name
25571 = arm_rewrite_selected_cpu (arm_selected_cpu
->name
);
25572 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
25575 if (TARGET_SOFT_FLOAT
)
25577 fpu_name
= "softvfp";
25581 fpu_name
= arm_fpu_desc
->name
;
25582 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
25584 if (TARGET_HARD_FLOAT
)
25585 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
25586 if (TARGET_HARD_FLOAT_ABI
)
25587 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25590 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
25592 /* Some of these attributes only apply when the corresponding features
25593 are used. However we don't have any easy way of figuring this out.
25594 Conservatively record the setting that would have been used. */
25596 if (flag_rounding_math
)
25597 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25599 if (!flag_unsafe_math_optimizations
)
25601 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25602 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25604 if (flag_signaling_nans
)
25605 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25607 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25608 flag_finite_math_only
? 1 : 3);
25610 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25611 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25612 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25613 flag_short_enums
? 1 : 2);
25615 /* Tag_ABI_optimization_goals. */
25618 else if (optimize
>= 2)
25624 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
25626 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25629 if (arm_fp16_format
)
25630 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25631 (int) arm_fp16_format
);
25633 if (arm_lang_output_object_attributes_hook
)
25634 arm_lang_output_object_attributes_hook();
25637 default_file_start ();
25641 arm_file_end (void)
25645 if (NEED_INDICATE_EXEC_STACK
)
25646 /* Add .note.GNU-stack. */
25647 file_end_indicate_exec_stack ();
25649 if (! thumb_call_reg_needed
)
25652 switch_to_section (text_section
);
25653 asm_fprintf (asm_out_file
, "\t.code 16\n");
25654 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
25656 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
25658 rtx label
= thumb_call_via_label
[regno
];
25662 targetm
.asm_out
.internal_label (asm_out_file
, "L",
25663 CODE_LABEL_NUMBER (label
));
25664 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
25670 /* Symbols in the text segment can be accessed without indirecting via the
25671 constant pool; it may take an extra binary operation, but this is still
25672 faster than indirecting via memory. Don't do this when not optimizing,
25673 since we won't be calculating al of the offsets necessary to do this
25677 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
25679 if (optimize
> 0 && TREE_CONSTANT (decl
))
25680 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
25682 default_encode_section_info (decl
, rtl
, first
);
25684 #endif /* !ARM_PE */
25687 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
25689 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
25690 && !strcmp (prefix
, "L"))
25692 arm_ccfsm_state
= 0;
25693 arm_target_insn
= NULL
;
25695 default_internal_label (stream
, prefix
, labelno
);
25698 /* Output code to add DELTA to the first argument, and then jump
25699 to FUNCTION. Used for C++ multiple inheritance. */
25701 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
25702 HOST_WIDE_INT delta
,
25703 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
25706 static int thunk_label
= 0;
25709 int mi_delta
= delta
;
25710 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
25712 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
25715 mi_delta
= - mi_delta
;
25717 final_start_function (emit_barrier (), file
, 1);
25721 int labelno
= thunk_label
++;
25722 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
25723 /* Thunks are entered in arm mode when avaiable. */
25724 if (TARGET_THUMB1_ONLY
)
25726 /* push r3 so we can use it as a temporary. */
25727 /* TODO: Omit this save if r3 is not used. */
25728 fputs ("\tpush {r3}\n", file
);
25729 fputs ("\tldr\tr3, ", file
);
25733 fputs ("\tldr\tr12, ", file
);
25735 assemble_name (file
, label
);
25736 fputc ('\n', file
);
25739 /* If we are generating PIC, the ldr instruction below loads
25740 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25741 the address of the add + 8, so we have:
25743 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25746 Note that we have "+ 1" because some versions of GNU ld
25747 don't set the low bit of the result for R_ARM_REL32
25748 relocations against thumb function symbols.
25749 On ARMv6M this is +4, not +8. */
25750 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
25751 assemble_name (file
, labelpc
);
25752 fputs (":\n", file
);
25753 if (TARGET_THUMB1_ONLY
)
25755 /* This is 2 insns after the start of the thunk, so we know it
25756 is 4-byte aligned. */
25757 fputs ("\tadd\tr3, pc, r3\n", file
);
25758 fputs ("\tmov r12, r3\n", file
);
25761 fputs ("\tadd\tr12, pc, r12\n", file
);
25763 else if (TARGET_THUMB1_ONLY
)
25764 fputs ("\tmov r12, r3\n", file
);
25766 if (TARGET_THUMB1_ONLY
)
25768 if (mi_delta
> 255)
25770 fputs ("\tldr\tr3, ", file
);
25771 assemble_name (file
, label
);
25772 fputs ("+4\n", file
);
25773 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
25774 mi_op
, this_regno
, this_regno
);
25776 else if (mi_delta
!= 0)
25778 /* Thumb1 unified syntax requires s suffix in instruction name when
25779 one of the operands is immediate. */
25780 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
25781 mi_op
, this_regno
, this_regno
,
25787 /* TODO: Use movw/movt for large constants when available. */
25788 while (mi_delta
!= 0)
25790 if ((mi_delta
& (3 << shift
)) == 0)
25794 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
25795 mi_op
, this_regno
, this_regno
,
25796 mi_delta
& (0xff << shift
));
25797 mi_delta
&= ~(0xff << shift
);
25804 if (TARGET_THUMB1_ONLY
)
25805 fputs ("\tpop\t{r3}\n", file
);
25807 fprintf (file
, "\tbx\tr12\n");
25808 ASM_OUTPUT_ALIGN (file
, 2);
25809 assemble_name (file
, label
);
25810 fputs (":\n", file
);
25813 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25814 rtx tem
= XEXP (DECL_RTL (function
), 0);
25815 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25816 pipeline offset is four rather than eight. Adjust the offset
25818 tem
= plus_constant (GET_MODE (tem
), tem
,
25819 TARGET_THUMB1_ONLY
? -3 : -7);
25820 tem
= gen_rtx_MINUS (GET_MODE (tem
),
25822 gen_rtx_SYMBOL_REF (Pmode
,
25823 ggc_strdup (labelpc
)));
25824 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
25827 /* Output ".word .LTHUNKn". */
25828 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
25830 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
25831 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
25835 fputs ("\tb\t", file
);
25836 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
25837 if (NEED_PLT_RELOC
)
25838 fputs ("(PLT)", file
);
25839 fputc ('\n', file
);
25842 final_end_function ();
25846 arm_emit_vector_const (FILE *file
, rtx x
)
25849 const char * pattern
;
25851 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
25853 switch (GET_MODE (x
))
25855 case V2SImode
: pattern
= "%08x"; break;
25856 case V4HImode
: pattern
= "%04x"; break;
25857 case V8QImode
: pattern
= "%02x"; break;
25858 default: gcc_unreachable ();
25861 fprintf (file
, "0x");
25862 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
25866 element
= CONST_VECTOR_ELT (x
, i
);
25867 fprintf (file
, pattern
, INTVAL (element
));
25873 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25874 HFmode constant pool entries are actually loaded with ldr. */
25876 arm_emit_fp16_const (rtx c
)
25881 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
25882 bits
= real_to_target (NULL
, &r
, HFmode
);
25883 if (WORDS_BIG_ENDIAN
)
25884 assemble_zeros (2);
25885 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
25886 if (!WORDS_BIG_ENDIAN
)
25887 assemble_zeros (2);
25891 arm_output_load_gr (rtx
*operands
)
25898 if (!MEM_P (operands
[1])
25899 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
25900 || !REG_P (reg
= XEXP (sum
, 0))
25901 || !CONST_INT_P (offset
= XEXP (sum
, 1))
25902 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
25903 return "wldrw%?\t%0, %1";
25905 /* Fix up an out-of-range load of a GR register. */
25906 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
25907 wcgr
= operands
[0];
25909 output_asm_insn ("ldr%?\t%0, %1", operands
);
25911 operands
[0] = wcgr
;
25913 output_asm_insn ("tmcr%?\t%0, %1", operands
);
25914 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
25919 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25921 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25922 named arg and all anonymous args onto the stack.
25923 XXX I know the prologue shouldn't be pushing registers, but it is faster
25927 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
25931 int second_time ATTRIBUTE_UNUSED
)
25933 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
25936 cfun
->machine
->uses_anonymous_args
= 1;
25937 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
25939 nregs
= pcum
->aapcs_ncrn
;
25940 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
25944 nregs
= pcum
->nregs
;
25946 if (nregs
< NUM_ARG_REGS
)
25947 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
25950 /* We can't rely on the caller doing the proper promotion when
25951 using APCS or ATPCS. */
25954 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
25956 return !TARGET_AAPCS_BASED
;
25959 static machine_mode
25960 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
25962 int *punsignedp ATTRIBUTE_UNUSED
,
25963 const_tree fntype ATTRIBUTE_UNUSED
,
25964 int for_return ATTRIBUTE_UNUSED
)
25966 if (GET_MODE_CLASS (mode
) == MODE_INT
25967 && GET_MODE_SIZE (mode
) < 4)
25973 /* AAPCS based ABIs use short enums by default. */
25976 arm_default_short_enums (void)
25978 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
25982 /* AAPCS requires that anonymous bitfields affect structure alignment. */
25985 arm_align_anon_bitfield (void)
25987 return TARGET_AAPCS_BASED
;
25991 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
25994 arm_cxx_guard_type (void)
25996 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
26000 /* The EABI says test the least significant bit of a guard variable. */
26003 arm_cxx_guard_mask_bit (void)
26005 return TARGET_AAPCS_BASED
;
26009 /* The EABI specifies that all array cookies are 8 bytes long. */
26012 arm_get_cookie_size (tree type
)
26016 if (!TARGET_AAPCS_BASED
)
26017 return default_cxx_get_cookie_size (type
);
26019 size
= build_int_cst (sizetype
, 8);
26024 /* The EABI says that array cookies should also contain the element size. */
26027 arm_cookie_has_size (void)
26029 return TARGET_AAPCS_BASED
;
26033 /* The EABI says constructors and destructors should return a pointer to
26034 the object constructed/destroyed. */
26037 arm_cxx_cdtor_returns_this (void)
26039 return TARGET_AAPCS_BASED
;
26042 /* The EABI says that an inline function may never be the key
26046 arm_cxx_key_method_may_be_inline (void)
26048 return !TARGET_AAPCS_BASED
;
26052 arm_cxx_determine_class_data_visibility (tree decl
)
26054 if (!TARGET_AAPCS_BASED
26055 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
26058 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26059 is exported. However, on systems without dynamic vague linkage,
26060 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26061 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
26062 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
26064 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
26065 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
26069 arm_cxx_class_data_always_comdat (void)
26071 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26072 vague linkage if the class has no key function. */
26073 return !TARGET_AAPCS_BASED
;
26077 /* The EABI says __aeabi_atexit should be used to register static
26081 arm_cxx_use_aeabi_atexit (void)
26083 return TARGET_AAPCS_BASED
;
26088 arm_set_return_address (rtx source
, rtx scratch
)
26090 arm_stack_offsets
*offsets
;
26091 HOST_WIDE_INT delta
;
26093 unsigned long saved_regs
;
26095 offsets
= arm_get_frame_offsets ();
26096 saved_regs
= offsets
->saved_regs_mask
;
26098 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
26099 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26102 if (frame_pointer_needed
)
26103 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
26106 /* LR will be the first saved register. */
26107 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
26112 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
26113 GEN_INT (delta
& ~4095)));
26118 addr
= stack_pointer_rtx
;
26120 addr
= plus_constant (Pmode
, addr
, delta
);
26122 /* The store needs to be marked as frame related in order to prevent
26123 DSE from deleting it as dead if it is based on fp. */
26124 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26125 RTX_FRAME_RELATED_P (insn
) = 1;
26126 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26132 thumb_set_return_address (rtx source
, rtx scratch
)
26134 arm_stack_offsets
*offsets
;
26135 HOST_WIDE_INT delta
;
26136 HOST_WIDE_INT limit
;
26139 unsigned long mask
;
26143 offsets
= arm_get_frame_offsets ();
26144 mask
= offsets
->saved_regs_mask
;
26145 if (mask
& (1 << LR_REGNUM
))
26148 /* Find the saved regs. */
26149 if (frame_pointer_needed
)
26151 delta
= offsets
->soft_frame
- offsets
->saved_args
;
26152 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
26158 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
26161 /* Allow for the stack frame. */
26162 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
26164 /* The link register is always the first saved register. */
26167 /* Construct the address. */
26168 addr
= gen_rtx_REG (SImode
, reg
);
26171 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
26172 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
26176 addr
= plus_constant (Pmode
, addr
, delta
);
26178 /* The store needs to be marked as frame related in order to prevent
26179 DSE from deleting it as dead if it is based on fp. */
26180 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26181 RTX_FRAME_RELATED_P (insn
) = 1;
26182 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26185 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26188 /* Implements target hook vector_mode_supported_p. */
26190 arm_vector_mode_supported_p (machine_mode mode
)
26192 /* Neon also supports V2SImode, etc. listed in the clause below. */
26193 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
26194 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
26197 if ((TARGET_NEON
|| TARGET_IWMMXT
)
26198 && ((mode
== V2SImode
)
26199 || (mode
== V4HImode
)
26200 || (mode
== V8QImode
)))
26203 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
26204 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
26205 || mode
== V2HAmode
))
26211 /* Implements target hook array_mode_supported_p. */
26214 arm_array_mode_supported_p (machine_mode mode
,
26215 unsigned HOST_WIDE_INT nelems
)
26218 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
26219 && (nelems
>= 2 && nelems
<= 4))
26225 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26226 registers when autovectorizing for Neon, at least until multiple vector
26227 widths are supported properly by the middle-end. */
26229 static machine_mode
26230 arm_preferred_simd_mode (machine_mode mode
)
26236 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
26238 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
26240 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
26242 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
26244 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
26251 if (TARGET_REALLY_IWMMXT
)
26267 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26269 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26270 using r0-r4 for function arguments, r7 for the stack frame and don't have
26271 enough left over to do doubleword arithmetic. For Thumb-2 all the
26272 potentially problematic instructions accept high registers so this is not
26273 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26274 that require many low registers. */
26276 arm_class_likely_spilled_p (reg_class_t rclass
)
26278 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
26279 || rclass
== CC_REG
)
26285 /* Implements target hook small_register_classes_for_mode_p. */
26287 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
26289 return TARGET_THUMB1
;
26292 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26293 ARM insns and therefore guarantee that the shift count is modulo 256.
26294 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26295 guarantee no particular behavior for out-of-range counts. */
26297 static unsigned HOST_WIDE_INT
26298 arm_shift_truncation_mask (machine_mode mode
)
26300 return mode
== SImode
? 255 : 0;
26304 /* Map internal gcc register numbers to DWARF2 register numbers. */
26307 arm_dbx_register_number (unsigned int regno
)
26312 if (IS_VFP_REGNUM (regno
))
26314 /* See comment in arm_dwarf_register_span. */
26315 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26316 return 64 + regno
- FIRST_VFP_REGNUM
;
26318 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
26321 if (IS_IWMMXT_GR_REGNUM (regno
))
26322 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
26324 if (IS_IWMMXT_REGNUM (regno
))
26325 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
26327 gcc_unreachable ();
26330 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26331 GCC models tham as 64 32-bit registers, so we need to describe this to
26332 the DWARF generation code. Other registers can use the default. */
26334 arm_dwarf_register_span (rtx rtl
)
26342 regno
= REGNO (rtl
);
26343 if (!IS_VFP_REGNUM (regno
))
26346 /* XXX FIXME: The EABI defines two VFP register ranges:
26347 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26349 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26350 corresponding D register. Until GDB supports this, we shall use the
26351 legacy encodings. We also use these encodings for D0-D15 for
26352 compatibility with older debuggers. */
26353 mode
= GET_MODE (rtl
);
26354 if (GET_MODE_SIZE (mode
) < 8)
26357 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26359 nregs
= GET_MODE_SIZE (mode
) / 4;
26360 for (i
= 0; i
< nregs
; i
+= 2)
26361 if (TARGET_BIG_END
)
26363 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26364 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
26368 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
26369 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26374 nregs
= GET_MODE_SIZE (mode
) / 8;
26375 for (i
= 0; i
< nregs
; i
++)
26376 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
26379 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
26382 #if ARM_UNWIND_INFO
26383 /* Emit unwind directives for a store-multiple instruction or stack pointer
26384 push during alignment.
26385 These should only ever be generated by the function prologue code, so
26386 expect them to have a particular form.
26387 The store-multiple instruction sometimes pushes pc as the last register,
26388 although it should not be tracked into unwind information, or for -Os
26389 sometimes pushes some dummy registers before first register that needs
26390 to be tracked in unwind information; such dummy registers are there just
26391 to avoid separate stack adjustment, and will not be restored in the
26395 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
26398 HOST_WIDE_INT offset
;
26399 HOST_WIDE_INT nregs
;
26403 unsigned padfirst
= 0, padlast
= 0;
26406 e
= XVECEXP (p
, 0, 0);
26407 gcc_assert (GET_CODE (e
) == SET
);
26409 /* First insn will adjust the stack pointer. */
26410 gcc_assert (GET_CODE (e
) == SET
26411 && REG_P (SET_DEST (e
))
26412 && REGNO (SET_DEST (e
)) == SP_REGNUM
26413 && GET_CODE (SET_SRC (e
)) == PLUS
);
26415 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
26416 nregs
= XVECLEN (p
, 0) - 1;
26417 gcc_assert (nregs
);
26419 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
26422 /* For -Os dummy registers can be pushed at the beginning to
26423 avoid separate stack pointer adjustment. */
26424 e
= XVECEXP (p
, 0, 1);
26425 e
= XEXP (SET_DEST (e
), 0);
26426 if (GET_CODE (e
) == PLUS
)
26427 padfirst
= INTVAL (XEXP (e
, 1));
26428 gcc_assert (padfirst
== 0 || optimize_size
);
26429 /* The function prologue may also push pc, but not annotate it as it is
26430 never restored. We turn this into a stack pointer adjustment. */
26431 e
= XVECEXP (p
, 0, nregs
);
26432 e
= XEXP (SET_DEST (e
), 0);
26433 if (GET_CODE (e
) == PLUS
)
26434 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
26436 padlast
= offset
- 4;
26437 gcc_assert (padlast
== 0 || padlast
== 4);
26439 fprintf (asm_out_file
, "\t.pad #4\n");
26441 fprintf (asm_out_file
, "\t.save {");
26443 else if (IS_VFP_REGNUM (reg
))
26446 fprintf (asm_out_file
, "\t.vsave {");
26449 /* Unknown register type. */
26450 gcc_unreachable ();
26452 /* If the stack increment doesn't match the size of the saved registers,
26453 something has gone horribly wrong. */
26454 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
26458 /* The remaining insns will describe the stores. */
26459 for (i
= 1; i
<= nregs
; i
++)
26461 /* Expect (set (mem <addr>) (reg)).
26462 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26463 e
= XVECEXP (p
, 0, i
);
26464 gcc_assert (GET_CODE (e
) == SET
26465 && MEM_P (SET_DEST (e
))
26466 && REG_P (SET_SRC (e
)));
26468 reg
= REGNO (SET_SRC (e
));
26469 gcc_assert (reg
>= lastreg
);
26472 fprintf (asm_out_file
, ", ");
26473 /* We can't use %r for vfp because we need to use the
26474 double precision register names. */
26475 if (IS_VFP_REGNUM (reg
))
26476 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
26478 asm_fprintf (asm_out_file
, "%r", reg
);
26480 #ifdef ENABLE_CHECKING
26481 /* Check that the addresses are consecutive. */
26482 e
= XEXP (SET_DEST (e
), 0);
26483 if (GET_CODE (e
) == PLUS
)
26484 gcc_assert (REG_P (XEXP (e
, 0))
26485 && REGNO (XEXP (e
, 0)) == SP_REGNUM
26486 && CONST_INT_P (XEXP (e
, 1))
26487 && offset
== INTVAL (XEXP (e
, 1)));
26491 && REGNO (e
) == SP_REGNUM
);
26492 offset
+= reg_size
;
26495 fprintf (asm_out_file
, "}\n");
26497 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
26500 /* Emit unwind directives for a SET. */
26503 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
26511 switch (GET_CODE (e0
))
26514 /* Pushing a single register. */
26515 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
26516 || !REG_P (XEXP (XEXP (e0
, 0), 0))
26517 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
26520 asm_fprintf (asm_out_file
, "\t.save ");
26521 if (IS_VFP_REGNUM (REGNO (e1
)))
26522 asm_fprintf(asm_out_file
, "{d%d}\n",
26523 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
26525 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
26529 if (REGNO (e0
) == SP_REGNUM
)
26531 /* A stack increment. */
26532 if (GET_CODE (e1
) != PLUS
26533 || !REG_P (XEXP (e1
, 0))
26534 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
26535 || !CONST_INT_P (XEXP (e1
, 1)))
26538 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
26539 -INTVAL (XEXP (e1
, 1)));
26541 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
26543 HOST_WIDE_INT offset
;
26545 if (GET_CODE (e1
) == PLUS
)
26547 if (!REG_P (XEXP (e1
, 0))
26548 || !CONST_INT_P (XEXP (e1
, 1)))
26550 reg
= REGNO (XEXP (e1
, 0));
26551 offset
= INTVAL (XEXP (e1
, 1));
26552 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
26553 HARD_FRAME_POINTER_REGNUM
, reg
,
26556 else if (REG_P (e1
))
26559 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
26560 HARD_FRAME_POINTER_REGNUM
, reg
);
26565 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
26567 /* Move from sp to reg. */
26568 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
26570 else if (GET_CODE (e1
) == PLUS
26571 && REG_P (XEXP (e1
, 0))
26572 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
26573 && CONST_INT_P (XEXP (e1
, 1)))
26575 /* Set reg to offset from sp. */
26576 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
26577 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
26589 /* Emit unwind directives for the given insn. */
26592 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
26595 bool handled_one
= false;
26597 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
26600 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
26601 && (TREE_NOTHROW (current_function_decl
)
26602 || crtl
->all_throwers_are_sibcalls
))
26605 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
26608 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
26610 switch (REG_NOTE_KIND (note
))
26612 case REG_FRAME_RELATED_EXPR
:
26613 pat
= XEXP (note
, 0);
26616 case REG_CFA_REGISTER
:
26617 pat
= XEXP (note
, 0);
26620 pat
= PATTERN (insn
);
26621 if (GET_CODE (pat
) == PARALLEL
)
26622 pat
= XVECEXP (pat
, 0, 0);
26625 /* Only emitted for IS_STACKALIGN re-alignment. */
26630 src
= SET_SRC (pat
);
26631 dest
= SET_DEST (pat
);
26633 gcc_assert (src
== stack_pointer_rtx
);
26634 reg
= REGNO (dest
);
26635 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26638 handled_one
= true;
26641 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26642 to get correct dwarf information for shrink-wrap. We should not
26643 emit unwind information for it because these are used either for
26644 pretend arguments or notes to adjust sp and restore registers from
26646 case REG_CFA_DEF_CFA
:
26647 case REG_CFA_ADJUST_CFA
:
26648 case REG_CFA_RESTORE
:
26651 case REG_CFA_EXPRESSION
:
26652 case REG_CFA_OFFSET
:
26653 /* ??? Only handling here what we actually emit. */
26654 gcc_unreachable ();
26662 pat
= PATTERN (insn
);
26665 switch (GET_CODE (pat
))
26668 arm_unwind_emit_set (asm_out_file
, pat
);
26672 /* Store multiple. */
26673 arm_unwind_emit_sequence (asm_out_file
, pat
);
26682 /* Output a reference from a function exception table to the type_info
26683 object X. The EABI specifies that the symbol should be relocated by
26684 an R_ARM_TARGET2 relocation. */
26687 arm_output_ttype (rtx x
)
26689 fputs ("\t.word\t", asm_out_file
);
26690 output_addr_const (asm_out_file
, x
);
26691 /* Use special relocations for symbol references. */
26692 if (!CONST_INT_P (x
))
26693 fputs ("(TARGET2)", asm_out_file
);
26694 fputc ('\n', asm_out_file
);
26699 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26702 arm_asm_emit_except_personality (rtx personality
)
26704 fputs ("\t.personality\t", asm_out_file
);
26705 output_addr_const (asm_out_file
, personality
);
26706 fputc ('\n', asm_out_file
);
26709 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26712 arm_asm_init_sections (void)
26714 exception_section
= get_unnamed_section (0, output_section_asm_op
,
26717 #endif /* ARM_UNWIND_INFO */
26719 /* Output unwind directives for the start/end of a function. */
26722 arm_output_fn_unwind (FILE * f
, bool prologue
)
26724 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
26728 fputs ("\t.fnstart\n", f
);
26731 /* If this function will never be unwound, then mark it as such.
26732 The came condition is used in arm_unwind_emit to suppress
26733 the frame annotations. */
26734 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
26735 && (TREE_NOTHROW (current_function_decl
)
26736 || crtl
->all_throwers_are_sibcalls
))
26737 fputs("\t.cantunwind\n", f
);
26739 fputs ("\t.fnend\n", f
);
26744 arm_emit_tls_decoration (FILE *fp
, rtx x
)
26746 enum tls_reloc reloc
;
26749 val
= XVECEXP (x
, 0, 0);
26750 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
26752 output_addr_const (fp
, val
);
26757 fputs ("(tlsgd)", fp
);
26760 fputs ("(tlsldm)", fp
);
26763 fputs ("(tlsldo)", fp
);
26766 fputs ("(gottpoff)", fp
);
26769 fputs ("(tpoff)", fp
);
26772 fputs ("(tlsdesc)", fp
);
26775 gcc_unreachable ();
26784 fputs (" + (. - ", fp
);
26785 output_addr_const (fp
, XVECEXP (x
, 0, 2));
26786 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26787 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
26788 output_addr_const (fp
, XVECEXP (x
, 0, 3));
26798 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26801 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
26803 gcc_assert (size
== 4);
26804 fputs ("\t.word\t", file
);
26805 output_addr_const (file
, x
);
26806 fputs ("(tlsldo)", file
);
26809 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26812 arm_output_addr_const_extra (FILE *fp
, rtx x
)
26814 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
26815 return arm_emit_tls_decoration (fp
, x
);
26816 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
26819 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
26821 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
26822 assemble_name_raw (fp
, label
);
26826 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
26828 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
26832 output_addr_const (fp
, XVECEXP (x
, 0, 0));
26836 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
26838 output_addr_const (fp
, XVECEXP (x
, 0, 0));
26842 output_addr_const (fp
, XVECEXP (x
, 0, 1));
26846 else if (GET_CODE (x
) == CONST_VECTOR
)
26847 return arm_emit_vector_const (fp
, x
);
26852 /* Output assembly for a shift instruction.
26853 SET_FLAGS determines how the instruction modifies the condition codes.
26854 0 - Do not set condition codes.
26855 1 - Set condition codes.
26856 2 - Use smallest instruction. */
26858 arm_output_shift(rtx
* operands
, int set_flags
)
26861 static const char flag_chars
[3] = {'?', '.', '!'};
26866 c
= flag_chars
[set_flags
];
26867 if (TARGET_UNIFIED_ASM
)
26869 shift
= shift_op(operands
[3], &val
);
26873 operands
[2] = GEN_INT(val
);
26874 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
26877 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
26880 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
26881 output_asm_insn (pattern
, operands
);
26885 /* Output assembly for a WMMX immediate shift instruction. */
26887 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
26889 int shift
= INTVAL (operands
[2]);
26891 machine_mode opmode
= GET_MODE (operands
[0]);
26893 gcc_assert (shift
>= 0);
26895 /* If the shift value in the register versions is > 63 (for D qualifier),
26896 31 (for W qualifier) or 15 (for H qualifier). */
26897 if (((opmode
== V4HImode
) && (shift
> 15))
26898 || ((opmode
== V2SImode
) && (shift
> 31))
26899 || ((opmode
== DImode
) && (shift
> 63)))
26903 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
26904 output_asm_insn (templ
, operands
);
26905 if (opmode
== DImode
)
26907 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
26908 output_asm_insn (templ
, operands
);
26913 /* The destination register will contain all zeros. */
26914 sprintf (templ
, "wzero\t%%0");
26915 output_asm_insn (templ
, operands
);
26920 if ((opmode
== DImode
) && (shift
> 32))
26922 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
26923 output_asm_insn (templ
, operands
);
26924 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
26925 output_asm_insn (templ
, operands
);
26929 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
26930 output_asm_insn (templ
, operands
);
26935 /* Output assembly for a WMMX tinsr instruction. */
26937 arm_output_iwmmxt_tinsr (rtx
*operands
)
26939 int mask
= INTVAL (operands
[3]);
26942 int units
= mode_nunits
[GET_MODE (operands
[0])];
26943 gcc_assert ((mask
& (mask
- 1)) == 0);
26944 for (i
= 0; i
< units
; ++i
)
26946 if ((mask
& 0x01) == 1)
26952 gcc_assert (i
< units
);
26954 switch (GET_MODE (operands
[0]))
26957 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
26960 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
26963 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
26966 gcc_unreachable ();
26969 output_asm_insn (templ
, operands
);
26974 /* Output a Thumb-1 casesi dispatch sequence. */
26976 thumb1_output_casesi (rtx
*operands
)
26978 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
26980 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
26982 switch (GET_MODE(diff_vec
))
26985 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
26986 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
26988 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
26989 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
26991 return "bl\t%___gnu_thumb1_case_si";
26993 gcc_unreachable ();
26997 /* Output a Thumb-2 casesi instruction. */
26999 thumb2_output_casesi (rtx
*operands
)
27001 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
27003 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27005 output_asm_insn ("cmp\t%0, %1", operands
);
27006 output_asm_insn ("bhi\t%l3", operands
);
27007 switch (GET_MODE(diff_vec
))
27010 return "tbb\t[%|pc, %0]";
27012 return "tbh\t[%|pc, %0, lsl #1]";
27016 output_asm_insn ("adr\t%4, %l2", operands
);
27017 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
27018 output_asm_insn ("add\t%4, %4, %5", operands
);
27023 output_asm_insn ("adr\t%4, %l2", operands
);
27024 return "ldr\t%|pc, [%4, %0, lsl #2]";
27027 gcc_unreachable ();
27031 /* Most ARM cores are single issue, but some newer ones can dual issue.
27032 The scheduler descriptions rely on this being correct. */
27034 arm_issue_rate (void)
27064 arm_mangle_type (const_tree type
)
27066 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27067 has to be managled as if it is in the "std" namespace. */
27068 if (TARGET_AAPCS_BASED
27069 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
27070 return "St9__va_list";
27072 /* Half-precision float. */
27073 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
27076 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27078 if (TYPE_NAME (type
) != NULL
)
27079 return arm_mangle_builtin_type (type
);
27081 /* Use the default mangling. */
27085 /* Order of allocation of core registers for Thumb: this allocation is
27086 written over the corresponding initial entries of the array
27087 initialized with REG_ALLOC_ORDER. We allocate all low registers
27088 first. Saving and restoring a low register is usually cheaper than
27089 using a call-clobbered high register. */
27091 static const int thumb_core_reg_alloc_order
[] =
27093 3, 2, 1, 0, 4, 5, 6, 7,
27094 14, 12, 8, 9, 10, 11
27097 /* Adjust register allocation order when compiling for Thumb. */
27100 arm_order_regs_for_local_alloc (void)
27102 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
27103 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
27105 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
27106 sizeof (thumb_core_reg_alloc_order
));
27109 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27112 arm_frame_pointer_required (void)
27114 return (cfun
->has_nonlocal_label
27115 || SUBTARGET_FRAME_POINTER_REQUIRED
27116 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
27119 /* Only thumb1 can't support conditional execution, so return true if
27120 the target is not thumb1. */
27122 arm_have_conditional_execution (void)
27124 return !TARGET_THUMB1
;
27127 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27128 static HOST_WIDE_INT
27129 arm_vector_alignment (const_tree type
)
27131 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
27133 if (TARGET_AAPCS_BASED
)
27134 align
= MIN (align
, 64);
27139 static unsigned int
27140 arm_autovectorize_vector_sizes (void)
27142 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
27146 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
27148 /* Vectors which aren't in packed structures will not be less aligned than
27149 the natural alignment of their element type, so this is safe. */
27150 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27153 return default_builtin_vector_alignment_reachable (type
, is_packed
);
27157 arm_builtin_support_vector_misalignment (machine_mode mode
,
27158 const_tree type
, int misalignment
,
27161 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27163 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
27168 /* If the misalignment is unknown, we should be able to handle the access
27169 so long as it is not to a member of a packed data structure. */
27170 if (misalignment
== -1)
27173 /* Return true if the misalignment is a multiple of the natural alignment
27174 of the vector's element type. This is probably always going to be
27175 true in practice, since we've already established that this isn't a
27177 return ((misalignment
% align
) == 0);
27180 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
27185 arm_conditional_register_usage (void)
27189 if (TARGET_THUMB1
&& optimize_size
)
27191 /* When optimizing for size on Thumb-1, it's better not
27192 to use the HI regs, because of the overhead of
27194 for (regno
= FIRST_HI_REGNUM
;
27195 regno
<= LAST_HI_REGNUM
; ++regno
)
27196 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
27199 /* The link register can be clobbered by any branch insn,
27200 but we have no way to track that at present, so mark
27201 it as unavailable. */
27203 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
27205 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
27207 /* VFPv3 registers are disabled when earlier VFP
27208 versions are selected due to the definition of
27209 LAST_VFP_REGNUM. */
27210 for (regno
= FIRST_VFP_REGNUM
;
27211 regno
<= LAST_VFP_REGNUM
; ++ regno
)
27213 fixed_regs
[regno
] = 0;
27214 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
27215 || regno
>= FIRST_VFP_REGNUM
+ 32;
27219 if (TARGET_REALLY_IWMMXT
)
27221 regno
= FIRST_IWMMXT_GR_REGNUM
;
27222 /* The 2002/10/09 revision of the XScale ABI has wCG0
27223 and wCG1 as call-preserved registers. The 2002/11/21
27224 revision changed this so that all wCG registers are
27225 scratch registers. */
27226 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
27227 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
27228 fixed_regs
[regno
] = 0;
27229 /* The XScale ABI has wR0 - wR9 as scratch registers,
27230 the rest as call-preserved registers. */
27231 for (regno
= FIRST_IWMMXT_REGNUM
;
27232 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
27234 fixed_regs
[regno
] = 0;
27235 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
27239 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
27241 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27242 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27244 else if (TARGET_APCS_STACK
)
27246 fixed_regs
[10] = 1;
27247 call_used_regs
[10] = 1;
27249 /* -mcaller-super-interworking reserves r11 for calls to
27250 _interwork_r11_call_via_rN(). Making the register global
27251 is an easy way of ensuring that it remains valid for all
27253 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
27254 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
27256 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27257 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27258 if (TARGET_CALLER_INTERWORKING
)
27259 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27261 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27265 arm_preferred_rename_class (reg_class_t rclass
)
27267 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27268 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27269 and code size can be reduced. */
27270 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
27276 /* Compute the atrribute "length" of insn "*push_multi".
27277 So this function MUST be kept in sync with that insn pattern. */
27279 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
27281 int i
, regno
, hi_reg
;
27282 int num_saves
= XVECLEN (parallel_op
, 0);
27292 regno
= REGNO (first_op
);
27293 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27294 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
27296 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
27297 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27305 /* Compute the number of instructions emitted by output_move_double. */
27307 arm_count_output_move_double_insns (rtx
*operands
)
27311 /* output_move_double may modify the operands array, so call it
27312 here on a copy of the array. */
27313 ops
[0] = operands
[0];
27314 ops
[1] = operands
[1];
27315 output_move_double (ops
, false, &count
);
27320 vfp3_const_double_for_fract_bits (rtx operand
)
27322 REAL_VALUE_TYPE r0
;
27324 if (!CONST_DOUBLE_P (operand
))
27327 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
27328 if (exact_real_inverse (DFmode
, &r0
))
27330 if (exact_real_truncate (DFmode
, &r0
))
27332 HOST_WIDE_INT value
= real_to_integer (&r0
);
27333 value
= value
& 0xffffffff;
27334 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
27335 return int_log2 (value
);
27342 vfp3_const_double_for_bits (rtx operand
)
27344 REAL_VALUE_TYPE r0
;
27346 if (!CONST_DOUBLE_P (operand
))
27349 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
27350 if (exact_real_truncate (DFmode
, &r0
))
27352 HOST_WIDE_INT value
= real_to_integer (&r0
);
27353 value
= value
& 0xffffffff;
27354 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
27355 return int_log2 (value
);
27361 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27364 arm_pre_atomic_barrier (enum memmodel model
)
27366 if (need_atomic_barrier_p (model
, true))
27367 emit_insn (gen_memory_barrier ());
27371 arm_post_atomic_barrier (enum memmodel model
)
27373 if (need_atomic_barrier_p (model
, false))
27374 emit_insn (gen_memory_barrier ());
27377 /* Emit the load-exclusive and store-exclusive instructions.
27378 Use acquire and release versions if necessary. */
27381 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
27383 rtx (*gen
) (rtx
, rtx
);
27389 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
27390 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
27391 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
27392 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
27394 gcc_unreachable ();
27401 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
27402 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
27403 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
27404 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
27406 gcc_unreachable ();
27410 emit_insn (gen (rval
, mem
));
27414 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
27417 rtx (*gen
) (rtx
, rtx
, rtx
);
27423 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
27424 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
27425 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
27426 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
27428 gcc_unreachable ();
27435 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
27436 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
27437 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
27438 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
27440 gcc_unreachable ();
27444 emit_insn (gen (bval
, rval
, mem
));
27447 /* Mark the previous jump instruction as unlikely. */
27450 emit_unlikely_jump (rtx insn
)
27452 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
27454 insn
= emit_jump_insn (insn
);
27455 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
27458 /* Expand a compare and swap pattern. */
27461 arm_expand_compare_and_swap (rtx operands
[])
27463 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
27465 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
27467 bval
= operands
[0];
27468 rval
= operands
[1];
27470 oldval
= operands
[3];
27471 newval
= operands
[4];
27472 is_weak
= operands
[5];
27473 mod_s
= operands
[6];
27474 mod_f
= operands
[7];
27475 mode
= GET_MODE (mem
);
27477 /* Normally the succ memory model must be stronger than fail, but in the
27478 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27479 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27481 if (TARGET_HAVE_LDACQ
27482 && INTVAL (mod_f
) == MEMMODEL_ACQUIRE
27483 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
27484 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
27490 /* For narrow modes, we're going to perform the comparison in SImode,
27491 so do the zero-extension now. */
27492 rval
= gen_reg_rtx (SImode
);
27493 oldval
= convert_modes (SImode
, mode
, oldval
, true);
27497 /* Force the value into a register if needed. We waited until after
27498 the zero-extension above to do this properly. */
27499 if (!arm_add_operand (oldval
, SImode
))
27500 oldval
= force_reg (SImode
, oldval
);
27504 if (!cmpdi_operand (oldval
, mode
))
27505 oldval
= force_reg (mode
, oldval
);
27509 gcc_unreachable ();
27514 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
27515 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
27516 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
27517 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
27519 gcc_unreachable ();
27522 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
27524 if (mode
== QImode
|| mode
== HImode
)
27525 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
27527 /* In all cases, we arrange for success to be signaled by Z set.
27528 This arrangement allows for the boolean result to be used directly
27529 in a subsequent branch, post optimization. */
27530 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
27531 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
27532 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
27535 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27536 another memory store between the load-exclusive and store-exclusive can
27537 reset the monitor from Exclusive to Open state. This means we must wait
27538 until after reload to split the pattern, lest we get a register spill in
27539 the middle of the atomic sequence. */
27542 arm_split_compare_and_swap (rtx operands
[])
27544 rtx rval
, mem
, oldval
, newval
, scratch
;
27546 enum memmodel mod_s
, mod_f
;
27548 rtx_code_label
*label1
, *label2
;
27551 rval
= operands
[0];
27553 oldval
= operands
[2];
27554 newval
= operands
[3];
27555 is_weak
= (operands
[4] != const0_rtx
);
27556 mod_s
= (enum memmodel
) INTVAL (operands
[5]);
27557 mod_f
= (enum memmodel
) INTVAL (operands
[6]);
27558 scratch
= operands
[7];
27559 mode
= GET_MODE (mem
);
27561 bool use_acquire
= TARGET_HAVE_LDACQ
27562 && !(mod_s
== MEMMODEL_RELAXED
27563 || mod_s
== MEMMODEL_CONSUME
27564 || mod_s
== MEMMODEL_RELEASE
);
27566 bool use_release
= TARGET_HAVE_LDACQ
27567 && !(mod_s
== MEMMODEL_RELAXED
27568 || mod_s
== MEMMODEL_CONSUME
27569 || mod_s
== MEMMODEL_ACQUIRE
);
27571 /* Checks whether a barrier is needed and emits one accordingly. */
27572 if (!(use_acquire
|| use_release
))
27573 arm_pre_atomic_barrier (mod_s
);
27578 label1
= gen_label_rtx ();
27579 emit_label (label1
);
27581 label2
= gen_label_rtx ();
27583 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
27585 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
27586 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27587 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
27588 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
27589 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
27591 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
, use_release
);
27593 /* Weak or strong, we want EQ to be true for success, so that we
27594 match the flags that we got from the compare above. */
27595 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
27596 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
27597 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
27601 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27602 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
27603 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
27604 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
27607 if (mod_f
!= MEMMODEL_RELAXED
)
27608 emit_label (label2
);
27610 /* Checks whether a barrier is needed and emits one accordingly. */
27611 if (!(use_acquire
|| use_release
))
27612 arm_post_atomic_barrier (mod_s
);
27614 if (mod_f
== MEMMODEL_RELAXED
)
27615 emit_label (label2
);
27619 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
27620 rtx value
, rtx model_rtx
, rtx cond
)
27622 enum memmodel model
= (enum memmodel
) INTVAL (model_rtx
);
27623 machine_mode mode
= GET_MODE (mem
);
27624 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
27625 rtx_code_label
*label
;
27628 bool use_acquire
= TARGET_HAVE_LDACQ
27629 && !(model
== MEMMODEL_RELAXED
27630 || model
== MEMMODEL_CONSUME
27631 || model
== MEMMODEL_RELEASE
);
27633 bool use_release
= TARGET_HAVE_LDACQ
27634 && !(model
== MEMMODEL_RELAXED
27635 || model
== MEMMODEL_CONSUME
27636 || model
== MEMMODEL_ACQUIRE
);
27638 /* Checks whether a barrier is needed and emits one accordingly. */
27639 if (!(use_acquire
|| use_release
))
27640 arm_pre_atomic_barrier (model
);
27642 label
= gen_label_rtx ();
27643 emit_label (label
);
27646 new_out
= gen_lowpart (wmode
, new_out
);
27648 old_out
= gen_lowpart (wmode
, old_out
);
27651 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
27653 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
27662 x
= gen_rtx_AND (wmode
, old_out
, value
);
27663 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
27664 x
= gen_rtx_NOT (wmode
, new_out
);
27665 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
27669 if (CONST_INT_P (value
))
27671 value
= GEN_INT (-INTVAL (value
));
27677 if (mode
== DImode
)
27679 /* DImode plus/minus need to clobber flags. */
27680 /* The adddi3 and subdi3 patterns are incorrectly written so that
27681 they require matching operands, even when we could easily support
27682 three operands. Thankfully, this can be fixed up post-splitting,
27683 as the individual add+adc patterns do accept three operands and
27684 post-reload cprop can make these moves go away. */
27685 emit_move_insn (new_out
, old_out
);
27687 x
= gen_adddi3 (new_out
, new_out
, value
);
27689 x
= gen_subdi3 (new_out
, new_out
, value
);
27696 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
27697 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
27701 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
27704 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27705 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
27707 /* Checks whether a barrier is needed and emits one accordingly. */
27708 if (!(use_acquire
|| use_release
))
27709 arm_post_atomic_barrier (model
);
27712 #define MAX_VECT_LEN 16
27714 struct expand_vec_perm_d
27716 rtx target
, op0
, op1
;
27717 unsigned char perm
[MAX_VECT_LEN
];
27718 machine_mode vmode
;
27719 unsigned char nelt
;
27724 /* Generate a variable permutation. */
27727 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
27729 machine_mode vmode
= GET_MODE (target
);
27730 bool one_vector_p
= rtx_equal_p (op0
, op1
);
27732 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
27733 gcc_checking_assert (GET_MODE (op0
) == vmode
);
27734 gcc_checking_assert (GET_MODE (op1
) == vmode
);
27735 gcc_checking_assert (GET_MODE (sel
) == vmode
);
27736 gcc_checking_assert (TARGET_NEON
);
27740 if (vmode
== V8QImode
)
27741 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
27743 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
27749 if (vmode
== V8QImode
)
27751 pair
= gen_reg_rtx (V16QImode
);
27752 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
27753 pair
= gen_lowpart (TImode
, pair
);
27754 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
27758 pair
= gen_reg_rtx (OImode
);
27759 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
27760 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
27766 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
27768 machine_mode vmode
= GET_MODE (target
);
27769 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
27770 bool one_vector_p
= rtx_equal_p (op0
, op1
);
27771 rtx rmask
[MAX_VECT_LEN
], mask
;
27773 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27774 numbering of elements for big-endian, we must reverse the order. */
27775 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
27777 /* The VTBL instruction does not use a modulo index, so we must take care
27778 of that ourselves. */
27779 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27780 for (i
= 0; i
< nelt
; ++i
)
27782 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
27783 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
27785 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
27788 /* Generate or test for an insn that supports a constant permutation. */
27790 /* Recognize patterns for the VUZP insns. */
27793 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
27795 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
27796 rtx out0
, out1
, in0
, in1
, x
;
27797 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
27799 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
27802 /* Note that these are little-endian tests. Adjust for big-endian later. */
27803 if (d
->perm
[0] == 0)
27805 else if (d
->perm
[0] == 1)
27809 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27811 for (i
= 0; i
< nelt
; i
++)
27813 unsigned elt
= (i
* 2 + odd
) & mask
;
27814 if (d
->perm
[i
] != elt
)
27824 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
27825 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
27826 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
27827 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
27828 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
27829 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
27830 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
27831 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
27833 gcc_unreachable ();
27838 if (BYTES_BIG_ENDIAN
)
27840 x
= in0
, in0
= in1
, in1
= x
;
27845 out1
= gen_reg_rtx (d
->vmode
);
27847 x
= out0
, out0
= out1
, out1
= x
;
27849 emit_insn (gen (out0
, in0
, in1
, out1
));
27853 /* Recognize patterns for the VZIP insns. */
27856 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
27858 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
27859 rtx out0
, out1
, in0
, in1
, x
;
27860 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
27862 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
27865 /* Note that these are little-endian tests. Adjust for big-endian later. */
27867 if (d
->perm
[0] == high
)
27869 else if (d
->perm
[0] == 0)
27873 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27875 for (i
= 0; i
< nelt
/ 2; i
++)
27877 unsigned elt
= (i
+ high
) & mask
;
27878 if (d
->perm
[i
* 2] != elt
)
27880 elt
= (elt
+ nelt
) & mask
;
27881 if (d
->perm
[i
* 2 + 1] != elt
)
27891 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
27892 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
27893 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
27894 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
27895 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
27896 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
27897 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
27898 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
27900 gcc_unreachable ();
27905 if (BYTES_BIG_ENDIAN
)
27907 x
= in0
, in0
= in1
, in1
= x
;
27912 out1
= gen_reg_rtx (d
->vmode
);
27914 x
= out0
, out0
= out1
, out1
= x
;
27916 emit_insn (gen (out0
, in0
, in1
, out1
));
27920 /* Recognize patterns for the VREV insns. */
27923 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
27925 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
27926 rtx (*gen
)(rtx
, rtx
);
27928 if (!d
->one_vector_p
)
27937 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
27938 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
27946 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
27947 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
27948 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
27949 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
27957 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
27958 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
27959 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
27960 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
27961 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
27962 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
27963 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
27964 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
27973 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
27974 for (j
= 0; j
<= diff
; j
+= 1)
27976 /* This is guaranteed to be true as the value of diff
27977 is 7, 3, 1 and we should have enough elements in the
27978 queue to generate this. Getting a vector mask with a
27979 value of diff other than these values implies that
27980 something is wrong by the time we get here. */
27981 gcc_assert (i
+ j
< nelt
);
27982 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
27990 emit_insn (gen (d
->target
, d
->op0
));
27994 /* Recognize patterns for the VTRN insns. */
27997 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
27999 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28000 rtx out0
, out1
, in0
, in1
, x
;
28001 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28003 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28006 /* Note that these are little-endian tests. Adjust for big-endian later. */
28007 if (d
->perm
[0] == 0)
28009 else if (d
->perm
[0] == 1)
28013 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28015 for (i
= 0; i
< nelt
; i
+= 2)
28017 if (d
->perm
[i
] != i
+ odd
)
28019 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
28029 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
28030 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
28031 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
28032 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
28033 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
28034 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
28035 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
28036 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
28038 gcc_unreachable ();
28043 if (BYTES_BIG_ENDIAN
)
28045 x
= in0
, in0
= in1
, in1
= x
;
28050 out1
= gen_reg_rtx (d
->vmode
);
28052 x
= out0
, out0
= out1
, out1
= x
;
28054 emit_insn (gen (out0
, in0
, in1
, out1
));
28058 /* Recognize patterns for the VEXT insns. */
28061 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
28063 unsigned int i
, nelt
= d
->nelt
;
28064 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
28067 unsigned int location
;
28069 unsigned int next
= d
->perm
[0] + 1;
28071 /* TODO: Handle GCC's numbering of elements for big-endian. */
28072 if (BYTES_BIG_ENDIAN
)
28075 /* Check if the extracted indexes are increasing by one. */
28076 for (i
= 1; i
< nelt
; next
++, i
++)
28078 /* If we hit the most significant element of the 2nd vector in
28079 the previous iteration, no need to test further. */
28080 if (next
== 2 * nelt
)
28083 /* If we are operating on only one vector: it could be a
28084 rotation. If there are only two elements of size < 64, let
28085 arm_evpc_neon_vrev catch it. */
28086 if (d
->one_vector_p
&& (next
== nelt
))
28088 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
28094 if (d
->perm
[i
] != next
)
28098 location
= d
->perm
[0];
28102 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
28103 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
28104 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
28105 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
28106 case V2SImode
: gen
= gen_neon_vextv2si
; break;
28107 case V4SImode
: gen
= gen_neon_vextv4si
; break;
28108 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
28109 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
28110 case V2DImode
: gen
= gen_neon_vextv2di
; break;
28119 offset
= GEN_INT (location
);
28120 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
28124 /* The NEON VTBL instruction is a fully variable permuation that's even
28125 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28126 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28127 can do slightly better by expanding this as a constant where we don't
28128 have to apply a mask. */
28131 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
28133 rtx rperm
[MAX_VECT_LEN
], sel
;
28134 machine_mode vmode
= d
->vmode
;
28135 unsigned int i
, nelt
= d
->nelt
;
28137 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28138 numbering of elements for big-endian, we must reverse the order. */
28139 if (BYTES_BIG_ENDIAN
)
28145 /* Generic code will try constant permutation twice. Once with the
28146 original mode and again with the elements lowered to QImode.
28147 So wait and don't do the selector expansion ourselves. */
28148 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
28151 for (i
= 0; i
< nelt
; ++i
)
28152 rperm
[i
] = GEN_INT (d
->perm
[i
]);
28153 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
28154 sel
= force_reg (vmode
, sel
);
28156 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
28161 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
28163 /* Check if the input mask matches vext before reordering the
28166 if (arm_evpc_neon_vext (d
))
28169 /* The pattern matching functions above are written to look for a small
28170 number to begin the sequence (0, 1, N/2). If we begin with an index
28171 from the second operand, we can swap the operands. */
28172 if (d
->perm
[0] >= d
->nelt
)
28174 unsigned i
, nelt
= d
->nelt
;
28177 for (i
= 0; i
< nelt
; ++i
)
28178 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
28187 if (arm_evpc_neon_vuzp (d
))
28189 if (arm_evpc_neon_vzip (d
))
28191 if (arm_evpc_neon_vrev (d
))
28193 if (arm_evpc_neon_vtrn (d
))
28195 return arm_evpc_neon_vtbl (d
);
28200 /* Expand a vec_perm_const pattern. */
28203 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28205 struct expand_vec_perm_d d
;
28206 int i
, nelt
, which
;
28212 d
.vmode
= GET_MODE (target
);
28213 gcc_assert (VECTOR_MODE_P (d
.vmode
));
28214 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
28215 d
.testing_p
= false;
28217 for (i
= which
= 0; i
< nelt
; ++i
)
28219 rtx e
= XVECEXP (sel
, 0, i
);
28220 int ei
= INTVAL (e
) & (2 * nelt
- 1);
28221 which
|= (ei
< nelt
? 1 : 2);
28231 d
.one_vector_p
= false;
28232 if (!rtx_equal_p (op0
, op1
))
28235 /* The elements of PERM do not suggest that only the first operand
28236 is used, but both operands are identical. Allow easier matching
28237 of the permutation by folding the permutation into the single
28241 for (i
= 0; i
< nelt
; ++i
)
28242 d
.perm
[i
] &= nelt
- 1;
28244 d
.one_vector_p
= true;
28249 d
.one_vector_p
= true;
28253 return arm_expand_vec_perm_const_1 (&d
);
28256 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28259 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
28260 const unsigned char *sel
)
28262 struct expand_vec_perm_d d
;
28263 unsigned int i
, nelt
, which
;
28267 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
28268 d
.testing_p
= true;
28269 memcpy (d
.perm
, sel
, nelt
);
28271 /* Categorize the set of elements in the selector. */
28272 for (i
= which
= 0; i
< nelt
; ++i
)
28274 unsigned char e
= d
.perm
[i
];
28275 gcc_assert (e
< 2 * nelt
);
28276 which
|= (e
< nelt
? 1 : 2);
28279 /* For all elements from second vector, fold the elements to first. */
28281 for (i
= 0; i
< nelt
; ++i
)
28284 /* Check whether the mask can be applied to the vector type. */
28285 d
.one_vector_p
= (which
!= 3);
28287 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
28288 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
28289 if (!d
.one_vector_p
)
28290 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
28293 ret
= arm_expand_vec_perm_const_1 (&d
);
28300 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
28302 /* If we are soft float and we do not have ldrd
28303 then all auto increment forms are ok. */
28304 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
28309 /* Post increment and Pre Decrement are supported for all
28310 instruction forms except for vector forms. */
28313 if (VECTOR_MODE_P (mode
))
28315 if (code
!= ARM_PRE_DEC
)
28325 /* Without LDRD and mode size greater than
28326 word size, there is no point in auto-incrementing
28327 because ldm and stm will not have these forms. */
28328 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
28331 /* Vector and floating point modes do not support
28332 these auto increment forms. */
28333 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
28346 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28347 on ARM, since we know that shifts by negative amounts are no-ops.
28348 Additionally, the default expansion code is not available or suitable
28349 for post-reload insn splits (this can occur when the register allocator
28350 chooses not to do a shift in NEON).
28352 This function is used in both initial expand and post-reload splits, and
28353 handles all kinds of 64-bit shifts.
28355 Input requirements:
28356 - It is safe for the input and output to be the same register, but
28357 early-clobber rules apply for the shift amount and scratch registers.
28358 - Shift by register requires both scratch registers. In all other cases
28359 the scratch registers may be NULL.
28360 - Ashiftrt by a register also clobbers the CC register. */
28362 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
28363 rtx amount
, rtx scratch1
, rtx scratch2
)
28365 rtx out_high
= gen_highpart (SImode
, out
);
28366 rtx out_low
= gen_lowpart (SImode
, out
);
28367 rtx in_high
= gen_highpart (SImode
, in
);
28368 rtx in_low
= gen_lowpart (SImode
, in
);
28371 in = the register pair containing the input value.
28372 out = the destination register pair.
28373 up = the high- or low-part of each pair.
28374 down = the opposite part to "up".
28375 In a shift, we can consider bits to shift from "up"-stream to
28376 "down"-stream, so in a left-shift "up" is the low-part and "down"
28377 is the high-part of each register pair. */
28379 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
28380 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
28381 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
28382 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
28384 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
28386 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
28387 && GET_MODE (out
) == DImode
);
28389 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
28390 && GET_MODE (in
) == DImode
);
28392 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
28393 && GET_MODE (amount
) == SImode
)
28394 || CONST_INT_P (amount
)));
28395 gcc_assert (scratch1
== NULL
28396 || (GET_CODE (scratch1
) == SCRATCH
)
28397 || (GET_MODE (scratch1
) == SImode
28398 && REG_P (scratch1
)));
28399 gcc_assert (scratch2
== NULL
28400 || (GET_CODE (scratch2
) == SCRATCH
)
28401 || (GET_MODE (scratch2
) == SImode
28402 && REG_P (scratch2
)));
28403 gcc_assert (!REG_P (out
) || !REG_P (amount
)
28404 || !HARD_REGISTER_P (out
)
28405 || (REGNO (out
) != REGNO (amount
)
28406 && REGNO (out
) + 1 != REGNO (amount
)));
28408 /* Macros to make following code more readable. */
28409 #define SUB_32(DEST,SRC) \
28410 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28411 #define RSB_32(DEST,SRC) \
28412 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28413 #define SUB_S_32(DEST,SRC) \
28414 gen_addsi3_compare0 ((DEST), (SRC), \
28416 #define SET(DEST,SRC) \
28417 gen_rtx_SET (SImode, (DEST), (SRC))
28418 #define SHIFT(CODE,SRC,AMOUNT) \
28419 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28420 #define LSHIFT(CODE,SRC,AMOUNT) \
28421 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28422 SImode, (SRC), (AMOUNT))
28423 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28424 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28425 SImode, (SRC), (AMOUNT))
28427 gen_rtx_IOR (SImode, (A), (B))
28428 #define BRANCH(COND,LABEL) \
28429 gen_arm_cond_branch ((LABEL), \
28430 gen_rtx_ ## COND (CCmode, cc_reg, \
28434 /* Shifts by register and shifts by constant are handled separately. */
28435 if (CONST_INT_P (amount
))
28437 /* We have a shift-by-constant. */
28439 /* First, handle out-of-range shift amounts.
28440 In both cases we try to match the result an ARM instruction in a
28441 shift-by-register would give. This helps reduce execution
28442 differences between optimization levels, but it won't stop other
28443 parts of the compiler doing different things. This is "undefined
28444 behaviour, in any case. */
28445 if (INTVAL (amount
) <= 0)
28446 emit_insn (gen_movdi (out
, in
));
28447 else if (INTVAL (amount
) >= 64)
28449 if (code
== ASHIFTRT
)
28451 rtx const31_rtx
= GEN_INT (31);
28452 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
28453 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
28456 emit_insn (gen_movdi (out
, const0_rtx
));
28459 /* Now handle valid shifts. */
28460 else if (INTVAL (amount
) < 32)
28462 /* Shifts by a constant less than 32. */
28463 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
28465 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
28466 emit_insn (SET (out_down
,
28467 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
28469 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
28473 /* Shifts by a constant greater than 31. */
28474 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
28476 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
28477 if (code
== ASHIFTRT
)
28478 emit_insn (gen_ashrsi3 (out_up
, in_up
,
28481 emit_insn (SET (out_up
, const0_rtx
));
28486 /* We have a shift-by-register. */
28487 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
28489 /* This alternative requires the scratch registers. */
28490 gcc_assert (scratch1
&& REG_P (scratch1
));
28491 gcc_assert (scratch2
&& REG_P (scratch2
));
28493 /* We will need the values "amount-32" and "32-amount" later.
28494 Swapping them around now allows the later code to be more general. */
28498 emit_insn (SUB_32 (scratch1
, amount
));
28499 emit_insn (RSB_32 (scratch2
, amount
));
28502 emit_insn (RSB_32 (scratch1
, amount
));
28503 /* Also set CC = amount > 32. */
28504 emit_insn (SUB_S_32 (scratch2
, amount
));
28507 emit_insn (RSB_32 (scratch1
, amount
));
28508 emit_insn (SUB_32 (scratch2
, amount
));
28511 gcc_unreachable ();
28514 /* Emit code like this:
28517 out_down = in_down << amount;
28518 out_down = (in_up << (amount - 32)) | out_down;
28519 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28520 out_up = in_up << amount;
28523 out_down = in_down >> amount;
28524 out_down = (in_up << (32 - amount)) | out_down;
28526 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28527 out_up = in_up << amount;
28530 out_down = in_down >> amount;
28531 out_down = (in_up << (32 - amount)) | out_down;
28533 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28534 out_up = in_up << amount;
28536 The ARM and Thumb2 variants are the same but implemented slightly
28537 differently. If this were only called during expand we could just
28538 use the Thumb2 case and let combine do the right thing, but this
28539 can also be called from post-reload splitters. */
28541 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
28543 if (!TARGET_THUMB2
)
28545 /* Emit code for ARM mode. */
28546 emit_insn (SET (out_down
,
28547 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
28548 if (code
== ASHIFTRT
)
28550 rtx_code_label
*done_label
= gen_label_rtx ();
28551 emit_jump_insn (BRANCH (LT
, done_label
));
28552 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
28554 emit_label (done_label
);
28557 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
28562 /* Emit code for Thumb2 mode.
28563 Thumb2 can't do shift and or in one insn. */
28564 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
28565 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
28567 if (code
== ASHIFTRT
)
28569 rtx_code_label
*done_label
= gen_label_rtx ();
28570 emit_jump_insn (BRANCH (LT
, done_label
));
28571 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
28572 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
28573 emit_label (done_label
);
28577 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
28578 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
28582 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
28597 /* Returns true if a valid comparison operation and makes
28598 the operands in a form that is valid. */
28600 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
28602 enum rtx_code code
= GET_CODE (*comparison
);
28604 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
28605 ? GET_MODE (*op2
) : GET_MODE (*op1
);
28607 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
28609 if (code
== UNEQ
|| code
== LTGT
)
28612 code_int
= (int)code
;
28613 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
28614 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
28619 if (!arm_add_operand (*op1
, mode
))
28620 *op1
= force_reg (mode
, *op1
);
28621 if (!arm_add_operand (*op2
, mode
))
28622 *op2
= force_reg (mode
, *op2
);
28626 if (!cmpdi_operand (*op1
, mode
))
28627 *op1
= force_reg (mode
, *op1
);
28628 if (!cmpdi_operand (*op2
, mode
))
28629 *op2
= force_reg (mode
, *op2
);
28634 if (!arm_float_compare_operand (*op1
, mode
))
28635 *op1
= force_reg (mode
, *op1
);
28636 if (!arm_float_compare_operand (*op2
, mode
))
28637 *op2
= force_reg (mode
, *op2
);
28647 /* Maximum number of instructions to set block of memory. */
28649 arm_block_set_max_insns (void)
28651 if (optimize_function_for_size_p (cfun
))
28654 return current_tune
->max_insns_inline_memset
;
28657 /* Return TRUE if it's profitable to set block of memory for
28658 non-vectorized case. VAL is the value to set the memory
28659 with. LENGTH is the number of bytes to set. ALIGN is the
28660 alignment of the destination memory in bytes. UNALIGNED_P
28661 is TRUE if we can only set the memory with instructions
28662 meeting alignment requirements. USE_STRD_P is TRUE if we
28663 can use strd to set the memory. */
28665 arm_block_set_non_vect_profit_p (rtx val
,
28666 unsigned HOST_WIDE_INT length
,
28667 unsigned HOST_WIDE_INT align
,
28668 bool unaligned_p
, bool use_strd_p
)
28671 /* For leftovers in bytes of 0-7, we can set the memory block using
28672 strb/strh/str with minimum instruction number. */
28673 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28677 num
= arm_const_inline_cost (SET
, val
);
28678 num
+= length
/ align
+ length
% align
;
28680 else if (use_strd_p
)
28682 num
= arm_const_double_inline_cost (val
);
28683 num
+= (length
>> 3) + leftover
[length
& 7];
28687 num
= arm_const_inline_cost (SET
, val
);
28688 num
+= (length
>> 2) + leftover
[length
& 3];
28691 /* We may be able to combine last pair STRH/STRB into a single STR
28692 by shifting one byte back. */
28693 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
28696 return (num
<= arm_block_set_max_insns ());
28699 /* Return TRUE if it's profitable to set block of memory for
28700 vectorized case. LENGTH is the number of bytes to set.
28701 ALIGN is the alignment of destination memory in bytes.
28702 MODE is the vector mode used to set the memory. */
28704 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
28705 unsigned HOST_WIDE_INT align
,
28709 bool unaligned_p
= ((align
& 3) != 0);
28710 unsigned int nelt
= GET_MODE_NUNITS (mode
);
28712 /* Instruction loading constant value. */
28714 /* Instructions storing the memory. */
28715 num
+= (length
+ nelt
- 1) / nelt
;
28716 /* Instructions adjusting the address expression. Only need to
28717 adjust address expression if it's 4 bytes aligned and bytes
28718 leftover can only be stored by mis-aligned store instruction. */
28719 if (!unaligned_p
&& (length
& 3) != 0)
28722 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
28723 if (!unaligned_p
&& mode
== V16QImode
)
28726 return (num
<= arm_block_set_max_insns ());
28729 /* Set a block of memory using vectorization instructions for the
28730 unaligned case. We fill the first LENGTH bytes of the memory
28731 area starting from DSTBASE with byte constant VALUE. ALIGN is
28732 the alignment requirement of memory. Return TRUE if succeeded. */
28734 arm_block_set_unaligned_vect (rtx dstbase
,
28735 unsigned HOST_WIDE_INT length
,
28736 unsigned HOST_WIDE_INT value
,
28737 unsigned HOST_WIDE_INT align
)
28739 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
28741 rtx val_elt
, val_vec
, reg
;
28742 rtx rval
[MAX_VECT_LEN
];
28743 rtx (*gen_func
) (rtx
, rtx
);
28745 unsigned HOST_WIDE_INT v
= value
;
28747 gcc_assert ((align
& 0x3) != 0);
28748 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
28749 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
28750 if (length
>= nelt_v16
)
28753 gen_func
= gen_movmisalignv16qi
;
28758 gen_func
= gen_movmisalignv8qi
;
28760 nelt_mode
= GET_MODE_NUNITS (mode
);
28761 gcc_assert (length
>= nelt_mode
);
28762 /* Skip if it isn't profitable. */
28763 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
28766 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28767 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28769 v
= sext_hwi (v
, BITS_PER_WORD
);
28770 val_elt
= GEN_INT (v
);
28771 for (j
= 0; j
< nelt_mode
; j
++)
28774 reg
= gen_reg_rtx (mode
);
28775 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
28776 /* Emit instruction loading the constant value. */
28777 emit_move_insn (reg
, val_vec
);
28779 /* Handle nelt_mode bytes in a vector. */
28780 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
28782 emit_insn ((*gen_func
) (mem
, reg
));
28783 if (i
+ 2 * nelt_mode
<= length
)
28784 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
28787 /* If there are not less than nelt_v8 bytes leftover, we must be in
28789 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
28791 /* Handle (8, 16) bytes leftover. */
28792 if (i
+ nelt_v8
< length
)
28794 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
28795 /* We are shifting bytes back, set the alignment accordingly. */
28796 if ((length
& 1) != 0 && align
>= 2)
28797 set_mem_align (mem
, BITS_PER_UNIT
);
28799 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28801 /* Handle (0, 8] bytes leftover. */
28802 else if (i
< length
&& i
+ nelt_v8
>= length
)
28804 if (mode
== V16QImode
)
28806 reg
= gen_lowpart (V8QImode
, reg
);
28807 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, 0);
28809 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
28810 + (nelt_mode
- nelt_v8
))));
28811 /* We are shifting bytes back, set the alignment accordingly. */
28812 if ((length
& 1) != 0 && align
>= 2)
28813 set_mem_align (mem
, BITS_PER_UNIT
);
28815 emit_insn (gen_movmisalignv8qi (mem
, reg
));
28821 /* Set a block of memory using vectorization instructions for the
28822 aligned case. We fill the first LENGTH bytes of the memory area
28823 starting from DSTBASE with byte constant VALUE. ALIGN is the
28824 alignment requirement of memory. Return TRUE if succeeded. */
28826 arm_block_set_aligned_vect (rtx dstbase
,
28827 unsigned HOST_WIDE_INT length
,
28828 unsigned HOST_WIDE_INT value
,
28829 unsigned HOST_WIDE_INT align
)
28831 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
28832 rtx dst
, addr
, mem
;
28833 rtx val_elt
, val_vec
, reg
;
28834 rtx rval
[MAX_VECT_LEN
];
28836 unsigned HOST_WIDE_INT v
= value
;
28838 gcc_assert ((align
& 0x3) == 0);
28839 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
28840 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
28841 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
28846 nelt_mode
= GET_MODE_NUNITS (mode
);
28847 gcc_assert (length
>= nelt_mode
);
28848 /* Skip if it isn't profitable. */
28849 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
28852 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28854 v
= sext_hwi (v
, BITS_PER_WORD
);
28855 val_elt
= GEN_INT (v
);
28856 for (j
= 0; j
< nelt_mode
; j
++)
28859 reg
= gen_reg_rtx (mode
);
28860 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
28861 /* Emit instruction loading the constant value. */
28862 emit_move_insn (reg
, val_vec
);
28865 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
28866 if (mode
== V16QImode
)
28868 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28869 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28871 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
28872 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
28874 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
28875 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28876 /* We are shifting bytes back, set the alignment accordingly. */
28877 if ((length
& 0x3) == 0)
28878 set_mem_align (mem
, BITS_PER_UNIT
* 4);
28879 else if ((length
& 0x1) == 0)
28880 set_mem_align (mem
, BITS_PER_UNIT
* 2);
28882 set_mem_align (mem
, BITS_PER_UNIT
);
28884 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28887 /* Fall through for bytes leftover. */
28889 nelt_mode
= GET_MODE_NUNITS (mode
);
28890 reg
= gen_lowpart (V8QImode
, reg
);
28893 /* Handle 8 bytes in a vector. */
28894 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
28896 addr
= plus_constant (Pmode
, dst
, i
);
28897 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
28898 emit_move_insn (mem
, reg
);
28901 /* Handle single word leftover by shifting 4 bytes back. We can
28902 use aligned access for this case. */
28903 if (i
+ UNITS_PER_WORD
== length
)
28905 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
28906 mem
= adjust_automodify_address (dstbase
, mode
,
28907 addr
, i
- UNITS_PER_WORD
);
28908 /* We are shifting 4 bytes back, set the alignment accordingly. */
28909 if (align
> UNITS_PER_WORD
)
28910 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
28912 emit_move_insn (mem
, reg
);
28914 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28915 We have to use unaligned access for this case. */
28916 else if (i
< length
)
28918 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
28919 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
28920 /* We are shifting bytes back, set the alignment accordingly. */
28921 if ((length
& 1) == 0)
28922 set_mem_align (mem
, BITS_PER_UNIT
* 2);
28924 set_mem_align (mem
, BITS_PER_UNIT
);
28926 emit_insn (gen_movmisalignv8qi (mem
, reg
));
28932 /* Set a block of memory using plain strh/strb instructions, only
28933 using instructions allowed by ALIGN on processor. We fill the
28934 first LENGTH bytes of the memory area starting from DSTBASE
28935 with byte constant VALUE. ALIGN is the alignment requirement
28938 arm_block_set_unaligned_non_vect (rtx dstbase
,
28939 unsigned HOST_WIDE_INT length
,
28940 unsigned HOST_WIDE_INT value
,
28941 unsigned HOST_WIDE_INT align
)
28944 rtx dst
, addr
, mem
;
28945 rtx val_exp
, val_reg
, reg
;
28947 HOST_WIDE_INT v
= value
;
28949 gcc_assert (align
== 1 || align
== 2);
28952 v
|= (value
<< BITS_PER_UNIT
);
28954 v
= sext_hwi (v
, BITS_PER_WORD
);
28955 val_exp
= GEN_INT (v
);
28956 /* Skip if it isn't profitable. */
28957 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
28958 align
, true, false))
28961 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28962 mode
= (align
== 2 ? HImode
: QImode
);
28963 val_reg
= force_reg (SImode
, val_exp
);
28964 reg
= gen_lowpart (mode
, val_reg
);
28966 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
28968 addr
= plus_constant (Pmode
, dst
, i
);
28969 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
28970 emit_move_insn (mem
, reg
);
28973 /* Handle single byte leftover. */
28974 if (i
+ 1 == length
)
28976 reg
= gen_lowpart (QImode
, val_reg
);
28977 addr
= plus_constant (Pmode
, dst
, i
);
28978 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
28979 emit_move_insn (mem
, reg
);
28983 gcc_assert (i
== length
);
28987 /* Set a block of memory using plain strd/str/strh/strb instructions,
28988 to permit unaligned copies on processors which support unaligned
28989 semantics for those instructions. We fill the first LENGTH bytes
28990 of the memory area starting from DSTBASE with byte constant VALUE.
28991 ALIGN is the alignment requirement of memory. */
28993 arm_block_set_aligned_non_vect (rtx dstbase
,
28994 unsigned HOST_WIDE_INT length
,
28995 unsigned HOST_WIDE_INT value
,
28996 unsigned HOST_WIDE_INT align
)
28999 rtx dst
, addr
, mem
;
29000 rtx val_exp
, val_reg
, reg
;
29001 unsigned HOST_WIDE_INT v
;
29004 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
29005 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
29007 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
29008 if (length
< UNITS_PER_WORD
)
29009 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
29012 v
|= (v
<< BITS_PER_WORD
);
29014 v
= sext_hwi (v
, BITS_PER_WORD
);
29016 val_exp
= GEN_INT (v
);
29017 /* Skip if it isn't profitable. */
29018 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29019 align
, false, use_strd_p
))
29024 /* Try without strd. */
29025 v
= (v
>> BITS_PER_WORD
);
29026 v
= sext_hwi (v
, BITS_PER_WORD
);
29027 val_exp
= GEN_INT (v
);
29028 use_strd_p
= false;
29029 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29030 align
, false, use_strd_p
))
29035 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29036 /* Handle double words using strd if possible. */
29039 val_reg
= force_reg (DImode
, val_exp
);
29041 for (; (i
+ 8 <= length
); i
+= 8)
29043 addr
= plus_constant (Pmode
, dst
, i
);
29044 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
29045 emit_move_insn (mem
, reg
);
29049 val_reg
= force_reg (SImode
, val_exp
);
29051 /* Handle words. */
29052 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
29053 for (; (i
+ 4 <= length
); i
+= 4)
29055 addr
= plus_constant (Pmode
, dst
, i
);
29056 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
29057 if ((align
& 3) == 0)
29058 emit_move_insn (mem
, reg
);
29060 emit_insn (gen_unaligned_storesi (mem
, reg
));
29063 /* Merge last pair of STRH and STRB into a STR if possible. */
29064 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
29066 addr
= plus_constant (Pmode
, dst
, i
- 1);
29067 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
29068 /* We are shifting one byte back, set the alignment accordingly. */
29069 if ((align
& 1) == 0)
29070 set_mem_align (mem
, BITS_PER_UNIT
);
29072 /* Most likely this is an unaligned access, and we can't tell at
29073 compilation time. */
29074 emit_insn (gen_unaligned_storesi (mem
, reg
));
29078 /* Handle half word leftover. */
29079 if (i
+ 2 <= length
)
29081 reg
= gen_lowpart (HImode
, val_reg
);
29082 addr
= plus_constant (Pmode
, dst
, i
);
29083 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
29084 if ((align
& 1) == 0)
29085 emit_move_insn (mem
, reg
);
29087 emit_insn (gen_unaligned_storehi (mem
, reg
));
29092 /* Handle single byte leftover. */
29093 if (i
+ 1 == length
)
29095 reg
= gen_lowpart (QImode
, val_reg
);
29096 addr
= plus_constant (Pmode
, dst
, i
);
29097 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29098 emit_move_insn (mem
, reg
);
29104 /* Set a block of memory using vectorization instructions for both
29105 aligned and unaligned cases. We fill the first LENGTH bytes of
29106 the memory area starting from DSTBASE with byte constant VALUE.
29107 ALIGN is the alignment requirement of memory. */
29109 arm_block_set_vect (rtx dstbase
,
29110 unsigned HOST_WIDE_INT length
,
29111 unsigned HOST_WIDE_INT value
,
29112 unsigned HOST_WIDE_INT align
)
29114 /* Check whether we need to use unaligned store instruction. */
29115 if (((align
& 3) != 0 || (length
& 3) != 0)
29116 /* Check whether unaligned store instruction is available. */
29117 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
29120 if ((align
& 3) == 0)
29121 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
29123 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
29126 /* Expand string store operation. Firstly we try to do that by using
29127 vectorization instructions, then try with ARM unaligned access and
29128 double-word store if profitable. OPERANDS[0] is the destination,
29129 OPERANDS[1] is the number of bytes, operands[2] is the value to
29130 initialize the memory, OPERANDS[3] is the known alignment of the
29133 arm_gen_setmem (rtx
*operands
)
29135 rtx dstbase
= operands
[0];
29136 unsigned HOST_WIDE_INT length
;
29137 unsigned HOST_WIDE_INT value
;
29138 unsigned HOST_WIDE_INT align
;
29140 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
29143 length
= UINTVAL (operands
[1]);
29147 value
= (UINTVAL (operands
[2]) & 0xFF);
29148 align
= UINTVAL (operands
[3]);
29149 if (TARGET_NEON
&& length
>= 8
29150 && current_tune
->string_ops_prefer_neon
29151 && arm_block_set_vect (dstbase
, length
, value
, align
))
29154 if (!unaligned_access
&& (align
& 3) != 0)
29155 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
29157 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
29160 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29162 static unsigned HOST_WIDE_INT
29163 arm_asan_shadow_offset (void)
29165 return (unsigned HOST_WIDE_INT
) 1 << 29;
29169 /* This is a temporary fix for PR60655. Ideally we need
29170 to handle most of these cases in the generic part but
29171 currently we reject minus (..) (sym_ref). We try to
29172 ameliorate the case with minus (sym_ref1) (sym_ref2)
29173 where they are in the same section. */
29176 arm_const_not_ok_for_debug_p (rtx p
)
29178 tree decl_op0
= NULL
;
29179 tree decl_op1
= NULL
;
29181 if (GET_CODE (p
) == MINUS
)
29183 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
29185 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
29187 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
29188 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
29190 if ((TREE_CODE (decl_op1
) == VAR_DECL
29191 || TREE_CODE (decl_op1
) == CONST_DECL
)
29192 && (TREE_CODE (decl_op0
) == VAR_DECL
29193 || TREE_CODE (decl_op0
) == CONST_DECL
))
29194 return (get_variable_section (decl_op1
, false)
29195 != get_variable_section (decl_op0
, false));
29197 if (TREE_CODE (decl_op1
) == LABEL_DECL
29198 && TREE_CODE (decl_op0
) == LABEL_DECL
)
29199 return (DECL_CONTEXT (decl_op1
)
29200 != DECL_CONTEXT (decl_op0
));
29210 /* return TRUE if x is a reference to a value in a constant pool */
29212 arm_is_constant_pool_ref (rtx x
)
29215 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
29216 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
29219 /* If MEM is in the form of [base+offset], extract the two parts
29220 of address and set to BASE and OFFSET, otherwise return false
29221 after clearing BASE and OFFSET. */
29224 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
29228 gcc_assert (MEM_P (mem
));
29230 addr
= XEXP (mem
, 0);
29232 /* Strip off const from addresses like (const (addr)). */
29233 if (GET_CODE (addr
) == CONST
)
29234 addr
= XEXP (addr
, 0);
29236 if (GET_CODE (addr
) == REG
)
29239 *offset
= const0_rtx
;
29243 if (GET_CODE (addr
) == PLUS
29244 && GET_CODE (XEXP (addr
, 0)) == REG
29245 && CONST_INT_P (XEXP (addr
, 1)))
29247 *base
= XEXP (addr
, 0);
29248 *offset
= XEXP (addr
, 1);
29253 *offset
= NULL_RTX
;
29258 /* If INSN is a load or store of address in the form of [base+offset],
29259 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
29260 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
29261 otherwise return FALSE. */
29264 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
29268 gcc_assert (INSN_P (insn
));
29269 x
= PATTERN (insn
);
29270 if (GET_CODE (x
) != SET
)
29274 dest
= SET_DEST (x
);
29275 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
29278 extract_base_offset_in_addr (dest
, base
, offset
);
29280 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
29283 extract_base_offset_in_addr (src
, base
, offset
);
29288 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
29291 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29293 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29294 and PRI are only calculated for these instructions. For other instruction,
29295 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
29296 instruction fusion can be supported by returning different priorities.
29298 It's important that irrelevant instructions get the largest FUSION_PRI. */
29301 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
29302 int *fusion_pri
, int *pri
)
29308 gcc_assert (INSN_P (insn
));
29311 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
29318 /* Load goes first. */
29320 *fusion_pri
= tmp
- 1;
29322 *fusion_pri
= tmp
- 2;
29326 /* INSN with smaller base register goes first. */
29327 tmp
-= ((REGNO (base
) & 0xff) << 20);
29329 /* INSN with smaller offset goes first. */
29330 off_val
= (int)(INTVAL (offset
));
29332 tmp
-= (off_val
& 0xfffff);
29334 tmp
+= ((- off_val
) & 0xfffff);
29339 #include "gt-arm.h"