1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2013 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
26 #include "hash-table.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
40 #include "insn-attr.h"
46 #include "diagnostic-core.h"
53 #include "target-def.h"
55 #include "langhooks.h"
63 /* Forward definitions of types. */
64 typedef struct minipool_node Mnode
;
65 typedef struct minipool_fixup Mfix
;
67 void (*arm_lang_output_object_attributes_hook
)(void);
74 /* Forward function declarations. */
75 static bool arm_lra_p (void);
76 static bool arm_needs_doubleword_align (enum machine_mode
, const_tree
);
77 static int arm_compute_static_chain_stack_bytes (void);
78 static arm_stack_offsets
*arm_get_frame_offsets (void);
79 static void arm_add_gc_roots (void);
80 static int arm_gen_constant (enum rtx_code
, enum machine_mode
, rtx
,
81 HOST_WIDE_INT
, rtx
, rtx
, int, int);
82 static unsigned bit_count (unsigned long);
83 static int arm_address_register_rtx_p (rtx
, int);
84 static int arm_legitimate_index_p (enum machine_mode
, rtx
, RTX_CODE
, int);
85 static int thumb2_legitimate_index_p (enum machine_mode
, rtx
, int);
86 static int thumb1_base_register_rtx_p (rtx
, enum machine_mode
, int);
87 static rtx
arm_legitimize_address (rtx
, rtx
, enum machine_mode
);
88 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
89 static rtx
thumb_legitimize_address (rtx
, rtx
, enum machine_mode
);
90 inline static int thumb1_index_register_rtx_p (rtx
, int);
91 static bool arm_legitimate_address_p (enum machine_mode
, rtx
, bool);
92 static int thumb_far_jump_used_p (void);
93 static bool thumb_force_lr_save (void);
94 static unsigned arm_size_return_regs (void);
95 static bool arm_assemble_integer (rtx
, unsigned int, int);
96 static void arm_print_operand (FILE *, rtx
, int);
97 static void arm_print_operand_address (FILE *, rtx
);
98 static bool arm_print_operand_punct_valid_p (unsigned char code
);
99 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
100 static arm_cc
get_arm_condition_code (rtx
);
101 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
102 static const char *output_multi_immediate (rtx
*, const char *, const char *,
104 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
105 static struct machine_function
*arm_init_machine_status (void);
106 static void thumb_exit (FILE *, int);
107 static HOST_WIDE_INT
get_jump_table_size (rtx
);
108 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
109 static Mnode
*add_minipool_forward_ref (Mfix
*);
110 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
111 static Mnode
*add_minipool_backward_ref (Mfix
*);
112 static void assign_minipool_offsets (Mfix
*);
113 static void arm_print_value (FILE *, rtx
);
114 static void dump_minipool (rtx
);
115 static int arm_barrier_cost (rtx
);
116 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
117 static void push_minipool_barrier (rtx
, HOST_WIDE_INT
);
118 static void push_minipool_fix (rtx
, HOST_WIDE_INT
, rtx
*, enum machine_mode
,
120 static void arm_reorg (void);
121 static void note_invalid_constants (rtx
, HOST_WIDE_INT
, int);
122 static unsigned long arm_compute_save_reg0_reg12_mask (void);
123 static unsigned long arm_compute_save_reg_mask (void);
124 static unsigned long arm_isr_value (tree
);
125 static unsigned long arm_compute_func_type (void);
126 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
127 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
128 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
129 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
130 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
132 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
133 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
134 static int arm_comp_type_attributes (const_tree
, const_tree
);
135 static void arm_set_default_type_attributes (tree
);
136 static int arm_adjust_cost (rtx
, rtx
, rtx
, int);
137 static int arm_sched_reorder (FILE *, int, rtx
*, int *, int);
138 static int optimal_immediate_sequence (enum rtx_code code
,
139 unsigned HOST_WIDE_INT val
,
140 struct four_ints
*return_sequence
);
141 static int optimal_immediate_sequence_1 (enum rtx_code code
,
142 unsigned HOST_WIDE_INT val
,
143 struct four_ints
*return_sequence
,
145 static int arm_get_strip_length (int);
146 static bool arm_function_ok_for_sibcall (tree
, tree
);
147 static enum machine_mode
arm_promote_function_mode (const_tree
,
148 enum machine_mode
, int *,
150 static bool arm_return_in_memory (const_tree
, const_tree
);
151 static rtx
arm_function_value (const_tree
, const_tree
, bool);
152 static rtx
arm_libcall_value_1 (enum machine_mode
);
153 static rtx
arm_libcall_value (enum machine_mode
, const_rtx
);
154 static bool arm_function_value_regno_p (const unsigned int);
155 static void arm_internal_label (FILE *, const char *, unsigned long);
156 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
158 static bool arm_have_conditional_execution (void);
159 static bool arm_cannot_force_const_mem (enum machine_mode
, rtx
);
160 static bool arm_legitimate_constant_p (enum machine_mode
, rtx
);
161 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
162 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
163 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
164 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
165 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
166 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
167 static bool arm_rtx_costs (rtx
, int, int, int, int *, bool);
168 static int arm_address_cost (rtx
, enum machine_mode
, addr_space_t
, bool);
169 static int arm_register_move_cost (enum machine_mode
, reg_class_t
, reg_class_t
);
170 static int arm_memory_move_cost (enum machine_mode
, reg_class_t
, bool);
171 static void arm_init_builtins (void);
172 static void arm_init_iwmmxt_builtins (void);
173 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
174 static rtx
arm_expand_binop_builtin (enum insn_code
, tree
, rtx
);
175 static rtx
arm_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
176 static rtx
arm_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
177 static tree
arm_builtin_decl (unsigned, bool);
178 static void emit_constant_insn (rtx cond
, rtx pattern
);
179 static rtx
emit_set_insn (rtx
, rtx
);
180 static rtx
emit_multi_reg_push (unsigned long);
181 static int arm_arg_partial_bytes (cumulative_args_t
, enum machine_mode
,
183 static rtx
arm_function_arg (cumulative_args_t
, enum machine_mode
,
185 static void arm_function_arg_advance (cumulative_args_t
, enum machine_mode
,
187 static unsigned int arm_function_arg_boundary (enum machine_mode
, const_tree
);
188 static rtx
aapcs_allocate_return_reg (enum machine_mode
, const_tree
,
190 static rtx
aapcs_libcall_value (enum machine_mode
);
191 static int aapcs_select_return_coproc (const_tree
, const_tree
);
193 #ifdef OBJECT_FORMAT_ELF
194 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
195 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
198 static void arm_encode_section_info (tree
, rtx
, int);
201 static void arm_file_end (void);
202 static void arm_file_start (void);
204 static void arm_setup_incoming_varargs (cumulative_args_t
, enum machine_mode
,
206 static bool arm_pass_by_reference (cumulative_args_t
,
207 enum machine_mode
, const_tree
, bool);
208 static bool arm_promote_prototypes (const_tree
);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree
);
212 static bool arm_must_pass_in_stack (enum machine_mode
, const_tree
);
213 static bool arm_return_in_memory (const_tree
, const_tree
);
215 static void arm_unwind_emit (FILE *, rtx
);
216 static bool arm_output_ttype (rtx
);
217 static void arm_asm_emit_except_personality (rtx
);
218 static void arm_asm_init_sections (void);
220 static rtx
arm_dwarf_register_span (rtx
);
222 static tree
arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree
arm_get_cookie_size (tree
);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree
);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree
arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree
, rtx
);
234 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
235 static void arm_option_override (void);
236 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (enum machine_mode
);
237 static bool arm_cannot_copy_insn_p (rtx
);
238 static bool arm_tls_symbol_p (rtx x
);
239 static int arm_issue_rate (void);
240 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
241 static bool arm_output_addr_const_extra (FILE *, rtx
);
242 static bool arm_allocate_stack_slots_for_args (void);
243 static bool arm_warn_func_return (tree
);
244 static const char *arm_invalid_parameter_type (const_tree t
);
245 static const char *arm_invalid_return_type (const_tree t
);
246 static tree
arm_promoted_type (const_tree t
);
247 static tree
arm_convert_to_type (tree type
, tree expr
);
248 static bool arm_scalar_mode_supported_p (enum machine_mode
);
249 static bool arm_frame_pointer_required (void);
250 static bool arm_can_eliminate (const int, const int);
251 static void arm_asm_trampoline_template (FILE *);
252 static void arm_trampoline_init (rtx
, tree
, rtx
);
253 static rtx
arm_trampoline_adjust_address (rtx
);
254 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
255 static bool cortex_a9_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
256 static bool xscale_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
257 static bool fa726te_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
258 static bool arm_array_mode_supported_p (enum machine_mode
,
259 unsigned HOST_WIDE_INT
);
260 static enum machine_mode
arm_preferred_simd_mode (enum machine_mode
);
261 static bool arm_class_likely_spilled_p (reg_class_t
);
262 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
263 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
264 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode
,
268 static void arm_conditional_register_usage (void);
269 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
270 static unsigned int arm_autovectorize_vector_sizes (void);
271 static int arm_default_branch_cost (bool, bool);
272 static int arm_cortex_a5_branch_cost (bool, bool);
273 static int arm_cortex_m_branch_cost (bool, bool);
275 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
276 const unsigned char *sel
);
278 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
280 int misalign ATTRIBUTE_UNUSED
);
281 static unsigned arm_add_stmt_cost (void *data
, int count
,
282 enum vect_cost_for_stmt kind
,
283 struct _stmt_vec_info
*stmt_info
,
285 enum vect_cost_model_location where
);
287 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
288 bool op0_preserve_value
);
289 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
291 /* Table of machine attributes. */
292 static const struct attribute_spec arm_attribute_table
[] =
294 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
295 affects_type_identity } */
296 /* Function calls made to this symbol must be done indirectly, because
297 it may lie outside of the 26 bit addressing range of a normal function
299 { "long_call", 0, 0, false, true, true, NULL
, false },
300 /* Whereas these functions are always known to reside within the 26 bit
302 { "short_call", 0, 0, false, true, true, NULL
, false },
303 /* Specify the procedure call conventions for a function. */
304 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
306 /* Interrupt Service Routines have special prologue and epilogue requirements. */
307 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
309 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
311 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
314 /* ARM/PE has three new attributes:
316 dllexport - for exporting a function/variable that will live in a dll
317 dllimport - for importing a function/variable from a dll
319 Microsoft allows multiple declspecs in one __declspec, separating
320 them with spaces. We do NOT support this. Instead, use __declspec
323 { "dllimport", 0, 0, true, false, false, NULL
, false },
324 { "dllexport", 0, 0, true, false, false, NULL
, false },
325 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
327 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
328 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
329 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
330 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
333 { NULL
, 0, 0, false, false, false, NULL
, false }
336 /* Initialize the GCC target structure. */
337 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
338 #undef TARGET_MERGE_DECL_ATTRIBUTES
339 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
342 #undef TARGET_LEGITIMIZE_ADDRESS
343 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
346 #define TARGET_LRA_P arm_lra_p
348 #undef TARGET_ATTRIBUTE_TABLE
349 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
351 #undef TARGET_ASM_FILE_START
352 #define TARGET_ASM_FILE_START arm_file_start
353 #undef TARGET_ASM_FILE_END
354 #define TARGET_ASM_FILE_END arm_file_end
356 #undef TARGET_ASM_ALIGNED_SI_OP
357 #define TARGET_ASM_ALIGNED_SI_OP NULL
358 #undef TARGET_ASM_INTEGER
359 #define TARGET_ASM_INTEGER arm_assemble_integer
361 #undef TARGET_PRINT_OPERAND
362 #define TARGET_PRINT_OPERAND arm_print_operand
363 #undef TARGET_PRINT_OPERAND_ADDRESS
364 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
365 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
366 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
368 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
369 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
371 #undef TARGET_ASM_FUNCTION_PROLOGUE
372 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
374 #undef TARGET_ASM_FUNCTION_EPILOGUE
375 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
377 #undef TARGET_OPTION_OVERRIDE
378 #define TARGET_OPTION_OVERRIDE arm_option_override
380 #undef TARGET_COMP_TYPE_ATTRIBUTES
381 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
383 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
384 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
386 #undef TARGET_SCHED_ADJUST_COST
387 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
389 #undef TARGET_SCHED_REORDER
390 #define TARGET_SCHED_REORDER arm_sched_reorder
392 #undef TARGET_REGISTER_MOVE_COST
393 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
395 #undef TARGET_MEMORY_MOVE_COST
396 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
398 #undef TARGET_ENCODE_SECTION_INFO
400 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
402 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
405 #undef TARGET_STRIP_NAME_ENCODING
406 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
408 #undef TARGET_ASM_INTERNAL_LABEL
409 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
411 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
412 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
414 #undef TARGET_FUNCTION_VALUE
415 #define TARGET_FUNCTION_VALUE arm_function_value
417 #undef TARGET_LIBCALL_VALUE
418 #define TARGET_LIBCALL_VALUE arm_libcall_value
420 #undef TARGET_FUNCTION_VALUE_REGNO_P
421 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
423 #undef TARGET_ASM_OUTPUT_MI_THUNK
424 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
425 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
426 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
428 #undef TARGET_RTX_COSTS
429 #define TARGET_RTX_COSTS arm_rtx_costs
430 #undef TARGET_ADDRESS_COST
431 #define TARGET_ADDRESS_COST arm_address_cost
433 #undef TARGET_SHIFT_TRUNCATION_MASK
434 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
435 #undef TARGET_VECTOR_MODE_SUPPORTED_P
436 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
437 #undef TARGET_ARRAY_MODE_SUPPORTED_P
438 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
439 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
440 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
441 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
442 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
443 arm_autovectorize_vector_sizes
445 #undef TARGET_MACHINE_DEPENDENT_REORG
446 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
448 #undef TARGET_INIT_BUILTINS
449 #define TARGET_INIT_BUILTINS arm_init_builtins
450 #undef TARGET_EXPAND_BUILTIN
451 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
452 #undef TARGET_BUILTIN_DECL
453 #define TARGET_BUILTIN_DECL arm_builtin_decl
455 #undef TARGET_INIT_LIBFUNCS
456 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
458 #undef TARGET_PROMOTE_FUNCTION_MODE
459 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
460 #undef TARGET_PROMOTE_PROTOTYPES
461 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
462 #undef TARGET_PASS_BY_REFERENCE
463 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
464 #undef TARGET_ARG_PARTIAL_BYTES
465 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
466 #undef TARGET_FUNCTION_ARG
467 #define TARGET_FUNCTION_ARG arm_function_arg
468 #undef TARGET_FUNCTION_ARG_ADVANCE
469 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
470 #undef TARGET_FUNCTION_ARG_BOUNDARY
471 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
473 #undef TARGET_SETUP_INCOMING_VARARGS
474 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
476 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
477 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
479 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
480 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
481 #undef TARGET_TRAMPOLINE_INIT
482 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
483 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
484 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
486 #undef TARGET_WARN_FUNC_RETURN
487 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
489 #undef TARGET_DEFAULT_SHORT_ENUMS
490 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
492 #undef TARGET_ALIGN_ANON_BITFIELD
493 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
495 #undef TARGET_NARROW_VOLATILE_BITFIELD
496 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
498 #undef TARGET_CXX_GUARD_TYPE
499 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
501 #undef TARGET_CXX_GUARD_MASK_BIT
502 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
504 #undef TARGET_CXX_GET_COOKIE_SIZE
505 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
507 #undef TARGET_CXX_COOKIE_HAS_SIZE
508 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
510 #undef TARGET_CXX_CDTOR_RETURNS_THIS
511 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
513 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
514 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
516 #undef TARGET_CXX_USE_AEABI_ATEXIT
517 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
519 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
520 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
521 arm_cxx_determine_class_data_visibility
523 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
524 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
526 #undef TARGET_RETURN_IN_MSB
527 #define TARGET_RETURN_IN_MSB arm_return_in_msb
529 #undef TARGET_RETURN_IN_MEMORY
530 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
532 #undef TARGET_MUST_PASS_IN_STACK
533 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
536 #undef TARGET_ASM_UNWIND_EMIT
537 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
539 /* EABI unwinding tables use a different format for the typeinfo tables. */
540 #undef TARGET_ASM_TTYPE
541 #define TARGET_ASM_TTYPE arm_output_ttype
543 #undef TARGET_ARM_EABI_UNWINDER
544 #define TARGET_ARM_EABI_UNWINDER true
546 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
547 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
549 #undef TARGET_ASM_INIT_SECTIONS
550 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
551 #endif /* ARM_UNWIND_INFO */
553 #undef TARGET_DWARF_REGISTER_SPAN
554 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
556 #undef TARGET_CANNOT_COPY_INSN_P
557 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
560 #undef TARGET_HAVE_TLS
561 #define TARGET_HAVE_TLS true
564 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
565 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
567 #undef TARGET_LEGITIMATE_CONSTANT_P
568 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
570 #undef TARGET_CANNOT_FORCE_CONST_MEM
571 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
573 #undef TARGET_MAX_ANCHOR_OFFSET
574 #define TARGET_MAX_ANCHOR_OFFSET 4095
576 /* The minimum is set such that the total size of the block
577 for a particular anchor is -4088 + 1 + 4095 bytes, which is
578 divisible by eight, ensuring natural spacing of anchors. */
579 #undef TARGET_MIN_ANCHOR_OFFSET
580 #define TARGET_MIN_ANCHOR_OFFSET -4088
582 #undef TARGET_SCHED_ISSUE_RATE
583 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
585 #undef TARGET_MANGLE_TYPE
586 #define TARGET_MANGLE_TYPE arm_mangle_type
588 #undef TARGET_BUILD_BUILTIN_VA_LIST
589 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
590 #undef TARGET_EXPAND_BUILTIN_VA_START
591 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
592 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
593 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
596 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
597 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
600 #undef TARGET_LEGITIMATE_ADDRESS_P
601 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
603 #undef TARGET_PREFERRED_RELOAD_CLASS
604 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
606 #undef TARGET_INVALID_PARAMETER_TYPE
607 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
609 #undef TARGET_INVALID_RETURN_TYPE
610 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
612 #undef TARGET_PROMOTED_TYPE
613 #define TARGET_PROMOTED_TYPE arm_promoted_type
615 #undef TARGET_CONVERT_TO_TYPE
616 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
618 #undef TARGET_SCALAR_MODE_SUPPORTED_P
619 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
621 #undef TARGET_FRAME_POINTER_REQUIRED
622 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
624 #undef TARGET_CAN_ELIMINATE
625 #define TARGET_CAN_ELIMINATE arm_can_eliminate
627 #undef TARGET_CONDITIONAL_REGISTER_USAGE
628 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
630 #undef TARGET_CLASS_LIKELY_SPILLED_P
631 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
633 #undef TARGET_VECTORIZE_BUILTINS
634 #define TARGET_VECTORIZE_BUILTINS
636 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
637 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
638 arm_builtin_vectorized_function
640 #undef TARGET_VECTOR_ALIGNMENT
641 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
643 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
644 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
645 arm_vector_alignment_reachable
647 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
648 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
649 arm_builtin_support_vector_misalignment
651 #undef TARGET_PREFERRED_RENAME_CLASS
652 #define TARGET_PREFERRED_RENAME_CLASS \
653 arm_preferred_rename_class
655 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
656 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
657 arm_vectorize_vec_perm_const_ok
659 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
660 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
661 arm_builtin_vectorization_cost
662 #undef TARGET_VECTORIZE_ADD_STMT_COST
663 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
665 #undef TARGET_CANONICALIZE_COMPARISON
666 #define TARGET_CANONICALIZE_COMPARISON \
667 arm_canonicalize_comparison
669 #undef TARGET_ASAN_SHADOW_OFFSET
670 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
672 #undef MAX_INSN_PER_IT_BLOCK
673 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
675 #undef TARGET_CAN_USE_DOLOOP_P
676 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
678 struct gcc_target targetm
= TARGET_INITIALIZER
;
680 /* Obstack for minipool constant handling. */
681 static struct obstack minipool_obstack
;
682 static char * minipool_startobj
;
684 /* The maximum number of insns skipped which
685 will be conditionalised if possible. */
686 static int max_insns_skipped
= 5;
688 extern FILE * asm_out_file
;
690 /* True if we are currently building a constant table. */
691 int making_const_table
;
693 /* The processor for which instructions should be scheduled. */
694 enum processor_type arm_tune
= arm_none
;
696 /* The current tuning set. */
697 const struct tune_params
*current_tune
;
699 /* Which floating point hardware to schedule for. */
702 /* Which floating popint hardware to use. */
703 const struct arm_fpu_desc
*arm_fpu_desc
;
705 /* Used for Thumb call_via trampolines. */
706 rtx thumb_call_via_label
[14];
707 static int thumb_call_reg_needed
;
709 /* Bit values used to identify processor capabilities. */
710 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
711 #define FL_ARCH3M (1 << 1) /* Extended multiply */
712 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
713 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
714 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
715 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
716 #define FL_THUMB (1 << 6) /* Thumb aware */
717 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
718 #define FL_STRONG (1 << 8) /* StrongARM */
719 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
720 #define FL_XSCALE (1 << 10) /* XScale */
721 /* spare (1 << 11) */
722 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
723 media instructions. */
724 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
725 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
726 Note: ARM6 & 7 derivatives only. */
727 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
728 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
729 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
731 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
732 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
733 #define FL_NEON (1 << 20) /* Neon instructions. */
734 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
736 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
737 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
738 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
739 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
741 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
742 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
744 /* Flags that only effect tuning, not available instructions. */
745 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
748 #define FL_FOR_ARCH2 FL_NOTM
749 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
750 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
751 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
752 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
753 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
754 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
755 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
756 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
757 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
758 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
759 #define FL_FOR_ARCH6J FL_FOR_ARCH6
760 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
761 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
762 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
763 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
764 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
765 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
766 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
767 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
768 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
769 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
770 #define FL_FOR_ARCH8A (FL_FOR_ARCH7 | FL_ARCH6K | FL_ARCH8 | FL_THUMB_DIV \
771 | FL_ARM_DIV | FL_NOTM)
773 /* The bits in this mask specify which
774 instructions we are allowed to generate. */
775 static unsigned long insn_flags
= 0;
777 /* The bits in this mask specify which instruction scheduling options should
779 static unsigned long tune_flags
= 0;
781 /* The highest ARM architecture version supported by the
783 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
785 /* The following are used in the arm.md file as equivalents to bits
786 in the above two flag variables. */
788 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
791 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
794 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
797 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
800 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
803 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
806 /* Nonzero if this chip supports the ARM 6K extensions. */
809 /* Nonzero if instructions present in ARMv6-M can be used. */
812 /* Nonzero if this chip supports the ARM 7 extensions. */
815 /* Nonzero if instructions not present in the 'M' profile can be used. */
816 int arm_arch_notm
= 0;
818 /* Nonzero if instructions present in ARMv7E-M can be used. */
821 /* Nonzero if instructions present in ARMv8 can be used. */
824 /* Nonzero if this chip can benefit from load scheduling. */
825 int arm_ld_sched
= 0;
827 /* Nonzero if this chip is a StrongARM. */
828 int arm_tune_strongarm
= 0;
830 /* Nonzero if this chip supports Intel Wireless MMX technology. */
831 int arm_arch_iwmmxt
= 0;
833 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
834 int arm_arch_iwmmxt2
= 0;
836 /* Nonzero if this chip is an XScale. */
837 int arm_arch_xscale
= 0;
839 /* Nonzero if tuning for XScale */
840 int arm_tune_xscale
= 0;
842 /* Nonzero if we want to tune for stores that access the write-buffer.
843 This typically means an ARM6 or ARM7 with MMU or MPU. */
844 int arm_tune_wbuf
= 0;
846 /* Nonzero if tuning for Cortex-A9. */
847 int arm_tune_cortex_a9
= 0;
849 /* Nonzero if generating Thumb instructions. */
852 /* Nonzero if generating Thumb-1 instructions. */
855 /* Nonzero if we should define __THUMB_INTERWORK__ in the
857 XXX This is a bit of a hack, it's intended to help work around
858 problems in GLD which doesn't understand that armv5t code is
859 interworking clean. */
860 int arm_cpp_interwork
= 0;
862 /* Nonzero if chip supports Thumb 2. */
865 /* Nonzero if chip supports integer division instruction. */
866 int arm_arch_arm_hwdiv
;
867 int arm_arch_thumb_hwdiv
;
869 /* Nonzero if we should use Neon to handle 64-bits operations rather
870 than core registers. */
871 int prefer_neon_for_64bits
= 0;
873 /* Nonzero if we shouldn't use literal pools. */
874 bool arm_disable_literal_pool
= false;
876 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
877 we must report the mode of the memory reference from
878 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
879 enum machine_mode output_memory_reference_mode
;
881 /* The register number to be used for the PIC offset register. */
882 unsigned arm_pic_register
= INVALID_REGNUM
;
884 /* Set to 1 after arm_reorg has started. Reset to start at the start of
885 the next function. */
886 static int after_arm_reorg
= 0;
888 enum arm_pcs arm_pcs_default
;
890 /* For an explanation of these variables, see final_prescan_insn below. */
892 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
893 enum arm_cond_code arm_current_cc
;
896 int arm_target_label
;
897 /* The number of conditionally executed insns, including the current insn. */
898 int arm_condexec_count
= 0;
899 /* A bitmask specifying the patterns for the IT block.
900 Zero means do not output an IT block before this insn. */
901 int arm_condexec_mask
= 0;
902 /* The number of bits used in arm_condexec_mask. */
903 int arm_condexec_masklen
= 0;
905 /* Nonzero if chip supports the ARMv8 CRC instructions. */
906 int arm_arch_crc
= 0;
908 /* The condition codes of the ARM, and the inverse function. */
909 static const char * const arm_condition_codes
[] =
911 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
912 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
915 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
916 int arm_regs_in_sequence
[] =
918 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
921 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
922 #define streq(string1, string2) (strcmp (string1, string2) == 0)
924 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
925 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
926 | (1 << PIC_OFFSET_TABLE_REGNUM)))
928 /* Initialization code. */
932 const char *const name
;
933 enum processor_type core
;
935 enum base_architecture base_arch
;
936 const unsigned long flags
;
937 const struct tune_params
*const tune
;
941 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
942 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
947 /* arm generic vectorizer costs. */
949 struct cpu_vec_costs arm_default_vec_cost
= {
950 1, /* scalar_stmt_cost. */
951 1, /* scalar load_cost. */
952 1, /* scalar_store_cost. */
953 1, /* vec_stmt_cost. */
954 1, /* vec_to_scalar_cost. */
955 1, /* scalar_to_vec_cost. */
956 1, /* vec_align_load_cost. */
957 1, /* vec_unalign_load_cost. */
958 1, /* vec_unalign_store_cost. */
959 1, /* vec_store_cost. */
960 3, /* cond_taken_branch_cost. */
961 1, /* cond_not_taken_branch_cost. */
964 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
965 #include "aarch-cost-tables.h"
969 const struct cpu_cost_table cortexa9_extra_costs
=
976 COSTS_N_INSNS (1), /* Shift_reg. */
977 COSTS_N_INSNS (1), /* Arith_shift. */
978 COSTS_N_INSNS (2), /* Arith_shift_reg. */
980 COSTS_N_INSNS (1), /* Log_shift_reg. */
981 COSTS_N_INSNS (1), /* Extend. */
982 COSTS_N_INSNS (2), /* Extend_arith. */
983 COSTS_N_INSNS (1), /* Bfi. */
984 COSTS_N_INSNS (1), /* Bfx. */
987 true /* non_exec_costs_exec. */
992 COSTS_N_INSNS (3), /* Simple. */
993 COSTS_N_INSNS (3), /* Flag_setting. */
994 COSTS_N_INSNS (2), /* Extend. */
995 COSTS_N_INSNS (3), /* Add. */
996 COSTS_N_INSNS (2), /* Extend_add. */
997 COSTS_N_INSNS (30) /* Idiv. No HW div on Cortex A9. */
1001 0, /* Simple (N/A). */
1002 0, /* Flag_setting (N/A). */
1003 COSTS_N_INSNS (4), /* Extend. */
1005 COSTS_N_INSNS (4), /* Extend_add. */
1011 COSTS_N_INSNS (2), /* Load. */
1012 COSTS_N_INSNS (2), /* Load_sign_extend. */
1013 COSTS_N_INSNS (2), /* Ldrd. */
1014 COSTS_N_INSNS (2), /* Ldm_1st. */
1015 1, /* Ldm_regs_per_insn_1st. */
1016 2, /* Ldm_regs_per_insn_subsequent. */
1017 COSTS_N_INSNS (5), /* Loadf. */
1018 COSTS_N_INSNS (5), /* Loadd. */
1019 COSTS_N_INSNS (1), /* Load_unaligned. */
1020 COSTS_N_INSNS (2), /* Store. */
1021 COSTS_N_INSNS (2), /* Strd. */
1022 COSTS_N_INSNS (2), /* Stm_1st. */
1023 1, /* Stm_regs_per_insn_1st. */
1024 2, /* Stm_regs_per_insn_subsequent. */
1025 COSTS_N_INSNS (1), /* Storef. */
1026 COSTS_N_INSNS (1), /* Stored. */
1027 COSTS_N_INSNS (1) /* Store_unaligned. */
1032 COSTS_N_INSNS (14), /* Div. */
1033 COSTS_N_INSNS (4), /* Mult. */
1034 COSTS_N_INSNS (7), /* Mult_addsub. */
1035 COSTS_N_INSNS (30), /* Fma. */
1036 COSTS_N_INSNS (3), /* Addsub. */
1037 COSTS_N_INSNS (1), /* Fpconst. */
1038 COSTS_N_INSNS (1), /* Neg. */
1039 COSTS_N_INSNS (3), /* Compare. */
1040 COSTS_N_INSNS (3), /* Widen. */
1041 COSTS_N_INSNS (3), /* Narrow. */
1042 COSTS_N_INSNS (3), /* Toint. */
1043 COSTS_N_INSNS (3), /* Fromint. */
1044 COSTS_N_INSNS (3) /* Roundint. */
1048 COSTS_N_INSNS (24), /* Div. */
1049 COSTS_N_INSNS (5), /* Mult. */
1050 COSTS_N_INSNS (8), /* Mult_addsub. */
1051 COSTS_N_INSNS (30), /* Fma. */
1052 COSTS_N_INSNS (3), /* Addsub. */
1053 COSTS_N_INSNS (1), /* Fpconst. */
1054 COSTS_N_INSNS (1), /* Neg. */
1055 COSTS_N_INSNS (3), /* Compare. */
1056 COSTS_N_INSNS (3), /* Widen. */
1057 COSTS_N_INSNS (3), /* Narrow. */
1058 COSTS_N_INSNS (3), /* Toint. */
1059 COSTS_N_INSNS (3), /* Fromint. */
1060 COSTS_N_INSNS (3) /* Roundint. */
1065 COSTS_N_INSNS (1) /* Alu. */
1070 const struct cpu_cost_table cortexa7_extra_costs
=
1076 COSTS_N_INSNS (1), /* Shift. */
1077 COSTS_N_INSNS (1), /* Shift_reg. */
1078 COSTS_N_INSNS (1), /* Arith_shift. */
1079 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1080 COSTS_N_INSNS (1), /* Log_shift. */
1081 COSTS_N_INSNS (1), /* Log_shift_reg. */
1082 COSTS_N_INSNS (1), /* Extend. */
1083 COSTS_N_INSNS (1), /* Extend_arith. */
1084 COSTS_N_INSNS (1), /* Bfi. */
1085 COSTS_N_INSNS (1), /* Bfx. */
1086 COSTS_N_INSNS (1), /* Clz. */
1088 true /* non_exec_costs_exec. */
1095 COSTS_N_INSNS (1), /* Flag_setting. */
1096 COSTS_N_INSNS (1), /* Extend. */
1097 COSTS_N_INSNS (1), /* Add. */
1098 COSTS_N_INSNS (1), /* Extend_add. */
1099 COSTS_N_INSNS (7) /* Idiv. */
1103 0, /* Simple (N/A). */
1104 0, /* Flag_setting (N/A). */
1105 COSTS_N_INSNS (1), /* Extend. */
1107 COSTS_N_INSNS (2), /* Extend_add. */
1113 COSTS_N_INSNS (1), /* Load. */
1114 COSTS_N_INSNS (1), /* Load_sign_extend. */
1115 COSTS_N_INSNS (3), /* Ldrd. */
1116 COSTS_N_INSNS (1), /* Ldm_1st. */
1117 1, /* Ldm_regs_per_insn_1st. */
1118 2, /* Ldm_regs_per_insn_subsequent. */
1119 COSTS_N_INSNS (2), /* Loadf. */
1120 COSTS_N_INSNS (2), /* Loadd. */
1121 COSTS_N_INSNS (1), /* Load_unaligned. */
1122 COSTS_N_INSNS (1), /* Store. */
1123 COSTS_N_INSNS (3), /* Strd. */
1124 COSTS_N_INSNS (1), /* Stm_1st. */
1125 1, /* Stm_regs_per_insn_1st. */
1126 2, /* Stm_regs_per_insn_subsequent. */
1127 COSTS_N_INSNS (2), /* Storef. */
1128 COSTS_N_INSNS (2), /* Stored. */
1129 COSTS_N_INSNS (1) /* Store_unaligned. */
1134 COSTS_N_INSNS (15), /* Div. */
1135 COSTS_N_INSNS (3), /* Mult. */
1136 COSTS_N_INSNS (7), /* Mult_addsub. */
1137 COSTS_N_INSNS (7), /* Fma. */
1138 COSTS_N_INSNS (3), /* Addsub. */
1139 COSTS_N_INSNS (3), /* Fpconst. */
1140 COSTS_N_INSNS (3), /* Neg. */
1141 COSTS_N_INSNS (3), /* Compare. */
1142 COSTS_N_INSNS (3), /* Widen. */
1143 COSTS_N_INSNS (3), /* Narrow. */
1144 COSTS_N_INSNS (3), /* Toint. */
1145 COSTS_N_INSNS (3), /* Fromint. */
1146 COSTS_N_INSNS (3) /* Roundint. */
1150 COSTS_N_INSNS (30), /* Div. */
1151 COSTS_N_INSNS (6), /* Mult. */
1152 COSTS_N_INSNS (10), /* Mult_addsub. */
1153 COSTS_N_INSNS (7), /* Fma. */
1154 COSTS_N_INSNS (3), /* Addsub. */
1155 COSTS_N_INSNS (3), /* Fpconst. */
1156 COSTS_N_INSNS (3), /* Neg. */
1157 COSTS_N_INSNS (3), /* Compare. */
1158 COSTS_N_INSNS (3), /* Widen. */
1159 COSTS_N_INSNS (3), /* Narrow. */
1160 COSTS_N_INSNS (3), /* Toint. */
1161 COSTS_N_INSNS (3), /* Fromint. */
1162 COSTS_N_INSNS (3) /* Roundint. */
1167 COSTS_N_INSNS (1) /* Alu. */
1171 const struct cpu_cost_table cortexa12_extra_costs
=
1178 COSTS_N_INSNS (1), /* Shift_reg. */
1179 COSTS_N_INSNS (1), /* Arith_shift. */
1180 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1181 COSTS_N_INSNS (1), /* Log_shift. */
1182 COSTS_N_INSNS (1), /* Log_shift_reg. */
1184 COSTS_N_INSNS (1), /* Extend_arith. */
1186 COSTS_N_INSNS (1), /* Bfx. */
1187 COSTS_N_INSNS (1), /* Clz. */
1189 true /* non_exec_costs_exec. */
1194 COSTS_N_INSNS (2), /* Simple. */
1195 COSTS_N_INSNS (3), /* Flag_setting. */
1196 COSTS_N_INSNS (2), /* Extend. */
1197 COSTS_N_INSNS (3), /* Add. */
1198 COSTS_N_INSNS (2), /* Extend_add. */
1199 COSTS_N_INSNS (18) /* Idiv. */
1203 0, /* Simple (N/A). */
1204 0, /* Flag_setting (N/A). */
1205 COSTS_N_INSNS (3), /* Extend. */
1207 COSTS_N_INSNS (3), /* Extend_add. */
1213 COSTS_N_INSNS (3), /* Load. */
1214 COSTS_N_INSNS (3), /* Load_sign_extend. */
1215 COSTS_N_INSNS (3), /* Ldrd. */
1216 COSTS_N_INSNS (3), /* Ldm_1st. */
1217 1, /* Ldm_regs_per_insn_1st. */
1218 2, /* Ldm_regs_per_insn_subsequent. */
1219 COSTS_N_INSNS (3), /* Loadf. */
1220 COSTS_N_INSNS (3), /* Loadd. */
1221 0, /* Load_unaligned. */
1225 1, /* Stm_regs_per_insn_1st. */
1226 2, /* Stm_regs_per_insn_subsequent. */
1227 COSTS_N_INSNS (2), /* Storef. */
1228 COSTS_N_INSNS (2), /* Stored. */
1229 0 /* Store_unaligned. */
1234 COSTS_N_INSNS (17), /* Div. */
1235 COSTS_N_INSNS (4), /* Mult. */
1236 COSTS_N_INSNS (8), /* Mult_addsub. */
1237 COSTS_N_INSNS (8), /* Fma. */
1238 COSTS_N_INSNS (4), /* Addsub. */
1239 COSTS_N_INSNS (2), /* Fpconst. */
1240 COSTS_N_INSNS (2), /* Neg. */
1241 COSTS_N_INSNS (2), /* Compare. */
1242 COSTS_N_INSNS (4), /* Widen. */
1243 COSTS_N_INSNS (4), /* Narrow. */
1244 COSTS_N_INSNS (4), /* Toint. */
1245 COSTS_N_INSNS (4), /* Fromint. */
1246 COSTS_N_INSNS (4) /* Roundint. */
1250 COSTS_N_INSNS (31), /* Div. */
1251 COSTS_N_INSNS (4), /* Mult. */
1252 COSTS_N_INSNS (8), /* Mult_addsub. */
1253 COSTS_N_INSNS (8), /* Fma. */
1254 COSTS_N_INSNS (4), /* Addsub. */
1255 COSTS_N_INSNS (2), /* Fpconst. */
1256 COSTS_N_INSNS (2), /* Neg. */
1257 COSTS_N_INSNS (2), /* Compare. */
1258 COSTS_N_INSNS (4), /* Widen. */
1259 COSTS_N_INSNS (4), /* Narrow. */
1260 COSTS_N_INSNS (4), /* Toint. */
1261 COSTS_N_INSNS (4), /* Fromint. */
1262 COSTS_N_INSNS (4) /* Roundint. */
1267 COSTS_N_INSNS (1) /* Alu. */
1271 const struct cpu_cost_table cortexa15_extra_costs
=
1279 COSTS_N_INSNS (1), /* Arith_shift. */
1280 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1281 COSTS_N_INSNS (1), /* Log_shift. */
1282 COSTS_N_INSNS (1), /* Log_shift_reg. */
1284 COSTS_N_INSNS (1), /* Extend_arith. */
1285 COSTS_N_INSNS (1), /* Bfi. */
1289 true /* non_exec_costs_exec. */
1294 COSTS_N_INSNS (2), /* Simple. */
1295 COSTS_N_INSNS (3), /* Flag_setting. */
1296 COSTS_N_INSNS (2), /* Extend. */
1297 COSTS_N_INSNS (2), /* Add. */
1298 COSTS_N_INSNS (2), /* Extend_add. */
1299 COSTS_N_INSNS (18) /* Idiv. */
1303 0, /* Simple (N/A). */
1304 0, /* Flag_setting (N/A). */
1305 COSTS_N_INSNS (3), /* Extend. */
1307 COSTS_N_INSNS (3), /* Extend_add. */
1313 COSTS_N_INSNS (3), /* Load. */
1314 COSTS_N_INSNS (3), /* Load_sign_extend. */
1315 COSTS_N_INSNS (3), /* Ldrd. */
1316 COSTS_N_INSNS (4), /* Ldm_1st. */
1317 1, /* Ldm_regs_per_insn_1st. */
1318 2, /* Ldm_regs_per_insn_subsequent. */
1319 COSTS_N_INSNS (4), /* Loadf. */
1320 COSTS_N_INSNS (4), /* Loadd. */
1321 0, /* Load_unaligned. */
1324 COSTS_N_INSNS (1), /* Stm_1st. */
1325 1, /* Stm_regs_per_insn_1st. */
1326 2, /* Stm_regs_per_insn_subsequent. */
1329 0 /* Store_unaligned. */
1334 COSTS_N_INSNS (17), /* Div. */
1335 COSTS_N_INSNS (4), /* Mult. */
1336 COSTS_N_INSNS (8), /* Mult_addsub. */
1337 COSTS_N_INSNS (8), /* Fma. */
1338 COSTS_N_INSNS (4), /* Addsub. */
1339 COSTS_N_INSNS (2), /* Fpconst. */
1340 COSTS_N_INSNS (2), /* Neg. */
1341 COSTS_N_INSNS (5), /* Compare. */
1342 COSTS_N_INSNS (4), /* Widen. */
1343 COSTS_N_INSNS (4), /* Narrow. */
1344 COSTS_N_INSNS (4), /* Toint. */
1345 COSTS_N_INSNS (4), /* Fromint. */
1346 COSTS_N_INSNS (4) /* Roundint. */
1350 COSTS_N_INSNS (31), /* Div. */
1351 COSTS_N_INSNS (4), /* Mult. */
1352 COSTS_N_INSNS (8), /* Mult_addsub. */
1353 COSTS_N_INSNS (8), /* Fma. */
1354 COSTS_N_INSNS (4), /* Addsub. */
1355 COSTS_N_INSNS (2), /* Fpconst. */
1356 COSTS_N_INSNS (2), /* Neg. */
1357 COSTS_N_INSNS (2), /* Compare. */
1358 COSTS_N_INSNS (4), /* Widen. */
1359 COSTS_N_INSNS (4), /* Narrow. */
1360 COSTS_N_INSNS (4), /* Toint. */
1361 COSTS_N_INSNS (4), /* Fromint. */
1362 COSTS_N_INSNS (4) /* Roundint. */
1367 COSTS_N_INSNS (1) /* Alu. */
1371 const struct cpu_cost_table v7m_extra_costs
=
1379 0, /* Arith_shift. */
1380 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1382 COSTS_N_INSNS (1), /* Log_shift_reg. */
1384 COSTS_N_INSNS (1), /* Extend_arith. */
1388 COSTS_N_INSNS (1), /* non_exec. */
1389 false /* non_exec_costs_exec. */
1394 COSTS_N_INSNS (1), /* Simple. */
1395 COSTS_N_INSNS (1), /* Flag_setting. */
1396 COSTS_N_INSNS (2), /* Extend. */
1397 COSTS_N_INSNS (1), /* Add. */
1398 COSTS_N_INSNS (3), /* Extend_add. */
1399 COSTS_N_INSNS (8) /* Idiv. */
1403 0, /* Simple (N/A). */
1404 0, /* Flag_setting (N/A). */
1405 COSTS_N_INSNS (2), /* Extend. */
1407 COSTS_N_INSNS (3), /* Extend_add. */
1413 COSTS_N_INSNS (2), /* Load. */
1414 0, /* Load_sign_extend. */
1415 COSTS_N_INSNS (3), /* Ldrd. */
1416 COSTS_N_INSNS (2), /* Ldm_1st. */
1417 1, /* Ldm_regs_per_insn_1st. */
1418 1, /* Ldm_regs_per_insn_subsequent. */
1419 COSTS_N_INSNS (2), /* Loadf. */
1420 COSTS_N_INSNS (3), /* Loadd. */
1421 COSTS_N_INSNS (1), /* Load_unaligned. */
1422 COSTS_N_INSNS (2), /* Store. */
1423 COSTS_N_INSNS (3), /* Strd. */
1424 COSTS_N_INSNS (2), /* Stm_1st. */
1425 1, /* Stm_regs_per_insn_1st. */
1426 1, /* Stm_regs_per_insn_subsequent. */
1427 COSTS_N_INSNS (2), /* Storef. */
1428 COSTS_N_INSNS (3), /* Stored. */
1429 COSTS_N_INSNS (1) /* Store_unaligned. */
1434 COSTS_N_INSNS (7), /* Div. */
1435 COSTS_N_INSNS (2), /* Mult. */
1436 COSTS_N_INSNS (5), /* Mult_addsub. */
1437 COSTS_N_INSNS (3), /* Fma. */
1438 COSTS_N_INSNS (1), /* Addsub. */
1450 COSTS_N_INSNS (15), /* Div. */
1451 COSTS_N_INSNS (5), /* Mult. */
1452 COSTS_N_INSNS (7), /* Mult_addsub. */
1453 COSTS_N_INSNS (7), /* Fma. */
1454 COSTS_N_INSNS (3), /* Addsub. */
1467 COSTS_N_INSNS (1) /* Alu. */
1471 const struct tune_params arm_slowmul_tune
=
1473 arm_slowmul_rtx_costs
,
1475 NULL
, /* Sched adj cost. */
1476 3, /* Constant limit. */
1477 5, /* Max cond insns. */
1478 ARM_PREFETCH_NOT_BENEFICIAL
,
1479 true, /* Prefer constant pool. */
1480 arm_default_branch_cost
,
1481 false, /* Prefer LDRD/STRD. */
1482 {true, true}, /* Prefer non short circuit. */
1483 &arm_default_vec_cost
, /* Vectorizer costs. */
1484 false /* Prefer Neon for 64-bits bitops. */
1487 const struct tune_params arm_fastmul_tune
=
1489 arm_fastmul_rtx_costs
,
1491 NULL
, /* Sched adj cost. */
1492 1, /* Constant limit. */
1493 5, /* Max cond insns. */
1494 ARM_PREFETCH_NOT_BENEFICIAL
,
1495 true, /* Prefer constant pool. */
1496 arm_default_branch_cost
,
1497 false, /* Prefer LDRD/STRD. */
1498 {true, true}, /* Prefer non short circuit. */
1499 &arm_default_vec_cost
, /* Vectorizer costs. */
1500 false /* Prefer Neon for 64-bits bitops. */
1503 /* StrongARM has early execution of branches, so a sequence that is worth
1504 skipping is shorter. Set max_insns_skipped to a lower value. */
1506 const struct tune_params arm_strongarm_tune
=
1508 arm_fastmul_rtx_costs
,
1510 NULL
, /* Sched adj cost. */
1511 1, /* Constant limit. */
1512 3, /* Max cond insns. */
1513 ARM_PREFETCH_NOT_BENEFICIAL
,
1514 true, /* Prefer constant pool. */
1515 arm_default_branch_cost
,
1516 false, /* Prefer LDRD/STRD. */
1517 {true, true}, /* Prefer non short circuit. */
1518 &arm_default_vec_cost
, /* Vectorizer costs. */
1519 false /* Prefer Neon for 64-bits bitops. */
1522 const struct tune_params arm_xscale_tune
=
1524 arm_xscale_rtx_costs
,
1526 xscale_sched_adjust_cost
,
1527 2, /* Constant limit. */
1528 3, /* Max cond insns. */
1529 ARM_PREFETCH_NOT_BENEFICIAL
,
1530 true, /* Prefer constant pool. */
1531 arm_default_branch_cost
,
1532 false, /* Prefer LDRD/STRD. */
1533 {true, true}, /* Prefer non short circuit. */
1534 &arm_default_vec_cost
, /* Vectorizer costs. */
1535 false /* Prefer Neon for 64-bits bitops. */
1538 const struct tune_params arm_9e_tune
=
1542 NULL
, /* Sched adj cost. */
1543 1, /* Constant limit. */
1544 5, /* Max cond insns. */
1545 ARM_PREFETCH_NOT_BENEFICIAL
,
1546 true, /* Prefer constant pool. */
1547 arm_default_branch_cost
,
1548 false, /* Prefer LDRD/STRD. */
1549 {true, true}, /* Prefer non short circuit. */
1550 &arm_default_vec_cost
, /* Vectorizer costs. */
1551 false /* Prefer Neon for 64-bits bitops. */
1554 const struct tune_params arm_v6t2_tune
=
1558 NULL
, /* Sched adj cost. */
1559 1, /* Constant limit. */
1560 5, /* Max cond insns. */
1561 ARM_PREFETCH_NOT_BENEFICIAL
,
1562 false, /* Prefer constant pool. */
1563 arm_default_branch_cost
,
1564 false, /* Prefer LDRD/STRD. */
1565 {true, true}, /* Prefer non short circuit. */
1566 &arm_default_vec_cost
, /* Vectorizer costs. */
1567 false /* Prefer Neon for 64-bits bitops. */
1570 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1571 const struct tune_params arm_cortex_tune
=
1574 &generic_extra_costs
,
1575 NULL
, /* Sched adj cost. */
1576 1, /* Constant limit. */
1577 5, /* Max cond insns. */
1578 ARM_PREFETCH_NOT_BENEFICIAL
,
1579 false, /* Prefer constant pool. */
1580 arm_default_branch_cost
,
1581 false, /* Prefer LDRD/STRD. */
1582 {true, true}, /* Prefer non short circuit. */
1583 &arm_default_vec_cost
, /* Vectorizer costs. */
1584 false /* Prefer Neon for 64-bits bitops. */
1587 const struct tune_params arm_cortex_a7_tune
=
1590 &cortexa7_extra_costs
,
1592 1, /* Constant limit. */
1593 5, /* Max cond insns. */
1594 ARM_PREFETCH_NOT_BENEFICIAL
,
1595 false, /* Prefer constant pool. */
1596 arm_default_branch_cost
,
1597 false, /* Prefer LDRD/STRD. */
1598 {true, true}, /* Prefer non short circuit. */
1599 &arm_default_vec_cost
, /* Vectorizer costs. */
1600 false /* Prefer Neon for 64-bits bitops. */
1603 const struct tune_params arm_cortex_a15_tune
=
1606 &cortexa15_extra_costs
,
1607 NULL
, /* Sched adj cost. */
1608 1, /* Constant limit. */
1609 2, /* Max cond insns. */
1610 ARM_PREFETCH_NOT_BENEFICIAL
,
1611 false, /* Prefer constant pool. */
1612 arm_default_branch_cost
,
1613 true, /* Prefer LDRD/STRD. */
1614 {true, true}, /* Prefer non short circuit. */
1615 &arm_default_vec_cost
, /* Vectorizer costs. */
1616 false /* Prefer Neon for 64-bits bitops. */
1619 const struct tune_params arm_cortex_a53_tune
=
1622 &cortexa53_extra_costs
,
1623 NULL
, /* Scheduler cost adjustment. */
1624 1, /* Constant limit. */
1625 5, /* Max cond insns. */
1626 ARM_PREFETCH_NOT_BENEFICIAL
,
1627 false, /* Prefer constant pool. */
1628 arm_default_branch_cost
,
1629 false, /* Prefer LDRD/STRD. */
1630 {true, true}, /* Prefer non short circuit. */
1631 &arm_default_vec_cost
, /* Vectorizer costs. */
1632 false /* Prefer Neon for 64-bits bitops. */
1635 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1636 less appealing. Set max_insns_skipped to a low value. */
1638 const struct tune_params arm_cortex_a5_tune
=
1642 NULL
, /* Sched adj cost. */
1643 1, /* Constant limit. */
1644 1, /* Max cond insns. */
1645 ARM_PREFETCH_NOT_BENEFICIAL
,
1646 false, /* Prefer constant pool. */
1647 arm_cortex_a5_branch_cost
,
1648 false, /* Prefer LDRD/STRD. */
1649 {false, false}, /* Prefer non short circuit. */
1650 &arm_default_vec_cost
, /* Vectorizer costs. */
1651 false /* Prefer Neon for 64-bits bitops. */
1654 const struct tune_params arm_cortex_a9_tune
=
1657 &cortexa9_extra_costs
,
1658 cortex_a9_sched_adjust_cost
,
1659 1, /* Constant limit. */
1660 5, /* Max cond insns. */
1661 ARM_PREFETCH_BENEFICIAL(4,32,32),
1662 false, /* Prefer constant pool. */
1663 arm_default_branch_cost
,
1664 false, /* Prefer LDRD/STRD. */
1665 {true, true}, /* Prefer non short circuit. */
1666 &arm_default_vec_cost
, /* Vectorizer costs. */
1667 false /* Prefer Neon for 64-bits bitops. */
1670 const struct tune_params arm_cortex_a12_tune
=
1673 &cortexa12_extra_costs
,
1675 1, /* Constant limit. */
1676 5, /* Max cond insns. */
1677 ARM_PREFETCH_BENEFICIAL(4,32,32),
1678 false, /* Prefer constant pool. */
1679 arm_default_branch_cost
,
1680 true, /* Prefer LDRD/STRD. */
1681 {true, true}, /* Prefer non short circuit. */
1682 &arm_default_vec_cost
, /* Vectorizer costs. */
1683 false /* Prefer Neon for 64-bits bitops. */
1686 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1687 cycle to execute each. An LDR from the constant pool also takes two cycles
1688 to execute, but mildly increases pipelining opportunity (consecutive
1689 loads/stores can be pipelined together, saving one cycle), and may also
1690 improve icache utilisation. Hence we prefer the constant pool for such
1693 const struct tune_params arm_v7m_tune
=
1697 NULL
, /* Sched adj cost. */
1698 1, /* Constant limit. */
1699 5, /* Max cond insns. */
1700 ARM_PREFETCH_NOT_BENEFICIAL
,
1701 true, /* Prefer constant pool. */
1702 arm_cortex_m_branch_cost
,
1703 false, /* Prefer LDRD/STRD. */
1704 {false, false}, /* Prefer non short circuit. */
1705 &arm_default_vec_cost
, /* Vectorizer costs. */
1706 false /* Prefer Neon for 64-bits bitops. */
1709 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1710 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1711 const struct tune_params arm_v6m_tune
=
1715 NULL
, /* Sched adj cost. */
1716 1, /* Constant limit. */
1717 5, /* Max cond insns. */
1718 ARM_PREFETCH_NOT_BENEFICIAL
,
1719 false, /* Prefer constant pool. */
1720 arm_default_branch_cost
,
1721 false, /* Prefer LDRD/STRD. */
1722 {false, false}, /* Prefer non short circuit. */
1723 &arm_default_vec_cost
, /* Vectorizer costs. */
1724 false /* Prefer Neon for 64-bits bitops. */
1727 const struct tune_params arm_fa726te_tune
=
1731 fa726te_sched_adjust_cost
,
1732 1, /* Constant limit. */
1733 5, /* Max cond insns. */
1734 ARM_PREFETCH_NOT_BENEFICIAL
,
1735 true, /* Prefer constant pool. */
1736 arm_default_branch_cost
,
1737 false, /* Prefer LDRD/STRD. */
1738 {true, true}, /* Prefer non short circuit. */
1739 &arm_default_vec_cost
, /* Vectorizer costs. */
1740 false /* Prefer Neon for 64-bits bitops. */
1744 /* Not all of these give usefully different compilation alternatives,
1745 but there is no simple way of generalizing them. */
1746 static const struct processors all_cores
[] =
1749 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
1750 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1751 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1752 #include "arm-cores.def"
1754 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
1757 static const struct processors all_architectures
[] =
1759 /* ARM Architectures */
1760 /* We don't specify tuning costs here as it will be figured out
1763 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1764 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1765 #include "arm-arches.def"
1767 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, 0, NULL
}
1771 /* These are populated as commandline arguments are processed, or NULL
1772 if not specified. */
1773 static const struct processors
*arm_selected_arch
;
1774 static const struct processors
*arm_selected_cpu
;
1775 static const struct processors
*arm_selected_tune
;
1777 /* The name of the preprocessor macro to define for this architecture. */
1779 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
1781 /* Available values for -mfpu=. */
1783 static const struct arm_fpu_desc all_fpus
[] =
1785 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1786 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1787 #include "arm-fpus.def"
1792 /* Supported TLS relocations. */
1800 TLS_DESCSEQ
/* GNU scheme */
1803 /* The maximum number of insns to be used when loading a constant. */
1805 arm_constant_limit (bool size_p
)
1807 return size_p
? 1 : current_tune
->constant_limit
;
1810 /* Emit an insn that's a simple single-set. Both the operands must be known
1813 emit_set_insn (rtx x
, rtx y
)
1815 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
1818 /* Return the number of bits set in VALUE. */
1820 bit_count (unsigned long value
)
1822 unsigned long count
= 0;
1827 value
&= value
- 1; /* Clear the least-significant set bit. */
1835 enum machine_mode mode
;
1837 } arm_fixed_mode_set
;
1839 /* A small helper for setting fixed-point library libfuncs. */
1842 arm_set_fixed_optab_libfunc (optab optable
, enum machine_mode mode
,
1843 const char *funcname
, const char *modename
,
1848 if (num_suffix
== 0)
1849 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
1851 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
1853 set_optab_libfunc (optable
, mode
, buffer
);
1857 arm_set_fixed_conv_libfunc (convert_optab optable
, enum machine_mode to
,
1858 enum machine_mode from
, const char *funcname
,
1859 const char *toname
, const char *fromname
)
1862 const char *maybe_suffix_2
= "";
1864 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1865 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
1866 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
1867 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
1868 maybe_suffix_2
= "2";
1870 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
1873 set_conv_libfunc (optable
, to
, from
, buffer
);
1876 /* Set up library functions unique to ARM. */
1879 arm_init_libfuncs (void)
1881 /* For Linux, we have access to kernel support for atomic operations. */
1882 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
1883 init_sync_libfuncs (2 * UNITS_PER_WORD
);
1885 /* There are no special library functions unless we are using the
1890 /* The functions below are described in Section 4 of the "Run-Time
1891 ABI for the ARM architecture", Version 1.0. */
1893 /* Double-precision floating-point arithmetic. Table 2. */
1894 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
1895 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
1896 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
1897 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
1898 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
1900 /* Double-precision comparisons. Table 3. */
1901 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
1902 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
1903 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
1904 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
1905 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
1906 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
1907 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
1909 /* Single-precision floating-point arithmetic. Table 4. */
1910 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
1911 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
1912 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
1913 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
1914 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
1916 /* Single-precision comparisons. Table 5. */
1917 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
1918 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
1919 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
1920 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
1921 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
1922 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
1923 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
1925 /* Floating-point to integer conversions. Table 6. */
1926 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
1927 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
1928 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
1929 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
1930 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
1931 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
1932 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
1933 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
1935 /* Conversions between floating types. Table 7. */
1936 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
1937 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
1939 /* Integer to floating-point conversions. Table 8. */
1940 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
1941 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
1942 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
1943 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
1944 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
1945 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
1946 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
1947 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
1949 /* Long long. Table 9. */
1950 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
1951 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
1952 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
1953 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
1954 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
1955 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
1956 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
1957 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
1959 /* Integer (32/32->32) division. \S 4.3.1. */
1960 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
1961 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
1963 /* The divmod functions are designed so that they can be used for
1964 plain division, even though they return both the quotient and the
1965 remainder. The quotient is returned in the usual location (i.e.,
1966 r0 for SImode, {r0, r1} for DImode), just as would be expected
1967 for an ordinary division routine. Because the AAPCS calling
1968 conventions specify that all of { r0, r1, r2, r3 } are
1969 callee-saved registers, there is no need to tell the compiler
1970 explicitly that those registers are clobbered by these
1972 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
1973 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
1975 /* For SImode division the ABI provides div-without-mod routines,
1976 which are faster. */
1977 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
1978 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
1980 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1981 divmod libcalls instead. */
1982 set_optab_libfunc (smod_optab
, DImode
, NULL
);
1983 set_optab_libfunc (umod_optab
, DImode
, NULL
);
1984 set_optab_libfunc (smod_optab
, SImode
, NULL
);
1985 set_optab_libfunc (umod_optab
, SImode
, NULL
);
1987 /* Half-precision float operations. The compiler handles all operations
1988 with NULL libfuncs by converting the SFmode. */
1989 switch (arm_fp16_format
)
1991 case ARM_FP16_FORMAT_IEEE
:
1992 case ARM_FP16_FORMAT_ALTERNATIVE
:
1995 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
1996 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1998 : "__gnu_f2h_alternative"));
1999 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2000 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2002 : "__gnu_h2f_alternative"));
2005 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2006 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2007 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2008 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2009 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2012 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2013 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2014 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2015 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2016 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2017 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2018 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2025 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2027 const arm_fixed_mode_set fixed_arith_modes
[] =
2048 const arm_fixed_mode_set fixed_conv_modes
[] =
2078 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2080 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2081 "add", fixed_arith_modes
[i
].name
, 3);
2082 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2083 "ssadd", fixed_arith_modes
[i
].name
, 3);
2084 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2085 "usadd", fixed_arith_modes
[i
].name
, 3);
2086 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2087 "sub", fixed_arith_modes
[i
].name
, 3);
2088 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2089 "sssub", fixed_arith_modes
[i
].name
, 3);
2090 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2091 "ussub", fixed_arith_modes
[i
].name
, 3);
2092 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2093 "mul", fixed_arith_modes
[i
].name
, 3);
2094 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2095 "ssmul", fixed_arith_modes
[i
].name
, 3);
2096 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2097 "usmul", fixed_arith_modes
[i
].name
, 3);
2098 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2099 "div", fixed_arith_modes
[i
].name
, 3);
2100 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2101 "udiv", fixed_arith_modes
[i
].name
, 3);
2102 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2103 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2104 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2105 "usdiv", fixed_arith_modes
[i
].name
, 3);
2106 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2107 "neg", fixed_arith_modes
[i
].name
, 2);
2108 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2109 "ssneg", fixed_arith_modes
[i
].name
, 2);
2110 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2111 "usneg", fixed_arith_modes
[i
].name
, 2);
2112 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2113 "ashl", fixed_arith_modes
[i
].name
, 3);
2114 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2115 "ashr", fixed_arith_modes
[i
].name
, 3);
2116 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2117 "lshr", fixed_arith_modes
[i
].name
, 3);
2118 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2119 "ssashl", fixed_arith_modes
[i
].name
, 3);
2120 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2121 "usashl", fixed_arith_modes
[i
].name
, 3);
2122 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2123 "cmp", fixed_arith_modes
[i
].name
, 2);
2126 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2127 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2130 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2131 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2134 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2135 fixed_conv_modes
[j
].mode
, "fract",
2136 fixed_conv_modes
[i
].name
,
2137 fixed_conv_modes
[j
].name
);
2138 arm_set_fixed_conv_libfunc (satfract_optab
,
2139 fixed_conv_modes
[i
].mode
,
2140 fixed_conv_modes
[j
].mode
, "satfract",
2141 fixed_conv_modes
[i
].name
,
2142 fixed_conv_modes
[j
].name
);
2143 arm_set_fixed_conv_libfunc (fractuns_optab
,
2144 fixed_conv_modes
[i
].mode
,
2145 fixed_conv_modes
[j
].mode
, "fractuns",
2146 fixed_conv_modes
[i
].name
,
2147 fixed_conv_modes
[j
].name
);
2148 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2149 fixed_conv_modes
[i
].mode
,
2150 fixed_conv_modes
[j
].mode
, "satfractuns",
2151 fixed_conv_modes
[i
].name
,
2152 fixed_conv_modes
[j
].name
);
2156 if (TARGET_AAPCS_BASED
)
2157 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2160 /* On AAPCS systems, this is the "struct __va_list". */
2161 static GTY(()) tree va_list_type
;
2163 /* Return the type to use as __builtin_va_list. */
2165 arm_build_builtin_va_list (void)
2170 if (!TARGET_AAPCS_BASED
)
2171 return std_build_builtin_va_list ();
2173 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2181 The C Library ABI further reinforces this definition in \S
2184 We must follow this definition exactly. The structure tag
2185 name is visible in C++ mangled names, and thus forms a part
2186 of the ABI. The field name may be used by people who
2187 #include <stdarg.h>. */
2188 /* Create the type. */
2189 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2190 /* Give it the required name. */
2191 va_list_name
= build_decl (BUILTINS_LOCATION
,
2193 get_identifier ("__va_list"),
2195 DECL_ARTIFICIAL (va_list_name
) = 1;
2196 TYPE_NAME (va_list_type
) = va_list_name
;
2197 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2198 /* Create the __ap field. */
2199 ap_field
= build_decl (BUILTINS_LOCATION
,
2201 get_identifier ("__ap"),
2203 DECL_ARTIFICIAL (ap_field
) = 1;
2204 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2205 TYPE_FIELDS (va_list_type
) = ap_field
;
2206 /* Compute its layout. */
2207 layout_type (va_list_type
);
2209 return va_list_type
;
2212 /* Return an expression of type "void *" pointing to the next
2213 available argument in a variable-argument list. VALIST is the
2214 user-level va_list object, of type __builtin_va_list. */
2216 arm_extract_valist_ptr (tree valist
)
2218 if (TREE_TYPE (valist
) == error_mark_node
)
2219 return error_mark_node
;
2221 /* On an AAPCS target, the pointer is stored within "struct
2223 if (TARGET_AAPCS_BASED
)
2225 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2226 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2227 valist
, ap_field
, NULL_TREE
);
2233 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2235 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2237 valist
= arm_extract_valist_ptr (valist
);
2238 std_expand_builtin_va_start (valist
, nextarg
);
2241 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2243 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2246 valist
= arm_extract_valist_ptr (valist
);
2247 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2250 /* Fix up any incompatible options that the user has specified. */
2252 arm_option_override (void)
2254 if (global_options_set
.x_arm_arch_option
)
2255 arm_selected_arch
= &all_architectures
[arm_arch_option
];
2257 if (global_options_set
.x_arm_cpu_option
)
2259 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
2260 arm_selected_tune
= &all_cores
[(int) arm_cpu_option
];
2263 if (global_options_set
.x_arm_tune_option
)
2264 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
2266 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2267 SUBTARGET_OVERRIDE_OPTIONS
;
2270 if (arm_selected_arch
)
2272 if (arm_selected_cpu
)
2274 /* Check for conflict between mcpu and march. */
2275 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
2277 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2278 arm_selected_cpu
->name
, arm_selected_arch
->name
);
2279 /* -march wins for code generation.
2280 -mcpu wins for default tuning. */
2281 if (!arm_selected_tune
)
2282 arm_selected_tune
= arm_selected_cpu
;
2284 arm_selected_cpu
= arm_selected_arch
;
2288 arm_selected_arch
= NULL
;
2291 /* Pick a CPU based on the architecture. */
2292 arm_selected_cpu
= arm_selected_arch
;
2295 /* If the user did not specify a processor, choose one for them. */
2296 if (!arm_selected_cpu
)
2298 const struct processors
* sel
;
2299 unsigned int sought
;
2301 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
2302 if (!arm_selected_cpu
->name
)
2304 #ifdef SUBTARGET_CPU_DEFAULT
2305 /* Use the subtarget default CPU if none was specified by
2307 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
2309 /* Default to ARM6. */
2310 if (!arm_selected_cpu
->name
)
2311 arm_selected_cpu
= &all_cores
[arm6
];
2314 sel
= arm_selected_cpu
;
2315 insn_flags
= sel
->flags
;
2317 /* Now check to see if the user has specified some command line
2318 switch that require certain abilities from the cpu. */
2321 if (TARGET_INTERWORK
|| TARGET_THUMB
)
2323 sought
|= (FL_THUMB
| FL_MODE32
);
2325 /* There are no ARM processors that support both APCS-26 and
2326 interworking. Therefore we force FL_MODE26 to be removed
2327 from insn_flags here (if it was set), so that the search
2328 below will always be able to find a compatible processor. */
2329 insn_flags
&= ~FL_MODE26
;
2332 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
2334 /* Try to locate a CPU type that supports all of the abilities
2335 of the default CPU, plus the extra abilities requested by
2337 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2338 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
2341 if (sel
->name
== NULL
)
2343 unsigned current_bit_count
= 0;
2344 const struct processors
* best_fit
= NULL
;
2346 /* Ideally we would like to issue an error message here
2347 saying that it was not possible to find a CPU compatible
2348 with the default CPU, but which also supports the command
2349 line options specified by the programmer, and so they
2350 ought to use the -mcpu=<name> command line option to
2351 override the default CPU type.
2353 If we cannot find a cpu that has both the
2354 characteristics of the default cpu and the given
2355 command line options we scan the array again looking
2356 for a best match. */
2357 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2358 if ((sel
->flags
& sought
) == sought
)
2362 count
= bit_count (sel
->flags
& insn_flags
);
2364 if (count
>= current_bit_count
)
2367 current_bit_count
= count
;
2371 gcc_assert (best_fit
);
2375 arm_selected_cpu
= sel
;
2379 gcc_assert (arm_selected_cpu
);
2380 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2381 if (!arm_selected_tune
)
2382 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
2384 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
2385 insn_flags
= arm_selected_cpu
->flags
;
2386 arm_base_arch
= arm_selected_cpu
->base_arch
;
2388 arm_tune
= arm_selected_tune
->core
;
2389 tune_flags
= arm_selected_tune
->flags
;
2390 current_tune
= arm_selected_tune
->tune
;
2392 /* Make sure that the processor choice does not conflict with any of the
2393 other command line choices. */
2394 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
2395 error ("target CPU does not support ARM mode");
2397 /* BPABI targets use linker tricks to allow interworking on cores
2398 without thumb support. */
2399 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
2401 warning (0, "target CPU does not support interworking" );
2402 target_flags
&= ~MASK_INTERWORK
;
2405 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
2407 warning (0, "target CPU does not support THUMB instructions");
2408 target_flags
&= ~MASK_THUMB
;
2411 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
2413 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2414 target_flags
&= ~MASK_APCS_FRAME
;
2417 /* Callee super interworking implies thumb interworking. Adding
2418 this to the flags here simplifies the logic elsewhere. */
2419 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
2420 target_flags
|= MASK_INTERWORK
;
2422 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2423 from here where no function is being compiled currently. */
2424 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
2425 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2427 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
2428 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2430 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
2432 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2433 target_flags
|= MASK_APCS_FRAME
;
2436 if (TARGET_POKE_FUNCTION_NAME
)
2437 target_flags
|= MASK_APCS_FRAME
;
2439 if (TARGET_APCS_REENT
&& flag_pic
)
2440 error ("-fpic and -mapcs-reent are incompatible");
2442 if (TARGET_APCS_REENT
)
2443 warning (0, "APCS reentrant code not supported. Ignored");
2445 /* If this target is normally configured to use APCS frames, warn if they
2446 are turned off and debugging is turned on. */
2448 && write_symbols
!= NO_DEBUG
2449 && !TARGET_APCS_FRAME
2450 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2451 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2453 if (TARGET_APCS_FLOAT
)
2454 warning (0, "passing floating point arguments in fp regs not yet supported");
2456 if (TARGET_LITTLE_WORDS
)
2457 warning (OPT_Wdeprecated
, "%<mwords-little-endian%> is deprecated and "
2458 "will be removed in a future release");
2460 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2461 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
2462 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
2463 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
2464 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
2465 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
2466 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
2467 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
2468 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
2469 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
2470 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
2471 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
2472 arm_arch8
= (insn_flags
& FL_ARCH8
) != 0;
2473 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
2474 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
2476 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
2477 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
2478 thumb_code
= TARGET_ARM
== 0;
2479 thumb1_code
= TARGET_THUMB1
!= 0;
2480 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
2481 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
2482 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
2483 arm_arch_iwmmxt2
= (insn_flags
& FL_IWMMXT2
) != 0;
2484 arm_arch_thumb_hwdiv
= (insn_flags
& FL_THUMB_DIV
) != 0;
2485 arm_arch_arm_hwdiv
= (insn_flags
& FL_ARM_DIV
) != 0;
2486 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
2487 arm_arch_crc
= (insn_flags
& FL_CRC32
) != 0;
2488 if (arm_restrict_it
== 2)
2489 arm_restrict_it
= arm_arch8
&& TARGET_THUMB2
;
2492 arm_restrict_it
= 0;
2494 /* If we are not using the default (ARM mode) section anchor offset
2495 ranges, then set the correct ranges now. */
2498 /* Thumb-1 LDR instructions cannot have negative offsets.
2499 Permissible positive offset ranges are 5-bit (for byte loads),
2500 6-bit (for halfword loads), or 7-bit (for word loads).
2501 Empirical results suggest a 7-bit anchor range gives the best
2502 overall code size. */
2503 targetm
.min_anchor_offset
= 0;
2504 targetm
.max_anchor_offset
= 127;
2506 else if (TARGET_THUMB2
)
2508 /* The minimum is set such that the total size of the block
2509 for a particular anchor is 248 + 1 + 4095 bytes, which is
2510 divisible by eight, ensuring natural spacing of anchors. */
2511 targetm
.min_anchor_offset
= -248;
2512 targetm
.max_anchor_offset
= 4095;
2515 /* V5 code we generate is completely interworking capable, so we turn off
2516 TARGET_INTERWORK here to avoid many tests later on. */
2518 /* XXX However, we must pass the right pre-processor defines to CPP
2519 or GLD can get confused. This is a hack. */
2520 if (TARGET_INTERWORK
)
2521 arm_cpp_interwork
= 1;
2524 target_flags
&= ~MASK_INTERWORK
;
2526 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
2527 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2529 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
2530 error ("iwmmxt abi requires an iwmmxt capable cpu");
2532 if (!global_options_set
.x_arm_fpu_index
)
2534 const char *target_fpu_name
;
2537 #ifdef FPUTYPE_DEFAULT
2538 target_fpu_name
= FPUTYPE_DEFAULT
;
2540 target_fpu_name
= "vfp";
2543 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
2548 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
2550 switch (arm_fpu_desc
->model
)
2552 case ARM_FP_MODEL_VFP
:
2553 arm_fpu_attr
= FPU_VFP
;
2560 if (TARGET_AAPCS_BASED
)
2562 if (TARGET_CALLER_INTERWORKING
)
2563 error ("AAPCS does not support -mcaller-super-interworking");
2565 if (TARGET_CALLEE_INTERWORKING
)
2566 error ("AAPCS does not support -mcallee-super-interworking");
2569 /* iWMMXt and NEON are incompatible. */
2570 if (TARGET_IWMMXT
&& TARGET_NEON
)
2571 error ("iWMMXt and NEON are incompatible");
2573 /* iWMMXt unsupported under Thumb mode. */
2574 if (TARGET_THUMB
&& TARGET_IWMMXT
)
2575 error ("iWMMXt unsupported under Thumb mode");
2577 /* __fp16 support currently assumes the core has ldrh. */
2578 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
2579 sorry ("__fp16 and no ldrh");
2581 /* If soft-float is specified then don't use FPU. */
2582 if (TARGET_SOFT_FLOAT
)
2583 arm_fpu_attr
= FPU_NONE
;
2585 if (TARGET_AAPCS_BASED
)
2587 if (arm_abi
== ARM_ABI_IWMMXT
)
2588 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
2589 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
2590 && TARGET_HARD_FLOAT
2592 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
2594 arm_pcs_default
= ARM_PCS_AAPCS
;
2598 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
2599 sorry ("-mfloat-abi=hard and VFP");
2601 if (arm_abi
== ARM_ABI_APCS
)
2602 arm_pcs_default
= ARM_PCS_APCS
;
2604 arm_pcs_default
= ARM_PCS_ATPCS
;
2607 /* For arm2/3 there is no need to do any scheduling if we are doing
2608 software floating-point. */
2609 if (TARGET_SOFT_FLOAT
&& (tune_flags
& FL_MODE32
) == 0)
2610 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
2612 /* Use the cp15 method if it is available. */
2613 if (target_thread_pointer
== TP_AUTO
)
2615 if (arm_arch6k
&& !TARGET_THUMB1
)
2616 target_thread_pointer
= TP_CP15
;
2618 target_thread_pointer
= TP_SOFT
;
2621 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
2622 error ("can not use -mtp=cp15 with 16-bit Thumb");
2624 /* Override the default structure alignment for AAPCS ABI. */
2625 if (!global_options_set
.x_arm_structure_size_boundary
)
2627 if (TARGET_AAPCS_BASED
)
2628 arm_structure_size_boundary
= 8;
2632 if (arm_structure_size_boundary
!= 8
2633 && arm_structure_size_boundary
!= 32
2634 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
2636 if (ARM_DOUBLEWORD_ALIGN
)
2638 "structure size boundary can only be set to 8, 32 or 64");
2640 warning (0, "structure size boundary can only be set to 8 or 32");
2641 arm_structure_size_boundary
2642 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
2646 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
2648 error ("RTP PIC is incompatible with Thumb");
2652 /* If stack checking is disabled, we can use r10 as the PIC register,
2653 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2654 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
2656 if (TARGET_VXWORKS_RTP
)
2657 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2658 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
2661 if (flag_pic
&& TARGET_VXWORKS_RTP
)
2662 arm_pic_register
= 9;
2664 if (arm_pic_register_string
!= NULL
)
2666 int pic_register
= decode_reg_name (arm_pic_register_string
);
2669 warning (0, "-mpic-register= is useless without -fpic");
2671 /* Prevent the user from choosing an obviously stupid PIC register. */
2672 else if (pic_register
< 0 || call_used_regs
[pic_register
]
2673 || pic_register
== HARD_FRAME_POINTER_REGNUM
2674 || pic_register
== STACK_POINTER_REGNUM
2675 || pic_register
>= PC_REGNUM
2676 || (TARGET_VXWORKS_RTP
2677 && (unsigned int) pic_register
!= arm_pic_register
))
2678 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
2680 arm_pic_register
= pic_register
;
2683 if (TARGET_VXWORKS_RTP
2684 && !global_options_set
.x_arm_pic_data_is_text_relative
)
2685 arm_pic_data_is_text_relative
= 0;
2687 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2688 if (fix_cm3_ldrd
== 2)
2690 if (arm_selected_cpu
->core
== cortexm3
)
2696 /* Enable -munaligned-access by default for
2697 - all ARMv6 architecture-based processors
2698 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2699 - ARMv8 architecture-base processors.
2701 Disable -munaligned-access by default for
2702 - all pre-ARMv6 architecture-based processors
2703 - ARMv6-M architecture-based processors. */
2705 if (unaligned_access
== 2)
2707 if (arm_arch6
&& (arm_arch_notm
|| arm_arch7
))
2708 unaligned_access
= 1;
2710 unaligned_access
= 0;
2712 else if (unaligned_access
== 1
2713 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
2715 warning (0, "target CPU does not support unaligned accesses");
2716 unaligned_access
= 0;
2719 if (TARGET_THUMB1
&& flag_schedule_insns
)
2721 /* Don't warn since it's on by default in -O2. */
2722 flag_schedule_insns
= 0;
2727 /* If optimizing for size, bump the number of instructions that we
2728 are prepared to conditionally execute (even on a StrongARM). */
2729 max_insns_skipped
= 6;
2732 max_insns_skipped
= current_tune
->max_insns_skipped
;
2734 /* Hot/Cold partitioning is not currently supported, since we can't
2735 handle literal pool placement in that case. */
2736 if (flag_reorder_blocks_and_partition
)
2738 inform (input_location
,
2739 "-freorder-blocks-and-partition not supported on this architecture");
2740 flag_reorder_blocks_and_partition
= 0;
2741 flag_reorder_blocks
= 1;
2745 /* Hoisting PIC address calculations more aggressively provides a small,
2746 but measurable, size reduction for PIC code. Therefore, we decrease
2747 the bar for unrestricted expression hoisting to the cost of PIC address
2748 calculation, which is 2 instructions. */
2749 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
2750 global_options
.x_param_values
,
2751 global_options_set
.x_param_values
);
2753 /* ARM EABI defaults to strict volatile bitfields. */
2754 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
2755 && abi_version_at_least(2))
2756 flag_strict_volatile_bitfields
= 1;
2758 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2759 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2760 if (flag_prefetch_loop_arrays
< 0
2763 && current_tune
->num_prefetch_slots
> 0)
2764 flag_prefetch_loop_arrays
= 1;
2766 /* Set up parameters to be used in prefetching algorithm. Do not override the
2767 defaults unless we are tuning for a core we have researched values for. */
2768 if (current_tune
->num_prefetch_slots
> 0)
2769 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
2770 current_tune
->num_prefetch_slots
,
2771 global_options
.x_param_values
,
2772 global_options_set
.x_param_values
);
2773 if (current_tune
->l1_cache_line_size
>= 0)
2774 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
2775 current_tune
->l1_cache_line_size
,
2776 global_options
.x_param_values
,
2777 global_options_set
.x_param_values
);
2778 if (current_tune
->l1_cache_size
>= 0)
2779 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
2780 current_tune
->l1_cache_size
,
2781 global_options
.x_param_values
,
2782 global_options_set
.x_param_values
);
2784 /* Use Neon to perform 64-bits operations rather than core
2786 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
2787 if (use_neon_for_64bits
== 1)
2788 prefer_neon_for_64bits
= true;
2790 /* Use the alternative scheduling-pressure algorithm by default. */
2791 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, 2,
2792 global_options
.x_param_values
,
2793 global_options_set
.x_param_values
);
2795 /* Disable shrink-wrap when optimizing function for size, since it tends to
2796 generate additional returns. */
2797 if (optimize_function_for_size_p (cfun
) && TARGET_THUMB2
)
2798 flag_shrink_wrap
= false;
2799 /* TBD: Dwarf info for apcs frame is not handled yet. */
2800 if (TARGET_APCS_FRAME
)
2801 flag_shrink_wrap
= false;
2803 /* We only support -mslow-flash-data on armv7-m targets. */
2804 if (target_slow_flash_data
2805 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
2806 || (TARGET_THUMB1
|| flag_pic
|| TARGET_NEON
)))
2807 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2809 /* Currently, for slow flash data, we just disable literal pools. */
2810 if (target_slow_flash_data
)
2811 arm_disable_literal_pool
= true;
2813 /* Register global variables with the garbage collector. */
2814 arm_add_gc_roots ();
2818 arm_add_gc_roots (void)
2820 gcc_obstack_init(&minipool_obstack
);
2821 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
2824 /* A table of known ARM exception types.
2825 For use with the interrupt function attribute. */
2829 const char *const arg
;
2830 const unsigned long return_value
;
2834 static const isr_attribute_arg isr_attribute_args
[] =
2836 { "IRQ", ARM_FT_ISR
},
2837 { "irq", ARM_FT_ISR
},
2838 { "FIQ", ARM_FT_FIQ
},
2839 { "fiq", ARM_FT_FIQ
},
2840 { "ABORT", ARM_FT_ISR
},
2841 { "abort", ARM_FT_ISR
},
2842 { "ABORT", ARM_FT_ISR
},
2843 { "abort", ARM_FT_ISR
},
2844 { "UNDEF", ARM_FT_EXCEPTION
},
2845 { "undef", ARM_FT_EXCEPTION
},
2846 { "SWI", ARM_FT_EXCEPTION
},
2847 { "swi", ARM_FT_EXCEPTION
},
2848 { NULL
, ARM_FT_NORMAL
}
2851 /* Returns the (interrupt) function type of the current
2852 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2854 static unsigned long
2855 arm_isr_value (tree argument
)
2857 const isr_attribute_arg
* ptr
;
2861 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
2863 /* No argument - default to IRQ. */
2864 if (argument
== NULL_TREE
)
2867 /* Get the value of the argument. */
2868 if (TREE_VALUE (argument
) == NULL_TREE
2869 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
2870 return ARM_FT_UNKNOWN
;
2872 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
2874 /* Check it against the list of known arguments. */
2875 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
2876 if (streq (arg
, ptr
->arg
))
2877 return ptr
->return_value
;
2879 /* An unrecognized interrupt type. */
2880 return ARM_FT_UNKNOWN
;
2883 /* Computes the type of the current function. */
2885 static unsigned long
2886 arm_compute_func_type (void)
2888 unsigned long type
= ARM_FT_UNKNOWN
;
2892 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
2894 /* Decide if the current function is volatile. Such functions
2895 never return, and many memory cycles can be saved by not storing
2896 register values that will never be needed again. This optimization
2897 was added to speed up context switching in a kernel application. */
2899 && (TREE_NOTHROW (current_function_decl
)
2900 || !(flag_unwind_tables
2902 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
2903 && TREE_THIS_VOLATILE (current_function_decl
))
2904 type
|= ARM_FT_VOLATILE
;
2906 if (cfun
->static_chain_decl
!= NULL
)
2907 type
|= ARM_FT_NESTED
;
2909 attr
= DECL_ATTRIBUTES (current_function_decl
);
2911 a
= lookup_attribute ("naked", attr
);
2913 type
|= ARM_FT_NAKED
;
2915 a
= lookup_attribute ("isr", attr
);
2917 a
= lookup_attribute ("interrupt", attr
);
2920 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
2922 type
|= arm_isr_value (TREE_VALUE (a
));
2927 /* Returns the type of the current function. */
2930 arm_current_func_type (void)
2932 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
2933 cfun
->machine
->func_type
= arm_compute_func_type ();
2935 return cfun
->machine
->func_type
;
2939 arm_allocate_stack_slots_for_args (void)
2941 /* Naked functions should not allocate stack slots for arguments. */
2942 return !IS_NAKED (arm_current_func_type ());
2946 arm_warn_func_return (tree decl
)
2948 /* Naked functions are implemented entirely in assembly, including the
2949 return sequence, so suppress warnings about this. */
2950 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
2954 /* Output assembler code for a block containing the constant parts
2955 of a trampoline, leaving space for the variable parts.
2957 On the ARM, (if r8 is the static chain regnum, and remembering that
2958 referencing pc adds an offset of 8) the trampoline looks like:
2961 .word static chain value
2962 .word function's address
2963 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2966 arm_asm_trampoline_template (FILE *f
)
2970 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2971 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
2973 else if (TARGET_THUMB2
)
2975 /* The Thumb-2 trampoline is similar to the arm implementation.
2976 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2977 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
2978 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2979 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
2983 ASM_OUTPUT_ALIGN (f
, 2);
2984 fprintf (f
, "\t.code\t16\n");
2985 fprintf (f
, ".Ltrampoline_start:\n");
2986 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
2987 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
2988 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
2989 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
2990 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
2991 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
2993 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
2994 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
2997 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3000 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3002 rtx fnaddr
, mem
, a_tramp
;
3004 emit_block_move (m_tramp
, assemble_trampoline_template (),
3005 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3007 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3008 emit_move_insn (mem
, chain_value
);
3010 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3011 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3012 emit_move_insn (mem
, fnaddr
);
3014 a_tramp
= XEXP (m_tramp
, 0);
3015 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3016 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3017 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3020 /* Thumb trampolines should be entered in thumb mode, so set
3021 the bottom bit of the address. */
3024 arm_trampoline_adjust_address (rtx addr
)
3027 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3028 NULL
, 0, OPTAB_LIB_WIDEN
);
3032 /* Return 1 if it is possible to return using a single instruction.
3033 If SIBLING is non-null, this is a test for a return before a sibling
3034 call. SIBLING is the call insn, so we can examine its register usage. */
3037 use_return_insn (int iscond
, rtx sibling
)
3040 unsigned int func_type
;
3041 unsigned long saved_int_regs
;
3042 unsigned HOST_WIDE_INT stack_adjust
;
3043 arm_stack_offsets
*offsets
;
3045 /* Never use a return instruction before reload has run. */
3046 if (!reload_completed
)
3049 func_type
= arm_current_func_type ();
3051 /* Naked, volatile and stack alignment functions need special
3053 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3056 /* So do interrupt functions that use the frame pointer and Thumb
3057 interrupt functions. */
3058 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3061 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3062 && !optimize_function_for_size_p (cfun
))
3065 offsets
= arm_get_frame_offsets ();
3066 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3068 /* As do variadic functions. */
3069 if (crtl
->args
.pretend_args_size
3070 || cfun
->machine
->uses_anonymous_args
3071 /* Or if the function calls __builtin_eh_return () */
3072 || crtl
->calls_eh_return
3073 /* Or if the function calls alloca */
3074 || cfun
->calls_alloca
3075 /* Or if there is a stack adjustment. However, if the stack pointer
3076 is saved on the stack, we can use a pre-incrementing stack load. */
3077 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3078 && stack_adjust
== 4)))
3081 saved_int_regs
= offsets
->saved_regs_mask
;
3083 /* Unfortunately, the insn
3085 ldmib sp, {..., sp, ...}
3087 triggers a bug on most SA-110 based devices, such that the stack
3088 pointer won't be correctly restored if the instruction takes a
3089 page fault. We work around this problem by popping r3 along with
3090 the other registers, since that is never slower than executing
3091 another instruction.
3093 We test for !arm_arch5 here, because code for any architecture
3094 less than this could potentially be run on one of the buggy
3096 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3098 /* Validate that r3 is a call-clobbered register (always true in
3099 the default abi) ... */
3100 if (!call_used_regs
[3])
3103 /* ... that it isn't being used for a return value ... */
3104 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3107 /* ... or for a tail-call argument ... */
3110 gcc_assert (CALL_P (sibling
));
3112 if (find_regno_fusage (sibling
, USE
, 3))
3116 /* ... and that there are no call-saved registers in r0-r2
3117 (always true in the default ABI). */
3118 if (saved_int_regs
& 0x7)
3122 /* Can't be done if interworking with Thumb, and any registers have been
3124 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3127 /* On StrongARM, conditional returns are expensive if they aren't
3128 taken and multiple registers have been stacked. */
3129 if (iscond
&& arm_tune_strongarm
)
3131 /* Conditional return when just the LR is stored is a simple
3132 conditional-load instruction, that's not expensive. */
3133 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3137 && arm_pic_register
!= INVALID_REGNUM
3138 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3142 /* If there are saved registers but the LR isn't saved, then we need
3143 two instructions for the return. */
3144 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3147 /* Can't be done if any of the VFP regs are pushed,
3148 since this also requires an insn. */
3149 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
3150 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3151 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
3154 if (TARGET_REALLY_IWMMXT
)
3155 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
3156 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
3162 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3163 shrink-wrapping if possible. This is the case if we need to emit a
3164 prologue, which we can test by looking at the offsets. */
3166 use_simple_return_p (void)
3168 arm_stack_offsets
*offsets
;
3170 offsets
= arm_get_frame_offsets ();
3171 return offsets
->outgoing_args
!= 0;
3174 /* Return TRUE if int I is a valid immediate ARM constant. */
3177 const_ok_for_arm (HOST_WIDE_INT i
)
3181 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3182 be all zero, or all one. */
3183 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
3184 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
3185 != ((~(unsigned HOST_WIDE_INT
) 0)
3186 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
3189 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
3191 /* Fast return for 0 and small values. We must do this for zero, since
3192 the code below can't handle that one case. */
3193 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
3196 /* Get the number of trailing zeros. */
3197 lowbit
= ffs((int) i
) - 1;
3199 /* Only even shifts are allowed in ARM mode so round down to the
3200 nearest even number. */
3204 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
3209 /* Allow rotated constants in ARM mode. */
3211 && ((i
& ~0xc000003f) == 0
3212 || (i
& ~0xf000000f) == 0
3213 || (i
& ~0xfc000003) == 0))
3220 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3223 if (i
== v
|| i
== (v
| (v
<< 8)))
3226 /* Allow repeated pattern 0xXY00XY00. */
3236 /* Return true if I is a valid constant for the operation CODE. */
3238 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
3240 if (const_ok_for_arm (i
))
3246 /* See if we can use movw. */
3247 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
3250 /* Otherwise, try mvn. */
3251 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3254 /* See if we can use addw or subw. */
3256 && ((i
& 0xfffff000) == 0
3257 || ((-i
) & 0xfffff000) == 0))
3259 /* else fall through. */
3279 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
3281 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
3287 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3291 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3298 /* Return true if I is a valid di mode constant for the operation CODE. */
3300 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
3302 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
3303 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
3304 rtx hi
= GEN_INT (hi_val
);
3305 rtx lo
= GEN_INT (lo_val
);
3315 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
3316 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
3318 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
3325 /* Emit a sequence of insns to handle a large constant.
3326 CODE is the code of the operation required, it can be any of SET, PLUS,
3327 IOR, AND, XOR, MINUS;
3328 MODE is the mode in which the operation is being performed;
3329 VAL is the integer to operate on;
3330 SOURCE is the other operand (a register, or a null-pointer for SET);
3331 SUBTARGETS means it is safe to create scratch registers if that will
3332 either produce a simpler sequence, or we will want to cse the values.
3333 Return value is the number of insns emitted. */
3335 /* ??? Tweak this for thumb2. */
3337 arm_split_constant (enum rtx_code code
, enum machine_mode mode
, rtx insn
,
3338 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
3342 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
3343 cond
= COND_EXEC_TEST (PATTERN (insn
));
3347 if (subtargets
|| code
== SET
3348 || (REG_P (target
) && REG_P (source
)
3349 && REGNO (target
) != REGNO (source
)))
3351 /* After arm_reorg has been called, we can't fix up expensive
3352 constants by pushing them into memory so we must synthesize
3353 them in-line, regardless of the cost. This is only likely to
3354 be more costly on chips that have load delay slots and we are
3355 compiling without running the scheduler (so no splitting
3356 occurred before the final instruction emission).
3358 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3360 if (!after_arm_reorg
3362 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
3364 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
3369 /* Currently SET is the only monadic value for CODE, all
3370 the rest are diadic. */
3371 if (TARGET_USE_MOVT
)
3372 arm_emit_movpair (target
, GEN_INT (val
));
3374 emit_set_insn (target
, GEN_INT (val
));
3380 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
3382 if (TARGET_USE_MOVT
)
3383 arm_emit_movpair (temp
, GEN_INT (val
));
3385 emit_set_insn (temp
, GEN_INT (val
));
3387 /* For MINUS, the value is subtracted from, since we never
3388 have subtraction of a constant. */
3390 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
3392 emit_set_insn (target
,
3393 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
3399 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
3403 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3404 ARM/THUMB2 immediates, and add up to VAL.
3405 Thr function return value gives the number of insns required. */
3407 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3408 struct four_ints
*return_sequence
)
3410 int best_consecutive_zeros
= 0;
3414 struct four_ints tmp_sequence
;
3416 /* If we aren't targeting ARM, the best place to start is always at
3417 the bottom, otherwise look more closely. */
3420 for (i
= 0; i
< 32; i
+= 2)
3422 int consecutive_zeros
= 0;
3424 if (!(val
& (3 << i
)))
3426 while ((i
< 32) && !(val
& (3 << i
)))
3428 consecutive_zeros
+= 2;
3431 if (consecutive_zeros
> best_consecutive_zeros
)
3433 best_consecutive_zeros
= consecutive_zeros
;
3434 best_start
= i
- consecutive_zeros
;
3441 /* So long as it won't require any more insns to do so, it's
3442 desirable to emit a small constant (in bits 0...9) in the last
3443 insn. This way there is more chance that it can be combined with
3444 a later addressing insn to form a pre-indexed load or store
3445 operation. Consider:
3447 *((volatile int *)0xe0000100) = 1;
3448 *((volatile int *)0xe0000110) = 2;
3450 We want this to wind up as:
3454 str rB, [rA, #0x100]
3456 str rB, [rA, #0x110]
3458 rather than having to synthesize both large constants from scratch.
3460 Therefore, we calculate how many insns would be required to emit
3461 the constant starting from `best_start', and also starting from
3462 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3463 yield a shorter sequence, we may as well use zero. */
3464 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
3466 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < val
))
3468 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
3469 if (insns2
<= insns1
)
3471 *return_sequence
= tmp_sequence
;
3479 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3481 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
3482 struct four_ints
*return_sequence
, int i
)
3484 int remainder
= val
& 0xffffffff;
3487 /* Try and find a way of doing the job in either two or three
3490 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3491 location. We start at position I. This may be the MSB, or
3492 optimial_immediate_sequence may have positioned it at the largest block
3493 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3494 wrapping around to the top of the word when we drop off the bottom.
3495 In the worst case this code should produce no more than four insns.
3497 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3498 constants, shifted to any arbitrary location. We should always start
3503 unsigned int b1
, b2
, b3
, b4
;
3504 unsigned HOST_WIDE_INT result
;
3507 gcc_assert (insns
< 4);
3512 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3513 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
3516 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
3517 /* We can use addw/subw for the last 12 bits. */
3521 /* Use an 8-bit shifted/rotated immediate. */
3525 result
= remainder
& ((0x0ff << end
)
3526 | ((i
< end
) ? (0xff >> (32 - end
))
3533 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3534 arbitrary shifts. */
3535 i
-= TARGET_ARM
? 2 : 1;
3539 /* Next, see if we can do a better job with a thumb2 replicated
3542 We do it this way around to catch the cases like 0x01F001E0 where
3543 two 8-bit immediates would work, but a replicated constant would
3546 TODO: 16-bit constants that don't clear all the bits, but still win.
3547 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3550 b1
= (remainder
& 0xff000000) >> 24;
3551 b2
= (remainder
& 0x00ff0000) >> 16;
3552 b3
= (remainder
& 0x0000ff00) >> 8;
3553 b4
= remainder
& 0xff;
3557 /* The 8-bit immediate already found clears b1 (and maybe b2),
3558 but must leave b3 and b4 alone. */
3560 /* First try to find a 32-bit replicated constant that clears
3561 almost everything. We can assume that we can't do it in one,
3562 or else we wouldn't be here. */
3563 unsigned int tmp
= b1
& b2
& b3
& b4
;
3564 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
3566 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
3567 + (tmp
== b3
) + (tmp
== b4
);
3569 && (matching_bytes
>= 3
3570 || (matching_bytes
== 2
3571 && const_ok_for_op (remainder
& ~tmp2
, code
))))
3573 /* At least 3 of the bytes match, and the fourth has at
3574 least as many bits set, or two of the bytes match
3575 and it will only require one more insn to finish. */
3583 /* Second, try to find a 16-bit replicated constant that can
3584 leave three of the bytes clear. If b2 or b4 is already
3585 zero, then we can. If the 8-bit from above would not
3586 clear b2 anyway, then we still win. */
3587 else if (b1
== b3
&& (!b2
|| !b4
3588 || (remainder
& 0x00ff0000 & ~result
)))
3590 result
= remainder
& 0xff00ff00;
3596 /* The 8-bit immediate already found clears b2 (and maybe b3)
3597 and we don't get here unless b1 is alredy clear, but it will
3598 leave b4 unchanged. */
3600 /* If we can clear b2 and b4 at once, then we win, since the
3601 8-bits couldn't possibly reach that far. */
3604 result
= remainder
& 0x00ff00ff;
3610 return_sequence
->i
[insns
++] = result
;
3611 remainder
&= ~result
;
3613 if (code
== SET
|| code
== MINUS
)
3621 /* Emit an instruction with the indicated PATTERN. If COND is
3622 non-NULL, conditionalize the execution of the instruction on COND
3626 emit_constant_insn (rtx cond
, rtx pattern
)
3629 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
3630 emit_insn (pattern
);
3633 /* As above, but extra parameter GENERATE which, if clear, suppresses
3637 arm_gen_constant (enum rtx_code code
, enum machine_mode mode
, rtx cond
,
3638 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
3643 int final_invert
= 0;
3645 int set_sign_bit_copies
= 0;
3646 int clear_sign_bit_copies
= 0;
3647 int clear_zero_bit_copies
= 0;
3648 int set_zero_bit_copies
= 0;
3649 int insns
= 0, neg_insns
, inv_insns
;
3650 unsigned HOST_WIDE_INT temp1
, temp2
;
3651 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
3652 struct four_ints
*immediates
;
3653 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
3655 /* Find out which operations are safe for a given CODE. Also do a quick
3656 check for degenerate cases; these can occur when DImode operations
3669 if (remainder
== 0xffffffff)
3672 emit_constant_insn (cond
,
3673 gen_rtx_SET (VOIDmode
, target
,
3674 GEN_INT (ARM_SIGN_EXTEND (val
))));
3680 if (reload_completed
&& rtx_equal_p (target
, source
))
3684 emit_constant_insn (cond
,
3685 gen_rtx_SET (VOIDmode
, target
, source
));
3694 emit_constant_insn (cond
,
3695 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
3698 if (remainder
== 0xffffffff)
3700 if (reload_completed
&& rtx_equal_p (target
, source
))
3703 emit_constant_insn (cond
,
3704 gen_rtx_SET (VOIDmode
, target
, source
));
3713 if (reload_completed
&& rtx_equal_p (target
, source
))
3716 emit_constant_insn (cond
,
3717 gen_rtx_SET (VOIDmode
, target
, source
));
3721 if (remainder
== 0xffffffff)
3724 emit_constant_insn (cond
,
3725 gen_rtx_SET (VOIDmode
, target
,
3726 gen_rtx_NOT (mode
, source
)));
3733 /* We treat MINUS as (val - source), since (source - val) is always
3734 passed as (source + (-val)). */
3738 emit_constant_insn (cond
,
3739 gen_rtx_SET (VOIDmode
, target
,
3740 gen_rtx_NEG (mode
, source
)));
3743 if (const_ok_for_arm (val
))
3746 emit_constant_insn (cond
,
3747 gen_rtx_SET (VOIDmode
, target
,
3748 gen_rtx_MINUS (mode
, GEN_INT (val
),
3759 /* If we can do it in one insn get out quickly. */
3760 if (const_ok_for_op (val
, code
))
3763 emit_constant_insn (cond
,
3764 gen_rtx_SET (VOIDmode
, target
,
3766 ? gen_rtx_fmt_ee (code
, mode
, source
,
3772 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3774 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
3775 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
3779 if (mode
== SImode
&& i
== 16)
3780 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3782 emit_constant_insn (cond
,
3783 gen_zero_extendhisi2
3784 (target
, gen_lowpart (HImode
, source
)));
3786 /* Extz only supports SImode, but we can coerce the operands
3788 emit_constant_insn (cond
,
3789 gen_extzv_t2 (gen_lowpart (SImode
, target
),
3790 gen_lowpart (SImode
, source
),
3791 GEN_INT (i
), const0_rtx
));
3797 /* Calculate a few attributes that may be useful for specific
3799 /* Count number of leading zeros. */
3800 for (i
= 31; i
>= 0; i
--)
3802 if ((remainder
& (1 << i
)) == 0)
3803 clear_sign_bit_copies
++;
3808 /* Count number of leading 1's. */
3809 for (i
= 31; i
>= 0; i
--)
3811 if ((remainder
& (1 << i
)) != 0)
3812 set_sign_bit_copies
++;
3817 /* Count number of trailing zero's. */
3818 for (i
= 0; i
<= 31; i
++)
3820 if ((remainder
& (1 << i
)) == 0)
3821 clear_zero_bit_copies
++;
3826 /* Count number of trailing 1's. */
3827 for (i
= 0; i
<= 31; i
++)
3829 if ((remainder
& (1 << i
)) != 0)
3830 set_zero_bit_copies
++;
3838 /* See if we can do this by sign_extending a constant that is known
3839 to be negative. This is a good, way of doing it, since the shift
3840 may well merge into a subsequent insn. */
3841 if (set_sign_bit_copies
> 1)
3843 if (const_ok_for_arm
3844 (temp1
= ARM_SIGN_EXTEND (remainder
3845 << (set_sign_bit_copies
- 1))))
3849 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3850 emit_constant_insn (cond
,
3851 gen_rtx_SET (VOIDmode
, new_src
,
3853 emit_constant_insn (cond
,
3854 gen_ashrsi3 (target
, new_src
,
3855 GEN_INT (set_sign_bit_copies
- 1)));
3859 /* For an inverted constant, we will need to set the low bits,
3860 these will be shifted out of harm's way. */
3861 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
3862 if (const_ok_for_arm (~temp1
))
3866 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3867 emit_constant_insn (cond
,
3868 gen_rtx_SET (VOIDmode
, new_src
,
3870 emit_constant_insn (cond
,
3871 gen_ashrsi3 (target
, new_src
,
3872 GEN_INT (set_sign_bit_copies
- 1)));
3878 /* See if we can calculate the value as the difference between two
3879 valid immediates. */
3880 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
3882 int topshift
= clear_sign_bit_copies
& ~1;
3884 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
3885 & (0xff000000 >> topshift
));
3887 /* If temp1 is zero, then that means the 9 most significant
3888 bits of remainder were 1 and we've caused it to overflow.
3889 When topshift is 0 we don't need to do anything since we
3890 can borrow from 'bit 32'. */
3891 if (temp1
== 0 && topshift
!= 0)
3892 temp1
= 0x80000000 >> (topshift
- 1);
3894 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
3896 if (const_ok_for_arm (temp2
))
3900 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3901 emit_constant_insn (cond
,
3902 gen_rtx_SET (VOIDmode
, new_src
,
3904 emit_constant_insn (cond
,
3905 gen_addsi3 (target
, new_src
,
3913 /* See if we can generate this by setting the bottom (or the top)
3914 16 bits, and then shifting these into the other half of the
3915 word. We only look for the simplest cases, to do more would cost
3916 too much. Be careful, however, not to generate this when the
3917 alternative would take fewer insns. */
3918 if (val
& 0xffff0000)
3920 temp1
= remainder
& 0xffff0000;
3921 temp2
= remainder
& 0x0000ffff;
3923 /* Overlaps outside this range are best done using other methods. */
3924 for (i
= 9; i
< 24; i
++)
3926 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
3927 && !const_ok_for_arm (temp2
))
3929 rtx new_src
= (subtargets
3930 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
3932 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
3933 source
, subtargets
, generate
);
3941 gen_rtx_ASHIFT (mode
, source
,
3948 /* Don't duplicate cases already considered. */
3949 for (i
= 17; i
< 24; i
++)
3951 if (((temp1
| (temp1
>> i
)) == remainder
)
3952 && !const_ok_for_arm (temp1
))
3954 rtx new_src
= (subtargets
3955 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
3957 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
3958 source
, subtargets
, generate
);
3963 gen_rtx_SET (VOIDmode
, target
,
3966 gen_rtx_LSHIFTRT (mode
, source
,
3977 /* If we have IOR or XOR, and the constant can be loaded in a
3978 single instruction, and we can find a temporary to put it in,
3979 then this can be done in two instructions instead of 3-4. */
3981 /* TARGET can't be NULL if SUBTARGETS is 0 */
3982 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
3984 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
3988 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3990 emit_constant_insn (cond
,
3991 gen_rtx_SET (VOIDmode
, sub
,
3993 emit_constant_insn (cond
,
3994 gen_rtx_SET (VOIDmode
, target
,
3995 gen_rtx_fmt_ee (code
, mode
,
4006 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4007 and the remainder 0s for e.g. 0xfff00000)
4008 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4010 This can be done in 2 instructions by using shifts with mov or mvn.
4015 mvn r0, r0, lsr #12 */
4016 if (set_sign_bit_copies
> 8
4017 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
4021 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4022 rtx shift
= GEN_INT (set_sign_bit_copies
);
4026 gen_rtx_SET (VOIDmode
, sub
,
4028 gen_rtx_ASHIFT (mode
,
4033 gen_rtx_SET (VOIDmode
, target
,
4035 gen_rtx_LSHIFTRT (mode
, sub
,
4042 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4044 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4046 For eg. r0 = r0 | 0xfff
4051 if (set_zero_bit_copies
> 8
4052 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4056 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4057 rtx shift
= GEN_INT (set_zero_bit_copies
);
4061 gen_rtx_SET (VOIDmode
, sub
,
4063 gen_rtx_LSHIFTRT (mode
,
4068 gen_rtx_SET (VOIDmode
, target
,
4070 gen_rtx_ASHIFT (mode
, sub
,
4076 /* This will never be reached for Thumb2 because orn is a valid
4077 instruction. This is for Thumb1 and the ARM 32 bit cases.
4079 x = y | constant (such that ~constant is a valid constant)
4081 x = ~(~y & ~constant).
4083 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4087 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4088 emit_constant_insn (cond
,
4089 gen_rtx_SET (VOIDmode
, sub
,
4090 gen_rtx_NOT (mode
, source
)));
4093 sub
= gen_reg_rtx (mode
);
4094 emit_constant_insn (cond
,
4095 gen_rtx_SET (VOIDmode
, sub
,
4096 gen_rtx_AND (mode
, source
,
4098 emit_constant_insn (cond
,
4099 gen_rtx_SET (VOIDmode
, target
,
4100 gen_rtx_NOT (mode
, sub
)));
4107 /* See if two shifts will do 2 or more insn's worth of work. */
4108 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4110 HOST_WIDE_INT shift_mask
= ((0xffffffff
4111 << (32 - clear_sign_bit_copies
))
4114 if ((remainder
| shift_mask
) != 0xffffffff)
4118 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4119 insns
= arm_gen_constant (AND
, mode
, cond
,
4120 remainder
| shift_mask
,
4121 new_src
, source
, subtargets
, 1);
4126 rtx targ
= subtargets
? NULL_RTX
: target
;
4127 insns
= arm_gen_constant (AND
, mode
, cond
,
4128 remainder
| shift_mask
,
4129 targ
, source
, subtargets
, 0);
4135 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4136 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4138 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4139 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4145 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4147 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4149 if ((remainder
| shift_mask
) != 0xffffffff)
4153 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4155 insns
= arm_gen_constant (AND
, mode
, cond
,
4156 remainder
| shift_mask
,
4157 new_src
, source
, subtargets
, 1);
4162 rtx targ
= subtargets
? NULL_RTX
: target
;
4164 insns
= arm_gen_constant (AND
, mode
, cond
,
4165 remainder
| shift_mask
,
4166 targ
, source
, subtargets
, 0);
4172 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4173 rtx shift
= GEN_INT (clear_zero_bit_copies
);
4175 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
4176 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
4188 /* Calculate what the instruction sequences would be if we generated it
4189 normally, negated, or inverted. */
4191 /* AND cannot be split into multiple insns, so invert and use BIC. */
4194 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
4197 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
4202 if (can_invert
|| final_invert
)
4203 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
4208 immediates
= &pos_immediates
;
4210 /* Is the negated immediate sequence more efficient? */
4211 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
4214 immediates
= &neg_immediates
;
4219 /* Is the inverted immediate sequence more efficient?
4220 We must allow for an extra NOT instruction for XOR operations, although
4221 there is some chance that the final 'mvn' will get optimized later. */
4222 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
4225 immediates
= &inv_immediates
;
4233 /* Now output the chosen sequence as instructions. */
4236 for (i
= 0; i
< insns
; i
++)
4238 rtx new_src
, temp1_rtx
;
4240 temp1
= immediates
->i
[i
];
4242 if (code
== SET
|| code
== MINUS
)
4243 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
4244 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
4245 new_src
= gen_reg_rtx (mode
);
4251 else if (can_negate
)
4254 temp1
= trunc_int_for_mode (temp1
, mode
);
4255 temp1_rtx
= GEN_INT (temp1
);
4259 else if (code
== MINUS
)
4260 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
4262 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
4264 emit_constant_insn (cond
,
4265 gen_rtx_SET (VOIDmode
, new_src
,
4271 can_negate
= can_invert
;
4275 else if (code
== MINUS
)
4283 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
4284 gen_rtx_NOT (mode
, source
)));
4291 /* Canonicalize a comparison so that we are more likely to recognize it.
4292 This can be done for a few constant compares, where we can make the
4293 immediate value easier to load. */
4296 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
4297 bool op0_preserve_value
)
4299 enum machine_mode mode
;
4300 unsigned HOST_WIDE_INT i
, maxval
;
4302 mode
= GET_MODE (*op0
);
4303 if (mode
== VOIDmode
)
4304 mode
= GET_MODE (*op1
);
4306 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
4308 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4309 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4310 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4311 for GTU/LEU in Thumb mode. */
4316 if (*code
== GT
|| *code
== LE
4317 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
4319 /* Missing comparison. First try to use an available
4321 if (CONST_INT_P (*op1
))
4329 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4331 *op1
= GEN_INT (i
+ 1);
4332 *code
= *code
== GT
? GE
: LT
;
4338 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4339 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4341 *op1
= GEN_INT (i
+ 1);
4342 *code
= *code
== GTU
? GEU
: LTU
;
4351 /* If that did not work, reverse the condition. */
4352 if (!op0_preserve_value
)
4357 *code
= (int)swap_condition ((enum rtx_code
)*code
);
4363 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4364 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4365 to facilitate possible combining with a cmp into 'ands'. */
4367 && GET_CODE (*op0
) == ZERO_EXTEND
4368 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
4369 && GET_MODE (XEXP (*op0
, 0)) == QImode
4370 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
4371 && subreg_lowpart_p (XEXP (*op0
, 0))
4372 && *op1
== const0_rtx
)
4373 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
4376 /* Comparisons smaller than DImode. Only adjust comparisons against
4377 an out-of-range constant. */
4378 if (!CONST_INT_P (*op1
)
4379 || const_ok_for_arm (INTVAL (*op1
))
4380 || const_ok_for_arm (- INTVAL (*op1
)))
4394 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4396 *op1
= GEN_INT (i
+ 1);
4397 *code
= *code
== GT
? GE
: LT
;
4405 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4407 *op1
= GEN_INT (i
- 1);
4408 *code
= *code
== GE
? GT
: LE
;
4415 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4416 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4418 *op1
= GEN_INT (i
+ 1);
4419 *code
= *code
== GTU
? GEU
: LTU
;
4427 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
4429 *op1
= GEN_INT (i
- 1);
4430 *code
= *code
== GEU
? GTU
: LEU
;
4441 /* Define how to find the value returned by a function. */
4444 arm_function_value(const_tree type
, const_tree func
,
4445 bool outgoing ATTRIBUTE_UNUSED
)
4447 enum machine_mode mode
;
4448 int unsignedp ATTRIBUTE_UNUSED
;
4449 rtx r ATTRIBUTE_UNUSED
;
4451 mode
= TYPE_MODE (type
);
4453 if (TARGET_AAPCS_BASED
)
4454 return aapcs_allocate_return_reg (mode
, type
, func
);
4456 /* Promote integer types. */
4457 if (INTEGRAL_TYPE_P (type
))
4458 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
4460 /* Promotes small structs returned in a register to full-word size
4461 for big-endian AAPCS. */
4462 if (arm_return_in_msb (type
))
4464 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4465 if (size
% UNITS_PER_WORD
!= 0)
4467 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4468 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4472 return arm_libcall_value_1 (mode
);
4475 /* libcall hashtable helpers. */
4477 struct libcall_hasher
: typed_noop_remove
<rtx_def
>
4479 typedef rtx_def value_type
;
4480 typedef rtx_def compare_type
;
4481 static inline hashval_t
hash (const value_type
*);
4482 static inline bool equal (const value_type
*, const compare_type
*);
4483 static inline void remove (value_type
*);
4487 libcall_hasher::equal (const value_type
*p1
, const compare_type
*p2
)
4489 return rtx_equal_p (p1
, p2
);
4493 libcall_hasher::hash (const value_type
*p1
)
4495 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
4498 typedef hash_table
<libcall_hasher
> libcall_table_type
;
4501 add_libcall (libcall_table_type htab
, rtx libcall
)
4503 *htab
.find_slot (libcall
, INSERT
) = libcall
;
4507 arm_libcall_uses_aapcs_base (const_rtx libcall
)
4509 static bool init_done
= false;
4510 static libcall_table_type libcall_htab
;
4516 libcall_htab
.create (31);
4517 add_libcall (libcall_htab
,
4518 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
4519 add_libcall (libcall_htab
,
4520 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
4521 add_libcall (libcall_htab
,
4522 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
4523 add_libcall (libcall_htab
,
4524 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
4526 add_libcall (libcall_htab
,
4527 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
4528 add_libcall (libcall_htab
,
4529 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
4530 add_libcall (libcall_htab
,
4531 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
4532 add_libcall (libcall_htab
,
4533 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
4535 add_libcall (libcall_htab
,
4536 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
4537 add_libcall (libcall_htab
,
4538 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
4539 add_libcall (libcall_htab
,
4540 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
4541 add_libcall (libcall_htab
,
4542 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
4543 add_libcall (libcall_htab
,
4544 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
4545 add_libcall (libcall_htab
,
4546 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
4547 add_libcall (libcall_htab
,
4548 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
4549 add_libcall (libcall_htab
,
4550 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
4552 /* Values from double-precision helper functions are returned in core
4553 registers if the selected core only supports single-precision
4554 arithmetic, even if we are using the hard-float ABI. The same is
4555 true for single-precision helpers, but we will never be using the
4556 hard-float ABI on a CPU which doesn't support single-precision
4557 operations in hardware. */
4558 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
4559 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
4560 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
4561 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
4562 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
4563 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
4564 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
4565 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
4566 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
4567 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
4568 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
4569 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
4571 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
4575 return libcall
&& libcall_htab
.find (libcall
) != NULL
;
4579 arm_libcall_value_1 (enum machine_mode mode
)
4581 if (TARGET_AAPCS_BASED
)
4582 return aapcs_libcall_value (mode
);
4583 else if (TARGET_IWMMXT_ABI
4584 && arm_vector_mode_supported_p (mode
))
4585 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
4587 return gen_rtx_REG (mode
, ARG_REGISTER (1));
4590 /* Define how to find the value returned by a library function
4591 assuming the value has mode MODE. */
4594 arm_libcall_value (enum machine_mode mode
, const_rtx libcall
)
4596 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
4597 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
4599 /* The following libcalls return their result in integer registers,
4600 even though they return a floating point value. */
4601 if (arm_libcall_uses_aapcs_base (libcall
))
4602 return gen_rtx_REG (mode
, ARG_REGISTER(1));
4606 return arm_libcall_value_1 (mode
);
4609 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4612 arm_function_value_regno_p (const unsigned int regno
)
4614 if (regno
== ARG_REGISTER (1)
4616 && TARGET_AAPCS_BASED
4618 && TARGET_HARD_FLOAT
4619 && regno
== FIRST_VFP_REGNUM
)
4620 || (TARGET_IWMMXT_ABI
4621 && regno
== FIRST_IWMMXT_REGNUM
))
4627 /* Determine the amount of memory needed to store the possible return
4628 registers of an untyped call. */
4630 arm_apply_result_size (void)
4636 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
4638 if (TARGET_IWMMXT_ABI
)
4645 /* Decide whether TYPE should be returned in memory (true)
4646 or in a register (false). FNTYPE is the type of the function making
4649 arm_return_in_memory (const_tree type
, const_tree fntype
)
4653 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
4655 if (TARGET_AAPCS_BASED
)
4657 /* Simple, non-aggregate types (ie not including vectors and
4658 complex) are always returned in a register (or registers).
4659 We don't care about which register here, so we can short-cut
4660 some of the detail. */
4661 if (!AGGREGATE_TYPE_P (type
)
4662 && TREE_CODE (type
) != VECTOR_TYPE
4663 && TREE_CODE (type
) != COMPLEX_TYPE
)
4666 /* Any return value that is no larger than one word can be
4668 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
4671 /* Check any available co-processors to see if they accept the
4672 type as a register candidate (VFP, for example, can return
4673 some aggregates in consecutive registers). These aren't
4674 available if the call is variadic. */
4675 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
4678 /* Vector values should be returned using ARM registers, not
4679 memory (unless they're over 16 bytes, which will break since
4680 we only have four call-clobbered registers to play with). */
4681 if (TREE_CODE (type
) == VECTOR_TYPE
)
4682 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
4684 /* The rest go in memory. */
4688 if (TREE_CODE (type
) == VECTOR_TYPE
)
4689 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
4691 if (!AGGREGATE_TYPE_P (type
) &&
4692 (TREE_CODE (type
) != VECTOR_TYPE
))
4693 /* All simple types are returned in registers. */
4696 if (arm_abi
!= ARM_ABI_APCS
)
4698 /* ATPCS and later return aggregate types in memory only if they are
4699 larger than a word (or are variable size). */
4700 return (size
< 0 || size
> UNITS_PER_WORD
);
4703 /* For the arm-wince targets we choose to be compatible with Microsoft's
4704 ARM and Thumb compilers, which always return aggregates in memory. */
4706 /* All structures/unions bigger than one word are returned in memory.
4707 Also catch the case where int_size_in_bytes returns -1. In this case
4708 the aggregate is either huge or of variable size, and in either case
4709 we will want to return it via memory and not in a register. */
4710 if (size
< 0 || size
> UNITS_PER_WORD
)
4713 if (TREE_CODE (type
) == RECORD_TYPE
)
4717 /* For a struct the APCS says that we only return in a register
4718 if the type is 'integer like' and every addressable element
4719 has an offset of zero. For practical purposes this means
4720 that the structure can have at most one non bit-field element
4721 and that this element must be the first one in the structure. */
4723 /* Find the first field, ignoring non FIELD_DECL things which will
4724 have been created by C++. */
4725 for (field
= TYPE_FIELDS (type
);
4726 field
&& TREE_CODE (field
) != FIELD_DECL
;
4727 field
= DECL_CHAIN (field
))
4731 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4733 /* Check that the first field is valid for returning in a register. */
4735 /* ... Floats are not allowed */
4736 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
4739 /* ... Aggregates that are not themselves valid for returning in
4740 a register are not allowed. */
4741 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
4744 /* Now check the remaining fields, if any. Only bitfields are allowed,
4745 since they are not addressable. */
4746 for (field
= DECL_CHAIN (field
);
4748 field
= DECL_CHAIN (field
))
4750 if (TREE_CODE (field
) != FIELD_DECL
)
4753 if (!DECL_BIT_FIELD_TYPE (field
))
4760 if (TREE_CODE (type
) == UNION_TYPE
)
4764 /* Unions can be returned in registers if every element is
4765 integral, or can be returned in an integer register. */
4766 for (field
= TYPE_FIELDS (type
);
4768 field
= DECL_CHAIN (field
))
4770 if (TREE_CODE (field
) != FIELD_DECL
)
4773 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
4776 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
4782 #endif /* not ARM_WINCE */
4784 /* Return all other types in memory. */
4788 const struct pcs_attribute_arg
4792 } pcs_attribute_args
[] =
4794 {"aapcs", ARM_PCS_AAPCS
},
4795 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
4797 /* We could recognize these, but changes would be needed elsewhere
4798 * to implement them. */
4799 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
4800 {"atpcs", ARM_PCS_ATPCS
},
4801 {"apcs", ARM_PCS_APCS
},
4803 {NULL
, ARM_PCS_UNKNOWN
}
4807 arm_pcs_from_attribute (tree attr
)
4809 const struct pcs_attribute_arg
*ptr
;
4812 /* Get the value of the argument. */
4813 if (TREE_VALUE (attr
) == NULL_TREE
4814 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
4815 return ARM_PCS_UNKNOWN
;
4817 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
4819 /* Check it against the list of known arguments. */
4820 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
4821 if (streq (arg
, ptr
->arg
))
4824 /* An unrecognized interrupt type. */
4825 return ARM_PCS_UNKNOWN
;
4828 /* Get the PCS variant to use for this call. TYPE is the function's type
4829 specification, DECL is the specific declartion. DECL may be null if
4830 the call could be indirect or if this is a library call. */
4832 arm_get_pcs_model (const_tree type
, const_tree decl
)
4834 bool user_convention
= false;
4835 enum arm_pcs user_pcs
= arm_pcs_default
;
4840 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
4843 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
4844 user_convention
= true;
4847 if (TARGET_AAPCS_BASED
)
4849 /* Detect varargs functions. These always use the base rules
4850 (no argument is ever a candidate for a co-processor
4852 bool base_rules
= stdarg_p (type
);
4854 if (user_convention
)
4856 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
4857 sorry ("non-AAPCS derived PCS variant");
4858 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
4859 error ("variadic functions must use the base AAPCS variant");
4863 return ARM_PCS_AAPCS
;
4864 else if (user_convention
)
4866 else if (decl
&& flag_unit_at_a_time
)
4868 /* Local functions never leak outside this compilation unit,
4869 so we are free to use whatever conventions are
4871 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4872 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
4874 return ARM_PCS_AAPCS_LOCAL
;
4877 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
4878 sorry ("PCS variant");
4880 /* For everything else we use the target's default. */
4881 return arm_pcs_default
;
4886 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
4887 const_tree fntype ATTRIBUTE_UNUSED
,
4888 rtx libcall ATTRIBUTE_UNUSED
,
4889 const_tree fndecl ATTRIBUTE_UNUSED
)
4891 /* Record the unallocated VFP registers. */
4892 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
4893 pcum
->aapcs_vfp_reg_alloc
= 0;
4896 /* Walk down the type tree of TYPE counting consecutive base elements.
4897 If *MODEP is VOIDmode, then set it to the first valid floating point
4898 type. If a non-floating point type is found, or if a floating point
4899 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4900 otherwise return the count in the sub-tree. */
4902 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
4904 enum machine_mode mode
;
4907 switch (TREE_CODE (type
))
4910 mode
= TYPE_MODE (type
);
4911 if (mode
!= DFmode
&& mode
!= SFmode
)
4914 if (*modep
== VOIDmode
)
4923 mode
= TYPE_MODE (TREE_TYPE (type
));
4924 if (mode
!= DFmode
&& mode
!= SFmode
)
4927 if (*modep
== VOIDmode
)
4936 /* Use V2SImode and V4SImode as representatives of all 64-bit
4937 and 128-bit vector types, whether or not those modes are
4938 supported with the present options. */
4939 size
= int_size_in_bytes (type
);
4952 if (*modep
== VOIDmode
)
4955 /* Vector modes are considered to be opaque: two vectors are
4956 equivalent for the purposes of being homogeneous aggregates
4957 if they are the same size. */
4966 tree index
= TYPE_DOMAIN (type
);
4968 /* Can't handle incomplete types. */
4969 if (!COMPLETE_TYPE_P (type
))
4972 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
4975 || !TYPE_MAX_VALUE (index
)
4976 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
4977 || !TYPE_MIN_VALUE (index
)
4978 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
4982 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
4983 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
4985 /* There must be no padding. */
4986 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
4987 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
4988 != count
* GET_MODE_BITSIZE (*modep
)))
5000 /* Can't handle incomplete types. */
5001 if (!COMPLETE_TYPE_P (type
))
5004 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5006 if (TREE_CODE (field
) != FIELD_DECL
)
5009 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5015 /* There must be no padding. */
5016 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
5017 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
5018 != count
* GET_MODE_BITSIZE (*modep
)))
5025 case QUAL_UNION_TYPE
:
5027 /* These aren't very interesting except in a degenerate case. */
5032 /* Can't handle incomplete types. */
5033 if (!COMPLETE_TYPE_P (type
))
5036 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5038 if (TREE_CODE (field
) != FIELD_DECL
)
5041 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5044 count
= count
> sub_count
? count
: sub_count
;
5047 /* There must be no padding. */
5048 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
5049 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
5050 != count
* GET_MODE_BITSIZE (*modep
)))
5063 /* Return true if PCS_VARIANT should use VFP registers. */
5065 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5067 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5069 static bool seen_thumb1_vfp
= false;
5071 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5073 sorry ("Thumb-1 hard-float VFP ABI");
5074 /* sorry() is not immediately fatal, so only display this once. */
5075 seen_thumb1_vfp
= true;
5081 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5084 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
5085 (TARGET_VFP_DOUBLE
|| !is_double
));
5088 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5089 suitable for passing or returning in VFP registers for the PCS
5090 variant selected. If it is, then *BASE_MODE is updated to contain
5091 a machine mode describing each element of the argument's type and
5092 *COUNT to hold the number of such elements. */
5094 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5095 enum machine_mode mode
, const_tree type
,
5096 enum machine_mode
*base_mode
, int *count
)
5098 enum machine_mode new_mode
= VOIDmode
;
5100 /* If we have the type information, prefer that to working things
5101 out from the mode. */
5104 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5106 if (ag_count
> 0 && ag_count
<= 4)
5111 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5112 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5113 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5118 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5121 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5127 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5130 *base_mode
= new_mode
;
5135 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5136 enum machine_mode mode
, const_tree type
)
5138 int count ATTRIBUTE_UNUSED
;
5139 enum machine_mode ag_mode ATTRIBUTE_UNUSED
;
5141 if (!use_vfp_abi (pcs_variant
, false))
5143 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5148 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5151 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5154 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5155 &pcum
->aapcs_vfp_rmode
,
5156 &pcum
->aapcs_vfp_rcount
);
5160 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5161 const_tree type ATTRIBUTE_UNUSED
)
5163 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
5164 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
5167 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
5168 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
5170 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
5172 || (mode
== TImode
&& ! TARGET_NEON
)
5173 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
5176 int rcount
= pcum
->aapcs_vfp_rcount
;
5178 enum machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
5182 /* Avoid using unsupported vector modes. */
5183 if (rmode
== V2SImode
)
5185 else if (rmode
== V4SImode
)
5192 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
5193 for (i
= 0; i
< rcount
; i
++)
5195 rtx tmp
= gen_rtx_REG (rmode
,
5196 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
5197 tmp
= gen_rtx_EXPR_LIST
5199 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
5200 XVECEXP (par
, 0, i
) = tmp
;
5203 pcum
->aapcs_reg
= par
;
5206 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
5213 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
5214 enum machine_mode mode
,
5215 const_tree type ATTRIBUTE_UNUSED
)
5217 if (!use_vfp_abi (pcs_variant
, false))
5220 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
5223 enum machine_mode ag_mode
;
5228 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5233 if (ag_mode
== V2SImode
)
5235 else if (ag_mode
== V4SImode
)
5241 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
5242 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
5243 for (i
= 0; i
< count
; i
++)
5245 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
5246 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
5247 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
5248 XVECEXP (par
, 0, i
) = tmp
;
5254 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
5258 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5259 enum machine_mode mode ATTRIBUTE_UNUSED
,
5260 const_tree type ATTRIBUTE_UNUSED
)
5262 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
5263 pcum
->aapcs_vfp_reg_alloc
= 0;
5267 #define AAPCS_CP(X) \
5269 aapcs_ ## X ## _cum_init, \
5270 aapcs_ ## X ## _is_call_candidate, \
5271 aapcs_ ## X ## _allocate, \
5272 aapcs_ ## X ## _is_return_candidate, \
5273 aapcs_ ## X ## _allocate_return_reg, \
5274 aapcs_ ## X ## _advance \
5277 /* Table of co-processors that can be used to pass arguments in
5278 registers. Idealy no arugment should be a candidate for more than
5279 one co-processor table entry, but the table is processed in order
5280 and stops after the first match. If that entry then fails to put
5281 the argument into a co-processor register, the argument will go on
5285 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5286 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
5288 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5289 BLKmode) is a candidate for this co-processor's registers; this
5290 function should ignore any position-dependent state in
5291 CUMULATIVE_ARGS and only use call-type dependent information. */
5292 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
5294 /* Return true if the argument does get a co-processor register; it
5295 should set aapcs_reg to an RTX of the register allocated as is
5296 required for a return from FUNCTION_ARG. */
5297 bool (*allocate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
5299 /* Return true if a result of mode MODE (or type TYPE if MODE is
5300 BLKmode) is can be returned in this co-processor's registers. */
5301 bool (*is_return_candidate
) (enum arm_pcs
, enum machine_mode
, const_tree
);
5303 /* Allocate and return an RTX element to hold the return type of a
5304 call, this routine must not fail and will only be called if
5305 is_return_candidate returned true with the same parameters. */
5306 rtx (*allocate_return_reg
) (enum arm_pcs
, enum machine_mode
, const_tree
);
5308 /* Finish processing this argument and prepare to start processing
5310 void (*advance
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
5311 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
5319 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5324 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5325 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
5332 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
5334 /* We aren't passed a decl, so we can't check that a call is local.
5335 However, it isn't clear that that would be a win anyway, since it
5336 might limit some tail-calling opportunities. */
5337 enum arm_pcs pcs_variant
;
5341 const_tree fndecl
= NULL_TREE
;
5343 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5346 fntype
= TREE_TYPE (fntype
);
5349 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5352 pcs_variant
= arm_pcs_default
;
5354 if (pcs_variant
!= ARM_PCS_AAPCS
)
5358 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5359 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
5368 aapcs_allocate_return_reg (enum machine_mode mode
, const_tree type
,
5371 /* We aren't passed a decl, so we can't check that a call is local.
5372 However, it isn't clear that that would be a win anyway, since it
5373 might limit some tail-calling opportunities. */
5374 enum arm_pcs pcs_variant
;
5375 int unsignedp ATTRIBUTE_UNUSED
;
5379 const_tree fndecl
= NULL_TREE
;
5381 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5384 fntype
= TREE_TYPE (fntype
);
5387 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5390 pcs_variant
= arm_pcs_default
;
5392 /* Promote integer types. */
5393 if (type
&& INTEGRAL_TYPE_P (type
))
5394 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
5396 if (pcs_variant
!= ARM_PCS_AAPCS
)
5400 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5401 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
5403 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
5407 /* Promotes small structs returned in a register to full-word size
5408 for big-endian AAPCS. */
5409 if (type
&& arm_return_in_msb (type
))
5411 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5412 if (size
% UNITS_PER_WORD
!= 0)
5414 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5415 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5419 return gen_rtx_REG (mode
, R0_REGNUM
);
5423 aapcs_libcall_value (enum machine_mode mode
)
5425 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
5426 && GET_MODE_SIZE (mode
) <= 4)
5429 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
5432 /* Lay out a function argument using the AAPCS rules. The rule
5433 numbers referred to here are those in the AAPCS. */
5435 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
5436 const_tree type
, bool named
)
5441 /* We only need to do this once per argument. */
5442 if (pcum
->aapcs_arg_processed
)
5445 pcum
->aapcs_arg_processed
= true;
5447 /* Special case: if named is false then we are handling an incoming
5448 anonymous argument which is on the stack. */
5452 /* Is this a potential co-processor register candidate? */
5453 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5455 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
5456 pcum
->aapcs_cprc_slot
= slot
;
5458 /* We don't have to apply any of the rules from part B of the
5459 preparation phase, these are handled elsewhere in the
5464 /* A Co-processor register candidate goes either in its own
5465 class of registers or on the stack. */
5466 if (!pcum
->aapcs_cprc_failed
[slot
])
5468 /* C1.cp - Try to allocate the argument to co-processor
5470 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
5473 /* C2.cp - Put the argument on the stack and note that we
5474 can't assign any more candidates in this slot. We also
5475 need to note that we have allocated stack space, so that
5476 we won't later try to split a non-cprc candidate between
5477 core registers and the stack. */
5478 pcum
->aapcs_cprc_failed
[slot
] = true;
5479 pcum
->can_split
= false;
5482 /* We didn't get a register, so this argument goes on the
5484 gcc_assert (pcum
->can_split
== false);
5489 /* C3 - For double-word aligned arguments, round the NCRN up to the
5490 next even number. */
5491 ncrn
= pcum
->aapcs_ncrn
;
5492 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
5495 nregs
= ARM_NUM_REGS2(mode
, type
);
5497 /* Sigh, this test should really assert that nregs > 0, but a GCC
5498 extension allows empty structs and then gives them empty size; it
5499 then allows such a structure to be passed by value. For some of
5500 the code below we have to pretend that such an argument has
5501 non-zero size so that we 'locate' it correctly either in
5502 registers or on the stack. */
5503 gcc_assert (nregs
>= 0);
5505 nregs2
= nregs
? nregs
: 1;
5507 /* C4 - Argument fits entirely in core registers. */
5508 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
5510 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5511 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
5515 /* C5 - Some core registers left and there are no arguments already
5516 on the stack: split this argument between the remaining core
5517 registers and the stack. */
5518 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
5520 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
5521 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5522 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
5526 /* C6 - NCRN is set to 4. */
5527 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
5529 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5533 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5534 for a call to a function whose data type is FNTYPE.
5535 For a library call, FNTYPE is NULL. */
5537 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
5539 tree fndecl ATTRIBUTE_UNUSED
)
5541 /* Long call handling. */
5543 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5545 pcum
->pcs_variant
= arm_pcs_default
;
5547 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5549 if (arm_libcall_uses_aapcs_base (libname
))
5550 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
5552 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
5553 pcum
->aapcs_reg
= NULL_RTX
;
5554 pcum
->aapcs_partial
= 0;
5555 pcum
->aapcs_arg_processed
= false;
5556 pcum
->aapcs_cprc_slot
= -1;
5557 pcum
->can_split
= true;
5559 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
5563 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5565 pcum
->aapcs_cprc_failed
[i
] = false;
5566 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
5574 /* On the ARM, the offset starts at 0. */
5576 pcum
->iwmmxt_nregs
= 0;
5577 pcum
->can_split
= true;
5579 /* Varargs vectors are treated the same as long long.
5580 named_count avoids having to change the way arm handles 'named' */
5581 pcum
->named_count
= 0;
5584 if (TARGET_REALLY_IWMMXT
&& fntype
)
5588 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
5590 fn_arg
= TREE_CHAIN (fn_arg
))
5591 pcum
->named_count
+= 1;
5593 if (! pcum
->named_count
)
5594 pcum
->named_count
= INT_MAX
;
5598 /* Return true if we use LRA instead of reload pass. */
5602 return arm_lra_flag
;
5605 /* Return true if mode/type need doubleword alignment. */
5607 arm_needs_doubleword_align (enum machine_mode mode
, const_tree type
)
5609 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
5610 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
5614 /* Determine where to put an argument to a function.
5615 Value is zero to push the argument on the stack,
5616 or a hard register in which to store the argument.
5618 MODE is the argument's machine mode.
5619 TYPE is the data type of the argument (as a tree).
5620 This is null for libcalls where that information may
5622 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5623 the preceding args and about the function being called.
5624 NAMED is nonzero if this argument is a named parameter
5625 (otherwise it is an extra parameter matching an ellipsis).
5627 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5628 other arguments are passed on the stack. If (NAMED == 0) (which happens
5629 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5630 defined), say it is passed in the stack (function_prologue will
5631 indeed make it pass in the stack if necessary). */
5634 arm_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
5635 const_tree type
, bool named
)
5637 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5640 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5641 a call insn (op3 of a call_value insn). */
5642 if (mode
== VOIDmode
)
5645 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5647 aapcs_layout_arg (pcum
, mode
, type
, named
);
5648 return pcum
->aapcs_reg
;
5651 /* Varargs vectors are treated the same as long long.
5652 named_count avoids having to change the way arm handles 'named' */
5653 if (TARGET_IWMMXT_ABI
5654 && arm_vector_mode_supported_p (mode
)
5655 && pcum
->named_count
> pcum
->nargs
+ 1)
5657 if (pcum
->iwmmxt_nregs
<= 9)
5658 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
5661 pcum
->can_split
= false;
5666 /* Put doubleword aligned quantities in even register pairs. */
5668 && ARM_DOUBLEWORD_ALIGN
5669 && arm_needs_doubleword_align (mode
, type
))
5672 /* Only allow splitting an arg between regs and memory if all preceding
5673 args were allocated to regs. For args passed by reference we only count
5674 the reference pointer. */
5675 if (pcum
->can_split
)
5678 nregs
= ARM_NUM_REGS2 (mode
, type
);
5680 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
5683 return gen_rtx_REG (mode
, pcum
->nregs
);
5687 arm_function_arg_boundary (enum machine_mode mode
, const_tree type
)
5689 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
5690 ? DOUBLEWORD_ALIGNMENT
5695 arm_arg_partial_bytes (cumulative_args_t pcum_v
, enum machine_mode mode
,
5696 tree type
, bool named
)
5698 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5699 int nregs
= pcum
->nregs
;
5701 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5703 aapcs_layout_arg (pcum
, mode
, type
, named
);
5704 return pcum
->aapcs_partial
;
5707 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
5710 if (NUM_ARG_REGS
> nregs
5711 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
5713 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
5718 /* Update the data in PCUM to advance over an argument
5719 of mode MODE and data type TYPE.
5720 (TYPE is null for libcalls where that information may not be available.) */
5723 arm_function_arg_advance (cumulative_args_t pcum_v
, enum machine_mode mode
,
5724 const_tree type
, bool named
)
5726 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
5728 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
5730 aapcs_layout_arg (pcum
, mode
, type
, named
);
5732 if (pcum
->aapcs_cprc_slot
>= 0)
5734 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
5736 pcum
->aapcs_cprc_slot
= -1;
5739 /* Generic stuff. */
5740 pcum
->aapcs_arg_processed
= false;
5741 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
5742 pcum
->aapcs_reg
= NULL_RTX
;
5743 pcum
->aapcs_partial
= 0;
5748 if (arm_vector_mode_supported_p (mode
)
5749 && pcum
->named_count
> pcum
->nargs
5750 && TARGET_IWMMXT_ABI
)
5751 pcum
->iwmmxt_nregs
+= 1;
5753 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
5757 /* Variable sized types are passed by reference. This is a GCC
5758 extension to the ARM ABI. */
5761 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
5762 enum machine_mode mode ATTRIBUTE_UNUSED
,
5763 const_tree type
, bool named ATTRIBUTE_UNUSED
)
5765 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
5768 /* Encode the current state of the #pragma [no_]long_calls. */
5771 OFF
, /* No #pragma [no_]long_calls is in effect. */
5772 LONG
, /* #pragma long_calls is in effect. */
5773 SHORT
/* #pragma no_long_calls is in effect. */
5776 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
5779 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
5781 arm_pragma_long_calls
= LONG
;
5785 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
5787 arm_pragma_long_calls
= SHORT
;
5791 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
5793 arm_pragma_long_calls
= OFF
;
5796 /* Handle an attribute requiring a FUNCTION_DECL;
5797 arguments as in struct attribute_spec.handler. */
5799 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
5800 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
5802 if (TREE_CODE (*node
) != FUNCTION_DECL
)
5804 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5806 *no_add_attrs
= true;
5812 /* Handle an "interrupt" or "isr" attribute;
5813 arguments as in struct attribute_spec.handler. */
5815 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
5820 if (TREE_CODE (*node
) != FUNCTION_DECL
)
5822 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5824 *no_add_attrs
= true;
5826 /* FIXME: the argument if any is checked for type attributes;
5827 should it be checked for decl ones? */
5831 if (TREE_CODE (*node
) == FUNCTION_TYPE
5832 || TREE_CODE (*node
) == METHOD_TYPE
)
5834 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
5836 warning (OPT_Wattributes
, "%qE attribute ignored",
5838 *no_add_attrs
= true;
5841 else if (TREE_CODE (*node
) == POINTER_TYPE
5842 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
5843 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
5844 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
5846 *node
= build_variant_type_copy (*node
);
5847 TREE_TYPE (*node
) = build_type_attribute_variant
5849 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
5850 *no_add_attrs
= true;
5854 /* Possibly pass this attribute on from the type to a decl. */
5855 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
5856 | (int) ATTR_FLAG_FUNCTION_NEXT
5857 | (int) ATTR_FLAG_ARRAY_NEXT
))
5859 *no_add_attrs
= true;
5860 return tree_cons (name
, args
, NULL_TREE
);
5864 warning (OPT_Wattributes
, "%qE attribute ignored",
5873 /* Handle a "pcs" attribute; arguments as in struct
5874 attribute_spec.handler. */
5876 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
5877 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
5879 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
5881 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
5882 *no_add_attrs
= true;
5887 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5888 /* Handle the "notshared" attribute. This attribute is another way of
5889 requesting hidden visibility. ARM's compiler supports
5890 "__declspec(notshared)"; we support the same thing via an
5894 arm_handle_notshared_attribute (tree
*node
,
5895 tree name ATTRIBUTE_UNUSED
,
5896 tree args ATTRIBUTE_UNUSED
,
5897 int flags ATTRIBUTE_UNUSED
,
5900 tree decl
= TYPE_NAME (*node
);
5904 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
5905 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
5906 *no_add_attrs
= false;
5912 /* Return 0 if the attributes for two types are incompatible, 1 if they
5913 are compatible, and 2 if they are nearly compatible (which causes a
5914 warning to be generated). */
5916 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
5920 /* Check for mismatch of non-default calling convention. */
5921 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
5924 /* Check for mismatched call attributes. */
5925 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
5926 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
5927 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
5928 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
5930 /* Only bother to check if an attribute is defined. */
5931 if (l1
| l2
| s1
| s2
)
5933 /* If one type has an attribute, the other must have the same attribute. */
5934 if ((l1
!= l2
) || (s1
!= s2
))
5937 /* Disallow mixed attributes. */
5938 if ((l1
& s2
) || (l2
& s1
))
5942 /* Check for mismatched ISR attribute. */
5943 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
5945 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
5946 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
5948 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
5955 /* Assigns default attributes to newly defined type. This is used to
5956 set short_call/long_call attributes for function types of
5957 functions defined inside corresponding #pragma scopes. */
5959 arm_set_default_type_attributes (tree type
)
5961 /* Add __attribute__ ((long_call)) to all functions, when
5962 inside #pragma long_calls or __attribute__ ((short_call)),
5963 when inside #pragma no_long_calls. */
5964 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
5966 tree type_attr_list
, attr_name
;
5967 type_attr_list
= TYPE_ATTRIBUTES (type
);
5969 if (arm_pragma_long_calls
== LONG
)
5970 attr_name
= get_identifier ("long_call");
5971 else if (arm_pragma_long_calls
== SHORT
)
5972 attr_name
= get_identifier ("short_call");
5976 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
5977 TYPE_ATTRIBUTES (type
) = type_attr_list
;
5981 /* Return true if DECL is known to be linked into section SECTION. */
5984 arm_function_in_section_p (tree decl
, section
*section
)
5986 /* We can only be certain about functions defined in the same
5987 compilation unit. */
5988 if (!TREE_STATIC (decl
))
5991 /* Make sure that SYMBOL always binds to the definition in this
5992 compilation unit. */
5993 if (!targetm
.binds_local_p (decl
))
5996 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5997 if (!DECL_SECTION_NAME (decl
))
5999 /* Make sure that we will not create a unique section for DECL. */
6000 if (flag_function_sections
|| DECL_ONE_ONLY (decl
))
6004 return function_section (decl
) == section
;
6007 /* Return nonzero if a 32-bit "long_call" should be generated for
6008 a call from the current function to DECL. We generate a long_call
6011 a. has an __attribute__((long call))
6012 or b. is within the scope of a #pragma long_calls
6013 or c. the -mlong-calls command line switch has been specified
6015 However we do not generate a long call if the function:
6017 d. has an __attribute__ ((short_call))
6018 or e. is inside the scope of a #pragma no_long_calls
6019 or f. is defined in the same section as the current function. */
6022 arm_is_long_call_p (tree decl
)
6027 return TARGET_LONG_CALLS
;
6029 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
6030 if (lookup_attribute ("short_call", attrs
))
6033 /* For "f", be conservative, and only cater for cases in which the
6034 whole of the current function is placed in the same section. */
6035 if (!flag_reorder_blocks_and_partition
6036 && TREE_CODE (decl
) == FUNCTION_DECL
6037 && arm_function_in_section_p (decl
, current_function_section ()))
6040 if (lookup_attribute ("long_call", attrs
))
6043 return TARGET_LONG_CALLS
;
6046 /* Return nonzero if it is ok to make a tail-call to DECL. */
6048 arm_function_ok_for_sibcall (tree decl
, tree exp
)
6050 unsigned long func_type
;
6052 if (cfun
->machine
->sibcall_blocked
)
6055 /* Never tailcall something if we are generating code for Thumb-1. */
6059 /* The PIC register is live on entry to VxWorks PLT entries, so we
6060 must make the call before restoring the PIC register. */
6061 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
6064 /* Cannot tail-call to long calls, since these are out of range of
6065 a branch instruction. */
6066 if (decl
&& arm_is_long_call_p (decl
))
6069 /* If we are interworking and the function is not declared static
6070 then we can't tail-call it unless we know that it exists in this
6071 compilation unit (since it might be a Thumb routine). */
6072 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
6073 && !TREE_ASM_WRITTEN (decl
))
6076 func_type
= arm_current_func_type ();
6077 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6078 if (IS_INTERRUPT (func_type
))
6081 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6083 /* Check that the return value locations are the same. For
6084 example that we aren't returning a value from the sibling in
6085 a VFP register but then need to transfer it to a core
6089 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
6090 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6092 if (!rtx_equal_p (a
, b
))
6096 /* Never tailcall if function may be called with a misaligned SP. */
6097 if (IS_STACKALIGN (func_type
))
6100 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6101 references should become a NOP. Don't convert such calls into
6103 if (TARGET_AAPCS_BASED
6104 && arm_abi
== ARM_ABI_AAPCS
6106 && DECL_WEAK (decl
))
6109 /* Everything else is ok. */
6114 /* Addressing mode support functions. */
6116 /* Return nonzero if X is a legitimate immediate operand when compiling
6117 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6119 legitimate_pic_operand_p (rtx x
)
6121 if (GET_CODE (x
) == SYMBOL_REF
6122 || (GET_CODE (x
) == CONST
6123 && GET_CODE (XEXP (x
, 0)) == PLUS
6124 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6130 /* Record that the current function needs a PIC register. Initialize
6131 cfun->machine->pic_reg if we have not already done so. */
6134 require_pic_register (void)
6136 /* A lot of the logic here is made obscure by the fact that this
6137 routine gets called as part of the rtx cost estimation process.
6138 We don't want those calls to affect any assumptions about the real
6139 function; and further, we can't call entry_of_function() until we
6140 start the real expansion process. */
6141 if (!crtl
->uses_pic_offset_table
)
6143 gcc_assert (can_create_pseudo_p ());
6144 if (arm_pic_register
!= INVALID_REGNUM
6145 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
6147 if (!cfun
->machine
->pic_reg
)
6148 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
6150 /* Play games to avoid marking the function as needing pic
6151 if we are being called as part of the cost-estimation
6153 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6154 crtl
->uses_pic_offset_table
= 1;
6160 if (!cfun
->machine
->pic_reg
)
6161 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
6163 /* Play games to avoid marking the function as needing pic
6164 if we are being called as part of the cost-estimation
6166 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6168 crtl
->uses_pic_offset_table
= 1;
6171 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
6172 && arm_pic_register
> LAST_LO_REGNUM
)
6173 emit_move_insn (cfun
->machine
->pic_reg
,
6174 gen_rtx_REG (Pmode
, arm_pic_register
));
6176 arm_load_pic_register (0UL);
6181 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
6183 INSN_LOCATION (insn
) = prologue_location
;
6185 /* We can be called during expansion of PHI nodes, where
6186 we can't yet emit instructions directly in the final
6187 insn stream. Queue the insns on the entry edge, they will
6188 be committed after everything else is expanded. */
6189 insert_insn_on_edge (seq
,
6190 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
6197 legitimize_pic_address (rtx orig
, enum machine_mode mode
, rtx reg
)
6199 if (GET_CODE (orig
) == SYMBOL_REF
6200 || GET_CODE (orig
) == LABEL_REF
)
6206 gcc_assert (can_create_pseudo_p ());
6207 reg
= gen_reg_rtx (Pmode
);
6210 /* VxWorks does not impose a fixed gap between segments; the run-time
6211 gap can be different from the object-file gap. We therefore can't
6212 use GOTOFF unless we are absolutely sure that the symbol is in the
6213 same segment as the GOT. Unfortunately, the flexibility of linker
6214 scripts means that we can't be sure of that in general, so assume
6215 that GOTOFF is never valid on VxWorks. */
6216 if ((GET_CODE (orig
) == LABEL_REF
6217 || (GET_CODE (orig
) == SYMBOL_REF
&&
6218 SYMBOL_REF_LOCAL_P (orig
)))
6220 && arm_pic_data_is_text_relative
)
6221 insn
= arm_pic_static_addr (orig
, reg
);
6227 /* If this function doesn't have a pic register, create one now. */
6228 require_pic_register ();
6230 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
6232 /* Make the MEM as close to a constant as possible. */
6233 mem
= SET_SRC (pat
);
6234 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
6235 MEM_READONLY_P (mem
) = 1;
6236 MEM_NOTRAP_P (mem
) = 1;
6238 insn
= emit_insn (pat
);
6241 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6243 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
6247 else if (GET_CODE (orig
) == CONST
)
6251 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6252 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
6255 /* Handle the case where we have: const (UNSPEC_TLS). */
6256 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
6257 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
6260 /* Handle the case where we have:
6261 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6263 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6264 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
6265 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
6267 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
6273 gcc_assert (can_create_pseudo_p ());
6274 reg
= gen_reg_rtx (Pmode
);
6277 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
6279 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
6280 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
6281 base
== reg
? 0 : reg
);
6283 if (CONST_INT_P (offset
))
6285 /* The base register doesn't really matter, we only want to
6286 test the index for the appropriate mode. */
6287 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
6289 gcc_assert (can_create_pseudo_p ());
6290 offset
= force_reg (Pmode
, offset
);
6293 if (CONST_INT_P (offset
))
6294 return plus_constant (Pmode
, base
, INTVAL (offset
));
6297 if (GET_MODE_SIZE (mode
) > 4
6298 && (GET_MODE_CLASS (mode
) == MODE_INT
6299 || TARGET_SOFT_FLOAT
))
6301 emit_insn (gen_addsi3 (reg
, base
, offset
));
6305 return gen_rtx_PLUS (Pmode
, base
, offset
);
6312 /* Find a spare register to use during the prolog of a function. */
6315 thumb_find_work_register (unsigned long pushed_regs_mask
)
6319 /* Check the argument registers first as these are call-used. The
6320 register allocation order means that sometimes r3 might be used
6321 but earlier argument registers might not, so check them all. */
6322 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
6323 if (!df_regs_ever_live_p (reg
))
6326 /* Before going on to check the call-saved registers we can try a couple
6327 more ways of deducing that r3 is available. The first is when we are
6328 pushing anonymous arguments onto the stack and we have less than 4
6329 registers worth of fixed arguments(*). In this case r3 will be part of
6330 the variable argument list and so we can be sure that it will be
6331 pushed right at the start of the function. Hence it will be available
6332 for the rest of the prologue.
6333 (*): ie crtl->args.pretend_args_size is greater than 0. */
6334 if (cfun
->machine
->uses_anonymous_args
6335 && crtl
->args
.pretend_args_size
> 0)
6336 return LAST_ARG_REGNUM
;
6338 /* The other case is when we have fixed arguments but less than 4 registers
6339 worth. In this case r3 might be used in the body of the function, but
6340 it is not being used to convey an argument into the function. In theory
6341 we could just check crtl->args.size to see how many bytes are
6342 being passed in argument registers, but it seems that it is unreliable.
6343 Sometimes it will have the value 0 when in fact arguments are being
6344 passed. (See testcase execute/20021111-1.c for an example). So we also
6345 check the args_info.nregs field as well. The problem with this field is
6346 that it makes no allowances for arguments that are passed to the
6347 function but which are not used. Hence we could miss an opportunity
6348 when a function has an unused argument in r3. But it is better to be
6349 safe than to be sorry. */
6350 if (! cfun
->machine
->uses_anonymous_args
6351 && crtl
->args
.size
>= 0
6352 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
6353 && (TARGET_AAPCS_BASED
6354 ? crtl
->args
.info
.aapcs_ncrn
< 4
6355 : crtl
->args
.info
.nregs
< 4))
6356 return LAST_ARG_REGNUM
;
6358 /* Otherwise look for a call-saved register that is going to be pushed. */
6359 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
6360 if (pushed_regs_mask
& (1 << reg
))
6365 /* Thumb-2 can use high regs. */
6366 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
6367 if (pushed_regs_mask
& (1 << reg
))
6370 /* Something went wrong - thumb_compute_save_reg_mask()
6371 should have arranged for a suitable register to be pushed. */
6375 static GTY(()) int pic_labelno
;
6377 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6381 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
6383 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
6385 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
6388 gcc_assert (flag_pic
);
6390 pic_reg
= cfun
->machine
->pic_reg
;
6391 if (TARGET_VXWORKS_RTP
)
6393 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
6394 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6395 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
6397 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
6399 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
6400 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
6404 /* We use an UNSPEC rather than a LABEL_REF because this label
6405 never appears in the code stream. */
6407 labelno
= GEN_INT (pic_labelno
++);
6408 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6409 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6411 /* On the ARM the PC register contains 'dot + 8' at the time of the
6412 addition, on the Thumb it is 'dot + 4'. */
6413 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6414 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
6416 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6420 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6422 else /* TARGET_THUMB1 */
6424 if (arm_pic_register
!= INVALID_REGNUM
6425 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
6427 /* We will have pushed the pic register, so we should always be
6428 able to find a work register. */
6429 pic_tmp
= gen_rtx_REG (SImode
,
6430 thumb_find_work_register (saved_regs
));
6431 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
6432 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
6433 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
6435 else if (arm_pic_register
!= INVALID_REGNUM
6436 && arm_pic_register
> LAST_LO_REGNUM
6437 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
6439 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6440 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
6441 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
6444 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
6448 /* Need to emit this whether or not we obey regdecls,
6449 since setjmp/longjmp can cause life info to screw up. */
6453 /* Generate code to load the address of a static var when flag_pic is set. */
6455 arm_pic_static_addr (rtx orig
, rtx reg
)
6457 rtx l1
, labelno
, offset_rtx
, insn
;
6459 gcc_assert (flag_pic
);
6461 /* We use an UNSPEC rather than a LABEL_REF because this label
6462 never appears in the code stream. */
6463 labelno
= GEN_INT (pic_labelno
++);
6464 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6465 l1
= gen_rtx_CONST (VOIDmode
, l1
);
6467 /* On the ARM the PC register contains 'dot + 8' at the time of the
6468 addition, on the Thumb it is 'dot + 4'. */
6469 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
6470 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
6471 UNSPEC_SYMBOL_OFFSET
);
6472 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
6474 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
6478 /* Return nonzero if X is valid as an ARM state addressing register. */
6480 arm_address_register_rtx_p (rtx x
, int strict_p
)
6490 return ARM_REGNO_OK_FOR_BASE_P (regno
);
6492 return (regno
<= LAST_ARM_REGNUM
6493 || regno
>= FIRST_PSEUDO_REGISTER
6494 || regno
== FRAME_POINTER_REGNUM
6495 || regno
== ARG_POINTER_REGNUM
);
6498 /* Return TRUE if this rtx is the difference of a symbol and a label,
6499 and will reduce to a PC-relative relocation in the object file.
6500 Expressions like this can be left alone when generating PIC, rather
6501 than forced through the GOT. */
6503 pcrel_constant_p (rtx x
)
6505 if (GET_CODE (x
) == MINUS
)
6506 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
6511 /* Return true if X will surely end up in an index register after next
6514 will_be_in_index_register (const_rtx x
)
6516 /* arm.md: calculate_pic_address will split this into a register. */
6517 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
6520 /* Return nonzero if X is a valid ARM state address operand. */
6522 arm_legitimate_address_outer_p (enum machine_mode mode
, rtx x
, RTX_CODE outer
,
6526 enum rtx_code code
= GET_CODE (x
);
6528 if (arm_address_register_rtx_p (x
, strict_p
))
6531 use_ldrd
= (TARGET_LDRD
6533 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6535 if (code
== POST_INC
|| code
== PRE_DEC
6536 || ((code
== PRE_INC
|| code
== POST_DEC
)
6537 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6538 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6540 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6541 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6542 && GET_CODE (XEXP (x
, 1)) == PLUS
6543 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6545 rtx addend
= XEXP (XEXP (x
, 1), 1);
6547 /* Don't allow ldrd post increment by register because it's hard
6548 to fixup invalid register choices. */
6550 && GET_CODE (x
) == POST_MODIFY
6554 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
6555 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
6558 /* After reload constants split into minipools will have addresses
6559 from a LABEL_REF. */
6560 else if (reload_completed
6561 && (code
== LABEL_REF
6563 && GET_CODE (XEXP (x
, 0)) == PLUS
6564 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6565 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6568 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6571 else if (code
== PLUS
)
6573 rtx xop0
= XEXP (x
, 0);
6574 rtx xop1
= XEXP (x
, 1);
6576 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6577 && ((CONST_INT_P (xop1
)
6578 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
6579 || (!strict_p
&& will_be_in_index_register (xop1
))))
6580 || (arm_address_register_rtx_p (xop1
, strict_p
)
6581 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
6585 /* Reload currently can't handle MINUS, so disable this for now */
6586 else if (GET_CODE (x
) == MINUS
)
6588 rtx xop0
= XEXP (x
, 0);
6589 rtx xop1
= XEXP (x
, 1);
6591 return (arm_address_register_rtx_p (xop0
, strict_p
)
6592 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
6596 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6597 && code
== SYMBOL_REF
6598 && CONSTANT_POOL_ADDRESS_P (x
)
6600 && symbol_mentioned_p (get_pool_constant (x
))
6601 && ! pcrel_constant_p (get_pool_constant (x
))))
6607 /* Return nonzero if X is a valid Thumb-2 address operand. */
6609 thumb2_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
6612 enum rtx_code code
= GET_CODE (x
);
6614 if (arm_address_register_rtx_p (x
, strict_p
))
6617 use_ldrd
= (TARGET_LDRD
6619 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
6621 if (code
== POST_INC
|| code
== PRE_DEC
6622 || ((code
== PRE_INC
|| code
== POST_DEC
)
6623 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
6624 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
6626 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
6627 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
6628 && GET_CODE (XEXP (x
, 1)) == PLUS
6629 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
6631 /* Thumb-2 only has autoincrement by constant. */
6632 rtx addend
= XEXP (XEXP (x
, 1), 1);
6633 HOST_WIDE_INT offset
;
6635 if (!CONST_INT_P (addend
))
6638 offset
= INTVAL(addend
);
6639 if (GET_MODE_SIZE (mode
) <= 4)
6640 return (offset
> -256 && offset
< 256);
6642 return (use_ldrd
&& offset
> -1024 && offset
< 1024
6643 && (offset
& 3) == 0);
6646 /* After reload constants split into minipools will have addresses
6647 from a LABEL_REF. */
6648 else if (reload_completed
6649 && (code
== LABEL_REF
6651 && GET_CODE (XEXP (x
, 0)) == PLUS
6652 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6653 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6656 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
6659 else if (code
== PLUS
)
6661 rtx xop0
= XEXP (x
, 0);
6662 rtx xop1
= XEXP (x
, 1);
6664 return ((arm_address_register_rtx_p (xop0
, strict_p
)
6665 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
6666 || (!strict_p
&& will_be_in_index_register (xop1
))))
6667 || (arm_address_register_rtx_p (xop1
, strict_p
)
6668 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
6671 /* Normally we can assign constant values to target registers without
6672 the help of constant pool. But there are cases we have to use constant
6674 1) assign a label to register.
6675 2) sign-extend a 8bit value to 32bit and then assign to register.
6677 Constant pool access in format:
6678 (set (reg r0) (mem (symbol_ref (".LC0"))))
6679 will cause the use of literal pool (later in function arm_reorg).
6680 So here we mark such format as an invalid format, then the compiler
6681 will adjust it into:
6682 (set (reg r0) (symbol_ref (".LC0")))
6683 (set (reg r0) (mem (reg r0))).
6684 No extra register is required, and (mem (reg r0)) won't cause the use
6685 of literal pools. */
6686 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
6687 && CONSTANT_POOL_ADDRESS_P (x
))
6690 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6691 && code
== SYMBOL_REF
6692 && CONSTANT_POOL_ADDRESS_P (x
)
6694 && symbol_mentioned_p (get_pool_constant (x
))
6695 && ! pcrel_constant_p (get_pool_constant (x
))))
6701 /* Return nonzero if INDEX is valid for an address index operand in
6704 arm_legitimate_index_p (enum machine_mode mode
, rtx index
, RTX_CODE outer
,
6707 HOST_WIDE_INT range
;
6708 enum rtx_code code
= GET_CODE (index
);
6710 /* Standard coprocessor addressing modes. */
6711 if (TARGET_HARD_FLOAT
6713 && (mode
== SFmode
|| mode
== DFmode
))
6714 return (code
== CONST_INT
&& INTVAL (index
) < 1024
6715 && INTVAL (index
) > -1024
6716 && (INTVAL (index
) & 3) == 0);
6718 /* For quad modes, we restrict the constant offset to be slightly less
6719 than what the instruction format permits. We do this because for
6720 quad mode moves, we will actually decompose them into two separate
6721 double-mode reads or writes. INDEX must therefore be a valid
6722 (double-mode) offset and so should INDEX+8. */
6723 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
6724 return (code
== CONST_INT
6725 && INTVAL (index
) < 1016
6726 && INTVAL (index
) > -1024
6727 && (INTVAL (index
) & 3) == 0);
6729 /* We have no such constraint on double mode offsets, so we permit the
6730 full range of the instruction format. */
6731 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
6732 return (code
== CONST_INT
6733 && INTVAL (index
) < 1024
6734 && INTVAL (index
) > -1024
6735 && (INTVAL (index
) & 3) == 0);
6737 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
6738 return (code
== CONST_INT
6739 && INTVAL (index
) < 1024
6740 && INTVAL (index
) > -1024
6741 && (INTVAL (index
) & 3) == 0);
6743 if (arm_address_register_rtx_p (index
, strict_p
)
6744 && (GET_MODE_SIZE (mode
) <= 4))
6747 if (mode
== DImode
|| mode
== DFmode
)
6749 if (code
== CONST_INT
)
6751 HOST_WIDE_INT val
= INTVAL (index
);
6754 return val
> -256 && val
< 256;
6756 return val
> -4096 && val
< 4092;
6759 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
6762 if (GET_MODE_SIZE (mode
) <= 4
6766 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
6770 rtx xiop0
= XEXP (index
, 0);
6771 rtx xiop1
= XEXP (index
, 1);
6773 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
6774 && power_of_two_operand (xiop1
, SImode
))
6775 || (arm_address_register_rtx_p (xiop1
, strict_p
)
6776 && power_of_two_operand (xiop0
, SImode
)));
6778 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
6779 || code
== ASHIFT
|| code
== ROTATERT
)
6781 rtx op
= XEXP (index
, 1);
6783 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
6786 && INTVAL (op
) <= 31);
6790 /* For ARM v4 we may be doing a sign-extend operation during the
6796 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
6802 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
6804 return (code
== CONST_INT
6805 && INTVAL (index
) < range
6806 && INTVAL (index
) > -range
);
6809 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6810 index operand. i.e. 1, 2, 4 or 8. */
6812 thumb2_index_mul_operand (rtx op
)
6816 if (!CONST_INT_P (op
))
6820 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
6823 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6825 thumb2_legitimate_index_p (enum machine_mode mode
, rtx index
, int strict_p
)
6827 enum rtx_code code
= GET_CODE (index
);
6829 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6830 /* Standard coprocessor addressing modes. */
6831 if (TARGET_HARD_FLOAT
6833 && (mode
== SFmode
|| mode
== DFmode
))
6834 return (code
== CONST_INT
&& INTVAL (index
) < 1024
6835 /* Thumb-2 allows only > -256 index range for it's core register
6836 load/stores. Since we allow SF/DF in core registers, we have
6837 to use the intersection between -256~4096 (core) and -1024~1024
6839 && INTVAL (index
) > -256
6840 && (INTVAL (index
) & 3) == 0);
6842 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
6844 /* For DImode assume values will usually live in core regs
6845 and only allow LDRD addressing modes. */
6846 if (!TARGET_LDRD
|| mode
!= DImode
)
6847 return (code
== CONST_INT
6848 && INTVAL (index
) < 1024
6849 && INTVAL (index
) > -1024
6850 && (INTVAL (index
) & 3) == 0);
6853 /* For quad modes, we restrict the constant offset to be slightly less
6854 than what the instruction format permits. We do this because for
6855 quad mode moves, we will actually decompose them into two separate
6856 double-mode reads or writes. INDEX must therefore be a valid
6857 (double-mode) offset and so should INDEX+8. */
6858 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
6859 return (code
== CONST_INT
6860 && INTVAL (index
) < 1016
6861 && INTVAL (index
) > -1024
6862 && (INTVAL (index
) & 3) == 0);
6864 /* We have no such constraint on double mode offsets, so we permit the
6865 full range of the instruction format. */
6866 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
6867 return (code
== CONST_INT
6868 && INTVAL (index
) < 1024
6869 && INTVAL (index
) > -1024
6870 && (INTVAL (index
) & 3) == 0);
6872 if (arm_address_register_rtx_p (index
, strict_p
)
6873 && (GET_MODE_SIZE (mode
) <= 4))
6876 if (mode
== DImode
|| mode
== DFmode
)
6878 if (code
== CONST_INT
)
6880 HOST_WIDE_INT val
= INTVAL (index
);
6881 /* ??? Can we assume ldrd for thumb2? */
6882 /* Thumb-2 ldrd only has reg+const addressing modes. */
6883 /* ldrd supports offsets of +-1020.
6884 However the ldr fallback does not. */
6885 return val
> -256 && val
< 256 && (val
& 3) == 0;
6893 rtx xiop0
= XEXP (index
, 0);
6894 rtx xiop1
= XEXP (index
, 1);
6896 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
6897 && thumb2_index_mul_operand (xiop1
))
6898 || (arm_address_register_rtx_p (xiop1
, strict_p
)
6899 && thumb2_index_mul_operand (xiop0
)));
6901 else if (code
== ASHIFT
)
6903 rtx op
= XEXP (index
, 1);
6905 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
6908 && INTVAL (op
) <= 3);
6911 return (code
== CONST_INT
6912 && INTVAL (index
) < 4096
6913 && INTVAL (index
) > -256);
6916 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6918 thumb1_base_register_rtx_p (rtx x
, enum machine_mode mode
, int strict_p
)
6928 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
6930 return (regno
<= LAST_LO_REGNUM
6931 || regno
> LAST_VIRTUAL_REGISTER
6932 || regno
== FRAME_POINTER_REGNUM
6933 || (GET_MODE_SIZE (mode
) >= 4
6934 && (regno
== STACK_POINTER_REGNUM
6935 || regno
>= FIRST_PSEUDO_REGISTER
6936 || x
== hard_frame_pointer_rtx
6937 || x
== arg_pointer_rtx
)));
6940 /* Return nonzero if x is a legitimate index register. This is the case
6941 for any base register that can access a QImode object. */
6943 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
6945 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
6948 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6950 The AP may be eliminated to either the SP or the FP, so we use the
6951 least common denominator, e.g. SImode, and offsets from 0 to 64.
6953 ??? Verify whether the above is the right approach.
6955 ??? Also, the FP may be eliminated to the SP, so perhaps that
6956 needs special handling also.
6958 ??? Look at how the mips16 port solves this problem. It probably uses
6959 better ways to solve some of these problems.
6961 Although it is not incorrect, we don't accept QImode and HImode
6962 addresses based on the frame pointer or arg pointer until the
6963 reload pass starts. This is so that eliminating such addresses
6964 into stack based ones won't produce impossible code. */
6966 thumb1_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
6968 /* ??? Not clear if this is right. Experiment. */
6969 if (GET_MODE_SIZE (mode
) < 4
6970 && !(reload_in_progress
|| reload_completed
)
6971 && (reg_mentioned_p (frame_pointer_rtx
, x
)
6972 || reg_mentioned_p (arg_pointer_rtx
, x
)
6973 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
6974 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
6975 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
6976 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
6979 /* Accept any base register. SP only in SImode or larger. */
6980 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
6983 /* This is PC relative data before arm_reorg runs. */
6984 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
6985 && GET_CODE (x
) == SYMBOL_REF
6986 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
6989 /* This is PC relative data after arm_reorg runs. */
6990 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
6992 && (GET_CODE (x
) == LABEL_REF
6993 || (GET_CODE (x
) == CONST
6994 && GET_CODE (XEXP (x
, 0)) == PLUS
6995 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
6996 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
6999 /* Post-inc indexing only supported for SImode and larger. */
7000 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
7001 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
7004 else if (GET_CODE (x
) == PLUS
)
7006 /* REG+REG address can be any two index registers. */
7007 /* We disallow FRAME+REG addressing since we know that FRAME
7008 will be replaced with STACK, and SP relative addressing only
7009 permits SP+OFFSET. */
7010 if (GET_MODE_SIZE (mode
) <= 4
7011 && XEXP (x
, 0) != frame_pointer_rtx
7012 && XEXP (x
, 1) != frame_pointer_rtx
7013 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7014 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
7015 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
7018 /* REG+const has 5-7 bit offset for non-SP registers. */
7019 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7020 || XEXP (x
, 0) == arg_pointer_rtx
)
7021 && CONST_INT_P (XEXP (x
, 1))
7022 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7025 /* REG+const has 10-bit offset for SP, but only SImode and
7026 larger is supported. */
7027 /* ??? Should probably check for DI/DFmode overflow here
7028 just like GO_IF_LEGITIMATE_OFFSET does. */
7029 else if (REG_P (XEXP (x
, 0))
7030 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
7031 && GET_MODE_SIZE (mode
) >= 4
7032 && CONST_INT_P (XEXP (x
, 1))
7033 && INTVAL (XEXP (x
, 1)) >= 0
7034 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
7035 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7038 else if (REG_P (XEXP (x
, 0))
7039 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
7040 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
7041 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
7042 && REGNO (XEXP (x
, 0))
7043 <= LAST_VIRTUAL_POINTER_REGISTER
))
7044 && GET_MODE_SIZE (mode
) >= 4
7045 && CONST_INT_P (XEXP (x
, 1))
7046 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7050 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7051 && GET_MODE_SIZE (mode
) == 4
7052 && GET_CODE (x
) == SYMBOL_REF
7053 && CONSTANT_POOL_ADDRESS_P (x
)
7055 && symbol_mentioned_p (get_pool_constant (x
))
7056 && ! pcrel_constant_p (get_pool_constant (x
))))
7062 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7063 instruction of mode MODE. */
7065 thumb_legitimate_offset_p (enum machine_mode mode
, HOST_WIDE_INT val
)
7067 switch (GET_MODE_SIZE (mode
))
7070 return val
>= 0 && val
< 32;
7073 return val
>= 0 && val
< 64 && (val
& 1) == 0;
7077 && (val
+ GET_MODE_SIZE (mode
)) <= 128
7083 arm_legitimate_address_p (enum machine_mode mode
, rtx x
, bool strict_p
)
7086 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
7087 else if (TARGET_THUMB2
)
7088 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
7089 else /* if (TARGET_THUMB1) */
7090 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
7093 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7095 Given an rtx X being reloaded into a reg required to be
7096 in class CLASS, return the class of reg to actually use.
7097 In general this is just CLASS, but for the Thumb core registers and
7098 immediate constants we prefer a LO_REGS class or a subset. */
7101 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
7107 if (rclass
== GENERAL_REGS
)
7114 /* Build the SYMBOL_REF for __tls_get_addr. */
7116 static GTY(()) rtx tls_get_addr_libfunc
;
7119 get_tls_get_addr (void)
7121 if (!tls_get_addr_libfunc
)
7122 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
7123 return tls_get_addr_libfunc
;
7127 arm_load_tp (rtx target
)
7130 target
= gen_reg_rtx (SImode
);
7134 /* Can return in any reg. */
7135 emit_insn (gen_load_tp_hard (target
));
7139 /* Always returned in r0. Immediately copy the result into a pseudo,
7140 otherwise other uses of r0 (e.g. setting up function arguments) may
7141 clobber the value. */
7145 emit_insn (gen_load_tp_soft ());
7147 tmp
= gen_rtx_REG (SImode
, 0);
7148 emit_move_insn (target
, tmp
);
7154 load_tls_operand (rtx x
, rtx reg
)
7158 if (reg
== NULL_RTX
)
7159 reg
= gen_reg_rtx (SImode
);
7161 tmp
= gen_rtx_CONST (SImode
, x
);
7163 emit_move_insn (reg
, tmp
);
7169 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
7171 rtx insns
, label
, labelno
, sum
;
7173 gcc_assert (reloc
!= TLS_DESCSEQ
);
7176 labelno
= GEN_INT (pic_labelno
++);
7177 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7178 label
= gen_rtx_CONST (VOIDmode
, label
);
7180 sum
= gen_rtx_UNSPEC (Pmode
,
7181 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
7182 GEN_INT (TARGET_ARM
? 8 : 4)),
7184 reg
= load_tls_operand (sum
, reg
);
7187 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
7189 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7191 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
7192 LCT_PURE
, /* LCT_CONST? */
7193 Pmode
, 1, reg
, Pmode
);
7195 insns
= get_insns ();
7202 arm_tls_descseq_addr (rtx x
, rtx reg
)
7204 rtx labelno
= GEN_INT (pic_labelno
++);
7205 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7206 rtx sum
= gen_rtx_UNSPEC (Pmode
,
7207 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
7208 gen_rtx_CONST (VOIDmode
, label
),
7209 GEN_INT (!TARGET_ARM
)),
7211 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, 0));
7213 emit_insn (gen_tlscall (x
, labelno
));
7215 reg
= gen_reg_rtx (SImode
);
7217 gcc_assert (REGNO (reg
) != 0);
7219 emit_move_insn (reg
, reg0
);
7225 legitimize_tls_address (rtx x
, rtx reg
)
7227 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
7228 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
7232 case TLS_MODEL_GLOBAL_DYNAMIC
:
7233 if (TARGET_GNU2_TLS
)
7235 reg
= arm_tls_descseq_addr (x
, reg
);
7237 tp
= arm_load_tp (NULL_RTX
);
7239 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7243 /* Original scheme */
7244 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
7245 dest
= gen_reg_rtx (Pmode
);
7246 emit_libcall_block (insns
, dest
, ret
, x
);
7250 case TLS_MODEL_LOCAL_DYNAMIC
:
7251 if (TARGET_GNU2_TLS
)
7253 reg
= arm_tls_descseq_addr (x
, reg
);
7255 tp
= arm_load_tp (NULL_RTX
);
7257 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7261 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
7263 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7264 share the LDM result with other LD model accesses. */
7265 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
7267 dest
= gen_reg_rtx (Pmode
);
7268 emit_libcall_block (insns
, dest
, ret
, eqv
);
7270 /* Load the addend. */
7271 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
7272 GEN_INT (TLS_LDO32
)),
7274 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
7275 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
7279 case TLS_MODEL_INITIAL_EXEC
:
7280 labelno
= GEN_INT (pic_labelno
++);
7281 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7282 label
= gen_rtx_CONST (VOIDmode
, label
);
7283 sum
= gen_rtx_UNSPEC (Pmode
,
7284 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
7285 GEN_INT (TARGET_ARM
? 8 : 4)),
7287 reg
= load_tls_operand (sum
, reg
);
7290 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
7291 else if (TARGET_THUMB2
)
7292 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
7295 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7296 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
7299 tp
= arm_load_tp (NULL_RTX
);
7301 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7303 case TLS_MODEL_LOCAL_EXEC
:
7304 tp
= arm_load_tp (NULL_RTX
);
7306 reg
= gen_rtx_UNSPEC (Pmode
,
7307 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
7309 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
7311 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7318 /* Try machine-dependent ways of modifying an illegitimate address
7319 to be legitimate. If we find one, return the new, valid address. */
7321 arm_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
7325 /* TODO: legitimize_address for Thumb2. */
7328 return thumb_legitimize_address (x
, orig_x
, mode
);
7331 if (arm_tls_symbol_p (x
))
7332 return legitimize_tls_address (x
, NULL_RTX
);
7334 if (GET_CODE (x
) == PLUS
)
7336 rtx xop0
= XEXP (x
, 0);
7337 rtx xop1
= XEXP (x
, 1);
7339 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
7340 xop0
= force_reg (SImode
, xop0
);
7342 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
7343 && !symbol_mentioned_p (xop1
))
7344 xop1
= force_reg (SImode
, xop1
);
7346 if (ARM_BASE_REGISTER_RTX_P (xop0
)
7347 && CONST_INT_P (xop1
))
7349 HOST_WIDE_INT n
, low_n
;
7353 /* VFP addressing modes actually allow greater offsets, but for
7354 now we just stick with the lowest common denominator. */
7356 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
7368 low_n
= ((mode
) == TImode
? 0
7369 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
7373 base_reg
= gen_reg_rtx (SImode
);
7374 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
7375 emit_move_insn (base_reg
, val
);
7376 x
= plus_constant (Pmode
, base_reg
, low_n
);
7378 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7379 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7382 /* XXX We don't allow MINUS any more -- see comment in
7383 arm_legitimate_address_outer_p (). */
7384 else if (GET_CODE (x
) == MINUS
)
7386 rtx xop0
= XEXP (x
, 0);
7387 rtx xop1
= XEXP (x
, 1);
7389 if (CONSTANT_P (xop0
))
7390 xop0
= force_reg (SImode
, xop0
);
7392 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
7393 xop1
= force_reg (SImode
, xop1
);
7395 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7396 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
7399 /* Make sure to take full advantage of the pre-indexed addressing mode
7400 with absolute addresses which often allows for the base register to
7401 be factorized for multiple adjacent memory references, and it might
7402 even allows for the mini pool to be avoided entirely. */
7403 else if (CONST_INT_P (x
) && optimize
> 0)
7406 HOST_WIDE_INT mask
, base
, index
;
7409 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7410 use a 8-bit index. So let's use a 12-bit index for SImode only and
7411 hope that arm_gen_constant will enable ldrb to use more bits. */
7412 bits
= (mode
== SImode
) ? 12 : 8;
7413 mask
= (1 << bits
) - 1;
7414 base
= INTVAL (x
) & ~mask
;
7415 index
= INTVAL (x
) & mask
;
7416 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
7418 /* It'll most probably be more efficient to generate the base
7419 with more bits set and use a negative index instead. */
7423 base_reg
= force_reg (SImode
, GEN_INT (base
));
7424 x
= plus_constant (Pmode
, base_reg
, index
);
7429 /* We need to find and carefully transform any SYMBOL and LABEL
7430 references; so go back to the original address expression. */
7431 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7433 if (new_x
!= orig_x
)
7441 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7442 to be legitimate. If we find one, return the new, valid address. */
7444 thumb_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
7446 if (arm_tls_symbol_p (x
))
7447 return legitimize_tls_address (x
, NULL_RTX
);
7449 if (GET_CODE (x
) == PLUS
7450 && CONST_INT_P (XEXP (x
, 1))
7451 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
7452 || INTVAL (XEXP (x
, 1)) < 0))
7454 rtx xop0
= XEXP (x
, 0);
7455 rtx xop1
= XEXP (x
, 1);
7456 HOST_WIDE_INT offset
= INTVAL (xop1
);
7458 /* Try and fold the offset into a biasing of the base register and
7459 then offsetting that. Don't do this when optimizing for space
7460 since it can cause too many CSEs. */
7461 if (optimize_size
&& offset
>= 0
7462 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
7464 HOST_WIDE_INT delta
;
7467 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
7468 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
7469 delta
= 31 * GET_MODE_SIZE (mode
);
7471 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
7473 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
7475 x
= plus_constant (Pmode
, xop0
, delta
);
7477 else if (offset
< 0 && offset
> -256)
7478 /* Small negative offsets are best done with a subtract before the
7479 dereference, forcing these into a register normally takes two
7481 x
= force_operand (x
, NULL_RTX
);
7484 /* For the remaining cases, force the constant into a register. */
7485 xop1
= force_reg (SImode
, xop1
);
7486 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
7489 else if (GET_CODE (x
) == PLUS
7490 && s_register_operand (XEXP (x
, 1), SImode
)
7491 && !s_register_operand (XEXP (x
, 0), SImode
))
7493 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
7495 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
7500 /* We need to find and carefully transform any SYMBOL and LABEL
7501 references; so go back to the original address expression. */
7502 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
7504 if (new_x
!= orig_x
)
7512 arm_legitimize_reload_address (rtx
*p
,
7513 enum machine_mode mode
,
7514 int opnum
, int type
,
7515 int ind_levels ATTRIBUTE_UNUSED
)
7517 /* We must recognize output that we have already generated ourselves. */
7518 if (GET_CODE (*p
) == PLUS
7519 && GET_CODE (XEXP (*p
, 0)) == PLUS
7520 && REG_P (XEXP (XEXP (*p
, 0), 0))
7521 && CONST_INT_P (XEXP (XEXP (*p
, 0), 1))
7522 && CONST_INT_P (XEXP (*p
, 1)))
7524 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
7525 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
7526 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7530 if (GET_CODE (*p
) == PLUS
7531 && REG_P (XEXP (*p
, 0))
7532 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p
, 0)))
7533 /* If the base register is equivalent to a constant, let the generic
7534 code handle it. Otherwise we will run into problems if a future
7535 reload pass decides to rematerialize the constant. */
7536 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p
, 0)))
7537 && CONST_INT_P (XEXP (*p
, 1)))
7539 HOST_WIDE_INT val
= INTVAL (XEXP (*p
, 1));
7540 HOST_WIDE_INT low
, high
;
7542 /* Detect coprocessor load/stores. */
7543 bool coproc_p
= ((TARGET_HARD_FLOAT
7545 && (mode
== SFmode
|| mode
== DFmode
))
7546 || (TARGET_REALLY_IWMMXT
7547 && VALID_IWMMXT_REG_MODE (mode
))
7549 && (VALID_NEON_DREG_MODE (mode
)
7550 || VALID_NEON_QREG_MODE (mode
))));
7552 /* For some conditions, bail out when lower two bits are unaligned. */
7553 if ((val
& 0x3) != 0
7554 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7556 /* For DI, and DF under soft-float: */
7557 || ((mode
== DImode
|| mode
== DFmode
)
7558 /* Without ldrd, we use stm/ldm, which does not
7559 fair well with unaligned bits. */
7561 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7562 || TARGET_THUMB2
))))
7565 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7566 of which the (reg+high) gets turned into a reload add insn,
7567 we try to decompose the index into high/low values that can often
7568 also lead to better reload CSE.
7570 ldr r0, [r2, #4100] // Offset too large
7571 ldr r1, [r2, #4104] // Offset too large
7573 is best reloaded as:
7579 which post-reload CSE can simplify in most cases to eliminate the
7580 second add instruction:
7585 The idea here is that we want to split out the bits of the constant
7586 as a mask, rather than as subtracting the maximum offset that the
7587 respective type of load/store used can handle.
7589 When encountering negative offsets, we can still utilize it even if
7590 the overall offset is positive; sometimes this may lead to an immediate
7591 that can be constructed with fewer instructions.
7593 ldr r0, [r2, #0x3FFFFC]
7595 This is best reloaded as:
7596 add t1, r2, #0x400000
7599 The trick for spotting this for a load insn with N bits of offset
7600 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7601 negative offset that is going to make bit N and all the bits below
7602 it become zero in the remainder part.
7604 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7605 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7606 used in most cases of ARM load/store instructions. */
7608 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7609 (((VAL) & ((1 << (N)) - 1)) \
7610 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7615 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 10);
7617 /* NEON quad-word load/stores are made of two double-word accesses,
7618 so the valid index range is reduced by 8. Treat as 9-bit range if
7620 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
) && low
>= 1016)
7621 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 9);
7623 else if (GET_MODE_SIZE (mode
) == 8)
7626 low
= (TARGET_THUMB2
7627 ? SIGN_MAG_LOW_ADDR_BITS (val
, 10)
7628 : SIGN_MAG_LOW_ADDR_BITS (val
, 8));
7630 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7631 to access doublewords. The supported load/store offsets are
7632 -8, -4, and 4, which we try to produce here. */
7633 low
= ((val
& 0xf) ^ 0x8) - 0x8;
7635 else if (GET_MODE_SIZE (mode
) < 8)
7637 /* NEON element load/stores do not have an offset. */
7638 if (TARGET_NEON_FP16
&& mode
== HFmode
)
7643 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7644 Try the wider 12-bit range first, and re-try if the result
7646 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7648 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
7652 if (mode
== HImode
|| mode
== HFmode
)
7655 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
7658 /* The storehi/movhi_bytes fallbacks can use only
7659 [-4094,+4094] of the full ldrb/strb index range. */
7660 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7661 if (low
== 4095 || low
== -4095)
7666 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
7672 high
= ((((val
- low
) & (unsigned HOST_WIDE_INT
) 0xffffffff)
7673 ^ (unsigned HOST_WIDE_INT
) 0x80000000)
7674 - (unsigned HOST_WIDE_INT
) 0x80000000);
7675 /* Check for overflow or zero */
7676 if (low
== 0 || high
== 0 || (high
+ low
!= val
))
7679 /* Reload the high part into a base reg; leave the low part
7681 Note that replacing this gen_rtx_PLUS with plus_constant is
7682 wrong in this case because we rely on the
7683 (plus (plus reg c1) c2) structure being preserved so that
7684 XEXP (*p, 0) in push_reload below uses the correct term. */
7685 *p
= gen_rtx_PLUS (GET_MODE (*p
),
7686 gen_rtx_PLUS (GET_MODE (*p
), XEXP (*p
, 0),
7689 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
7690 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
7691 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7699 thumb_legitimize_reload_address (rtx
*x_p
,
7700 enum machine_mode mode
,
7701 int opnum
, int type
,
7702 int ind_levels ATTRIBUTE_UNUSED
)
7706 if (GET_CODE (x
) == PLUS
7707 && GET_MODE_SIZE (mode
) < 4
7708 && REG_P (XEXP (x
, 0))
7709 && XEXP (x
, 0) == stack_pointer_rtx
7710 && CONST_INT_P (XEXP (x
, 1))
7711 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7716 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
7717 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7721 /* If both registers are hi-regs, then it's better to reload the
7722 entire expression rather than each register individually. That
7723 only requires one reload register rather than two. */
7724 if (GET_CODE (x
) == PLUS
7725 && REG_P (XEXP (x
, 0))
7726 && REG_P (XEXP (x
, 1))
7727 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
7728 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
7733 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
7734 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
7741 /* Test for various thread-local symbols. */
7743 /* Return TRUE if X is a thread-local symbol. */
7746 arm_tls_symbol_p (rtx x
)
7748 if (! TARGET_HAVE_TLS
)
7751 if (GET_CODE (x
) != SYMBOL_REF
)
7754 return SYMBOL_REF_TLS_MODEL (x
) != 0;
7757 /* Helper for arm_tls_referenced_p. */
7760 arm_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
7762 if (GET_CODE (*x
) == SYMBOL_REF
)
7763 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
7765 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7766 TLS offsets, not real symbol references. */
7767 if (GET_CODE (*x
) == UNSPEC
7768 && XINT (*x
, 1) == UNSPEC_TLS
)
7774 /* Return TRUE if X contains any TLS symbol references. */
7777 arm_tls_referenced_p (rtx x
)
7779 if (! TARGET_HAVE_TLS
)
7782 return for_each_rtx (&x
, arm_tls_operand_p_1
, NULL
);
7785 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7787 On the ARM, allow any integer (invalid ones are removed later by insn
7788 patterns), nice doubles and symbol_refs which refer to the function's
7791 When generating pic allow anything. */
7794 arm_legitimate_constant_p_1 (enum machine_mode mode
, rtx x
)
7796 /* At present, we have no support for Neon structure constants, so forbid
7797 them here. It might be possible to handle simple cases like 0 and -1
7799 if (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
7802 return flag_pic
|| !label_mentioned_p (x
);
7806 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
7808 return (CONST_INT_P (x
)
7809 || CONST_DOUBLE_P (x
)
7810 || CONSTANT_ADDRESS_P (x
)
7815 arm_legitimate_constant_p (enum machine_mode mode
, rtx x
)
7817 return (!arm_cannot_force_const_mem (mode
, x
)
7819 ? arm_legitimate_constant_p_1 (mode
, x
)
7820 : thumb_legitimate_constant_p (mode
, x
)));
7823 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7826 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
7830 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
7832 split_const (x
, &base
, &offset
);
7833 if (GET_CODE (base
) == SYMBOL_REF
7834 && !offset_within_block_p (base
, INTVAL (offset
)))
7837 return arm_tls_referenced_p (x
);
7840 #define REG_OR_SUBREG_REG(X) \
7842 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7844 #define REG_OR_SUBREG_RTX(X) \
7845 (REG_P (X) ? (X) : SUBREG_REG (X))
7848 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
7850 enum machine_mode mode
= GET_MODE (x
);
7859 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7866 return COSTS_N_INSNS (1);
7869 if (CONST_INT_P (XEXP (x
, 1)))
7872 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
7879 return COSTS_N_INSNS (2) + cycles
;
7881 return COSTS_N_INSNS (1) + 16;
7884 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7886 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
7887 return (COSTS_N_INSNS (words
)
7888 + 4 * ((MEM_P (SET_SRC (x
)))
7889 + MEM_P (SET_DEST (x
))));
7894 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
7896 if (thumb_shiftable_const (INTVAL (x
)))
7897 return COSTS_N_INSNS (2);
7898 return COSTS_N_INSNS (3);
7900 else if ((outer
== PLUS
|| outer
== COMPARE
)
7901 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
7903 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
7904 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
7905 return COSTS_N_INSNS (1);
7906 else if (outer
== AND
)
7909 /* This duplicates the tests in the andsi3 expander. */
7910 for (i
= 9; i
<= 31; i
++)
7911 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
7912 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
7913 return COSTS_N_INSNS (2);
7915 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
7916 || outer
== LSHIFTRT
)
7918 return COSTS_N_INSNS (2);
7924 return COSTS_N_INSNS (3);
7942 /* XXX another guess. */
7943 /* Memory costs quite a lot for the first word, but subsequent words
7944 load at the equivalent of a single insn each. */
7945 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
7946 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7951 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
7957 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
7958 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
7964 return total
+ COSTS_N_INSNS (1);
7966 /* Assume a two-shift sequence. Increase the cost slightly so
7967 we prefer actual shifts over an extend operation. */
7968 return total
+ 1 + COSTS_N_INSNS (2);
7976 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
7978 enum machine_mode mode
= GET_MODE (x
);
7979 enum rtx_code subcode
;
7981 enum rtx_code code
= GET_CODE (x
);
7987 /* Memory costs quite a lot for the first word, but subsequent words
7988 load at the equivalent of a single insn each. */
7989 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
7996 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
7997 *total
= COSTS_N_INSNS (2);
7998 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
7999 *total
= COSTS_N_INSNS (4);
8001 *total
= COSTS_N_INSNS (20);
8005 if (REG_P (XEXP (x
, 1)))
8006 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
8007 else if (!CONST_INT_P (XEXP (x
, 1)))
8008 *total
= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8014 *total
+= COSTS_N_INSNS (4);
8019 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
8020 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8023 *total
+= COSTS_N_INSNS (3);
8027 *total
+= COSTS_N_INSNS (1);
8028 /* Increase the cost of complex shifts because they aren't any faster,
8029 and reduce dual issue opportunities. */
8030 if (arm_tune_cortex_a9
8031 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
8039 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8040 if (CONST_INT_P (XEXP (x
, 0))
8041 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8043 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8047 if (CONST_INT_P (XEXP (x
, 1))
8048 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8050 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8057 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8059 if (TARGET_HARD_FLOAT
8061 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8063 *total
= COSTS_N_INSNS (1);
8064 if (CONST_DOUBLE_P (XEXP (x
, 0))
8065 && arm_const_double_rtx (XEXP (x
, 0)))
8067 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8071 if (CONST_DOUBLE_P (XEXP (x
, 1))
8072 && arm_const_double_rtx (XEXP (x
, 1)))
8074 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8080 *total
= COSTS_N_INSNS (20);
8084 *total
= COSTS_N_INSNS (1);
8085 if (CONST_INT_P (XEXP (x
, 0))
8086 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8088 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8092 subcode
= GET_CODE (XEXP (x
, 1));
8093 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8094 || subcode
== LSHIFTRT
8095 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8097 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8098 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8102 /* A shift as a part of RSB costs no more than RSB itself. */
8103 if (GET_CODE (XEXP (x
, 0)) == MULT
8104 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8106 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed
);
8107 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8112 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
8114 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8115 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, 0, speed
);
8119 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
8120 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
8122 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8123 if (REG_P (XEXP (XEXP (x
, 1), 0))
8124 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
8125 *total
+= COSTS_N_INSNS (1);
8133 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
8134 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8135 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8137 *total
= COSTS_N_INSNS (1);
8138 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
8140 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8144 /* MLA: All arguments must be registers. We filter out
8145 multiplication by a power of two, so that we fall down into
8147 if (GET_CODE (XEXP (x
, 0)) == MULT
8148 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8150 /* The cost comes from the cost of the multiply. */
8154 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8156 if (TARGET_HARD_FLOAT
8158 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8160 *total
= COSTS_N_INSNS (1);
8161 if (CONST_DOUBLE_P (XEXP (x
, 1))
8162 && arm_const_double_rtx (XEXP (x
, 1)))
8164 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8171 *total
= COSTS_N_INSNS (20);
8175 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
8176 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
8178 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8179 if (REG_P (XEXP (XEXP (x
, 0), 0))
8180 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
8181 *total
+= COSTS_N_INSNS (1);
8187 case AND
: case XOR
: case IOR
:
8189 /* Normally the frame registers will be spilt into reg+const during
8190 reload, so it is a bad idea to combine them with other instructions,
8191 since then they might not be moved outside of loops. As a compromise
8192 we allow integration with ops that have a constant as their second
8194 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
8195 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
8196 && !CONST_INT_P (XEXP (x
, 1)))
8197 *total
= COSTS_N_INSNS (1);
8201 *total
+= COSTS_N_INSNS (2);
8202 if (CONST_INT_P (XEXP (x
, 1))
8203 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8205 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8212 *total
+= COSTS_N_INSNS (1);
8213 if (CONST_INT_P (XEXP (x
, 1))
8214 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8216 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8219 subcode
= GET_CODE (XEXP (x
, 0));
8220 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8221 || subcode
== LSHIFTRT
8222 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8224 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8225 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8230 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8232 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8233 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8237 if (subcode
== UMIN
|| subcode
== UMAX
8238 || subcode
== SMIN
|| subcode
== SMAX
)
8240 *total
= COSTS_N_INSNS (3);
8247 /* This should have been handled by the CPU specific routines. */
8251 if (arm_arch3m
&& mode
== SImode
8252 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
8253 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8254 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
8255 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
8256 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
8257 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
8259 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, 0, speed
);
8262 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8266 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8268 if (TARGET_HARD_FLOAT
8270 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8272 *total
= COSTS_N_INSNS (1);
8275 *total
= COSTS_N_INSNS (2);
8281 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
8282 if (mode
== SImode
&& code
== NOT
)
8284 subcode
= GET_CODE (XEXP (x
, 0));
8285 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8286 || subcode
== LSHIFTRT
8287 || subcode
== ROTATE
|| subcode
== ROTATERT
8289 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
8291 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8292 /* Register shifts cost an extra cycle. */
8293 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
8294 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
8303 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8305 *total
= COSTS_N_INSNS (4);
8309 operand
= XEXP (x
, 0);
8311 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
8312 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
8313 && REG_P (XEXP (operand
, 0))
8314 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
8315 *total
+= COSTS_N_INSNS (1);
8316 *total
+= (rtx_cost (XEXP (x
, 1), code
, 1, speed
)
8317 + rtx_cost (XEXP (x
, 2), code
, 2, speed
));
8321 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8323 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8329 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8330 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8332 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8338 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8339 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8341 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8361 /* SCC insns. In the case where the comparison has already been
8362 performed, then they cost 2 instructions. Otherwise they need
8363 an additional comparison before them. */
8364 *total
= COSTS_N_INSNS (2);
8365 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8372 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8378 *total
+= COSTS_N_INSNS (1);
8379 if (CONST_INT_P (XEXP (x
, 1))
8380 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8382 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8386 subcode
= GET_CODE (XEXP (x
, 0));
8387 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8388 || subcode
== LSHIFTRT
8389 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8391 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8392 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8397 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8399 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8400 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, 0, speed
);
8410 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8411 if (!CONST_INT_P (XEXP (x
, 1))
8412 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8413 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, speed
);
8417 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8419 if (TARGET_HARD_FLOAT
8421 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8423 *total
= COSTS_N_INSNS (1);
8426 *total
= COSTS_N_INSNS (20);
8429 *total
= COSTS_N_INSNS (1);
8431 *total
+= COSTS_N_INSNS (3);
8437 if (GET_MODE_CLASS (mode
) == MODE_INT
)
8439 rtx op
= XEXP (x
, 0);
8440 enum machine_mode opmode
= GET_MODE (op
);
8443 *total
+= COSTS_N_INSNS (1);
8445 if (opmode
!= SImode
)
8449 /* If !arm_arch4, we use one of the extendhisi2_mem
8450 or movhi_bytes patterns for HImode. For a QImode
8451 sign extension, we first zero-extend from memory
8452 and then perform a shift sequence. */
8453 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
8454 *total
+= COSTS_N_INSNS (2);
8457 *total
+= COSTS_N_INSNS (1);
8459 /* We don't have the necessary insn, so we need to perform some
8461 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
8462 /* An and with constant 255. */
8463 *total
+= COSTS_N_INSNS (1);
8465 /* A shift sequence. Increase costs slightly to avoid
8466 combining two shifts into an extend operation. */
8467 *total
+= COSTS_N_INSNS (2) + 1;
8473 switch (GET_MODE (XEXP (x
, 0)))
8480 *total
= COSTS_N_INSNS (1);
8490 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8494 if (const_ok_for_arm (INTVAL (x
))
8495 || const_ok_for_arm (~INTVAL (x
)))
8496 *total
= COSTS_N_INSNS (1);
8498 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
8499 INTVAL (x
), NULL_RTX
,
8506 *total
= COSTS_N_INSNS (3);
8510 *total
= COSTS_N_INSNS (1);
8514 *total
= COSTS_N_INSNS (1);
8515 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
8519 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
8520 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8521 *total
= COSTS_N_INSNS (1);
8523 *total
= COSTS_N_INSNS (4);
8527 /* The vec_extract patterns accept memory operands that require an
8528 address reload. Account for the cost of that reload to give the
8529 auto-inc-dec pass an incentive to try to replace them. */
8530 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
8531 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
8533 *total
= rtx_cost (SET_DEST (x
), code
, 0, speed
);
8534 if (!neon_vector_mem_operand (SET_DEST (x
), 2, true))
8535 *total
+= COSTS_N_INSNS (1);
8538 /* Likewise for the vec_set patterns. */
8539 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
8540 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
8541 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
8543 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
8544 *total
= rtx_cost (mem
, code
, 0, speed
);
8545 if (!neon_vector_mem_operand (mem
, 2, true))
8546 *total
+= COSTS_N_INSNS (1);
8552 /* We cost this as high as our memory costs to allow this to
8553 be hoisted from loops. */
8554 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
8556 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8562 && TARGET_HARD_FLOAT
8564 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8565 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8566 *total
= COSTS_N_INSNS (1);
8568 *total
= COSTS_N_INSNS (4);
8572 *total
= COSTS_N_INSNS (4);
8577 /* Estimates the size cost of thumb1 instructions.
8578 For now most of the code is copied from thumb1_rtx_costs. We need more
8579 fine grain tuning when we have more related test cases. */
8581 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8583 enum machine_mode mode
= GET_MODE (x
);
8592 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8596 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8597 defined by RTL expansion, especially for the expansion of
8599 if ((GET_CODE (XEXP (x
, 0)) == MULT
8600 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8601 || (GET_CODE (XEXP (x
, 1)) == MULT
8602 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8603 return COSTS_N_INSNS (2);
8604 /* On purpose fall through for normal RTX. */
8608 return COSTS_N_INSNS (1);
8611 if (CONST_INT_P (XEXP (x
, 1)))
8613 /* Thumb1 mul instruction can't operate on const. We must Load it
8614 into a register first. */
8615 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8616 return COSTS_N_INSNS (1) + const_size
;
8618 return COSTS_N_INSNS (1);
8621 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8623 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8624 return (COSTS_N_INSNS (words
)
8625 + 4 * ((MEM_P (SET_SRC (x
)))
8626 + MEM_P (SET_DEST (x
))));
8631 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8632 return COSTS_N_INSNS (1);
8633 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8634 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8635 return COSTS_N_INSNS (2);
8636 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8637 if (thumb_shiftable_const (INTVAL (x
)))
8638 return COSTS_N_INSNS (2);
8639 return COSTS_N_INSNS (3);
8641 else if ((outer
== PLUS
|| outer
== COMPARE
)
8642 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8644 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8645 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8646 return COSTS_N_INSNS (1);
8647 else if (outer
== AND
)
8650 /* This duplicates the tests in the andsi3 expander. */
8651 for (i
= 9; i
<= 31; i
++)
8652 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8653 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8654 return COSTS_N_INSNS (2);
8656 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8657 || outer
== LSHIFTRT
)
8659 return COSTS_N_INSNS (2);
8665 return COSTS_N_INSNS (3);
8683 /* XXX another guess. */
8684 /* Memory costs quite a lot for the first word, but subsequent words
8685 load at the equivalent of a single insn each. */
8686 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8687 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8692 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8697 /* XXX still guessing. */
8698 switch (GET_MODE (XEXP (x
, 0)))
8701 return (1 + (mode
== DImode
? 4 : 0)
8702 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8705 return (4 + (mode
== DImode
? 4 : 0)
8706 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8709 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8720 /* RTX costs when optimizing for size. */
8722 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8725 enum machine_mode mode
= GET_MODE (x
);
8728 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
8732 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8736 /* A memory access costs 1 insn if the mode is small, or the address is
8737 a single register, otherwise it costs one insn per word. */
8738 if (REG_P (XEXP (x
, 0)))
8739 *total
= COSTS_N_INSNS (1);
8741 && GET_CODE (XEXP (x
, 0)) == PLUS
8742 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
8743 /* This will be split into two instructions.
8744 See arm.md:calculate_pic_address. */
8745 *total
= COSTS_N_INSNS (2);
8747 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8754 /* Needs a libcall, so it costs about this. */
8755 *total
= COSTS_N_INSNS (2);
8759 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
8761 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8769 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
8771 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8774 else if (mode
== SImode
)
8776 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, 0, false);
8777 /* Slightly disparage register shifts, but not by much. */
8778 if (!CONST_INT_P (XEXP (x
, 1)))
8779 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, 1, false);
8783 /* Needs a libcall. */
8784 *total
= COSTS_N_INSNS (2);
8788 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8789 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8791 *total
= COSTS_N_INSNS (1);
8797 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
8798 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
8800 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
8801 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
8802 || subcode1
== ROTATE
|| subcode1
== ROTATERT
8803 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
8804 || subcode1
== ASHIFTRT
)
8806 /* It's just the cost of the two operands. */
8811 *total
= COSTS_N_INSNS (1);
8815 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8819 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8820 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8822 *total
= COSTS_N_INSNS (1);
8826 /* A shift as a part of ADD costs nothing. */
8827 if (GET_CODE (XEXP (x
, 0)) == MULT
8828 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8830 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
8831 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, false);
8832 *total
+= rtx_cost (XEXP (x
, 1), code
, 1, false);
8837 case AND
: case XOR
: case IOR
:
8840 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
8842 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
8843 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
8844 || (code
== AND
&& subcode
== NOT
))
8846 /* It's just the cost of the two operands. */
8852 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8856 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8860 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8861 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8863 *total
= COSTS_N_INSNS (1);
8869 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8878 if (cc_register (XEXP (x
, 0), VOIDmode
))
8881 *total
= COSTS_N_INSNS (1);
8885 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8886 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8887 *total
= COSTS_N_INSNS (1);
8889 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
8894 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
8897 if (const_ok_for_arm (INTVAL (x
)))
8898 /* A multiplication by a constant requires another instruction
8899 to load the constant to a register. */
8900 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
8902 else if (const_ok_for_arm (~INTVAL (x
)))
8903 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
8904 else if (const_ok_for_arm (-INTVAL (x
)))
8906 if (outer_code
== COMPARE
|| outer_code
== PLUS
8907 || outer_code
== MINUS
)
8910 *total
= COSTS_N_INSNS (1);
8913 *total
= COSTS_N_INSNS (2);
8919 *total
= COSTS_N_INSNS (2);
8923 *total
= COSTS_N_INSNS (4);
8928 && TARGET_HARD_FLOAT
8929 && outer_code
== SET
8930 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8931 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8932 *total
= COSTS_N_INSNS (1);
8934 *total
= COSTS_N_INSNS (4);
8939 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8940 cost of these slightly. */
8941 *total
= COSTS_N_INSNS (1) + 1;
8948 if (mode
!= VOIDmode
)
8949 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8951 *total
= COSTS_N_INSNS (4); /* How knows? */
8956 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8957 operand, then return the operand that is being shifted. If the shift
8958 is not by a constant, then set SHIFT_REG to point to the operand.
8959 Return NULL if OP is not a shifter operand. */
8961 shifter_op_p (rtx op
, rtx
*shift_reg
)
8963 enum rtx_code code
= GET_CODE (op
);
8965 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
8966 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
8967 return XEXP (op
, 0);
8968 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
8969 return XEXP (op
, 0);
8970 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
8971 || code
== ASHIFTRT
)
8973 if (!CONST_INT_P (XEXP (op
, 1)))
8974 *shift_reg
= XEXP (op
, 1);
8975 return XEXP (op
, 0);
8982 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
8984 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
8985 gcc_assert (GET_CODE (x
) == UNSPEC
);
8987 switch (XINT (x
, 1))
8989 case UNSPEC_UNALIGNED_LOAD
:
8990 /* We can only do unaligned loads into the integer unit, and we can't
8992 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
8994 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
8995 + extra_cost
->ldst
.load_unaligned
);
8998 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
8999 ADDR_SPACE_GENERIC
, speed_p
);
9003 case UNSPEC_UNALIGNED_STORE
:
9004 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9006 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9007 + extra_cost
->ldst
.store_unaligned
);
9009 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), UNSPEC
, 0, speed_p
);
9011 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9012 ADDR_SPACE_GENERIC
, speed_p
);
9022 *cost
= COSTS_N_INSNS (1);
9024 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9028 *cost
= COSTS_N_INSNS (2);
9034 /* Cost of a libcall. We assume one insn per argument, an amount for the
9035 call (one insn for -Os) and then one for processing the result. */
9036 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9038 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9041 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9042 if (shift_op != NULL \
9043 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9048 *cost += extra_cost->alu.arith_shift_reg; \
9049 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9052 *cost += extra_cost->alu.arith_shift; \
9054 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9055 + rtx_cost (XEXP (x, 1 - IDX), \
9062 /* RTX costs. Make an estimate of the cost of executing the operation
9063 X, which is contained with an operation with code OUTER_CODE.
9064 SPEED_P indicates whether the cost desired is the performance cost,
9065 or the size cost. The estimate is stored in COST and the return
9066 value is TRUE if the cost calculation is final, or FALSE if the
9067 caller should recurse through the operands of X to add additional
9070 We currently make no attempt to model the size savings of Thumb-2
9071 16-bit instructions. At the normal points in compilation where
9072 this code is called we have no measure of whether the condition
9073 flags are live or not, and thus no realistic way to determine what
9074 the size will eventually be. */
9076 arm_new_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9077 const struct cpu_cost_table
*extra_cost
,
9078 int *cost
, bool speed_p
)
9080 enum machine_mode mode
= GET_MODE (x
);
9085 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9087 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9095 if (REG_P (SET_SRC (x
))
9096 && REG_P (SET_DEST (x
)))
9098 /* Assume that most copies can be done with a single insn,
9099 unless we don't have HW FP, in which case everything
9100 larger than word mode will require two insns. */
9101 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9102 && GET_MODE_SIZE (mode
) > 4)
9105 /* Conditional register moves can be encoded
9106 in 16 bits in Thumb mode. */
9107 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9111 if (CONST_INT_P (SET_SRC (x
)))
9113 /* Handle CONST_INT here, since the value doesn't have a mode
9114 and we would otherwise be unable to work out the true cost. */
9115 *cost
= rtx_cost (SET_DEST (x
), SET
, 0, speed_p
);
9116 mode
= GET_MODE (SET_DEST (x
));
9118 /* Slightly lower the cost of setting a core reg to a constant.
9119 This helps break up chains and allows for better scheduling. */
9120 if (REG_P (SET_DEST (x
))
9121 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9124 /* Immediate moves with an immediate in the range [0, 255] can be
9125 encoded in 16 bits in Thumb mode. */
9126 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9127 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9129 goto const_int_cost
;
9135 /* A memory access costs 1 insn if the mode is small, or the address is
9136 a single register, otherwise it costs one insn per word. */
9137 if (REG_P (XEXP (x
, 0)))
9138 *cost
= COSTS_N_INSNS (1);
9140 && GET_CODE (XEXP (x
, 0)) == PLUS
9141 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9142 /* This will be split into two instructions.
9143 See arm.md:calculate_pic_address. */
9144 *cost
= COSTS_N_INSNS (2);
9146 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9148 /* For speed optimizations, add the costs of the address and
9149 accessing memory. */
9152 *cost
+= (extra_cost
->ldst
.load
9153 + arm_address_cost (XEXP (x
, 0), mode
,
9154 ADDR_SPACE_GENERIC
, speed_p
));
9156 *cost
+= extra_cost
->ldst
.load
;
9162 /* Calculations of LDM costs are complex. We assume an initial cost
9163 (ldm_1st) which will load the number of registers mentioned in
9164 ldm_regs_per_insn_1st registers; then each additional
9165 ldm_regs_per_insn_subsequent registers cost one more insn. The
9166 formula for N regs is thus:
9168 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9169 + ldm_regs_per_insn_subsequent - 1)
9170 / ldm_regs_per_insn_subsequent).
9172 Additional costs may also be added for addressing. A similar
9173 formula is used for STM. */
9175 bool is_ldm
= load_multiple_operation (x
, SImode
);
9176 bool is_stm
= store_multiple_operation (x
, SImode
);
9178 *cost
= COSTS_N_INSNS (1);
9180 if (is_ldm
|| is_stm
)
9184 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9185 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9186 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9187 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9188 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9189 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9190 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9192 *cost
+= regs_per_insn_1st
9193 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9194 + regs_per_insn_sub
- 1)
9195 / regs_per_insn_sub
);
9204 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9205 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9206 *cost
= COSTS_N_INSNS (speed_p
9207 ? extra_cost
->fp
[mode
!= SFmode
].div
: 1);
9208 else if (mode
== SImode
&& TARGET_IDIV
)
9209 *cost
= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 1);
9211 *cost
= LIBCALL_COST (2);
9212 return false; /* All arguments must be in registers. */
9216 *cost
= LIBCALL_COST (2);
9217 return false; /* All arguments must be in registers. */
9220 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9222 *cost
= (COSTS_N_INSNS (2)
9223 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9225 *cost
+= extra_cost
->alu
.shift_reg
;
9233 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9235 *cost
= (COSTS_N_INSNS (3)
9236 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9238 *cost
+= 2 * extra_cost
->alu
.shift
;
9241 else if (mode
== SImode
)
9243 *cost
= (COSTS_N_INSNS (1)
9244 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9245 /* Slightly disparage register shifts at -Os, but not by much. */
9246 if (!CONST_INT_P (XEXP (x
, 1)))
9247 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9248 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9251 else if (GET_MODE_CLASS (mode
) == MODE_INT
9252 && GET_MODE_SIZE (mode
) < 4)
9256 *cost
= (COSTS_N_INSNS (1)
9257 + rtx_cost (XEXP (x
, 0), code
, 0, speed_p
));
9258 /* Slightly disparage register shifts at -Os, but not by
9260 if (!CONST_INT_P (XEXP (x
, 1)))
9261 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9262 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9264 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9266 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9268 /* Can use SBFX/UBFX. */
9269 *cost
= COSTS_N_INSNS (1);
9271 *cost
+= extra_cost
->alu
.bfx
;
9272 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9276 *cost
= COSTS_N_INSNS (2);
9277 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9280 if (CONST_INT_P (XEXP (x
, 1)))
9281 *cost
+= 2 * extra_cost
->alu
.shift
;
9283 *cost
+= (extra_cost
->alu
.shift
9284 + extra_cost
->alu
.shift_reg
);
9287 /* Slightly disparage register shifts. */
9288 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9293 *cost
= COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x
, 1)));
9294 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
9297 if (CONST_INT_P (XEXP (x
, 1)))
9298 *cost
+= (2 * extra_cost
->alu
.shift
9299 + extra_cost
->alu
.log_shift
);
9301 *cost
+= (extra_cost
->alu
.shift
9302 + extra_cost
->alu
.shift_reg
9303 + extra_cost
->alu
.log_shift_reg
);
9309 *cost
= LIBCALL_COST (2);
9313 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9314 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9316 *cost
= COSTS_N_INSNS (1);
9317 if (GET_CODE (XEXP (x
, 0)) == MULT
9318 || GET_CODE (XEXP (x
, 1)) == MULT
)
9320 rtx mul_op0
, mul_op1
, sub_op
;
9323 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9325 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9327 mul_op0
= XEXP (XEXP (x
, 0), 0);
9328 mul_op1
= XEXP (XEXP (x
, 0), 1);
9329 sub_op
= XEXP (x
, 1);
9333 mul_op0
= XEXP (XEXP (x
, 1), 0);
9334 mul_op1
= XEXP (XEXP (x
, 1), 1);
9335 sub_op
= XEXP (x
, 0);
9338 /* The first operand of the multiply may be optionally
9340 if (GET_CODE (mul_op0
) == NEG
)
9341 mul_op0
= XEXP (mul_op0
, 0);
9343 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9344 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9345 + rtx_cost (sub_op
, code
, 0, speed_p
));
9351 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9357 rtx shift_by_reg
= NULL
;
9361 *cost
= COSTS_N_INSNS (1);
9363 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9364 if (shift_op
== NULL
)
9366 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9367 non_shift_op
= XEXP (x
, 0);
9370 non_shift_op
= XEXP (x
, 1);
9372 if (shift_op
!= NULL
)
9374 if (shift_by_reg
!= NULL
)
9377 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9378 *cost
+= rtx_cost (shift_by_reg
, code
, 0, speed_p
);
9381 *cost
+= extra_cost
->alu
.arith_shift
;
9383 *cost
+= (rtx_cost (shift_op
, code
, 0, speed_p
)
9384 + rtx_cost (non_shift_op
, code
, 0, speed_p
));
9389 && GET_CODE (XEXP (x
, 1)) == MULT
)
9393 *cost
+= extra_cost
->mult
[0].add
;
9394 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9395 + rtx_cost (XEXP (XEXP (x
, 1), 0), MULT
, 0, speed_p
)
9396 + rtx_cost (XEXP (XEXP (x
, 1), 1), MULT
, 1, speed_p
));
9400 if (CONST_INT_P (XEXP (x
, 0)))
9402 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9403 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9405 *cost
= COSTS_N_INSNS (insns
);
9407 *cost
+= insns
* extra_cost
->alu
.arith
;
9408 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9415 if (GET_MODE_CLASS (mode
) == MODE_INT
9416 && GET_MODE_SIZE (mode
) < 4)
9418 rtx shift_op
, shift_reg
;
9421 /* We check both sides of the MINUS for shifter operands since,
9422 unlike PLUS, it's not commutative. */
9424 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9425 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9427 /* Slightly disparage, as we might need to widen the result. */
9428 *cost
= 1 + COSTS_N_INSNS (1);
9430 *cost
+= extra_cost
->alu
.arith
;
9432 if (CONST_INT_P (XEXP (x
, 0)))
9434 *cost
+= rtx_cost (XEXP (x
, 1), code
, 1, speed_p
);
9443 *cost
= COSTS_N_INSNS (2);
9445 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9447 rtx op1
= XEXP (x
, 1);
9450 *cost
+= 2 * extra_cost
->alu
.arith
;
9452 if (GET_CODE (op1
) == ZERO_EXTEND
)
9453 *cost
+= rtx_cost (XEXP (op1
, 0), ZERO_EXTEND
, 0, speed_p
);
9455 *cost
+= rtx_cost (op1
, MINUS
, 1, speed_p
);
9456 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
,
9460 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9463 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9464 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), SIGN_EXTEND
,
9466 + rtx_cost (XEXP (x
, 1), MINUS
, 1, speed_p
));
9469 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9470 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9473 *cost
+= (extra_cost
->alu
.arith
9474 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9475 ? extra_cost
->alu
.arith
9476 : extra_cost
->alu
.arith_shift
));
9477 *cost
+= (rtx_cost (XEXP (x
, 0), MINUS
, 0, speed_p
)
9478 + rtx_cost (XEXP (XEXP (x
, 1), 0),
9479 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9484 *cost
+= 2 * extra_cost
->alu
.arith
;
9490 *cost
= LIBCALL_COST (2);
9494 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9495 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9497 *cost
= COSTS_N_INSNS (1);
9498 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9500 rtx mul_op0
, mul_op1
, add_op
;
9503 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9505 mul_op0
= XEXP (XEXP (x
, 0), 0);
9506 mul_op1
= XEXP (XEXP (x
, 0), 1);
9507 add_op
= XEXP (x
, 1);
9509 *cost
+= (rtx_cost (mul_op0
, code
, 0, speed_p
)
9510 + rtx_cost (mul_op1
, code
, 0, speed_p
)
9511 + rtx_cost (add_op
, code
, 0, speed_p
));
9517 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9520 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9522 *cost
= LIBCALL_COST (2);
9526 /* Narrow modes can be synthesized in SImode, but the range
9527 of useful sub-operations is limited. Check for shift operations
9528 on one of the operands. Only left shifts can be used in the
9530 if (GET_MODE_CLASS (mode
) == MODE_INT
9531 && GET_MODE_SIZE (mode
) < 4)
9533 rtx shift_op
, shift_reg
;
9536 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9538 if (CONST_INT_P (XEXP (x
, 1)))
9540 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9541 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9543 *cost
= COSTS_N_INSNS (insns
);
9545 *cost
+= insns
* extra_cost
->alu
.arith
;
9546 /* Slightly penalize a narrow operation as the result may
9548 *cost
+= 1 + rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9552 /* Slightly penalize a narrow operation as the result may
9554 *cost
= 1 + COSTS_N_INSNS (1);
9556 *cost
+= extra_cost
->alu
.arith
;
9563 rtx shift_op
, shift_reg
;
9565 *cost
= COSTS_N_INSNS (1);
9567 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9568 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9570 /* UXTA[BH] or SXTA[BH]. */
9572 *cost
+= extra_cost
->alu
.extnd_arith
;
9573 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9575 + rtx_cost (XEXP (x
, 1), PLUS
, 0, speed_p
));
9580 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9581 if (shift_op
!= NULL
)
9586 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9587 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9590 *cost
+= extra_cost
->alu
.arith_shift
;
9592 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9593 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9596 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9598 rtx mul_op
= XEXP (x
, 0);
9600 *cost
= COSTS_N_INSNS (1);
9602 if (TARGET_DSP_MULTIPLY
9603 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9604 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9605 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9606 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9607 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9608 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9609 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9610 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9611 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9612 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9613 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9614 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9619 *cost
+= extra_cost
->mult
[0].extend_add
;
9620 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0),
9621 SIGN_EXTEND
, 0, speed_p
)
9622 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0),
9623 SIGN_EXTEND
, 0, speed_p
)
9624 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9629 *cost
+= extra_cost
->mult
[0].add
;
9630 *cost
+= (rtx_cost (XEXP (mul_op
, 0), MULT
, 0, speed_p
)
9631 + rtx_cost (XEXP (mul_op
, 1), MULT
, 1, speed_p
)
9632 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9635 if (CONST_INT_P (XEXP (x
, 1)))
9637 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9638 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9640 *cost
= COSTS_N_INSNS (insns
);
9642 *cost
+= insns
* extra_cost
->alu
.arith
;
9643 *cost
+= rtx_cost (XEXP (x
, 0), PLUS
, 0, speed_p
);
9652 && GET_CODE (XEXP (x
, 0)) == MULT
9653 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9654 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9655 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
9656 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
9658 *cost
= COSTS_N_INSNS (1);
9660 *cost
+= extra_cost
->mult
[1].extend_add
;
9661 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
9662 ZERO_EXTEND
, 0, speed_p
)
9663 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0),
9664 ZERO_EXTEND
, 0, speed_p
)
9665 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9669 *cost
= COSTS_N_INSNS (2);
9671 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9672 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9675 *cost
+= (extra_cost
->alu
.arith
9676 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9677 ? extra_cost
->alu
.arith
9678 : extra_cost
->alu
.arith_shift
));
9680 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), ZERO_EXTEND
, 0,
9682 + rtx_cost (XEXP (x
, 1), PLUS
, 1, speed_p
));
9687 *cost
+= 2 * extra_cost
->alu
.arith
;
9692 *cost
= LIBCALL_COST (2);
9695 case AND
: case XOR
: case IOR
:
9698 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9699 rtx op0
= XEXP (x
, 0);
9700 rtx shift_op
, shift_reg
;
9702 *cost
= COSTS_N_INSNS (1);
9706 || (code
== IOR
&& TARGET_THUMB2
)))
9707 op0
= XEXP (op0
, 0);
9710 shift_op
= shifter_op_p (op0
, &shift_reg
);
9711 if (shift_op
!= NULL
)
9716 *cost
+= extra_cost
->alu
.log_shift_reg
;
9717 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9720 *cost
+= extra_cost
->alu
.log_shift
;
9722 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
9723 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9727 if (CONST_INT_P (XEXP (x
, 1)))
9729 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
9730 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9733 *cost
= COSTS_N_INSNS (insns
);
9735 *cost
+= insns
* extra_cost
->alu
.logical
;
9736 *cost
+= rtx_cost (op0
, code
, 0, speed_p
);
9741 *cost
+= extra_cost
->alu
.logical
;
9742 *cost
+= (rtx_cost (op0
, code
, 0, speed_p
)
9743 + rtx_cost (XEXP (x
, 1), code
, 1, speed_p
));
9749 rtx op0
= XEXP (x
, 0);
9750 enum rtx_code subcode
= GET_CODE (op0
);
9752 *cost
= COSTS_N_INSNS (2);
9756 || (code
== IOR
&& TARGET_THUMB2
)))
9757 op0
= XEXP (op0
, 0);
9759 if (GET_CODE (op0
) == ZERO_EXTEND
)
9762 *cost
+= 2 * extra_cost
->alu
.logical
;
9764 *cost
+= (rtx_cost (XEXP (op0
, 0), ZERO_EXTEND
, 0, speed_p
)
9765 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
9768 else if (GET_CODE (op0
) == SIGN_EXTEND
)
9771 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
9773 *cost
+= (rtx_cost (XEXP (op0
, 0), SIGN_EXTEND
, 0, speed_p
)
9774 + rtx_cost (XEXP (x
, 1), code
, 0, speed_p
));
9779 *cost
+= 2 * extra_cost
->alu
.logical
;
9785 *cost
= LIBCALL_COST (2);
9789 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9790 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9792 rtx op0
= XEXP (x
, 0);
9794 *cost
= COSTS_N_INSNS (1);
9796 if (GET_CODE (op0
) == NEG
)
9797 op0
= XEXP (op0
, 0);
9800 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
9802 *cost
+= (rtx_cost (op0
, MULT
, 0, speed_p
)
9803 + rtx_cost (XEXP (x
, 1), MULT
, 1, speed_p
));
9806 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9808 *cost
= LIBCALL_COST (2);
9814 *cost
= COSTS_N_INSNS (1);
9815 if (TARGET_DSP_MULTIPLY
9816 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9817 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9818 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9819 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9820 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
9821 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
9822 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
9823 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
9824 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9825 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9826 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9827 && (INTVAL (XEXP (XEXP (x
, 1), 1))
9832 *cost
+= extra_cost
->mult
[0].extend
;
9833 *cost
+= (rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed_p
)
9834 + rtx_cost (XEXP (x
, 1), SIGN_EXTEND
, 0, speed_p
));
9838 *cost
+= extra_cost
->mult
[0].simple
;
9845 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9846 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
9847 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9848 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
9850 *cost
= COSTS_N_INSNS (1);
9852 *cost
+= extra_cost
->mult
[1].extend
;
9853 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0),
9854 ZERO_EXTEND
, 0, speed_p
)
9855 + rtx_cost (XEXP (XEXP (x
, 1), 0),
9856 ZERO_EXTEND
, 0, speed_p
));
9860 *cost
= LIBCALL_COST (2);
9865 *cost
= LIBCALL_COST (2);
9869 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9870 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9872 *cost
= COSTS_N_INSNS (1);
9874 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
9878 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9880 *cost
= LIBCALL_COST (1);
9886 if (GET_CODE (XEXP (x
, 0)) == ABS
)
9888 *cost
= COSTS_N_INSNS (2);
9889 /* Assume the non-flag-changing variant. */
9891 *cost
+= (extra_cost
->alu
.log_shift
9892 + extra_cost
->alu
.arith_shift
);
9893 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), ABS
, 0, speed_p
);
9897 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
9898 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
9900 *cost
= COSTS_N_INSNS (2);
9901 /* No extra cost for MOV imm and MVN imm. */
9902 /* If the comparison op is using the flags, there's no further
9903 cost, otherwise we need to add the cost of the comparison. */
9904 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
9905 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
9906 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
9908 *cost
+= (COSTS_N_INSNS (1)
9909 + rtx_cost (XEXP (XEXP (x
, 0), 0), COMPARE
, 0,
9911 + rtx_cost (XEXP (XEXP (x
, 0), 1), COMPARE
, 1,
9914 *cost
+= extra_cost
->alu
.arith
;
9918 *cost
= COSTS_N_INSNS (1);
9920 *cost
+= extra_cost
->alu
.arith
;
9924 if (GET_MODE_CLASS (mode
) == MODE_INT
9925 && GET_MODE_SIZE (mode
) < 4)
9927 /* Slightly disparage, as we might need an extend operation. */
9928 *cost
= 1 + COSTS_N_INSNS (1);
9930 *cost
+= extra_cost
->alu
.arith
;
9936 *cost
= COSTS_N_INSNS (2);
9938 *cost
+= 2 * extra_cost
->alu
.arith
;
9943 *cost
= LIBCALL_COST (1);
9950 rtx shift_reg
= NULL
;
9952 *cost
= COSTS_N_INSNS (1);
9953 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9957 if (shift_reg
!= NULL
)
9960 *cost
+= extra_cost
->alu
.log_shift_reg
;
9961 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
9964 *cost
+= extra_cost
->alu
.log_shift
;
9965 *cost
+= rtx_cost (shift_op
, ASHIFT
, 0, speed_p
);
9970 *cost
+= extra_cost
->alu
.logical
;
9975 *cost
= COSTS_N_INSNS (2);
9981 *cost
+= LIBCALL_COST (1);
9986 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9988 *cost
= COSTS_N_INSNS (4);
9991 int op1cost
= rtx_cost (XEXP (x
, 1), SET
, 1, speed_p
);
9992 int op2cost
= rtx_cost (XEXP (x
, 2), SET
, 1, speed_p
);
9994 *cost
= rtx_cost (XEXP (x
, 0), IF_THEN_ELSE
, 0, speed_p
);
9995 /* Assume that if one arm of the if_then_else is a register,
9996 that it will be tied with the result and eliminate the
9997 conditional insn. */
9998 if (REG_P (XEXP (x
, 1)))
10000 else if (REG_P (XEXP (x
, 2)))
10006 if (extra_cost
->alu
.non_exec_costs_exec
)
10007 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10009 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10012 *cost
+= op1cost
+ op2cost
;
10018 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10022 enum machine_mode op0mode
;
10023 /* We'll mostly assume that the cost of a compare is the cost of the
10024 LHS. However, there are some notable exceptions. */
10026 /* Floating point compares are never done as side-effects. */
10027 op0mode
= GET_MODE (XEXP (x
, 0));
10028 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10029 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10031 *cost
= COSTS_N_INSNS (1);
10033 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10035 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10037 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10043 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10045 *cost
= LIBCALL_COST (2);
10049 /* DImode compares normally take two insns. */
10050 if (op0mode
== DImode
)
10052 *cost
= COSTS_N_INSNS (2);
10054 *cost
+= 2 * extra_cost
->alu
.arith
;
10058 if (op0mode
== SImode
)
10063 if (XEXP (x
, 1) == const0_rtx
10064 && !(REG_P (XEXP (x
, 0))
10065 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10066 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10068 *cost
= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10070 /* Multiply operations that set the flags are often
10071 significantly more expensive. */
10073 && GET_CODE (XEXP (x
, 0)) == MULT
10074 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10075 *cost
+= extra_cost
->mult
[0].flag_setting
;
10078 && GET_CODE (XEXP (x
, 0)) == PLUS
10079 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10080 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10082 *cost
+= extra_cost
->mult
[0].flag_setting
;
10087 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10088 if (shift_op
!= NULL
)
10090 *cost
= COSTS_N_INSNS (1);
10091 if (shift_reg
!= NULL
)
10093 *cost
+= rtx_cost (shift_reg
, ASHIFT
, 1, speed_p
);
10095 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10098 *cost
+= extra_cost
->alu
.arith_shift
;
10099 *cost
+= (rtx_cost (shift_op
, ASHIFT
, 0, speed_p
)
10100 + rtx_cost (XEXP (x
, 1), COMPARE
, 1, speed_p
));
10104 *cost
= COSTS_N_INSNS (1);
10106 *cost
+= extra_cost
->alu
.arith
;
10107 if (CONST_INT_P (XEXP (x
, 1))
10108 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10110 *cost
+= rtx_cost (XEXP (x
, 0), COMPARE
, 0, speed_p
);
10118 *cost
= LIBCALL_COST (2);
10141 if (outer_code
== SET
)
10143 /* Is it a store-flag operation? */
10144 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10145 && XEXP (x
, 1) == const0_rtx
)
10147 /* Thumb also needs an IT insn. */
10148 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10151 if (XEXP (x
, 1) == const0_rtx
)
10156 /* LSR Rd, Rn, #31. */
10157 *cost
= COSTS_N_INSNS (1);
10159 *cost
+= extra_cost
->alu
.shift
;
10169 *cost
= COSTS_N_INSNS (2);
10173 /* RSBS T1, Rn, Rn, LSR #31
10175 *cost
= COSTS_N_INSNS (2);
10177 *cost
+= extra_cost
->alu
.arith_shift
;
10181 /* RSB Rd, Rn, Rn, ASR #1
10182 LSR Rd, Rd, #31. */
10183 *cost
= COSTS_N_INSNS (2);
10185 *cost
+= (extra_cost
->alu
.arith_shift
10186 + extra_cost
->alu
.shift
);
10192 *cost
= COSTS_N_INSNS (2);
10194 *cost
+= extra_cost
->alu
.shift
;
10198 /* Remaining cases are either meaningless or would take
10199 three insns anyway. */
10200 *cost
= COSTS_N_INSNS (3);
10203 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10208 *cost
= COSTS_N_INSNS (TARGET_THUMB
? 4 : 3);
10209 if (CONST_INT_P (XEXP (x
, 1))
10210 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10212 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10219 /* Not directly inside a set. If it involves the condition code
10220 register it must be the condition for a branch, cond_exec or
10221 I_T_E operation. Since the comparison is performed elsewhere
10222 this is just the control part which has no additional
10224 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10225 && XEXP (x
, 1) == const0_rtx
)
10233 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10234 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10236 *cost
= COSTS_N_INSNS (1);
10238 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10242 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10244 *cost
= LIBCALL_COST (1);
10248 if (mode
== SImode
)
10250 *cost
= COSTS_N_INSNS (1);
10252 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10256 *cost
= LIBCALL_COST (1);
10260 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10261 && MEM_P (XEXP (x
, 0)))
10263 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10265 if (mode
== DImode
)
10266 *cost
+= COSTS_N_INSNS (1);
10271 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10272 *cost
+= extra_cost
->ldst
.load
;
10274 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10276 if (mode
== DImode
)
10277 *cost
+= extra_cost
->alu
.shift
;
10282 /* Widening from less than 32-bits requires an extend operation. */
10283 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10285 /* We have SXTB/SXTH. */
10286 *cost
= COSTS_N_INSNS (1);
10287 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10289 *cost
+= extra_cost
->alu
.extnd
;
10291 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10293 /* Needs two shifts. */
10294 *cost
= COSTS_N_INSNS (2);
10295 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10297 *cost
+= 2 * extra_cost
->alu
.shift
;
10300 /* Widening beyond 32-bits requires one more insn. */
10301 if (mode
== DImode
)
10303 *cost
+= COSTS_N_INSNS (1);
10305 *cost
+= extra_cost
->alu
.shift
;
10312 || GET_MODE (XEXP (x
, 0)) == SImode
10313 || GET_MODE (XEXP (x
, 0)) == QImode
)
10314 && MEM_P (XEXP (x
, 0)))
10316 *cost
= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10318 if (mode
== DImode
)
10319 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10324 /* Widening from less than 32-bits requires an extend operation. */
10325 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10327 /* UXTB can be a shorter instruction in Thumb2, but it might
10328 be slower than the AND Rd, Rn, #255 alternative. When
10329 optimizing for speed it should never be slower to use
10330 AND, and we don't really model 16-bit vs 32-bit insns
10332 *cost
= COSTS_N_INSNS (1);
10334 *cost
+= extra_cost
->alu
.logical
;
10336 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10338 /* We have UXTB/UXTH. */
10339 *cost
= COSTS_N_INSNS (1);
10340 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10342 *cost
+= extra_cost
->alu
.extnd
;
10344 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10346 /* Needs two shifts. It's marginally preferable to use
10347 shifts rather than two BIC instructions as the second
10348 shift may merge with a subsequent insn as a shifter
10350 *cost
= COSTS_N_INSNS (2);
10351 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10353 *cost
+= 2 * extra_cost
->alu
.shift
;
10355 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10356 *cost
= COSTS_N_INSNS (1);
10358 /* Widening beyond 32-bits requires one more insn. */
10359 if (mode
== DImode
)
10361 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10368 /* CONST_INT has no mode, so we cannot tell for sure how many
10369 insns are really going to be needed. The best we can do is
10370 look at the value passed. If it fits in SImode, then assume
10371 that's the mode it will be used for. Otherwise assume it
10372 will be used in DImode. */
10373 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10378 /* Avoid blowing up in arm_gen_constant (). */
10379 if (!(outer_code
== PLUS
10380 || outer_code
== AND
10381 || outer_code
== IOR
10382 || outer_code
== XOR
10383 || outer_code
== MINUS
))
10387 if (mode
== SImode
)
10390 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10391 INTVAL (x
), NULL
, NULL
,
10397 *cost
+= COSTS_N_INSNS (arm_gen_constant
10398 (outer_code
, SImode
, NULL
,
10399 trunc_int_for_mode (INTVAL (x
), SImode
),
10401 + arm_gen_constant (outer_code
, SImode
, NULL
,
10402 INTVAL (x
) >> 32, NULL
,
10414 if (arm_arch_thumb2
&& !flag_pic
)
10415 *cost
= COSTS_N_INSNS (2);
10417 *cost
= COSTS_N_INSNS (1) + extra_cost
->ldst
.load
;
10420 *cost
= COSTS_N_INSNS (2);
10424 *cost
+= COSTS_N_INSNS (1);
10426 *cost
+= extra_cost
->alu
.arith
;
10432 *cost
= COSTS_N_INSNS (4);
10437 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10438 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10440 if (vfp3_const_double_rtx (x
))
10442 *cost
= COSTS_N_INSNS (1);
10444 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10450 *cost
= COSTS_N_INSNS (1);
10451 if (mode
== DFmode
)
10452 *cost
+= extra_cost
->ldst
.loadd
;
10454 *cost
+= extra_cost
->ldst
.loadf
;
10457 *cost
= COSTS_N_INSNS (2 + (mode
== DFmode
));
10461 *cost
= COSTS_N_INSNS (4);
10467 && TARGET_HARD_FLOAT
10468 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10469 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10470 *cost
= COSTS_N_INSNS (1);
10472 *cost
= COSTS_N_INSNS (4);
10477 *cost
= COSTS_N_INSNS (1);
10478 /* When optimizing for size, we prefer constant pool entries to
10479 MOVW/MOVT pairs, so bump the cost of these slightly. */
10485 *cost
= COSTS_N_INSNS (1);
10487 *cost
+= extra_cost
->alu
.clz
;
10491 if (XEXP (x
, 1) == const0_rtx
)
10493 *cost
= COSTS_N_INSNS (1);
10495 *cost
+= extra_cost
->alu
.log_shift
;
10496 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10499 /* Fall through. */
10503 *cost
= COSTS_N_INSNS (2);
10507 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10508 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10509 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10510 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10511 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10512 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10513 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10514 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10517 *cost
= COSTS_N_INSNS (1);
10519 *cost
+= extra_cost
->mult
[1].extend
;
10520 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), ZERO_EXTEND
, 0,
10522 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), ZERO_EXTEND
,
10526 *cost
= LIBCALL_COST (1);
10530 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10533 /* Reading the PC is like reading any other register. Writing it
10534 is more expensive, but we take that into account elsewhere. */
10539 /* TODO: Simple zero_extract of bottom bits using AND. */
10540 /* Fall through. */
10544 && CONST_INT_P (XEXP (x
, 1))
10545 && CONST_INT_P (XEXP (x
, 2)))
10547 *cost
= COSTS_N_INSNS (1);
10549 *cost
+= extra_cost
->alu
.bfx
;
10550 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10553 /* Without UBFX/SBFX, need to resort to shift operations. */
10554 *cost
= COSTS_N_INSNS (2);
10556 *cost
+= 2 * extra_cost
->alu
.shift
;
10557 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed_p
);
10561 if (TARGET_HARD_FLOAT
)
10563 *cost
= COSTS_N_INSNS (1);
10565 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10566 if (!TARGET_FPU_ARMV8
10567 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10569 /* Pre v8, widening HF->DF is a two-step process, first
10570 widening to SFmode. */
10571 *cost
+= COSTS_N_INSNS (1);
10573 *cost
+= extra_cost
->fp
[0].widen
;
10575 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10579 *cost
= LIBCALL_COST (1);
10582 case FLOAT_TRUNCATE
:
10583 if (TARGET_HARD_FLOAT
)
10585 *cost
= COSTS_N_INSNS (1);
10587 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10588 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10590 /* Vector modes? */
10592 *cost
= LIBCALL_COST (1);
10597 if (TARGET_HARD_FLOAT
)
10599 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10601 *cost
= COSTS_N_INSNS (1);
10603 *cost
+= extra_cost
->fp
[GET_MODE (XEXP (x
, 0)) == DFmode
].toint
;
10604 /* Strip of the 'cost' of rounding towards zero. */
10605 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10606 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, 0, speed_p
);
10608 *cost
+= rtx_cost (XEXP (x
, 0), code
, 0, speed_p
);
10609 /* ??? Increase the cost to deal with transferring from
10610 FP -> CORE registers? */
10613 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10614 && TARGET_FPU_ARMV8
)
10616 *cost
= COSTS_N_INSNS (1);
10618 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
10621 /* Vector costs? */
10623 *cost
= LIBCALL_COST (1);
10627 case UNSIGNED_FLOAT
:
10628 if (TARGET_HARD_FLOAT
)
10630 /* ??? Increase the cost to deal with transferring from CORE
10631 -> FP registers? */
10632 *cost
= COSTS_N_INSNS (1);
10634 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
10637 *cost
= LIBCALL_COST (1);
10641 *cost
= COSTS_N_INSNS (1);
10645 /* Just a guess. Cost one insn per input. */
10646 *cost
= COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x
));
10650 if (mode
!= VOIDmode
)
10651 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
10653 *cost
= COSTS_N_INSNS (4); /* Who knows? */
10658 #undef HANDLE_NARROW_SHIFT_ARITH
10660 /* RTX costs when optimizing for size. */
10662 arm_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
10663 int *total
, bool speed
)
10667 if (TARGET_OLD_RTX_COSTS
10668 || (!current_tune
->insn_extra_cost
&& !TARGET_NEW_GENERIC_COSTS
))
10670 /* Old way. (Deprecated.) */
10672 result
= arm_size_rtx_costs (x
, (enum rtx_code
) code
,
10673 (enum rtx_code
) outer_code
, total
);
10675 result
= current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
10676 (enum rtx_code
) outer_code
, total
,
10682 if (current_tune
->insn_extra_cost
)
10683 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
10684 (enum rtx_code
) outer_code
,
10685 current_tune
->insn_extra_cost
,
10687 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10688 && current_tune->insn_extra_cost != NULL */
10690 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
10691 (enum rtx_code
) outer_code
,
10692 &generic_extra_costs
, total
, speed
);
10695 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
10697 print_rtl_single (dump_file
, x
);
10698 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
10699 *total
, result
? "final" : "partial");
10704 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
10705 supported on any "slowmul" cores, so it can be ignored. */
10708 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10709 int *total
, bool speed
)
10711 enum machine_mode mode
= GET_MODE (x
);
10715 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10722 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10725 *total
= COSTS_N_INSNS (20);
10729 if (CONST_INT_P (XEXP (x
, 1)))
10731 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
10732 & (unsigned HOST_WIDE_INT
) 0xffffffff);
10733 int cost
, const_ok
= const_ok_for_arm (i
);
10734 int j
, booth_unit_size
;
10736 /* Tune as appropriate. */
10737 cost
= const_ok
? 4 : 8;
10738 booth_unit_size
= 2;
10739 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
10741 i
>>= booth_unit_size
;
10745 *total
= COSTS_N_INSNS (cost
);
10746 *total
+= rtx_cost (XEXP (x
, 0), code
, 0, speed
);
10750 *total
= COSTS_N_INSNS (20);
10754 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
10759 /* RTX cost for cores with a fast multiply unit (M variants). */
10762 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10763 int *total
, bool speed
)
10765 enum machine_mode mode
= GET_MODE (x
);
10769 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10773 /* ??? should thumb2 use different costs? */
10777 /* There is no point basing this on the tuning, since it is always the
10778 fast variant if it exists at all. */
10780 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
10781 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10782 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10784 *total
= COSTS_N_INSNS(2);
10789 if (mode
== DImode
)
10791 *total
= COSTS_N_INSNS (5);
10795 if (CONST_INT_P (XEXP (x
, 1)))
10797 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
10798 & (unsigned HOST_WIDE_INT
) 0xffffffff);
10799 int cost
, const_ok
= const_ok_for_arm (i
);
10800 int j
, booth_unit_size
;
10802 /* Tune as appropriate. */
10803 cost
= const_ok
? 4 : 8;
10804 booth_unit_size
= 8;
10805 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
10807 i
>>= booth_unit_size
;
10811 *total
= COSTS_N_INSNS(cost
);
10815 if (mode
== SImode
)
10817 *total
= COSTS_N_INSNS (4);
10821 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10823 if (TARGET_HARD_FLOAT
10825 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
10827 *total
= COSTS_N_INSNS (1);
10832 /* Requires a lib call */
10833 *total
= COSTS_N_INSNS (20);
10837 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
10842 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
10843 so it can be ignored. */
10846 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10847 int *total
, bool speed
)
10849 enum machine_mode mode
= GET_MODE (x
);
10853 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10860 if (GET_CODE (XEXP (x
, 0)) != MULT
)
10861 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
10863 /* A COMPARE of a MULT is slow on XScale; the muls instruction
10864 will stall until the multiplication is complete. */
10865 *total
= COSTS_N_INSNS (3);
10869 /* There is no point basing this on the tuning, since it is always the
10870 fast variant if it exists at all. */
10872 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
10873 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10874 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10876 *total
= COSTS_N_INSNS (2);
10881 if (mode
== DImode
)
10883 *total
= COSTS_N_INSNS (5);
10887 if (CONST_INT_P (XEXP (x
, 1)))
10889 /* If operand 1 is a constant we can more accurately
10890 calculate the cost of the multiply. The multiplier can
10891 retire 15 bits on the first cycle and a further 12 on the
10892 second. We do, of course, have to load the constant into
10893 a register first. */
10894 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
10895 /* There's a general overhead of one cycle. */
10897 unsigned HOST_WIDE_INT masked_const
;
10899 if (i
& 0x80000000)
10902 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
10904 masked_const
= i
& 0xffff8000;
10905 if (masked_const
!= 0)
10908 masked_const
= i
& 0xf8000000;
10909 if (masked_const
!= 0)
10912 *total
= COSTS_N_INSNS (cost
);
10916 if (mode
== SImode
)
10918 *total
= COSTS_N_INSNS (3);
10922 /* Requires a lib call */
10923 *total
= COSTS_N_INSNS (20);
10927 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
10932 /* RTX costs for 9e (and later) cores. */
10935 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10936 int *total
, bool speed
)
10938 enum machine_mode mode
= GET_MODE (x
);
10945 *total
= COSTS_N_INSNS (3);
10949 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
10957 /* There is no point basing this on the tuning, since it is always the
10958 fast variant if it exists at all. */
10960 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
10961 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10962 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10964 *total
= COSTS_N_INSNS (2);
10969 if (mode
== DImode
)
10971 *total
= COSTS_N_INSNS (5);
10975 if (mode
== SImode
)
10977 *total
= COSTS_N_INSNS (2);
10981 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10983 if (TARGET_HARD_FLOAT
10985 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
10987 *total
= COSTS_N_INSNS (1);
10992 *total
= COSTS_N_INSNS (20);
10996 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
10999 /* All address computations that can be done are free, but rtx cost returns
11000 the same for practically all of them. So we weight the different types
11001 of address here in the order (most pref first):
11002 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11004 arm_arm_address_cost (rtx x
)
11006 enum rtx_code c
= GET_CODE (x
);
11008 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11010 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11015 if (CONST_INT_P (XEXP (x
, 1)))
11018 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11028 arm_thumb_address_cost (rtx x
)
11030 enum rtx_code c
= GET_CODE (x
);
11035 && REG_P (XEXP (x
, 0))
11036 && CONST_INT_P (XEXP (x
, 1)))
11043 arm_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
11044 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11046 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11049 /* Adjust cost hook for XScale. */
11051 xscale_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
11053 /* Some true dependencies can have a higher cost depending
11054 on precisely how certain input operands are used. */
11055 if (REG_NOTE_KIND(link
) == 0
11056 && recog_memoized (insn
) >= 0
11057 && recog_memoized (dep
) >= 0)
11059 int shift_opnum
= get_attr_shift (insn
);
11060 enum attr_type attr_type
= get_attr_type (dep
);
11062 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11063 operand for INSN. If we have a shifted input operand and the
11064 instruction we depend on is another ALU instruction, then we may
11065 have to account for an additional stall. */
11066 if (shift_opnum
!= 0
11067 && (attr_type
== TYPE_ALU_SHIFT_IMM
11068 || attr_type
== TYPE_ALUS_SHIFT_IMM
11069 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11070 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11071 || attr_type
== TYPE_ALU_SHIFT_REG
11072 || attr_type
== TYPE_ALUS_SHIFT_REG
11073 || attr_type
== TYPE_LOGIC_SHIFT_REG
11074 || attr_type
== TYPE_LOGICS_SHIFT_REG
11075 || attr_type
== TYPE_MOV_SHIFT
11076 || attr_type
== TYPE_MVN_SHIFT
11077 || attr_type
== TYPE_MOV_SHIFT_REG
11078 || attr_type
== TYPE_MVN_SHIFT_REG
))
11080 rtx shifted_operand
;
11083 /* Get the shifted operand. */
11084 extract_insn (insn
);
11085 shifted_operand
= recog_data
.operand
[shift_opnum
];
11087 /* Iterate over all the operands in DEP. If we write an operand
11088 that overlaps with SHIFTED_OPERAND, then we have increase the
11089 cost of this dependency. */
11090 extract_insn (dep
);
11091 preprocess_constraints ();
11092 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11094 /* We can ignore strict inputs. */
11095 if (recog_data
.operand_type
[opno
] == OP_IN
)
11098 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11110 /* Adjust cost hook for Cortex A9. */
11112 cortex_a9_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
11114 switch (REG_NOTE_KIND (link
))
11121 case REG_DEP_OUTPUT
:
11122 if (recog_memoized (insn
) >= 0
11123 && recog_memoized (dep
) >= 0)
11125 if (GET_CODE (PATTERN (insn
)) == SET
)
11128 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11130 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11132 enum attr_type attr_type_insn
= get_attr_type (insn
);
11133 enum attr_type attr_type_dep
= get_attr_type (dep
);
11135 /* By default all dependencies of the form
11138 have an extra latency of 1 cycle because
11139 of the input and output dependency in this
11140 case. However this gets modeled as an true
11141 dependency and hence all these checks. */
11142 if (REG_P (SET_DEST (PATTERN (insn
)))
11143 && REG_P (SET_DEST (PATTERN (dep
)))
11144 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
11145 SET_DEST (PATTERN (dep
))))
11147 /* FMACS is a special case where the dependent
11148 instruction can be issued 3 cycles before
11149 the normal latency in case of an output
11151 if ((attr_type_insn
== TYPE_FMACS
11152 || attr_type_insn
== TYPE_FMACD
)
11153 && (attr_type_dep
== TYPE_FMACS
11154 || attr_type_dep
== TYPE_FMACD
))
11156 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11157 *cost
= insn_default_latency (dep
) - 3;
11159 *cost
= insn_default_latency (dep
);
11164 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11165 *cost
= insn_default_latency (dep
) + 1;
11167 *cost
= insn_default_latency (dep
);
11177 gcc_unreachable ();
11183 /* Adjust cost hook for FA726TE. */
11185 fa726te_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
11187 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11188 have penalty of 3. */
11189 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
11190 && recog_memoized (insn
) >= 0
11191 && recog_memoized (dep
) >= 0
11192 && get_attr_conds (dep
) == CONDS_SET
)
11194 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11195 if (get_attr_conds (insn
) == CONDS_USE
11196 && get_attr_type (insn
) != TYPE_BRANCH
)
11202 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11203 || get_attr_conds (insn
) == CONDS_USE
)
11213 /* Implement TARGET_REGISTER_MOVE_COST.
11215 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11216 it is typically more expensive than a single memory access. We set
11217 the cost to less than two memory accesses so that floating
11218 point to integer conversion does not go through memory. */
11221 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
11222 reg_class_t from
, reg_class_t to
)
11226 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11227 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11229 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11230 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11232 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11239 if (from
== HI_REGS
|| to
== HI_REGS
)
11246 /* Implement TARGET_MEMORY_MOVE_COST. */
11249 arm_memory_move_cost (enum machine_mode mode
, reg_class_t rclass
,
11250 bool in ATTRIBUTE_UNUSED
)
11256 if (GET_MODE_SIZE (mode
) < 4)
11259 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11263 /* Vectorizer cost model implementation. */
11265 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11267 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11269 int misalign ATTRIBUTE_UNUSED
)
11273 switch (type_of_cost
)
11276 return current_tune
->vec_costs
->scalar_stmt_cost
;
11279 return current_tune
->vec_costs
->scalar_load_cost
;
11282 return current_tune
->vec_costs
->scalar_store_cost
;
11285 return current_tune
->vec_costs
->vec_stmt_cost
;
11288 return current_tune
->vec_costs
->vec_align_load_cost
;
11291 return current_tune
->vec_costs
->vec_store_cost
;
11293 case vec_to_scalar
:
11294 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11296 case scalar_to_vec
:
11297 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11299 case unaligned_load
:
11300 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11302 case unaligned_store
:
11303 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11305 case cond_branch_taken
:
11306 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11308 case cond_branch_not_taken
:
11309 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11312 case vec_promote_demote
:
11313 return current_tune
->vec_costs
->vec_stmt_cost
;
11315 case vec_construct
:
11316 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11317 return elements
/ 2 + 1;
11320 gcc_unreachable ();
11324 /* Implement targetm.vectorize.add_stmt_cost. */
11327 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11328 struct _stmt_vec_info
*stmt_info
, int misalign
,
11329 enum vect_cost_model_location where
)
11331 unsigned *cost
= (unsigned *) data
;
11332 unsigned retval
= 0;
11334 if (flag_vect_cost_model
)
11336 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11337 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11339 /* Statements in an inner loop relative to the loop being
11340 vectorized are weighted more heavily. The value here is
11341 arbitrary and could potentially be improved with analysis. */
11342 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11343 count
*= 50; /* FIXME. */
11345 retval
= (unsigned) (count
* stmt_cost
);
11346 cost
[where
] += retval
;
11352 /* Return true if and only if this insn can dual-issue only as older. */
11354 cortexa7_older_only (rtx insn
)
11356 if (recog_memoized (insn
) < 0)
11359 switch (get_attr_type (insn
))
11362 case TYPE_ALUS_REG
:
11363 case TYPE_LOGIC_REG
:
11364 case TYPE_LOGICS_REG
:
11366 case TYPE_ADCS_REG
:
11371 case TYPE_SHIFT_IMM
:
11372 case TYPE_SHIFT_REG
:
11373 case TYPE_LOAD_BYTE
:
11376 case TYPE_FFARITHS
:
11378 case TYPE_FFARITHD
:
11396 case TYPE_F_STORES
:
11403 /* Return true if and only if this insn can dual-issue as younger. */
11405 cortexa7_younger (FILE *file
, int verbose
, rtx insn
)
11407 if (recog_memoized (insn
) < 0)
11410 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11414 switch (get_attr_type (insn
))
11417 case TYPE_ALUS_IMM
:
11418 case TYPE_LOGIC_IMM
:
11419 case TYPE_LOGICS_IMM
:
11424 case TYPE_MOV_SHIFT
:
11425 case TYPE_MOV_SHIFT_REG
:
11435 /* Look for an instruction that can dual issue only as an older
11436 instruction, and move it in front of any instructions that can
11437 dual-issue as younger, while preserving the relative order of all
11438 other instructions in the ready list. This is a hueuristic to help
11439 dual-issue in later cycles, by postponing issue of more flexible
11440 instructions. This heuristic may affect dual issue opportunities
11441 in the current cycle. */
11443 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx
*ready
, int *n_readyp
,
11447 int first_older_only
= -1, first_younger
= -1;
11451 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11455 /* Traverse the ready list from the head (the instruction to issue
11456 first), and looking for the first instruction that can issue as
11457 younger and the first instruction that can dual-issue only as
11459 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11461 rtx insn
= ready
[i
];
11462 if (cortexa7_older_only (insn
))
11464 first_older_only
= i
;
11466 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11469 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11473 /* Nothing to reorder because either no younger insn found or insn
11474 that can dual-issue only as older appears before any insn that
11475 can dual-issue as younger. */
11476 if (first_younger
== -1)
11479 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11483 /* Nothing to reorder because no older-only insn in the ready list. */
11484 if (first_older_only
== -1)
11487 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11491 /* Move first_older_only insn before first_younger. */
11493 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11494 INSN_UID(ready
[first_older_only
]),
11495 INSN_UID(ready
[first_younger
]));
11496 rtx first_older_only_insn
= ready
[first_older_only
];
11497 for (i
= first_older_only
; i
< first_younger
; i
++)
11499 ready
[i
] = ready
[i
+1];
11502 ready
[i
] = first_older_only_insn
;
11506 /* Implement TARGET_SCHED_REORDER. */
11508 arm_sched_reorder (FILE *file
, int verbose
, rtx
*ready
, int *n_readyp
,
11514 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11517 /* Do nothing for other cores. */
11521 return arm_issue_rate ();
11524 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11525 It corrects the value of COST based on the relationship between
11526 INSN and DEP through the dependence LINK. It returns the new
11527 value. There is a per-core adjust_cost hook to adjust scheduler costs
11528 and the per-core hook can choose to completely override the generic
11529 adjust_cost function. Only put bits of code into arm_adjust_cost that
11530 are common across all cores. */
11532 arm_adjust_cost (rtx insn
, rtx link
, rtx dep
, int cost
)
11536 /* When generating Thumb-1 code, we want to place flag-setting operations
11537 close to a conditional branch which depends on them, so that we can
11538 omit the comparison. */
11540 && REG_NOTE_KIND (link
) == 0
11541 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11542 && recog_memoized (dep
) >= 0
11543 && get_attr_conds (dep
) == CONDS_SET
)
11546 if (current_tune
->sched_adjust_cost
!= NULL
)
11548 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
11552 /* XXX Is this strictly true? */
11553 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
11554 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11557 /* Call insns don't incur a stall, even if they follow a load. */
11558 if (REG_NOTE_KIND (link
) == 0
11562 if ((i_pat
= single_set (insn
)) != NULL
11563 && MEM_P (SET_SRC (i_pat
))
11564 && (d_pat
= single_set (dep
)) != NULL
11565 && MEM_P (SET_DEST (d_pat
)))
11567 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11568 /* This is a load after a store, there is no conflict if the load reads
11569 from a cached area. Assume that loads from the stack, and from the
11570 constant pool are cached, and that others will miss. This is a
11573 if ((GET_CODE (src_mem
) == SYMBOL_REF
11574 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11575 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11576 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11577 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11585 arm_max_conditional_execute (void)
11587 return max_insns_skipped
;
11591 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11594 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11596 return (optimize
> 0) ? 2 : 0;
11600 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11602 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11605 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11606 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11607 sequences of non-executed instructions in IT blocks probably take the same
11608 amount of time as executed instructions (and the IT instruction itself takes
11609 space in icache). This function was experimentally determined to give good
11610 results on a popular embedded benchmark. */
11613 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11615 return (TARGET_32BIT
&& speed_p
) ? 1
11616 : arm_default_branch_cost (speed_p
, predictable_p
);
11619 static bool fp_consts_inited
= false;
11621 static REAL_VALUE_TYPE value_fp0
;
11624 init_fp_table (void)
11628 r
= REAL_VALUE_ATOF ("0", DFmode
);
11630 fp_consts_inited
= true;
11633 /* Return TRUE if rtx X is a valid immediate FP constant. */
11635 arm_const_double_rtx (rtx x
)
11639 if (!fp_consts_inited
)
11642 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
11643 if (REAL_VALUE_MINUS_ZERO (r
))
11646 if (REAL_VALUES_EQUAL (r
, value_fp0
))
11652 /* VFPv3 has a fairly wide range of representable immediates, formed from
11653 "quarter-precision" floating-point values. These can be evaluated using this
11654 formula (with ^ for exponentiation):
11658 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11659 16 <= n <= 31 and 0 <= r <= 7.
11661 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11663 - A (most-significant) is the sign bit.
11664 - BCD are the exponent (encoded as r XOR 3).
11665 - EFGH are the mantissa (encoded as n - 16).
11668 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11669 fconst[sd] instruction, or -1 if X isn't suitable. */
11671 vfp3_const_double_index (rtx x
)
11673 REAL_VALUE_TYPE r
, m
;
11674 int sign
, exponent
;
11675 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11676 unsigned HOST_WIDE_INT mask
;
11677 HOST_WIDE_INT m1
, m2
;
11678 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11680 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11683 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
11685 /* We can't represent these things, so detect them first. */
11686 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11689 /* Extract sign, exponent and mantissa. */
11690 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
11691 r
= real_value_abs (&r
);
11692 exponent
= REAL_EXP (&r
);
11693 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11694 highest (sign) bit, with a fixed binary point at bit point_pos.
11695 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11696 bits for the mantissa, this may fail (low bits would be lost). */
11697 real_ldexp (&m
, &r
, point_pos
- exponent
);
11698 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
11702 /* If there are bits set in the low part of the mantissa, we can't
11703 represent this value. */
11707 /* Now make it so that mantissa contains the most-significant bits, and move
11708 the point_pos to indicate that the least-significant bits have been
11710 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11711 mantissa
= mant_hi
;
11713 /* We can permit four significant bits of mantissa only, plus a high bit
11714 which is always 1. */
11715 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
11716 if ((mantissa
& mask
) != 0)
11719 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11720 mantissa
>>= point_pos
- 5;
11722 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11723 floating-point immediate zero with Neon using an integer-zero load, but
11724 that case is handled elsewhere.) */
11728 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
11730 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11731 normalized significands are in the range [1, 2). (Our mantissa is shifted
11732 left 4 places at this point relative to normalized IEEE754 values). GCC
11733 internally uses [0.5, 1) (see real.c), so the exponent returned from
11734 REAL_EXP must be altered. */
11735 exponent
= 5 - exponent
;
11737 if (exponent
< 0 || exponent
> 7)
11740 /* Sign, mantissa and exponent are now in the correct form to plug into the
11741 formula described in the comment above. */
11742 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
11745 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11747 vfp3_const_double_rtx (rtx x
)
11752 return vfp3_const_double_index (x
) != -1;
11755 /* Recognize immediates which can be used in various Neon instructions. Legal
11756 immediates are described by the following table (for VMVN variants, the
11757 bitwise inverse of the constant shown is recognized. In either case, VMOV
11758 is output and the correct instruction to use for a given constant is chosen
11759 by the assembler). The constant shown is replicated across all elements of
11760 the destination vector.
11762 insn elems variant constant (binary)
11763 ---- ----- ------- -----------------
11764 vmov i32 0 00000000 00000000 00000000 abcdefgh
11765 vmov i32 1 00000000 00000000 abcdefgh 00000000
11766 vmov i32 2 00000000 abcdefgh 00000000 00000000
11767 vmov i32 3 abcdefgh 00000000 00000000 00000000
11768 vmov i16 4 00000000 abcdefgh
11769 vmov i16 5 abcdefgh 00000000
11770 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11771 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11772 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11773 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11774 vmvn i16 10 00000000 abcdefgh
11775 vmvn i16 11 abcdefgh 00000000
11776 vmov i32 12 00000000 00000000 abcdefgh 11111111
11777 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11778 vmov i32 14 00000000 abcdefgh 11111111 11111111
11779 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11780 vmov i8 16 abcdefgh
11781 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11782 eeeeeeee ffffffff gggggggg hhhhhhhh
11783 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11784 vmov f32 19 00000000 00000000 00000000 00000000
11786 For case 18, B = !b. Representable values are exactly those accepted by
11787 vfp3_const_double_index, but are output as floating-point numbers rather
11790 For case 19, we will change it to vmov.i32 when assembling.
11792 Variants 0-5 (inclusive) may also be used as immediates for the second
11793 operand of VORR/VBIC instructions.
11795 The INVERSE argument causes the bitwise inverse of the given operand to be
11796 recognized instead (used for recognizing legal immediates for the VAND/VORN
11797 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11798 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11799 output, rather than the real insns vbic/vorr).
11801 INVERSE makes no difference to the recognition of float vectors.
11803 The return value is the variant of immediate as shown in the above table, or
11804 -1 if the given value doesn't match any of the listed patterns.
11807 neon_valid_immediate (rtx op
, enum machine_mode mode
, int inverse
,
11808 rtx
*modconst
, int *elementwidth
)
11810 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11812 for (i = 0; i < idx; i += (STRIDE)) \
11817 immtype = (CLASS); \
11818 elsize = (ELSIZE); \
11822 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
11823 unsigned int innersize
;
11824 unsigned char bytes
[16];
11825 int immtype
= -1, matches
;
11826 unsigned int invmask
= inverse
? 0xff : 0;
11827 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
11831 n_elts
= CONST_VECTOR_NUNITS (op
);
11832 innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
11837 if (mode
== VOIDmode
)
11839 innersize
= GET_MODE_SIZE (mode
);
11842 /* Vectors of float constants. */
11843 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11845 rtx el0
= CONST_VECTOR_ELT (op
, 0);
11846 REAL_VALUE_TYPE r0
;
11848 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
11851 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
11853 for (i
= 1; i
< n_elts
; i
++)
11855 rtx elt
= CONST_VECTOR_ELT (op
, i
);
11856 REAL_VALUE_TYPE re
;
11858 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
11860 if (!REAL_VALUES_EQUAL (r0
, re
))
11865 *modconst
= CONST_VECTOR_ELT (op
, 0);
11870 if (el0
== CONST0_RTX (GET_MODE (el0
)))
11876 /* Splat vector constant out into a byte vector. */
11877 for (i
= 0; i
< n_elts
; i
++)
11879 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
11880 unsigned HOST_WIDE_INT elpart
;
11881 unsigned int part
, parts
;
11883 if (CONST_INT_P (el
))
11885 elpart
= INTVAL (el
);
11888 else if (CONST_DOUBLE_P (el
))
11890 elpart
= CONST_DOUBLE_LOW (el
);
11894 gcc_unreachable ();
11896 for (part
= 0; part
< parts
; part
++)
11899 for (byte
= 0; byte
< innersize
; byte
++)
11901 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
11902 elpart
>>= BITS_PER_UNIT
;
11904 if (CONST_DOUBLE_P (el
))
11905 elpart
= CONST_DOUBLE_HIGH (el
);
11909 /* Sanity check. */
11910 gcc_assert (idx
== GET_MODE_SIZE (mode
));
11914 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
11915 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11917 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11918 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11920 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11921 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11923 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11924 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
11926 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
11928 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
11930 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
11931 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11933 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11934 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11936 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11937 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11939 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11940 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
11942 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
11944 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
11946 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11947 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11949 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11950 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11952 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11953 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11955 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11956 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11958 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
11960 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
11961 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
11969 *elementwidth
= elsize
;
11973 unsigned HOST_WIDE_INT imm
= 0;
11975 /* Un-invert bytes of recognized vector, if necessary. */
11977 for (i
= 0; i
< idx
; i
++)
11978 bytes
[i
] ^= invmask
;
11982 /* FIXME: Broken on 32-bit H_W_I hosts. */
11983 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
11985 for (i
= 0; i
< 8; i
++)
11986 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
11987 << (i
* BITS_PER_UNIT
);
11989 *modconst
= GEN_INT (imm
);
11993 unsigned HOST_WIDE_INT imm
= 0;
11995 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
11996 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
11998 *modconst
= GEN_INT (imm
);
12006 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12007 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12008 float elements), and a modified constant (whatever should be output for a
12009 VMOV) in *MODCONST. */
12012 neon_immediate_valid_for_move (rtx op
, enum machine_mode mode
,
12013 rtx
*modconst
, int *elementwidth
)
12017 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12023 *modconst
= tmpconst
;
12026 *elementwidth
= tmpwidth
;
12031 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12032 the immediate is valid, write a constant suitable for using as an operand
12033 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12034 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12037 neon_immediate_valid_for_logic (rtx op
, enum machine_mode mode
, int inverse
,
12038 rtx
*modconst
, int *elementwidth
)
12042 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12044 if (retval
< 0 || retval
> 5)
12048 *modconst
= tmpconst
;
12051 *elementwidth
= tmpwidth
;
12056 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12057 the immediate is valid, write a constant suitable for using as an operand
12058 to VSHR/VSHL to *MODCONST and the corresponding element width to
12059 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12060 because they have different limitations. */
12063 neon_immediate_valid_for_shift (rtx op
, enum machine_mode mode
,
12064 rtx
*modconst
, int *elementwidth
,
12067 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
12068 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12069 unsigned HOST_WIDE_INT last_elt
= 0;
12070 unsigned HOST_WIDE_INT maxshift
;
12072 /* Split vector constant out into a byte vector. */
12073 for (i
= 0; i
< n_elts
; i
++)
12075 rtx el
= CONST_VECTOR_ELT (op
, i
);
12076 unsigned HOST_WIDE_INT elpart
;
12078 if (CONST_INT_P (el
))
12079 elpart
= INTVAL (el
);
12080 else if (CONST_DOUBLE_P (el
))
12083 gcc_unreachable ();
12085 if (i
!= 0 && elpart
!= last_elt
)
12091 /* Shift less than element size. */
12092 maxshift
= innersize
* 8;
12096 /* Left shift immediate value can be from 0 to <size>-1. */
12097 if (last_elt
>= maxshift
)
12102 /* Right shift immediate value can be from 1 to <size>. */
12103 if (last_elt
== 0 || last_elt
> maxshift
)
12108 *elementwidth
= innersize
* 8;
12111 *modconst
= CONST_VECTOR_ELT (op
, 0);
12116 /* Return a string suitable for output of Neon immediate logic operation
12120 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, enum machine_mode mode
,
12121 int inverse
, int quad
)
12123 int width
, is_valid
;
12124 static char templ
[40];
12126 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12128 gcc_assert (is_valid
!= 0);
12131 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12133 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12138 /* Return a string suitable for output of Neon immediate shift operation
12139 (VSHR or VSHL) MNEM. */
12142 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12143 enum machine_mode mode
, int quad
,
12146 int width
, is_valid
;
12147 static char templ
[40];
12149 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12150 gcc_assert (is_valid
!= 0);
12153 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12155 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12160 /* Output a sequence of pairwise operations to implement a reduction.
12161 NOTE: We do "too much work" here, because pairwise operations work on two
12162 registers-worth of operands in one go. Unfortunately we can't exploit those
12163 extra calculations to do the full operation in fewer steps, I don't think.
12164 Although all vector elements of the result but the first are ignored, we
12165 actually calculate the same result in each of the elements. An alternative
12166 such as initially loading a vector with zero to use as each of the second
12167 operands would use up an additional register and take an extra instruction,
12168 for no particular gain. */
12171 neon_pairwise_reduce (rtx op0
, rtx op1
, enum machine_mode mode
,
12172 rtx (*reduc
) (rtx
, rtx
, rtx
))
12174 enum machine_mode inner
= GET_MODE_INNER (mode
);
12175 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
12178 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12180 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12181 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12186 /* If VALS is a vector constant that can be loaded into a register
12187 using VDUP, generate instructions to do so and return an RTX to
12188 assign to the register. Otherwise return NULL_RTX. */
12191 neon_vdup_constant (rtx vals
)
12193 enum machine_mode mode
= GET_MODE (vals
);
12194 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
12195 int n_elts
= GET_MODE_NUNITS (mode
);
12196 bool all_same
= true;
12200 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12203 for (i
= 0; i
< n_elts
; ++i
)
12205 x
= XVECEXP (vals
, 0, i
);
12206 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12211 /* The elements are not all the same. We could handle repeating
12212 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12213 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12217 /* We can load this constant by using VDUP and a constant in a
12218 single ARM register. This will be cheaper than a vector
12221 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12222 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12225 /* Generate code to load VALS, which is a PARALLEL containing only
12226 constants (for vec_init) or CONST_VECTOR, efficiently into a
12227 register. Returns an RTX to copy into the register, or NULL_RTX
12228 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12231 neon_make_constant (rtx vals
)
12233 enum machine_mode mode
= GET_MODE (vals
);
12235 rtx const_vec
= NULL_RTX
;
12236 int n_elts
= GET_MODE_NUNITS (mode
);
12240 if (GET_CODE (vals
) == CONST_VECTOR
)
12242 else if (GET_CODE (vals
) == PARALLEL
)
12244 /* A CONST_VECTOR must contain only CONST_INTs and
12245 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12246 Only store valid constants in a CONST_VECTOR. */
12247 for (i
= 0; i
< n_elts
; ++i
)
12249 rtx x
= XVECEXP (vals
, 0, i
);
12250 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12253 if (n_const
== n_elts
)
12254 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12257 gcc_unreachable ();
12259 if (const_vec
!= NULL
12260 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12261 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12263 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12264 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12265 pipeline cycle; creating the constant takes one or two ARM
12266 pipeline cycles. */
12268 else if (const_vec
!= NULL_RTX
)
12269 /* Load from constant pool. On Cortex-A8 this takes two cycles
12270 (for either double or quad vectors). We can not take advantage
12271 of single-cycle VLD1 because we need a PC-relative addressing
12275 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12276 We can not construct an initializer. */
12280 /* Initialize vector TARGET to VALS. */
12283 neon_expand_vector_init (rtx target
, rtx vals
)
12285 enum machine_mode mode
= GET_MODE (target
);
12286 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
12287 int n_elts
= GET_MODE_NUNITS (mode
);
12288 int n_var
= 0, one_var
= -1;
12289 bool all_same
= true;
12293 for (i
= 0; i
< n_elts
; ++i
)
12295 x
= XVECEXP (vals
, 0, i
);
12296 if (!CONSTANT_P (x
))
12297 ++n_var
, one_var
= i
;
12299 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12305 rtx constant
= neon_make_constant (vals
);
12306 if (constant
!= NULL_RTX
)
12308 emit_move_insn (target
, constant
);
12313 /* Splat a single non-constant element if we can. */
12314 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12316 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12317 emit_insn (gen_rtx_SET (VOIDmode
, target
,
12318 gen_rtx_VEC_DUPLICATE (mode
, x
)));
12322 /* One field is non-constant. Load constant then overwrite varying
12323 field. This is more efficient than using the stack. */
12326 rtx copy
= copy_rtx (vals
);
12327 rtx index
= GEN_INT (one_var
);
12329 /* Load constant part of vector, substitute neighboring value for
12330 varying element. */
12331 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12332 neon_expand_vector_init (target
, copy
);
12334 /* Insert variable. */
12335 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12339 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12342 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12345 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12348 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12351 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12354 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12357 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12360 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12363 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12366 gcc_unreachable ();
12371 /* Construct the vector in memory one field at a time
12372 and load the whole vector. */
12373 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12374 for (i
= 0; i
< n_elts
; i
++)
12375 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12376 i
* GET_MODE_SIZE (inner_mode
)),
12377 XVECEXP (vals
, 0, i
));
12378 emit_move_insn (target
, mem
);
12381 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12382 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12383 reported source locations are bogus. */
12386 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12389 HOST_WIDE_INT lane
;
12391 gcc_assert (CONST_INT_P (operand
));
12393 lane
= INTVAL (operand
);
12395 if (lane
< low
|| lane
>= high
)
12399 /* Bounds-check lanes. */
12402 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12404 bounds_check (operand
, low
, high
, "lane out of range");
12407 /* Bounds-check constants. */
12410 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12412 bounds_check (operand
, low
, high
, "constant out of range");
12416 neon_element_bits (enum machine_mode mode
)
12418 if (mode
== DImode
)
12419 return GET_MODE_BITSIZE (mode
);
12421 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
12425 /* Predicates for `match_operand' and `match_operator'. */
12427 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12428 WB is true if full writeback address modes are allowed and is false
12429 if limited writeback address modes (POST_INC and PRE_DEC) are
12433 arm_coproc_mem_operand (rtx op
, bool wb
)
12437 /* Reject eliminable registers. */
12438 if (! (reload_in_progress
|| reload_completed
)
12439 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12440 || reg_mentioned_p (arg_pointer_rtx
, op
)
12441 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12442 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12443 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12444 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12447 /* Constants are converted into offsets from labels. */
12451 ind
= XEXP (op
, 0);
12453 if (reload_completed
12454 && (GET_CODE (ind
) == LABEL_REF
12455 || (GET_CODE (ind
) == CONST
12456 && GET_CODE (XEXP (ind
, 0)) == PLUS
12457 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12458 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12461 /* Match: (mem (reg)). */
12463 return arm_address_register_rtx_p (ind
, 0);
12465 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12466 acceptable in any case (subject to verification by
12467 arm_address_register_rtx_p). We need WB to be true to accept
12468 PRE_INC and POST_DEC. */
12469 if (GET_CODE (ind
) == POST_INC
12470 || GET_CODE (ind
) == PRE_DEC
12472 && (GET_CODE (ind
) == PRE_INC
12473 || GET_CODE (ind
) == POST_DEC
)))
12474 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12477 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12478 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12479 && GET_CODE (XEXP (ind
, 1)) == PLUS
12480 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12481 ind
= XEXP (ind
, 1);
12486 if (GET_CODE (ind
) == PLUS
12487 && REG_P (XEXP (ind
, 0))
12488 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12489 && CONST_INT_P (XEXP (ind
, 1))
12490 && INTVAL (XEXP (ind
, 1)) > -1024
12491 && INTVAL (XEXP (ind
, 1)) < 1024
12492 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12498 /* Return TRUE if OP is a memory operand which we can load or store a vector
12499 to/from. TYPE is one of the following values:
12500 0 - Vector load/stor (vldr)
12501 1 - Core registers (ldm)
12502 2 - Element/structure loads (vld1)
12505 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12509 /* Reject eliminable registers. */
12510 if (! (reload_in_progress
|| reload_completed
)
12511 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12512 || reg_mentioned_p (arg_pointer_rtx
, op
)
12513 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12514 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12515 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12516 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12519 /* Constants are converted into offsets from labels. */
12523 ind
= XEXP (op
, 0);
12525 if (reload_completed
12526 && (GET_CODE (ind
) == LABEL_REF
12527 || (GET_CODE (ind
) == CONST
12528 && GET_CODE (XEXP (ind
, 0)) == PLUS
12529 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12530 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12533 /* Match: (mem (reg)). */
12535 return arm_address_register_rtx_p (ind
, 0);
12537 /* Allow post-increment with Neon registers. */
12538 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12539 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12540 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12542 /* FIXME: vld1 allows register post-modify. */
12548 && GET_CODE (ind
) == PLUS
12549 && REG_P (XEXP (ind
, 0))
12550 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12551 && CONST_INT_P (XEXP (ind
, 1))
12552 && INTVAL (XEXP (ind
, 1)) > -1024
12553 /* For quad modes, we restrict the constant offset to be slightly less
12554 than what the instruction format permits. We have no such constraint
12555 on double mode offsets. (This must match arm_legitimate_index_p.) */
12556 && (INTVAL (XEXP (ind
, 1))
12557 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12558 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12564 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12567 neon_struct_mem_operand (rtx op
)
12571 /* Reject eliminable registers. */
12572 if (! (reload_in_progress
|| reload_completed
)
12573 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12574 || reg_mentioned_p (arg_pointer_rtx
, op
)
12575 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12576 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12577 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12578 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12581 /* Constants are converted into offsets from labels. */
12585 ind
= XEXP (op
, 0);
12587 if (reload_completed
12588 && (GET_CODE (ind
) == LABEL_REF
12589 || (GET_CODE (ind
) == CONST
12590 && GET_CODE (XEXP (ind
, 0)) == PLUS
12591 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12592 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12595 /* Match: (mem (reg)). */
12597 return arm_address_register_rtx_p (ind
, 0);
12599 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12600 if (GET_CODE (ind
) == POST_INC
12601 || GET_CODE (ind
) == PRE_DEC
)
12602 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12607 /* Return true if X is a register that will be eliminated later on. */
12609 arm_eliminable_register (rtx x
)
12611 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12612 || REGNO (x
) == ARG_POINTER_REGNUM
12613 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12614 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12617 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12618 coprocessor registers. Otherwise return NO_REGS. */
12621 coproc_secondary_reload_class (enum machine_mode mode
, rtx x
, bool wb
)
12623 if (mode
== HFmode
)
12625 if (!TARGET_NEON_FP16
)
12626 return GENERAL_REGS
;
12627 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12629 return GENERAL_REGS
;
12632 /* The neon move patterns handle all legitimate vector and struct
12635 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12636 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12637 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12638 || VALID_NEON_STRUCT_MODE (mode
)))
12641 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12644 return GENERAL_REGS
;
12647 /* Values which must be returned in the most-significant end of the return
12651 arm_return_in_msb (const_tree valtype
)
12653 return (TARGET_AAPCS_BASED
12654 && BYTES_BIG_ENDIAN
12655 && (AGGREGATE_TYPE_P (valtype
)
12656 || TREE_CODE (valtype
) == COMPLEX_TYPE
12657 || FIXED_POINT_TYPE_P (valtype
)));
12660 /* Return TRUE if X references a SYMBOL_REF. */
12662 symbol_mentioned_p (rtx x
)
12667 if (GET_CODE (x
) == SYMBOL_REF
)
12670 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12671 are constant offsets, not symbols. */
12672 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12675 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12677 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12683 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12684 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
12687 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
12694 /* Return TRUE if X references a LABEL_REF. */
12696 label_mentioned_p (rtx x
)
12701 if (GET_CODE (x
) == LABEL_REF
)
12704 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12705 instruction, but they are constant offsets, not symbols. */
12706 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12709 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12710 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12716 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12717 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
12720 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
12728 tls_mentioned_p (rtx x
)
12730 switch (GET_CODE (x
))
12733 return tls_mentioned_p (XEXP (x
, 0));
12736 if (XINT (x
, 1) == UNSPEC_TLS
)
12744 /* Must not copy any rtx that uses a pc-relative address. */
12747 arm_note_pic_base (rtx
*x
, void *date ATTRIBUTE_UNUSED
)
12749 if (GET_CODE (*x
) == UNSPEC
12750 && (XINT (*x
, 1) == UNSPEC_PIC_BASE
12751 || XINT (*x
, 1) == UNSPEC_PIC_UNIFIED
))
12757 arm_cannot_copy_insn_p (rtx insn
)
12759 /* The tls call insn cannot be copied, as it is paired with a data
12761 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
12764 return for_each_rtx (&PATTERN (insn
), arm_note_pic_base
, NULL
);
12768 minmax_code (rtx x
)
12770 enum rtx_code code
= GET_CODE (x
);
12783 gcc_unreachable ();
12787 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12790 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
12791 int *mask
, bool *signed_sat
)
12793 /* The high bound must be a power of two minus one. */
12794 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
12798 /* The low bound is either zero (for usat) or one less than the
12799 negation of the high bound (for ssat). */
12800 if (INTVAL (lo_bound
) == 0)
12805 *signed_sat
= false;
12810 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
12815 *signed_sat
= true;
12823 /* Return 1 if memory locations are adjacent. */
12825 adjacent_mem_locations (rtx a
, rtx b
)
12827 /* We don't guarantee to preserve the order of these memory refs. */
12828 if (volatile_refs_p (a
) || volatile_refs_p (b
))
12831 if ((REG_P (XEXP (a
, 0))
12832 || (GET_CODE (XEXP (a
, 0)) == PLUS
12833 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
12834 && (REG_P (XEXP (b
, 0))
12835 || (GET_CODE (XEXP (b
, 0)) == PLUS
12836 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
12838 HOST_WIDE_INT val0
= 0, val1
= 0;
12842 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
12844 reg0
= XEXP (XEXP (a
, 0), 0);
12845 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
12848 reg0
= XEXP (a
, 0);
12850 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
12852 reg1
= XEXP (XEXP (b
, 0), 0);
12853 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
12856 reg1
= XEXP (b
, 0);
12858 /* Don't accept any offset that will require multiple
12859 instructions to handle, since this would cause the
12860 arith_adjacentmem pattern to output an overlong sequence. */
12861 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
12864 /* Don't allow an eliminable register: register elimination can make
12865 the offset too large. */
12866 if (arm_eliminable_register (reg0
))
12869 val_diff
= val1
- val0
;
12873 /* If the target has load delay slots, then there's no benefit
12874 to using an ldm instruction unless the offset is zero and
12875 we are optimizing for size. */
12876 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
12877 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
12878 && (val_diff
== 4 || val_diff
== -4));
12881 return ((REGNO (reg0
) == REGNO (reg1
))
12882 && (val_diff
== 4 || val_diff
== -4));
12888 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12889 for load operations, false for store operations. CONSECUTIVE is true
12890 if the register numbers in the operation must be consecutive in the register
12891 bank. RETURN_PC is true if value is to be loaded in PC.
12892 The pattern we are trying to match for load is:
12893 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12894 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12897 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12900 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12901 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12902 3. If consecutive is TRUE, then for kth register being loaded,
12903 REGNO (R_dk) = REGNO (R_d0) + k.
12904 The pattern for store is similar. */
12906 ldm_stm_operation_p (rtx op
, bool load
, enum machine_mode mode
,
12907 bool consecutive
, bool return_pc
)
12909 HOST_WIDE_INT count
= XVECLEN (op
, 0);
12910 rtx reg
, mem
, addr
;
12912 unsigned first_regno
;
12913 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
12915 bool addr_reg_in_reglist
= false;
12916 bool update
= false;
12921 /* If not in SImode, then registers must be consecutive
12922 (e.g., VLDM instructions for DFmode). */
12923 gcc_assert ((mode
== SImode
) || consecutive
);
12924 /* Setting return_pc for stores is illegal. */
12925 gcc_assert (!return_pc
|| load
);
12927 /* Set up the increments and the regs per val based on the mode. */
12928 reg_increment
= GET_MODE_SIZE (mode
);
12929 regs_per_val
= reg_increment
/ 4;
12930 offset_adj
= return_pc
? 1 : 0;
12933 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
12934 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
12937 /* Check if this is a write-back. */
12938 elt
= XVECEXP (op
, 0, offset_adj
);
12939 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
12945 /* The offset adjustment must be the number of registers being
12946 popped times the size of a single register. */
12947 if (!REG_P (SET_DEST (elt
))
12948 || !REG_P (XEXP (SET_SRC (elt
), 0))
12949 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
12950 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
12951 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
12952 ((count
- 1 - offset_adj
) * reg_increment
))
12956 i
= i
+ offset_adj
;
12957 base
= base
+ offset_adj
;
12958 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12959 success depends on the type: VLDM can do just one reg,
12960 LDM must do at least two. */
12961 if ((count
<= i
) && (mode
== SImode
))
12964 elt
= XVECEXP (op
, 0, i
- 1);
12965 if (GET_CODE (elt
) != SET
)
12970 reg
= SET_DEST (elt
);
12971 mem
= SET_SRC (elt
);
12975 reg
= SET_SRC (elt
);
12976 mem
= SET_DEST (elt
);
12979 if (!REG_P (reg
) || !MEM_P (mem
))
12982 regno
= REGNO (reg
);
12983 first_regno
= regno
;
12984 addr
= XEXP (mem
, 0);
12985 if (GET_CODE (addr
) == PLUS
)
12987 if (!CONST_INT_P (XEXP (addr
, 1)))
12990 offset
= INTVAL (XEXP (addr
, 1));
12991 addr
= XEXP (addr
, 0);
12997 /* Don't allow SP to be loaded unless it is also the base register. It
12998 guarantees that SP is reset correctly when an LDM instruction
12999 is interrupted. Otherwise, we might end up with a corrupt stack. */
13000 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13003 for (; i
< count
; i
++)
13005 elt
= XVECEXP (op
, 0, i
);
13006 if (GET_CODE (elt
) != SET
)
13011 reg
= SET_DEST (elt
);
13012 mem
= SET_SRC (elt
);
13016 reg
= SET_SRC (elt
);
13017 mem
= SET_DEST (elt
);
13021 || GET_MODE (reg
) != mode
13022 || REGNO (reg
) <= regno
13025 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13026 /* Don't allow SP to be loaded unless it is also the base register. It
13027 guarantees that SP is reset correctly when an LDM instruction
13028 is interrupted. Otherwise, we might end up with a corrupt stack. */
13029 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13031 || GET_MODE (mem
) != mode
13032 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13033 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13034 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13035 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13036 offset
+ (i
- base
) * reg_increment
))
13037 && (!REG_P (XEXP (mem
, 0))
13038 || offset
+ (i
- base
) * reg_increment
!= 0)))
13041 regno
= REGNO (reg
);
13042 if (regno
== REGNO (addr
))
13043 addr_reg_in_reglist
= true;
13048 if (update
&& addr_reg_in_reglist
)
13051 /* For Thumb-1, address register is always modified - either by write-back
13052 or by explicit load. If the pattern does not describe an update,
13053 then the address register must be in the list of loaded registers. */
13055 return update
|| addr_reg_in_reglist
;
13061 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13062 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13063 instruction. ADD_OFFSET is nonzero if the base address register needs
13064 to be modified with an add instruction before we can use it. */
13067 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13068 int nops
, HOST_WIDE_INT add_offset
)
13070 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13071 if the offset isn't small enough. The reason 2 ldrs are faster
13072 is because these ARMs are able to do more than one cache access
13073 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13074 whilst the ARM8 has a double bandwidth cache. This means that
13075 these cores can do both an instruction fetch and a data fetch in
13076 a single cycle, so the trick of calculating the address into a
13077 scratch register (one of the result regs) and then doing a load
13078 multiple actually becomes slower (and no smaller in code size).
13079 That is the transformation
13081 ldr rd1, [rbase + offset]
13082 ldr rd2, [rbase + offset + 4]
13086 add rd1, rbase, offset
13087 ldmia rd1, {rd1, rd2}
13089 produces worse code -- '3 cycles + any stalls on rd2' instead of
13090 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13091 access per cycle, the first sequence could never complete in less
13092 than 6 cycles, whereas the ldm sequence would only take 5 and
13093 would make better use of sequential accesses if not hitting the
13096 We cheat here and test 'arm_ld_sched' which we currently know to
13097 only be true for the ARM8, ARM9 and StrongARM. If this ever
13098 changes, then the test below needs to be reworked. */
13099 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13102 /* XScale has load-store double instructions, but they have stricter
13103 alignment requirements than load-store multiple, so we cannot
13106 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13107 the pipeline until completion.
13115 An ldr instruction takes 1-3 cycles, but does not block the
13124 Best case ldr will always win. However, the more ldr instructions
13125 we issue, the less likely we are to be able to schedule them well.
13126 Using ldr instructions also increases code size.
13128 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13129 for counts of 3 or 4 regs. */
13130 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13135 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13136 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13137 an array ORDER which describes the sequence to use when accessing the
13138 offsets that produces an ascending order. In this sequence, each
13139 offset must be larger by exactly 4 than the previous one. ORDER[0]
13140 must have been filled in with the lowest offset by the caller.
13141 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13142 we use to verify that ORDER produces an ascending order of registers.
13143 Return true if it was possible to construct such an order, false if
13147 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13148 int *unsorted_regs
)
13151 for (i
= 1; i
< nops
; i
++)
13155 order
[i
] = order
[i
- 1];
13156 for (j
= 0; j
< nops
; j
++)
13157 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13159 /* We must find exactly one offset that is higher than the
13160 previous one by 4. */
13161 if (order
[i
] != order
[i
- 1])
13165 if (order
[i
] == order
[i
- 1])
13167 /* The register numbers must be ascending. */
13168 if (unsorted_regs
!= NULL
13169 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13175 /* Used to determine in a peephole whether a sequence of load
13176 instructions can be changed into a load-multiple instruction.
13177 NOPS is the number of separate load instructions we are examining. The
13178 first NOPS entries in OPERANDS are the destination registers, the
13179 next NOPS entries are memory operands. If this function is
13180 successful, *BASE is set to the common base register of the memory
13181 accesses; *LOAD_OFFSET is set to the first memory location's offset
13182 from that base register.
13183 REGS is an array filled in with the destination register numbers.
13184 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13185 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13186 the sequence of registers in REGS matches the loads from ascending memory
13187 locations, and the function verifies that the register numbers are
13188 themselves ascending. If CHECK_REGS is false, the register numbers
13189 are stored in the order they are found in the operands. */
13191 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13192 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13194 int unsorted_regs
[MAX_LDM_STM_OPS
];
13195 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13196 int order
[MAX_LDM_STM_OPS
];
13197 rtx base_reg_rtx
= NULL
;
13201 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13202 easily extended if required. */
13203 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13205 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13207 /* Loop over the operands and check that the memory references are
13208 suitable (i.e. immediate offsets from the same base register). At
13209 the same time, extract the target register, and the memory
13211 for (i
= 0; i
< nops
; i
++)
13216 /* Convert a subreg of a mem into the mem itself. */
13217 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13218 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13220 gcc_assert (MEM_P (operands
[nops
+ i
]));
13222 /* Don't reorder volatile memory references; it doesn't seem worth
13223 looking for the case where the order is ok anyway. */
13224 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13227 offset
= const0_rtx
;
13229 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13230 || (GET_CODE (reg
) == SUBREG
13231 && REG_P (reg
= SUBREG_REG (reg
))))
13232 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13233 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13234 || (GET_CODE (reg
) == SUBREG
13235 && REG_P (reg
= SUBREG_REG (reg
))))
13236 && (CONST_INT_P (offset
13237 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13241 base_reg
= REGNO (reg
);
13242 base_reg_rtx
= reg
;
13243 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13246 else if (base_reg
!= (int) REGNO (reg
))
13247 /* Not addressed from the same base register. */
13250 unsorted_regs
[i
] = (REG_P (operands
[i
])
13251 ? REGNO (operands
[i
])
13252 : REGNO (SUBREG_REG (operands
[i
])));
13254 /* If it isn't an integer register, or if it overwrites the
13255 base register but isn't the last insn in the list, then
13256 we can't do this. */
13257 if (unsorted_regs
[i
] < 0
13258 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13259 || unsorted_regs
[i
] > 14
13260 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13263 /* Don't allow SP to be loaded unless it is also the base
13264 register. It guarantees that SP is reset correctly when
13265 an LDM instruction is interrupted. Otherwise, we might
13266 end up with a corrupt stack. */
13267 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13270 unsorted_offsets
[i
] = INTVAL (offset
);
13271 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13275 /* Not a suitable memory address. */
13279 /* All the useful information has now been extracted from the
13280 operands into unsorted_regs and unsorted_offsets; additionally,
13281 order[0] has been set to the lowest offset in the list. Sort
13282 the offsets into order, verifying that they are adjacent, and
13283 check that the register numbers are ascending. */
13284 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13285 check_regs
? unsorted_regs
: NULL
))
13289 memcpy (saved_order
, order
, sizeof order
);
13295 for (i
= 0; i
< nops
; i
++)
13296 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13298 *load_offset
= unsorted_offsets
[order
[0]];
13302 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13305 if (unsorted_offsets
[order
[0]] == 0)
13306 ldm_case
= 1; /* ldmia */
13307 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13308 ldm_case
= 2; /* ldmib */
13309 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13310 ldm_case
= 3; /* ldmda */
13311 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13312 ldm_case
= 4; /* ldmdb */
13313 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13314 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13319 if (!multiple_operation_profitable_p (false, nops
,
13321 ? unsorted_offsets
[order
[0]] : 0))
13327 /* Used to determine in a peephole whether a sequence of store instructions can
13328 be changed into a store-multiple instruction.
13329 NOPS is the number of separate store instructions we are examining.
13330 NOPS_TOTAL is the total number of instructions recognized by the peephole
13332 The first NOPS entries in OPERANDS are the source registers, the next
13333 NOPS entries are memory operands. If this function is successful, *BASE is
13334 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13335 to the first memory location's offset from that base register. REGS is an
13336 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13337 likewise filled with the corresponding rtx's.
13338 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13339 numbers to an ascending order of stores.
13340 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13341 from ascending memory locations, and the function verifies that the register
13342 numbers are themselves ascending. If CHECK_REGS is false, the register
13343 numbers are stored in the order they are found in the operands. */
13345 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13346 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13347 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13349 int unsorted_regs
[MAX_LDM_STM_OPS
];
13350 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13351 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13352 int order
[MAX_LDM_STM_OPS
];
13354 rtx base_reg_rtx
= NULL
;
13357 /* Write back of base register is currently only supported for Thumb 1. */
13358 int base_writeback
= TARGET_THUMB1
;
13360 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13361 easily extended if required. */
13362 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13364 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13366 /* Loop over the operands and check that the memory references are
13367 suitable (i.e. immediate offsets from the same base register). At
13368 the same time, extract the target register, and the memory
13370 for (i
= 0; i
< nops
; i
++)
13375 /* Convert a subreg of a mem into the mem itself. */
13376 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13377 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13379 gcc_assert (MEM_P (operands
[nops
+ i
]));
13381 /* Don't reorder volatile memory references; it doesn't seem worth
13382 looking for the case where the order is ok anyway. */
13383 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13386 offset
= const0_rtx
;
13388 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13389 || (GET_CODE (reg
) == SUBREG
13390 && REG_P (reg
= SUBREG_REG (reg
))))
13391 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13392 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13393 || (GET_CODE (reg
) == SUBREG
13394 && REG_P (reg
= SUBREG_REG (reg
))))
13395 && (CONST_INT_P (offset
13396 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13398 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13399 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13400 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13404 base_reg
= REGNO (reg
);
13405 base_reg_rtx
= reg
;
13406 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13409 else if (base_reg
!= (int) REGNO (reg
))
13410 /* Not addressed from the same base register. */
13413 /* If it isn't an integer register, then we can't do this. */
13414 if (unsorted_regs
[i
] < 0
13415 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13416 /* The effects are unpredictable if the base register is
13417 both updated and stored. */
13418 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13419 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13420 || unsorted_regs
[i
] > 14)
13423 unsorted_offsets
[i
] = INTVAL (offset
);
13424 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13428 /* Not a suitable memory address. */
13432 /* All the useful information has now been extracted from the
13433 operands into unsorted_regs and unsorted_offsets; additionally,
13434 order[0] has been set to the lowest offset in the list. Sort
13435 the offsets into order, verifying that they are adjacent, and
13436 check that the register numbers are ascending. */
13437 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13438 check_regs
? unsorted_regs
: NULL
))
13442 memcpy (saved_order
, order
, sizeof order
);
13448 for (i
= 0; i
< nops
; i
++)
13450 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13452 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13455 *load_offset
= unsorted_offsets
[order
[0]];
13459 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13462 if (unsorted_offsets
[order
[0]] == 0)
13463 stm_case
= 1; /* stmia */
13464 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13465 stm_case
= 2; /* stmib */
13466 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13467 stm_case
= 3; /* stmda */
13468 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13469 stm_case
= 4; /* stmdb */
13473 if (!multiple_operation_profitable_p (false, nops
, 0))
13479 /* Routines for use in generating RTL. */
13481 /* Generate a load-multiple instruction. COUNT is the number of loads in
13482 the instruction; REGS and MEMS are arrays containing the operands.
13483 BASEREG is the base register to be used in addressing the memory operands.
13484 WBACK_OFFSET is nonzero if the instruction should update the base
13488 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13489 HOST_WIDE_INT wback_offset
)
13494 if (!multiple_operation_profitable_p (false, count
, 0))
13500 for (i
= 0; i
< count
; i
++)
13501 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13503 if (wback_offset
!= 0)
13504 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13506 seq
= get_insns ();
13512 result
= gen_rtx_PARALLEL (VOIDmode
,
13513 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13514 if (wback_offset
!= 0)
13516 XVECEXP (result
, 0, 0)
13517 = gen_rtx_SET (VOIDmode
, basereg
,
13518 plus_constant (Pmode
, basereg
, wback_offset
));
13523 for (j
= 0; i
< count
; i
++, j
++)
13524 XVECEXP (result
, 0, i
)
13525 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13530 /* Generate a store-multiple instruction. COUNT is the number of stores in
13531 the instruction; REGS and MEMS are arrays containing the operands.
13532 BASEREG is the base register to be used in addressing the memory operands.
13533 WBACK_OFFSET is nonzero if the instruction should update the base
13537 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13538 HOST_WIDE_INT wback_offset
)
13543 if (GET_CODE (basereg
) == PLUS
)
13544 basereg
= XEXP (basereg
, 0);
13546 if (!multiple_operation_profitable_p (false, count
, 0))
13552 for (i
= 0; i
< count
; i
++)
13553 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13555 if (wback_offset
!= 0)
13556 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13558 seq
= get_insns ();
13564 result
= gen_rtx_PARALLEL (VOIDmode
,
13565 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13566 if (wback_offset
!= 0)
13568 XVECEXP (result
, 0, 0)
13569 = gen_rtx_SET (VOIDmode
, basereg
,
13570 plus_constant (Pmode
, basereg
, wback_offset
));
13575 for (j
= 0; i
< count
; i
++, j
++)
13576 XVECEXP (result
, 0, i
)
13577 = gen_rtx_SET (VOIDmode
, mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13582 /* Generate either a load-multiple or a store-multiple instruction. This
13583 function can be used in situations where we can start with a single MEM
13584 rtx and adjust its address upwards.
13585 COUNT is the number of operations in the instruction, not counting a
13586 possible update of the base register. REGS is an array containing the
13588 BASEREG is the base register to be used in addressing the memory operands,
13589 which are constructed from BASEMEM.
13590 WRITE_BACK specifies whether the generated instruction should include an
13591 update of the base register.
13592 OFFSETP is used to pass an offset to and from this function; this offset
13593 is not used when constructing the address (instead BASEMEM should have an
13594 appropriate offset in its address), it is used only for setting
13595 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13598 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13599 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13601 rtx mems
[MAX_LDM_STM_OPS
];
13602 HOST_WIDE_INT offset
= *offsetp
;
13605 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13607 if (GET_CODE (basereg
) == PLUS
)
13608 basereg
= XEXP (basereg
, 0);
13610 for (i
= 0; i
< count
; i
++)
13612 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13613 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13621 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13622 write_back
? 4 * count
: 0);
13624 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13625 write_back
? 4 * count
: 0);
13629 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13630 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13632 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13637 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13638 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13640 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13644 /* Called from a peephole2 expander to turn a sequence of loads into an
13645 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13646 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13647 is true if we can reorder the registers because they are used commutatively
13649 Returns true iff we could generate a new instruction. */
13652 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13654 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13655 rtx mems
[MAX_LDM_STM_OPS
];
13656 int i
, j
, base_reg
;
13658 HOST_WIDE_INT offset
;
13659 int write_back
= FALSE
;
13663 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13664 &base_reg
, &offset
, !sort_regs
);
13670 for (i
= 0; i
< nops
- 1; i
++)
13671 for (j
= i
+ 1; j
< nops
; j
++)
13672 if (regs
[i
] > regs
[j
])
13678 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13682 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
13683 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13689 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
13690 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
13692 if (!TARGET_THUMB1
)
13694 base_reg
= regs
[0];
13695 base_reg_rtx
= newbase
;
13699 for (i
= 0; i
< nops
; i
++)
13701 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13702 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13705 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13706 write_back
? offset
+ i
* 4 : 0));
13710 /* Called from a peephole2 expander to turn a sequence of stores into an
13711 STM instruction. OPERANDS are the operands found by the peephole matcher;
13712 NOPS indicates how many separate stores we are trying to combine.
13713 Returns true iff we could generate a new instruction. */
13716 gen_stm_seq (rtx
*operands
, int nops
)
13719 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13720 rtx mems
[MAX_LDM_STM_OPS
];
13723 HOST_WIDE_INT offset
;
13724 int write_back
= FALSE
;
13727 bool base_reg_dies
;
13729 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
13730 mem_order
, &base_reg
, &offset
, true);
13735 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13737 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
13740 gcc_assert (base_reg_dies
);
13746 gcc_assert (base_reg_dies
);
13747 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13751 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13753 for (i
= 0; i
< nops
; i
++)
13755 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13756 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13759 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13760 write_back
? offset
+ i
* 4 : 0));
13764 /* Called from a peephole2 expander to turn a sequence of stores that are
13765 preceded by constant loads into an STM instruction. OPERANDS are the
13766 operands found by the peephole matcher; NOPS indicates how many
13767 separate stores we are trying to combine; there are 2 * NOPS
13768 instructions in the peephole.
13769 Returns true iff we could generate a new instruction. */
13772 gen_const_stm_seq (rtx
*operands
, int nops
)
13774 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
13775 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13776 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
13777 rtx mems
[MAX_LDM_STM_OPS
];
13780 HOST_WIDE_INT offset
;
13781 int write_back
= FALSE
;
13784 bool base_reg_dies
;
13786 HARD_REG_SET allocated
;
13788 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
13789 mem_order
, &base_reg
, &offset
, false);
13794 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
13796 /* If the same register is used more than once, try to find a free
13798 CLEAR_HARD_REG_SET (allocated
);
13799 for (i
= 0; i
< nops
; i
++)
13801 for (j
= i
+ 1; j
< nops
; j
++)
13802 if (regs
[i
] == regs
[j
])
13804 rtx t
= peep2_find_free_register (0, nops
* 2,
13805 TARGET_THUMB1
? "l" : "r",
13806 SImode
, &allocated
);
13810 regs
[i
] = REGNO (t
);
13814 /* Compute an ordering that maps the register numbers to an ascending
13817 for (i
= 0; i
< nops
; i
++)
13818 if (regs
[i
] < regs
[reg_order
[0]])
13821 for (i
= 1; i
< nops
; i
++)
13823 int this_order
= reg_order
[i
- 1];
13824 for (j
= 0; j
< nops
; j
++)
13825 if (regs
[j
] > regs
[reg_order
[i
- 1]]
13826 && (this_order
== reg_order
[i
- 1]
13827 || regs
[j
] < regs
[this_order
]))
13829 reg_order
[i
] = this_order
;
13832 /* Ensure that registers that must be live after the instruction end
13833 up with the correct value. */
13834 for (i
= 0; i
< nops
; i
++)
13836 int this_order
= reg_order
[i
];
13837 if ((this_order
!= mem_order
[i
]
13838 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
13839 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
13843 /* Load the constants. */
13844 for (i
= 0; i
< nops
; i
++)
13846 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
13847 sorted_regs
[i
] = regs
[reg_order
[i
]];
13848 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
13851 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13853 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
13856 gcc_assert (base_reg_dies
);
13862 gcc_assert (base_reg_dies
);
13863 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13867 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13869 for (i
= 0; i
< nops
; i
++)
13871 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13872 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13875 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
13876 write_back
? offset
+ i
* 4 : 0));
13880 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13881 unaligned copies on processors which support unaligned semantics for those
13882 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13883 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13884 An interleave factor of 1 (the minimum) will perform no interleaving.
13885 Load/store multiple are used for aligned addresses where possible. */
13888 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
13889 HOST_WIDE_INT length
,
13890 unsigned int interleave_factor
)
13892 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
13893 int *regnos
= XALLOCAVEC (int, interleave_factor
);
13894 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
13895 HOST_WIDE_INT i
, j
;
13896 HOST_WIDE_INT remaining
= length
, words
;
13897 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
13899 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
13900 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
13901 HOST_WIDE_INT srcoffset
, dstoffset
;
13902 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
13905 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
13907 /* Use hard registers if we have aligned source or destination so we can use
13908 load/store multiple with contiguous registers. */
13909 if (dst_aligned
|| src_aligned
)
13910 for (i
= 0; i
< interleave_factor
; i
++)
13911 regs
[i
] = gen_rtx_REG (SImode
, i
);
13913 for (i
= 0; i
< interleave_factor
; i
++)
13914 regs
[i
] = gen_reg_rtx (SImode
);
13916 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
13917 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
13919 srcoffset
= dstoffset
= 0;
13921 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13922 For copying the last bytes we want to subtract this offset again. */
13923 src_autoinc
= dst_autoinc
= 0;
13925 for (i
= 0; i
< interleave_factor
; i
++)
13928 /* Copy BLOCK_SIZE_BYTES chunks. */
13930 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
13933 if (src_aligned
&& interleave_factor
> 1)
13935 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
13936 TRUE
, srcbase
, &srcoffset
));
13937 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13941 for (j
= 0; j
< interleave_factor
; j
++)
13943 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
13945 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13946 srcoffset
+ j
* UNITS_PER_WORD
);
13947 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13949 srcoffset
+= block_size_bytes
;
13953 if (dst_aligned
&& interleave_factor
> 1)
13955 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
13956 TRUE
, dstbase
, &dstoffset
));
13957 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13961 for (j
= 0; j
< interleave_factor
; j
++)
13963 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
13965 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13966 dstoffset
+ j
* UNITS_PER_WORD
);
13967 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13969 dstoffset
+= block_size_bytes
;
13972 remaining
-= block_size_bytes
;
13975 /* Copy any whole words left (note these aren't interleaved with any
13976 subsequent halfword/byte load/stores in the interests of simplicity). */
13978 words
= remaining
/ UNITS_PER_WORD
;
13980 gcc_assert (words
< interleave_factor
);
13982 if (src_aligned
&& words
> 1)
13984 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
13986 src_autoinc
+= UNITS_PER_WORD
* words
;
13990 for (j
= 0; j
< words
; j
++)
13992 addr
= plus_constant (Pmode
, src
,
13993 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
13994 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13995 srcoffset
+ j
* UNITS_PER_WORD
);
13996 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13998 srcoffset
+= words
* UNITS_PER_WORD
;
14001 if (dst_aligned
&& words
> 1)
14003 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14005 dst_autoinc
+= words
* UNITS_PER_WORD
;
14009 for (j
= 0; j
< words
; j
++)
14011 addr
= plus_constant (Pmode
, dst
,
14012 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14013 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14014 dstoffset
+ j
* UNITS_PER_WORD
);
14015 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14017 dstoffset
+= words
* UNITS_PER_WORD
;
14020 remaining
-= words
* UNITS_PER_WORD
;
14022 gcc_assert (remaining
< 4);
14024 /* Copy a halfword if necessary. */
14026 if (remaining
>= 2)
14028 halfword_tmp
= gen_reg_rtx (SImode
);
14030 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14031 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14032 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14034 /* Either write out immediately, or delay until we've loaded the last
14035 byte, depending on interleave factor. */
14036 if (interleave_factor
== 1)
14038 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14039 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14040 emit_insn (gen_unaligned_storehi (mem
,
14041 gen_lowpart (HImode
, halfword_tmp
)));
14042 halfword_tmp
= NULL
;
14050 gcc_assert (remaining
< 2);
14052 /* Copy last byte. */
14054 if ((remaining
& 1) != 0)
14056 byte_tmp
= gen_reg_rtx (SImode
);
14058 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14059 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14060 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14062 if (interleave_factor
== 1)
14064 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14065 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14066 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14075 /* Store last halfword if we haven't done so already. */
14079 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14080 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14081 emit_insn (gen_unaligned_storehi (mem
,
14082 gen_lowpart (HImode
, halfword_tmp
)));
14086 /* Likewise for last byte. */
14090 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14091 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14092 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14096 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14099 /* From mips_adjust_block_mem:
14101 Helper function for doing a loop-based block operation on memory
14102 reference MEM. Each iteration of the loop will operate on LENGTH
14105 Create a new base register for use within the loop and point it to
14106 the start of MEM. Create a new memory reference that uses this
14107 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14110 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14113 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14115 /* Although the new mem does not refer to a known location,
14116 it does keep up to LENGTH bytes of alignment. */
14117 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14118 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14121 /* From mips_block_move_loop:
14123 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14124 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14125 the memory regions do not overlap. */
14128 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14129 unsigned int interleave_factor
,
14130 HOST_WIDE_INT bytes_per_iter
)
14132 rtx label
, src_reg
, dest_reg
, final_src
, test
;
14133 HOST_WIDE_INT leftover
;
14135 leftover
= length
% bytes_per_iter
;
14136 length
-= leftover
;
14138 /* Create registers and memory references for use within the loop. */
14139 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14140 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14142 /* Calculate the value that SRC_REG should have after the last iteration of
14144 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14145 0, 0, OPTAB_WIDEN
);
14147 /* Emit the start of the loop. */
14148 label
= gen_label_rtx ();
14149 emit_label (label
);
14151 /* Emit the loop body. */
14152 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14153 interleave_factor
);
14155 /* Move on to the next block. */
14156 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14157 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14159 /* Emit the loop condition. */
14160 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14161 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14163 /* Mop up any left-over bytes. */
14165 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14168 /* Emit a block move when either the source or destination is unaligned (not
14169 aligned to a four-byte boundary). This may need further tuning depending on
14170 core type, optimize_size setting, etc. */
14173 arm_movmemqi_unaligned (rtx
*operands
)
14175 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14179 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14180 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14181 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14182 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14183 or dst_aligned though: allow more interleaving in those cases since the
14184 resulting code can be smaller. */
14185 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14186 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14189 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14190 interleave_factor
, bytes_per_iter
);
14192 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14193 interleave_factor
);
14197 /* Note that the loop created by arm_block_move_unaligned_loop may be
14198 subject to loop unrolling, which makes tuning this condition a little
14201 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14203 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14210 arm_gen_movmemqi (rtx
*operands
)
14212 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14213 HOST_WIDE_INT srcoffset
, dstoffset
;
14215 rtx src
, dst
, srcbase
, dstbase
;
14216 rtx part_bytes_reg
= NULL
;
14219 if (!CONST_INT_P (operands
[2])
14220 || !CONST_INT_P (operands
[3])
14221 || INTVAL (operands
[2]) > 64)
14224 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14225 return arm_movmemqi_unaligned (operands
);
14227 if (INTVAL (operands
[3]) & 3)
14230 dstbase
= operands
[0];
14231 srcbase
= operands
[1];
14233 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14234 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14236 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14237 out_words_to_go
= INTVAL (operands
[2]) / 4;
14238 last_bytes
= INTVAL (operands
[2]) & 3;
14239 dstoffset
= srcoffset
= 0;
14241 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14242 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14244 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14246 if (in_words_to_go
> 4)
14247 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14248 TRUE
, srcbase
, &srcoffset
));
14250 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14251 src
, FALSE
, srcbase
,
14254 if (out_words_to_go
)
14256 if (out_words_to_go
> 4)
14257 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14258 TRUE
, dstbase
, &dstoffset
));
14259 else if (out_words_to_go
!= 1)
14260 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14261 out_words_to_go
, dst
,
14264 dstbase
, &dstoffset
));
14267 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14268 emit_move_insn (mem
, gen_rtx_REG (SImode
, 0));
14269 if (last_bytes
!= 0)
14271 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14277 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14278 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14281 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14282 if (out_words_to_go
)
14286 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14287 sreg
= copy_to_reg (mem
);
14289 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14290 emit_move_insn (mem
, sreg
);
14293 gcc_assert (!in_words_to_go
); /* Sanity check */
14296 if (in_words_to_go
)
14298 gcc_assert (in_words_to_go
> 0);
14300 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14301 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14304 gcc_assert (!last_bytes
|| part_bytes_reg
);
14306 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14308 rtx tmp
= gen_reg_rtx (SImode
);
14310 /* The bytes we want are in the top end of the word. */
14311 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14312 GEN_INT (8 * (4 - last_bytes
))));
14313 part_bytes_reg
= tmp
;
14317 mem
= adjust_automodify_address (dstbase
, QImode
,
14318 plus_constant (Pmode
, dst
,
14320 dstoffset
+ last_bytes
- 1);
14321 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14325 tmp
= gen_reg_rtx (SImode
);
14326 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14327 part_bytes_reg
= tmp
;
14334 if (last_bytes
> 1)
14336 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14337 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14341 rtx tmp
= gen_reg_rtx (SImode
);
14342 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14343 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14344 part_bytes_reg
= tmp
;
14351 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14352 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14359 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14362 next_consecutive_mem (rtx mem
)
14364 enum machine_mode mode
= GET_MODE (mem
);
14365 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14366 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14368 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14371 /* Copy using LDRD/STRD instructions whenever possible.
14372 Returns true upon success. */
14374 gen_movmem_ldrd_strd (rtx
*operands
)
14376 unsigned HOST_WIDE_INT len
;
14377 HOST_WIDE_INT align
;
14378 rtx src
, dst
, base
;
14380 bool src_aligned
, dst_aligned
;
14381 bool src_volatile
, dst_volatile
;
14383 gcc_assert (CONST_INT_P (operands
[2]));
14384 gcc_assert (CONST_INT_P (operands
[3]));
14386 len
= UINTVAL (operands
[2]);
14390 /* Maximum alignment we can assume for both src and dst buffers. */
14391 align
= INTVAL (operands
[3]);
14393 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14396 /* Place src and dst addresses in registers
14397 and update the corresponding mem rtx. */
14399 dst_volatile
= MEM_VOLATILE_P (dst
);
14400 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14401 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14402 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14405 src_volatile
= MEM_VOLATILE_P (src
);
14406 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14407 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14408 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14410 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14413 if (src_volatile
|| dst_volatile
)
14416 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14417 if (!(dst_aligned
|| src_aligned
))
14418 return arm_gen_movmemqi (operands
);
14420 src
= adjust_address (src
, DImode
, 0);
14421 dst
= adjust_address (dst
, DImode
, 0);
14425 reg0
= gen_reg_rtx (DImode
);
14427 emit_move_insn (reg0
, src
);
14429 emit_insn (gen_unaligned_loaddi (reg0
, src
));
14432 emit_move_insn (dst
, reg0
);
14434 emit_insn (gen_unaligned_storedi (dst
, reg0
));
14436 src
= next_consecutive_mem (src
);
14437 dst
= next_consecutive_mem (dst
);
14440 gcc_assert (len
< 8);
14443 /* More than a word but less than a double-word to copy. Copy a word. */
14444 reg0
= gen_reg_rtx (SImode
);
14445 src
= adjust_address (src
, SImode
, 0);
14446 dst
= adjust_address (dst
, SImode
, 0);
14448 emit_move_insn (reg0
, src
);
14450 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14453 emit_move_insn (dst
, reg0
);
14455 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14457 src
= next_consecutive_mem (src
);
14458 dst
= next_consecutive_mem (dst
);
14465 /* Copy the remaining bytes. */
14468 dst
= adjust_address (dst
, HImode
, 0);
14469 src
= adjust_address (src
, HImode
, 0);
14470 reg0
= gen_reg_rtx (SImode
);
14472 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14474 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14477 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14479 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14481 src
= next_consecutive_mem (src
);
14482 dst
= next_consecutive_mem (dst
);
14487 dst
= adjust_address (dst
, QImode
, 0);
14488 src
= adjust_address (src
, QImode
, 0);
14489 reg0
= gen_reg_rtx (QImode
);
14490 emit_move_insn (reg0
, src
);
14491 emit_move_insn (dst
, reg0
);
14495 /* Select a dominance comparison mode if possible for a test of the general
14496 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14497 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14498 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14499 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14500 In all cases OP will be either EQ or NE, but we don't need to know which
14501 here. If we are unable to support a dominance comparison we return
14502 CC mode. This will then fail to match for the RTL expressions that
14503 generate this call. */
14505 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14507 enum rtx_code cond1
, cond2
;
14510 /* Currently we will probably get the wrong result if the individual
14511 comparisons are not simple. This also ensures that it is safe to
14512 reverse a comparison if necessary. */
14513 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14515 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14519 /* The if_then_else variant of this tests the second condition if the
14520 first passes, but is true if the first fails. Reverse the first
14521 condition to get a true "inclusive-or" expression. */
14522 if (cond_or
== DOM_CC_NX_OR_Y
)
14523 cond1
= reverse_condition (cond1
);
14525 /* If the comparisons are not equal, and one doesn't dominate the other,
14526 then we can't do this. */
14528 && !comparison_dominates_p (cond1
, cond2
)
14529 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14534 enum rtx_code temp
= cond1
;
14542 if (cond_or
== DOM_CC_X_AND_Y
)
14547 case EQ
: return CC_DEQmode
;
14548 case LE
: return CC_DLEmode
;
14549 case LEU
: return CC_DLEUmode
;
14550 case GE
: return CC_DGEmode
;
14551 case GEU
: return CC_DGEUmode
;
14552 default: gcc_unreachable ();
14556 if (cond_or
== DOM_CC_X_AND_Y
)
14568 gcc_unreachable ();
14572 if (cond_or
== DOM_CC_X_AND_Y
)
14584 gcc_unreachable ();
14588 if (cond_or
== DOM_CC_X_AND_Y
)
14589 return CC_DLTUmode
;
14594 return CC_DLTUmode
;
14596 return CC_DLEUmode
;
14600 gcc_unreachable ();
14604 if (cond_or
== DOM_CC_X_AND_Y
)
14605 return CC_DGTUmode
;
14610 return CC_DGTUmode
;
14612 return CC_DGEUmode
;
14616 gcc_unreachable ();
14619 /* The remaining cases only occur when both comparisons are the
14622 gcc_assert (cond1
== cond2
);
14626 gcc_assert (cond1
== cond2
);
14630 gcc_assert (cond1
== cond2
);
14634 gcc_assert (cond1
== cond2
);
14635 return CC_DLEUmode
;
14638 gcc_assert (cond1
== cond2
);
14639 return CC_DGEUmode
;
14642 gcc_unreachable ();
14647 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14649 /* All floating point compares return CCFP if it is an equality
14650 comparison, and CCFPE otherwise. */
14651 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14674 gcc_unreachable ();
14678 /* A compare with a shifted operand. Because of canonicalization, the
14679 comparison will have to be swapped when we emit the assembler. */
14680 if (GET_MODE (y
) == SImode
14681 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14682 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14683 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14684 || GET_CODE (x
) == ROTATERT
))
14687 /* This operation is performed swapped, but since we only rely on the Z
14688 flag we don't need an additional mode. */
14689 if (GET_MODE (y
) == SImode
14690 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14691 && GET_CODE (x
) == NEG
14692 && (op
== EQ
|| op
== NE
))
14695 /* This is a special case that is used by combine to allow a
14696 comparison of a shifted byte load to be split into a zero-extend
14697 followed by a comparison of the shifted integer (only valid for
14698 equalities and unsigned inequalities). */
14699 if (GET_MODE (x
) == SImode
14700 && GET_CODE (x
) == ASHIFT
14701 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
14702 && GET_CODE (XEXP (x
, 0)) == SUBREG
14703 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
14704 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
14705 && (op
== EQ
|| op
== NE
14706 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
14707 && CONST_INT_P (y
))
14710 /* A construct for a conditional compare, if the false arm contains
14711 0, then both conditions must be true, otherwise either condition
14712 must be true. Not all conditions are possible, so CCmode is
14713 returned if it can't be done. */
14714 if (GET_CODE (x
) == IF_THEN_ELSE
14715 && (XEXP (x
, 2) == const0_rtx
14716 || XEXP (x
, 2) == const1_rtx
)
14717 && COMPARISON_P (XEXP (x
, 0))
14718 && COMPARISON_P (XEXP (x
, 1)))
14719 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14720 INTVAL (XEXP (x
, 2)));
14722 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14723 if (GET_CODE (x
) == AND
14724 && (op
== EQ
|| op
== NE
)
14725 && COMPARISON_P (XEXP (x
, 0))
14726 && COMPARISON_P (XEXP (x
, 1)))
14727 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14730 if (GET_CODE (x
) == IOR
14731 && (op
== EQ
|| op
== NE
)
14732 && COMPARISON_P (XEXP (x
, 0))
14733 && COMPARISON_P (XEXP (x
, 1)))
14734 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14737 /* An operation (on Thumb) where we want to test for a single bit.
14738 This is done by shifting that bit up into the top bit of a
14739 scratch register; we can then branch on the sign bit. */
14741 && GET_MODE (x
) == SImode
14742 && (op
== EQ
|| op
== NE
)
14743 && GET_CODE (x
) == ZERO_EXTRACT
14744 && XEXP (x
, 1) == const1_rtx
)
14747 /* An operation that sets the condition codes as a side-effect, the
14748 V flag is not set correctly, so we can only use comparisons where
14749 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14751 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14752 if (GET_MODE (x
) == SImode
14754 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
14755 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
14756 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
14757 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
14758 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
14759 || GET_CODE (x
) == LSHIFTRT
14760 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14761 || GET_CODE (x
) == ROTATERT
14762 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
14763 return CC_NOOVmode
;
14765 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
14768 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
14769 && GET_CODE (x
) == PLUS
14770 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
14773 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
14779 /* A DImode comparison against zero can be implemented by
14780 or'ing the two halves together. */
14781 if (y
== const0_rtx
)
14784 /* We can do an equality test in three Thumb instructions. */
14794 /* DImode unsigned comparisons can be implemented by cmp +
14795 cmpeq without a scratch register. Not worth doing in
14806 /* DImode signed and unsigned comparisons can be implemented
14807 by cmp + sbcs with a scratch register, but that does not
14808 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14809 gcc_assert (op
!= EQ
&& op
!= NE
);
14813 gcc_unreachable ();
14817 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
14818 return GET_MODE (x
);
14823 /* X and Y are two things to compare using CODE. Emit the compare insn and
14824 return the rtx for register 0 in the proper mode. FP means this is a
14825 floating point compare: I don't think that it is needed on the arm. */
14827 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
14829 enum machine_mode mode
;
14831 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
14833 /* We might have X as a constant, Y as a register because of the predicates
14834 used for cmpdi. If so, force X to a register here. */
14835 if (dimode_comparison
&& !REG_P (x
))
14836 x
= force_reg (DImode
, x
);
14838 mode
= SELECT_CC_MODE (code
, x
, y
);
14839 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
14841 if (dimode_comparison
14842 && mode
!= CC_CZmode
)
14846 /* To compare two non-zero values for equality, XOR them and
14847 then compare against zero. Not used for ARM mode; there
14848 CC_CZmode is cheaper. */
14849 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
14851 gcc_assert (!reload_completed
);
14852 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
14856 /* A scratch register is required. */
14857 if (reload_completed
)
14858 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
14860 scratch
= gen_rtx_SCRATCH (SImode
);
14862 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
14863 set
= gen_rtx_SET (VOIDmode
, cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14864 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
14867 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14872 /* Generate a sequence of insns that will generate the correct return
14873 address mask depending on the physical architecture that the program
14876 arm_gen_return_addr_mask (void)
14878 rtx reg
= gen_reg_rtx (Pmode
);
14880 emit_insn (gen_return_addr_mask (reg
));
14885 arm_reload_in_hi (rtx
*operands
)
14887 rtx ref
= operands
[1];
14889 HOST_WIDE_INT offset
= 0;
14891 if (GET_CODE (ref
) == SUBREG
)
14893 offset
= SUBREG_BYTE (ref
);
14894 ref
= SUBREG_REG (ref
);
14899 /* We have a pseudo which has been spilt onto the stack; there
14900 are two cases here: the first where there is a simple
14901 stack-slot replacement and a second where the stack-slot is
14902 out of range, or is used as a subreg. */
14903 if (reg_equiv_mem (REGNO (ref
)))
14905 ref
= reg_equiv_mem (REGNO (ref
));
14906 base
= find_replacement (&XEXP (ref
, 0));
14909 /* The slot is out of range, or was dressed up in a SUBREG. */
14910 base
= reg_equiv_address (REGNO (ref
));
14913 base
= find_replacement (&XEXP (ref
, 0));
14915 /* Handle the case where the address is too complex to be offset by 1. */
14916 if (GET_CODE (base
) == MINUS
14917 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14919 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14921 emit_set_insn (base_plus
, base
);
14924 else if (GET_CODE (base
) == PLUS
)
14926 /* The addend must be CONST_INT, or we would have dealt with it above. */
14927 HOST_WIDE_INT hi
, lo
;
14929 offset
+= INTVAL (XEXP (base
, 1));
14930 base
= XEXP (base
, 0);
14932 /* Rework the address into a legal sequence of insns. */
14933 /* Valid range for lo is -4095 -> 4095 */
14936 : -((-offset
) & 0xfff));
14938 /* Corner case, if lo is the max offset then we would be out of range
14939 once we have added the additional 1 below, so bump the msb into the
14940 pre-loading insn(s). */
14944 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14945 ^ (HOST_WIDE_INT
) 0x80000000)
14946 - (HOST_WIDE_INT
) 0x80000000);
14948 gcc_assert (hi
+ lo
== offset
);
14952 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14954 /* Get the base address; addsi3 knows how to handle constants
14955 that require more than one insn. */
14956 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
14962 /* Operands[2] may overlap operands[0] (though it won't overlap
14963 operands[1]), that's why we asked for a DImode reg -- so we can
14964 use the bit that does not overlap. */
14965 if (REGNO (operands
[2]) == REGNO (operands
[0]))
14966 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14968 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14970 emit_insn (gen_zero_extendqisi2 (scratch
,
14971 gen_rtx_MEM (QImode
,
14972 plus_constant (Pmode
, base
,
14974 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14975 gen_rtx_MEM (QImode
,
14976 plus_constant (Pmode
, base
,
14978 if (!BYTES_BIG_ENDIAN
)
14979 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14980 gen_rtx_IOR (SImode
,
14983 gen_rtx_SUBREG (SImode
, operands
[0], 0),
14987 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14988 gen_rtx_IOR (SImode
,
14989 gen_rtx_ASHIFT (SImode
, scratch
,
14991 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
14994 /* Handle storing a half-word to memory during reload by synthesizing as two
14995 byte stores. Take care not to clobber the input values until after we
14996 have moved them somewhere safe. This code assumes that if the DImode
14997 scratch in operands[2] overlaps either the input value or output address
14998 in some way, then that value must die in this insn (we absolutely need
14999 two scratch registers for some corner cases). */
15001 arm_reload_out_hi (rtx
*operands
)
15003 rtx ref
= operands
[0];
15004 rtx outval
= operands
[1];
15006 HOST_WIDE_INT offset
= 0;
15008 if (GET_CODE (ref
) == SUBREG
)
15010 offset
= SUBREG_BYTE (ref
);
15011 ref
= SUBREG_REG (ref
);
15016 /* We have a pseudo which has been spilt onto the stack; there
15017 are two cases here: the first where there is a simple
15018 stack-slot replacement and a second where the stack-slot is
15019 out of range, or is used as a subreg. */
15020 if (reg_equiv_mem (REGNO (ref
)))
15022 ref
= reg_equiv_mem (REGNO (ref
));
15023 base
= find_replacement (&XEXP (ref
, 0));
15026 /* The slot is out of range, or was dressed up in a SUBREG. */
15027 base
= reg_equiv_address (REGNO (ref
));
15030 base
= find_replacement (&XEXP (ref
, 0));
15032 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15034 /* Handle the case where the address is too complex to be offset by 1. */
15035 if (GET_CODE (base
) == MINUS
15036 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15038 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15040 /* Be careful not to destroy OUTVAL. */
15041 if (reg_overlap_mentioned_p (base_plus
, outval
))
15043 /* Updating base_plus might destroy outval, see if we can
15044 swap the scratch and base_plus. */
15045 if (!reg_overlap_mentioned_p (scratch
, outval
))
15048 scratch
= base_plus
;
15053 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15055 /* Be conservative and copy OUTVAL into the scratch now,
15056 this should only be necessary if outval is a subreg
15057 of something larger than a word. */
15058 /* XXX Might this clobber base? I can't see how it can,
15059 since scratch is known to overlap with OUTVAL, and
15060 must be wider than a word. */
15061 emit_insn (gen_movhi (scratch_hi
, outval
));
15062 outval
= scratch_hi
;
15066 emit_set_insn (base_plus
, base
);
15069 else if (GET_CODE (base
) == PLUS
)
15071 /* The addend must be CONST_INT, or we would have dealt with it above. */
15072 HOST_WIDE_INT hi
, lo
;
15074 offset
+= INTVAL (XEXP (base
, 1));
15075 base
= XEXP (base
, 0);
15077 /* Rework the address into a legal sequence of insns. */
15078 /* Valid range for lo is -4095 -> 4095 */
15081 : -((-offset
) & 0xfff));
15083 /* Corner case, if lo is the max offset then we would be out of range
15084 once we have added the additional 1 below, so bump the msb into the
15085 pre-loading insn(s). */
15089 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15090 ^ (HOST_WIDE_INT
) 0x80000000)
15091 - (HOST_WIDE_INT
) 0x80000000);
15093 gcc_assert (hi
+ lo
== offset
);
15097 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15099 /* Be careful not to destroy OUTVAL. */
15100 if (reg_overlap_mentioned_p (base_plus
, outval
))
15102 /* Updating base_plus might destroy outval, see if we
15103 can swap the scratch and base_plus. */
15104 if (!reg_overlap_mentioned_p (scratch
, outval
))
15107 scratch
= base_plus
;
15112 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15114 /* Be conservative and copy outval into scratch now,
15115 this should only be necessary if outval is a
15116 subreg of something larger than a word. */
15117 /* XXX Might this clobber base? I can't see how it
15118 can, since scratch is known to overlap with
15120 emit_insn (gen_movhi (scratch_hi
, outval
));
15121 outval
= scratch_hi
;
15125 /* Get the base address; addsi3 knows how to handle constants
15126 that require more than one insn. */
15127 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15133 if (BYTES_BIG_ENDIAN
)
15135 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15136 plus_constant (Pmode
, base
,
15138 gen_lowpart (QImode
, outval
)));
15139 emit_insn (gen_lshrsi3 (scratch
,
15140 gen_rtx_SUBREG (SImode
, outval
, 0),
15142 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15144 gen_lowpart (QImode
, scratch
)));
15148 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15150 gen_lowpart (QImode
, outval
)));
15151 emit_insn (gen_lshrsi3 (scratch
,
15152 gen_rtx_SUBREG (SImode
, outval
, 0),
15154 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15155 plus_constant (Pmode
, base
,
15157 gen_lowpart (QImode
, scratch
)));
15161 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15162 (padded to the size of a word) should be passed in a register. */
15165 arm_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
15167 if (TARGET_AAPCS_BASED
)
15168 return must_pass_in_stack_var_size (mode
, type
);
15170 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15174 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15175 Return true if an argument passed on the stack should be padded upwards,
15176 i.e. if the least-significant byte has useful data.
15177 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15178 aggregate types are placed in the lowest memory address. */
15181 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15183 if (!TARGET_AAPCS_BASED
)
15184 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15186 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15193 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15194 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15195 register has useful data, and return the opposite if the most
15196 significant byte does. */
15199 arm_pad_reg_upward (enum machine_mode mode
,
15200 tree type
, int first ATTRIBUTE_UNUSED
)
15202 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15204 /* For AAPCS, small aggregates, small fixed-point types,
15205 and small complex types are always padded upwards. */
15208 if ((AGGREGATE_TYPE_P (type
)
15209 || TREE_CODE (type
) == COMPLEX_TYPE
15210 || FIXED_POINT_TYPE_P (type
))
15211 && int_size_in_bytes (type
) <= 4)
15216 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15217 && GET_MODE_SIZE (mode
) <= 4)
15222 /* Otherwise, use default padding. */
15223 return !BYTES_BIG_ENDIAN
;
15226 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15227 assuming that the address in the base register is word aligned. */
15229 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15231 HOST_WIDE_INT max_offset
;
15233 /* Offset must be a multiple of 4 in Thumb mode. */
15234 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15239 else if (TARGET_ARM
)
15244 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15247 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15248 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15249 Assumes that the address in the base register RN is word aligned. Pattern
15250 guarantees that both memory accesses use the same base register,
15251 the offsets are constants within the range, and the gap between the offsets is 4.
15252 If preload complete then check that registers are legal. WBACK indicates whether
15253 address is updated. LOAD indicates whether memory access is load or store. */
15255 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15256 bool wback
, bool load
)
15258 unsigned int t
, t2
, n
;
15260 if (!reload_completed
)
15263 if (!offset_ok_for_ldrd_strd (offset
))
15270 if ((TARGET_THUMB2
)
15271 && ((wback
&& (n
== t
|| n
== t2
))
15272 || (t
== SP_REGNUM
)
15273 || (t
== PC_REGNUM
)
15274 || (t2
== SP_REGNUM
)
15275 || (t2
== PC_REGNUM
)
15276 || (!load
&& (n
== PC_REGNUM
))
15277 || (load
&& (t
== t2
))
15278 /* Triggers Cortex-M3 LDRD errata. */
15279 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15283 && ((wback
&& (n
== t
|| n
== t2
))
15284 || (t2
== PC_REGNUM
)
15285 || (t
% 2 != 0) /* First destination register is not even. */
15287 /* PC can be used as base register (for offset addressing only),
15288 but it is depricated. */
15289 || (n
== PC_REGNUM
)))
15295 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15296 operand MEM's address contains an immediate offset from the base
15297 register and has no side effects, in which case it sets BASE and
15298 OFFSET accordingly. */
15300 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15304 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15306 /* TODO: Handle more general memory operand patterns, such as
15307 PRE_DEC and PRE_INC. */
15309 if (side_effects_p (mem
))
15312 /* Can't deal with subregs. */
15313 if (GET_CODE (mem
) == SUBREG
)
15316 gcc_assert (MEM_P (mem
));
15318 *offset
= const0_rtx
;
15320 addr
= XEXP (mem
, 0);
15322 /* If addr isn't valid for DImode, then we can't handle it. */
15323 if (!arm_legitimate_address_p (DImode
, addr
,
15324 reload_in_progress
|| reload_completed
))
15332 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15334 *base
= XEXP (addr
, 0);
15335 *offset
= XEXP (addr
, 1);
15336 return (REG_P (*base
) && CONST_INT_P (*offset
));
15342 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15344 /* Called from a peephole2 to replace two word-size accesses with a
15345 single LDRD/STRD instruction. Returns true iff we can generate a
15346 new instruction sequence. That is, both accesses use the same base
15347 register and the gap between constant offsets is 4. This function
15348 may reorder its operands to match ldrd/strd RTL templates.
15349 OPERANDS are the operands found by the peephole matcher;
15350 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15351 corresponding memory operands. LOAD indicaates whether the access
15352 is load or store. CONST_STORE indicates a store of constant
15353 integer values held in OPERANDS[4,5] and assumes that the pattern
15354 is of length 4 insn, for the purpose of checking dead registers.
15355 COMMUTE indicates that register operands may be reordered. */
15357 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15358 bool const_store
, bool commute
)
15361 HOST_WIDE_INT offsets
[2], offset
;
15362 rtx base
= NULL_RTX
;
15363 rtx cur_base
, cur_offset
, tmp
;
15365 HARD_REG_SET regset
;
15367 gcc_assert (!const_store
|| !load
);
15368 /* Check that the memory references are immediate offsets from the
15369 same base register. Extract the base register, the destination
15370 registers, and the corresponding memory offsets. */
15371 for (i
= 0; i
< nops
; i
++)
15373 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15378 else if (REGNO (base
) != REGNO (cur_base
))
15381 offsets
[i
] = INTVAL (cur_offset
);
15382 if (GET_CODE (operands
[i
]) == SUBREG
)
15384 tmp
= SUBREG_REG (operands
[i
]);
15385 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15390 /* Make sure there is no dependency between the individual loads. */
15391 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15392 return false; /* RAW */
15394 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15395 return false; /* WAW */
15397 /* If the same input register is used in both stores
15398 when storing different constants, try to find a free register.
15399 For example, the code
15404 can be transformed into
15407 in Thumb mode assuming that r1 is free. */
15409 && REGNO (operands
[0]) == REGNO (operands
[1])
15410 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15414 CLEAR_HARD_REG_SET (regset
);
15415 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15416 if (tmp
== NULL_RTX
)
15419 /* Use the new register in the first load to ensure that
15420 if the original input register is not dead after peephole,
15421 then it will have the correct constant value. */
15424 else if (TARGET_ARM
)
15427 int regno
= REGNO (operands
[0]);
15428 if (!peep2_reg_dead_p (4, operands
[0]))
15430 /* When the input register is even and is not dead after the
15431 pattern, it has to hold the second constant but we cannot
15432 form a legal STRD in ARM mode with this register as the second
15434 if (regno
% 2 == 0)
15437 /* Is regno-1 free? */
15438 SET_HARD_REG_SET (regset
);
15439 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15440 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15441 if (tmp
== NULL_RTX
)
15448 /* Find a DImode register. */
15449 CLEAR_HARD_REG_SET (regset
);
15450 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15451 if (tmp
!= NULL_RTX
)
15453 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15454 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15458 /* Can we use the input register to form a DI register? */
15459 SET_HARD_REG_SET (regset
);
15460 CLEAR_HARD_REG_BIT(regset
,
15461 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15462 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15463 if (tmp
== NULL_RTX
)
15465 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15469 gcc_assert (operands
[0] != NULL_RTX
);
15470 gcc_assert (operands
[1] != NULL_RTX
);
15471 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15472 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15476 /* Make sure the instructions are ordered with lower memory access first. */
15477 if (offsets
[0] > offsets
[1])
15479 gap
= offsets
[0] - offsets
[1];
15480 offset
= offsets
[1];
15482 /* Swap the instructions such that lower memory is accessed first. */
15483 SWAP_RTX (operands
[0], operands
[1]);
15484 SWAP_RTX (operands
[2], operands
[3]);
15486 SWAP_RTX (operands
[4], operands
[5]);
15490 gap
= offsets
[1] - offsets
[0];
15491 offset
= offsets
[0];
15494 /* Make sure accesses are to consecutive memory locations. */
15498 /* Make sure we generate legal instructions. */
15499 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15503 /* In Thumb state, where registers are almost unconstrained, there
15504 is little hope to fix it. */
15508 if (load
&& commute
)
15510 /* Try reordering registers. */
15511 SWAP_RTX (operands
[0], operands
[1]);
15512 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15519 /* If input registers are dead after this pattern, they can be
15520 reordered or replaced by other registers that are free in the
15521 current pattern. */
15522 if (!peep2_reg_dead_p (4, operands
[0])
15523 || !peep2_reg_dead_p (4, operands
[1]))
15526 /* Try to reorder the input registers. */
15527 /* For example, the code
15532 can be transformed into
15537 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15540 SWAP_RTX (operands
[0], operands
[1]);
15544 /* Try to find a free DI register. */
15545 CLEAR_HARD_REG_SET (regset
);
15546 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15547 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15550 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15551 if (tmp
== NULL_RTX
)
15554 /* DREG must be an even-numbered register in DImode.
15555 Split it into SI registers. */
15556 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15557 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15558 gcc_assert (operands
[0] != NULL_RTX
);
15559 gcc_assert (operands
[1] != NULL_RTX
);
15560 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15561 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15563 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15576 /* Print a symbolic form of X to the debug file, F. */
15578 arm_print_value (FILE *f
, rtx x
)
15580 switch (GET_CODE (x
))
15583 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15587 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15595 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15597 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15598 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15606 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15610 fprintf (f
, "`%s'", XSTR (x
, 0));
15614 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15618 arm_print_value (f
, XEXP (x
, 0));
15622 arm_print_value (f
, XEXP (x
, 0));
15624 arm_print_value (f
, XEXP (x
, 1));
15632 fprintf (f
, "????");
15637 /* Routines for manipulation of the constant pool. */
15639 /* Arm instructions cannot load a large constant directly into a
15640 register; they have to come from a pc relative load. The constant
15641 must therefore be placed in the addressable range of the pc
15642 relative load. Depending on the precise pc relative load
15643 instruction the range is somewhere between 256 bytes and 4k. This
15644 means that we often have to dump a constant inside a function, and
15645 generate code to branch around it.
15647 It is important to minimize this, since the branches will slow
15648 things down and make the code larger.
15650 Normally we can hide the table after an existing unconditional
15651 branch so that there is no interruption of the flow, but in the
15652 worst case the code looks like this:
15670 We fix this by performing a scan after scheduling, which notices
15671 which instructions need to have their operands fetched from the
15672 constant table and builds the table.
15674 The algorithm starts by building a table of all the constants that
15675 need fixing up and all the natural barriers in the function (places
15676 where a constant table can be dropped without breaking the flow).
15677 For each fixup we note how far the pc-relative replacement will be
15678 able to reach and the offset of the instruction into the function.
15680 Having built the table we then group the fixes together to form
15681 tables that are as large as possible (subject to addressing
15682 constraints) and emit each table of constants after the last
15683 barrier that is within range of all the instructions in the group.
15684 If a group does not contain a barrier, then we forcibly create one
15685 by inserting a jump instruction into the flow. Once the table has
15686 been inserted, the insns are then modified to reference the
15687 relevant entry in the pool.
15689 Possible enhancements to the algorithm (not implemented) are:
15691 1) For some processors and object formats, there may be benefit in
15692 aligning the pools to the start of cache lines; this alignment
15693 would need to be taken into account when calculating addressability
15696 /* These typedefs are located at the start of this file, so that
15697 they can be used in the prototypes there. This comment is to
15698 remind readers of that fact so that the following structures
15699 can be understood more easily.
15701 typedef struct minipool_node Mnode;
15702 typedef struct minipool_fixup Mfix; */
15704 struct minipool_node
15706 /* Doubly linked chain of entries. */
15709 /* The maximum offset into the code that this entry can be placed. While
15710 pushing fixes for forward references, all entries are sorted in order
15711 of increasing max_address. */
15712 HOST_WIDE_INT max_address
;
15713 /* Similarly for an entry inserted for a backwards ref. */
15714 HOST_WIDE_INT min_address
;
15715 /* The number of fixes referencing this entry. This can become zero
15716 if we "unpush" an entry. In this case we ignore the entry when we
15717 come to emit the code. */
15719 /* The offset from the start of the minipool. */
15720 HOST_WIDE_INT offset
;
15721 /* The value in table. */
15723 /* The mode of value. */
15724 enum machine_mode mode
;
15725 /* The size of the value. With iWMMXt enabled
15726 sizes > 4 also imply an alignment of 8-bytes. */
15730 struct minipool_fixup
15734 HOST_WIDE_INT address
;
15736 enum machine_mode mode
;
15740 HOST_WIDE_INT forwards
;
15741 HOST_WIDE_INT backwards
;
15744 /* Fixes less than a word need padding out to a word boundary. */
15745 #define MINIPOOL_FIX_SIZE(mode) \
15746 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15748 static Mnode
* minipool_vector_head
;
15749 static Mnode
* minipool_vector_tail
;
15750 static rtx minipool_vector_label
;
15751 static int minipool_pad
;
15753 /* The linked list of all minipool fixes required for this function. */
15754 Mfix
* minipool_fix_head
;
15755 Mfix
* minipool_fix_tail
;
15756 /* The fix entry for the current minipool, once it has been placed. */
15757 Mfix
* minipool_barrier
;
15759 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15760 #define JUMP_TABLES_IN_TEXT_SECTION 0
15763 static HOST_WIDE_INT
15764 get_jump_table_size (rtx insn
)
15766 /* ADDR_VECs only take room if read-only data does into the text
15768 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
15770 rtx body
= PATTERN (insn
);
15771 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
15772 HOST_WIDE_INT size
;
15773 HOST_WIDE_INT modesize
;
15775 modesize
= GET_MODE_SIZE (GET_MODE (body
));
15776 size
= modesize
* XVECLEN (body
, elt
);
15780 /* Round up size of TBB table to a halfword boundary. */
15781 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
15784 /* No padding necessary for TBH. */
15787 /* Add two bytes for alignment on Thumb. */
15792 gcc_unreachable ();
15800 /* Return the maximum amount of padding that will be inserted before
15803 static HOST_WIDE_INT
15804 get_label_padding (rtx label
)
15806 HOST_WIDE_INT align
, min_insn_size
;
15808 align
= 1 << label_to_alignment (label
);
15809 min_insn_size
= TARGET_THUMB
? 2 : 4;
15810 return align
> min_insn_size
? align
- min_insn_size
: 0;
15813 /* Move a minipool fix MP from its current location to before MAX_MP.
15814 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15815 constraints may need updating. */
15817 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
15818 HOST_WIDE_INT max_address
)
15820 /* The code below assumes these are different. */
15821 gcc_assert (mp
!= max_mp
);
15823 if (max_mp
== NULL
)
15825 if (max_address
< mp
->max_address
)
15826 mp
->max_address
= max_address
;
15830 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15831 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15833 mp
->max_address
= max_address
;
15835 /* Unlink MP from its current position. Since max_mp is non-null,
15836 mp->prev must be non-null. */
15837 mp
->prev
->next
= mp
->next
;
15838 if (mp
->next
!= NULL
)
15839 mp
->next
->prev
= mp
->prev
;
15841 minipool_vector_tail
= mp
->prev
;
15843 /* Re-insert it before MAX_MP. */
15845 mp
->prev
= max_mp
->prev
;
15848 if (mp
->prev
!= NULL
)
15849 mp
->prev
->next
= mp
;
15851 minipool_vector_head
= mp
;
15854 /* Save the new entry. */
15857 /* Scan over the preceding entries and adjust their addresses as
15859 while (mp
->prev
!= NULL
15860 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15862 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15869 /* Add a constant to the minipool for a forward reference. Returns the
15870 node added or NULL if the constant will not fit in this pool. */
15872 add_minipool_forward_ref (Mfix
*fix
)
15874 /* If set, max_mp is the first pool_entry that has a lower
15875 constraint than the one we are trying to add. */
15876 Mnode
* max_mp
= NULL
;
15877 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
15880 /* If the minipool starts before the end of FIX->INSN then this FIX
15881 can not be placed into the current pool. Furthermore, adding the
15882 new constant pool entry may cause the pool to start FIX_SIZE bytes
15884 if (minipool_vector_head
&&
15885 (fix
->address
+ get_attr_length (fix
->insn
)
15886 >= minipool_vector_head
->max_address
- fix
->fix_size
))
15889 /* Scan the pool to see if a constant with the same value has
15890 already been added. While we are doing this, also note the
15891 location where we must insert the constant if it doesn't already
15893 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15895 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15896 && fix
->mode
== mp
->mode
15897 && (!LABEL_P (fix
->value
)
15898 || (CODE_LABEL_NUMBER (fix
->value
)
15899 == CODE_LABEL_NUMBER (mp
->value
)))
15900 && rtx_equal_p (fix
->value
, mp
->value
))
15902 /* More than one fix references this entry. */
15904 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
15907 /* Note the insertion point if necessary. */
15909 && mp
->max_address
> max_address
)
15912 /* If we are inserting an 8-bytes aligned quantity and
15913 we have not already found an insertion point, then
15914 make sure that all such 8-byte aligned quantities are
15915 placed at the start of the pool. */
15916 if (ARM_DOUBLEWORD_ALIGN
15918 && fix
->fix_size
>= 8
15919 && mp
->fix_size
< 8)
15922 max_address
= mp
->max_address
;
15926 /* The value is not currently in the minipool, so we need to create
15927 a new entry for it. If MAX_MP is NULL, the entry will be put on
15928 the end of the list since the placement is less constrained than
15929 any existing entry. Otherwise, we insert the new fix before
15930 MAX_MP and, if necessary, adjust the constraints on the other
15933 mp
->fix_size
= fix
->fix_size
;
15934 mp
->mode
= fix
->mode
;
15935 mp
->value
= fix
->value
;
15937 /* Not yet required for a backwards ref. */
15938 mp
->min_address
= -65536;
15940 if (max_mp
== NULL
)
15942 mp
->max_address
= max_address
;
15944 mp
->prev
= minipool_vector_tail
;
15946 if (mp
->prev
== NULL
)
15948 minipool_vector_head
= mp
;
15949 minipool_vector_label
= gen_label_rtx ();
15952 mp
->prev
->next
= mp
;
15954 minipool_vector_tail
= mp
;
15958 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15959 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15961 mp
->max_address
= max_address
;
15964 mp
->prev
= max_mp
->prev
;
15966 if (mp
->prev
!= NULL
)
15967 mp
->prev
->next
= mp
;
15969 minipool_vector_head
= mp
;
15972 /* Save the new entry. */
15975 /* Scan over the preceding entries and adjust their addresses as
15977 while (mp
->prev
!= NULL
15978 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15980 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15988 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
15989 HOST_WIDE_INT min_address
)
15991 HOST_WIDE_INT offset
;
15993 /* The code below assumes these are different. */
15994 gcc_assert (mp
!= min_mp
);
15996 if (min_mp
== NULL
)
15998 if (min_address
> mp
->min_address
)
15999 mp
->min_address
= min_address
;
16003 /* We will adjust this below if it is too loose. */
16004 mp
->min_address
= min_address
;
16006 /* Unlink MP from its current position. Since min_mp is non-null,
16007 mp->next must be non-null. */
16008 mp
->next
->prev
= mp
->prev
;
16009 if (mp
->prev
!= NULL
)
16010 mp
->prev
->next
= mp
->next
;
16012 minipool_vector_head
= mp
->next
;
16014 /* Reinsert it after MIN_MP. */
16016 mp
->next
= min_mp
->next
;
16018 if (mp
->next
!= NULL
)
16019 mp
->next
->prev
= mp
;
16021 minipool_vector_tail
= mp
;
16027 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16029 mp
->offset
= offset
;
16030 if (mp
->refcount
> 0)
16031 offset
+= mp
->fix_size
;
16033 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16034 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16040 /* Add a constant to the minipool for a backward reference. Returns the
16041 node added or NULL if the constant will not fit in this pool.
16043 Note that the code for insertion for a backwards reference can be
16044 somewhat confusing because the calculated offsets for each fix do
16045 not take into account the size of the pool (which is still under
16048 add_minipool_backward_ref (Mfix
*fix
)
16050 /* If set, min_mp is the last pool_entry that has a lower constraint
16051 than the one we are trying to add. */
16052 Mnode
*min_mp
= NULL
;
16053 /* This can be negative, since it is only a constraint. */
16054 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16057 /* If we can't reach the current pool from this insn, or if we can't
16058 insert this entry at the end of the pool without pushing other
16059 fixes out of range, then we don't try. This ensures that we
16060 can't fail later on. */
16061 if (min_address
>= minipool_barrier
->address
16062 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16063 >= minipool_barrier
->address
))
16066 /* Scan the pool to see if a constant with the same value has
16067 already been added. While we are doing this, also note the
16068 location where we must insert the constant if it doesn't already
16070 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16072 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16073 && fix
->mode
== mp
->mode
16074 && (!LABEL_P (fix
->value
)
16075 || (CODE_LABEL_NUMBER (fix
->value
)
16076 == CODE_LABEL_NUMBER (mp
->value
)))
16077 && rtx_equal_p (fix
->value
, mp
->value
)
16078 /* Check that there is enough slack to move this entry to the
16079 end of the table (this is conservative). */
16080 && (mp
->max_address
16081 > (minipool_barrier
->address
16082 + minipool_vector_tail
->offset
16083 + minipool_vector_tail
->fix_size
)))
16086 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16089 if (min_mp
!= NULL
)
16090 mp
->min_address
+= fix
->fix_size
;
16093 /* Note the insertion point if necessary. */
16094 if (mp
->min_address
< min_address
)
16096 /* For now, we do not allow the insertion of 8-byte alignment
16097 requiring nodes anywhere but at the start of the pool. */
16098 if (ARM_DOUBLEWORD_ALIGN
16099 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16104 else if (mp
->max_address
16105 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16107 /* Inserting before this entry would push the fix beyond
16108 its maximum address (which can happen if we have
16109 re-located a forwards fix); force the new fix to come
16111 if (ARM_DOUBLEWORD_ALIGN
16112 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16117 min_address
= mp
->min_address
+ fix
->fix_size
;
16120 /* Do not insert a non-8-byte aligned quantity before 8-byte
16121 aligned quantities. */
16122 else if (ARM_DOUBLEWORD_ALIGN
16123 && fix
->fix_size
< 8
16124 && mp
->fix_size
>= 8)
16127 min_address
= mp
->min_address
+ fix
->fix_size
;
16132 /* We need to create a new entry. */
16134 mp
->fix_size
= fix
->fix_size
;
16135 mp
->mode
= fix
->mode
;
16136 mp
->value
= fix
->value
;
16138 mp
->max_address
= minipool_barrier
->address
+ 65536;
16140 mp
->min_address
= min_address
;
16142 if (min_mp
== NULL
)
16145 mp
->next
= minipool_vector_head
;
16147 if (mp
->next
== NULL
)
16149 minipool_vector_tail
= mp
;
16150 minipool_vector_label
= gen_label_rtx ();
16153 mp
->next
->prev
= mp
;
16155 minipool_vector_head
= mp
;
16159 mp
->next
= min_mp
->next
;
16163 if (mp
->next
!= NULL
)
16164 mp
->next
->prev
= mp
;
16166 minipool_vector_tail
= mp
;
16169 /* Save the new entry. */
16177 /* Scan over the following entries and adjust their offsets. */
16178 while (mp
->next
!= NULL
)
16180 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16181 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16184 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16186 mp
->next
->offset
= mp
->offset
;
16195 assign_minipool_offsets (Mfix
*barrier
)
16197 HOST_WIDE_INT offset
= 0;
16200 minipool_barrier
= barrier
;
16202 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16204 mp
->offset
= offset
;
16206 if (mp
->refcount
> 0)
16207 offset
+= mp
->fix_size
;
16211 /* Output the literal table */
16213 dump_minipool (rtx scan
)
16219 if (ARM_DOUBLEWORD_ALIGN
)
16220 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16221 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16228 fprintf (dump_file
,
16229 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16230 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16232 scan
= emit_label_after (gen_label_rtx (), scan
);
16233 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16234 scan
= emit_label_after (minipool_vector_label
, scan
);
16236 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16238 if (mp
->refcount
> 0)
16242 fprintf (dump_file
,
16243 ";; Offset %u, min %ld, max %ld ",
16244 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16245 (unsigned long) mp
->max_address
);
16246 arm_print_value (dump_file
, mp
->value
);
16247 fputc ('\n', dump_file
);
16250 switch (mp
->fix_size
)
16252 #ifdef HAVE_consttable_1
16254 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
16258 #ifdef HAVE_consttable_2
16260 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
16264 #ifdef HAVE_consttable_4
16266 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
16270 #ifdef HAVE_consttable_8
16272 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
16276 #ifdef HAVE_consttable_16
16278 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
16283 gcc_unreachable ();
16291 minipool_vector_head
= minipool_vector_tail
= NULL
;
16292 scan
= emit_insn_after (gen_consttable_end (), scan
);
16293 scan
= emit_barrier_after (scan
);
16296 /* Return the cost of forcibly inserting a barrier after INSN. */
16298 arm_barrier_cost (rtx insn
)
16300 /* Basing the location of the pool on the loop depth is preferable,
16301 but at the moment, the basic block information seems to be
16302 corrupt by this stage of the compilation. */
16303 int base_cost
= 50;
16304 rtx next
= next_nonnote_insn (insn
);
16306 if (next
!= NULL
&& LABEL_P (next
))
16309 switch (GET_CODE (insn
))
16312 /* It will always be better to place the table before the label, rather
16321 return base_cost
- 10;
16324 return base_cost
+ 10;
16328 /* Find the best place in the insn stream in the range
16329 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16330 Create the barrier by inserting a jump and add a new fix entry for
16333 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16335 HOST_WIDE_INT count
= 0;
16337 rtx from
= fix
->insn
;
16338 /* The instruction after which we will insert the jump. */
16339 rtx selected
= NULL
;
16341 /* The address at which the jump instruction will be placed. */
16342 HOST_WIDE_INT selected_address
;
16344 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16345 rtx label
= gen_label_rtx ();
16347 selected_cost
= arm_barrier_cost (from
);
16348 selected_address
= fix
->address
;
16350 while (from
&& count
< max_count
)
16355 /* This code shouldn't have been called if there was a natural barrier
16357 gcc_assert (!BARRIER_P (from
));
16359 /* Count the length of this insn. This must stay in sync with the
16360 code that pushes minipool fixes. */
16361 if (LABEL_P (from
))
16362 count
+= get_label_padding (from
);
16364 count
+= get_attr_length (from
);
16366 /* If there is a jump table, add its length. */
16367 if (tablejump_p (from
, NULL
, &tmp
))
16369 count
+= get_jump_table_size (tmp
);
16371 /* Jump tables aren't in a basic block, so base the cost on
16372 the dispatch insn. If we select this location, we will
16373 still put the pool after the table. */
16374 new_cost
= arm_barrier_cost (from
);
16376 if (count
< max_count
16377 && (!selected
|| new_cost
<= selected_cost
))
16380 selected_cost
= new_cost
;
16381 selected_address
= fix
->address
+ count
;
16384 /* Continue after the dispatch table. */
16385 from
= NEXT_INSN (tmp
);
16389 new_cost
= arm_barrier_cost (from
);
16391 if (count
< max_count
16392 && (!selected
|| new_cost
<= selected_cost
))
16395 selected_cost
= new_cost
;
16396 selected_address
= fix
->address
+ count
;
16399 from
= NEXT_INSN (from
);
16402 /* Make sure that we found a place to insert the jump. */
16403 gcc_assert (selected
);
16405 /* Make sure we do not split a call and its corresponding
16406 CALL_ARG_LOCATION note. */
16407 if (CALL_P (selected
))
16409 rtx next
= NEXT_INSN (selected
);
16410 if (next
&& NOTE_P (next
)
16411 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16415 /* Create a new JUMP_INSN that branches around a barrier. */
16416 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16417 JUMP_LABEL (from
) = label
;
16418 barrier
= emit_barrier_after (from
);
16419 emit_label_after (label
, barrier
);
16421 /* Create a minipool barrier entry for the new barrier. */
16422 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16423 new_fix
->insn
= barrier
;
16424 new_fix
->address
= selected_address
;
16425 new_fix
->next
= fix
->next
;
16426 fix
->next
= new_fix
;
16431 /* Record that there is a natural barrier in the insn stream at
16434 push_minipool_barrier (rtx insn
, HOST_WIDE_INT address
)
16436 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16439 fix
->address
= address
;
16442 if (minipool_fix_head
!= NULL
)
16443 minipool_fix_tail
->next
= fix
;
16445 minipool_fix_head
= fix
;
16447 minipool_fix_tail
= fix
;
16450 /* Record INSN, which will need fixing up to load a value from the
16451 minipool. ADDRESS is the offset of the insn since the start of the
16452 function; LOC is a pointer to the part of the insn which requires
16453 fixing; VALUE is the constant that must be loaded, which is of type
16456 push_minipool_fix (rtx insn
, HOST_WIDE_INT address
, rtx
*loc
,
16457 enum machine_mode mode
, rtx value
)
16459 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16462 fix
->address
= address
;
16465 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16466 fix
->value
= value
;
16467 fix
->forwards
= get_attr_pool_range (insn
);
16468 fix
->backwards
= get_attr_neg_pool_range (insn
);
16469 fix
->minipool
= NULL
;
16471 /* If an insn doesn't have a range defined for it, then it isn't
16472 expecting to be reworked by this code. Better to stop now than
16473 to generate duff assembly code. */
16474 gcc_assert (fix
->forwards
|| fix
->backwards
);
16476 /* If an entry requires 8-byte alignment then assume all constant pools
16477 require 4 bytes of padding. Trying to do this later on a per-pool
16478 basis is awkward because existing pool entries have to be modified. */
16479 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16484 fprintf (dump_file
,
16485 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16486 GET_MODE_NAME (mode
),
16487 INSN_UID (insn
), (unsigned long) address
,
16488 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16489 arm_print_value (dump_file
, fix
->value
);
16490 fprintf (dump_file
, "\n");
16493 /* Add it to the chain of fixes. */
16496 if (minipool_fix_head
!= NULL
)
16497 minipool_fix_tail
->next
= fix
;
16499 minipool_fix_head
= fix
;
16501 minipool_fix_tail
= fix
;
16504 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16505 Returns the number of insns needed, or 99 if we always want to synthesize
16508 arm_max_const_double_inline_cost ()
16510 /* Let the value get synthesized to avoid the use of literal pools. */
16511 if (arm_disable_literal_pool
)
16514 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16517 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16518 Returns the number of insns needed, or 99 if we don't know how to
16521 arm_const_double_inline_cost (rtx val
)
16523 rtx lowpart
, highpart
;
16524 enum machine_mode mode
;
16526 mode
= GET_MODE (val
);
16528 if (mode
== VOIDmode
)
16531 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16533 lowpart
= gen_lowpart (SImode
, val
);
16534 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16536 gcc_assert (CONST_INT_P (lowpart
));
16537 gcc_assert (CONST_INT_P (highpart
));
16539 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16540 NULL_RTX
, NULL_RTX
, 0, 0)
16541 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16542 NULL_RTX
, NULL_RTX
, 0, 0));
16545 /* Return true if it is worthwhile to split a 64-bit constant into two
16546 32-bit operations. This is the case if optimizing for size, or
16547 if we have load delay slots, or if one 32-bit part can be done with
16548 a single data operation. */
16550 arm_const_double_by_parts (rtx val
)
16552 enum machine_mode mode
= GET_MODE (val
);
16555 if (optimize_size
|| arm_ld_sched
)
16558 if (mode
== VOIDmode
)
16561 part
= gen_highpart_mode (SImode
, mode
, val
);
16563 gcc_assert (CONST_INT_P (part
));
16565 if (const_ok_for_arm (INTVAL (part
))
16566 || const_ok_for_arm (~INTVAL (part
)))
16569 part
= gen_lowpart (SImode
, val
);
16571 gcc_assert (CONST_INT_P (part
));
16573 if (const_ok_for_arm (INTVAL (part
))
16574 || const_ok_for_arm (~INTVAL (part
)))
16580 /* Return true if it is possible to inline both the high and low parts
16581 of a 64-bit constant into 32-bit data processing instructions. */
16583 arm_const_double_by_immediates (rtx val
)
16585 enum machine_mode mode
= GET_MODE (val
);
16588 if (mode
== VOIDmode
)
16591 part
= gen_highpart_mode (SImode
, mode
, val
);
16593 gcc_assert (CONST_INT_P (part
));
16595 if (!const_ok_for_arm (INTVAL (part
)))
16598 part
= gen_lowpart (SImode
, val
);
16600 gcc_assert (CONST_INT_P (part
));
16602 if (!const_ok_for_arm (INTVAL (part
)))
16608 /* Scan INSN and note any of its operands that need fixing.
16609 If DO_PUSHES is false we do not actually push any of the fixups
16612 note_invalid_constants (rtx insn
, HOST_WIDE_INT address
, int do_pushes
)
16616 extract_insn (insn
);
16618 if (!constrain_operands (1))
16619 fatal_insn_not_found (insn
);
16621 if (recog_data
.n_alternatives
== 0)
16624 /* Fill in recog_op_alt with information about the constraints of
16626 preprocess_constraints ();
16628 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16630 /* Things we need to fix can only occur in inputs. */
16631 if (recog_data
.operand_type
[opno
] != OP_IN
)
16634 /* If this alternative is a memory reference, then any mention
16635 of constants in this alternative is really to fool reload
16636 into allowing us to accept one there. We need to fix them up
16637 now so that we output the right code. */
16638 if (recog_op_alt
[opno
][which_alternative
].memory_ok
)
16640 rtx op
= recog_data
.operand
[opno
];
16642 if (CONSTANT_P (op
))
16645 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16646 recog_data
.operand_mode
[opno
], op
);
16648 else if (MEM_P (op
)
16649 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16650 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16654 rtx cop
= avoid_constant_pool_reference (op
);
16656 /* Casting the address of something to a mode narrower
16657 than a word can cause avoid_constant_pool_reference()
16658 to return the pool reference itself. That's no good to
16659 us here. Lets just hope that we can use the
16660 constant pool value directly. */
16662 cop
= get_pool_constant (XEXP (op
, 0));
16664 push_minipool_fix (insn
, address
,
16665 recog_data
.operand_loc
[opno
],
16666 recog_data
.operand_mode
[opno
], cop
);
16676 /* Rewrite move insn into subtract of 0 if the condition codes will
16677 be useful in next conditional jump insn. */
16680 thumb1_reorg (void)
16684 FOR_EACH_BB_FN (bb
, cfun
)
16687 rtx pat
, op0
, set
= NULL
;
16688 rtx prev
, insn
= BB_END (bb
);
16689 bool insn_clobbered
= false;
16691 while (insn
!= BB_HEAD (bb
) && DEBUG_INSN_P (insn
))
16692 insn
= PREV_INSN (insn
);
16694 /* Find the last cbranchsi4_insn in basic block BB. */
16695 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
16698 /* Get the register with which we are comparing. */
16699 pat
= PATTERN (insn
);
16700 op0
= XEXP (XEXP (SET_SRC (pat
), 0), 0);
16702 /* Find the first flag setting insn before INSN in basic block BB. */
16703 gcc_assert (insn
!= BB_HEAD (bb
));
16704 for (prev
= PREV_INSN (insn
);
16706 && prev
!= BB_HEAD (bb
)
16708 || DEBUG_INSN_P (prev
)
16709 || ((set
= single_set (prev
)) != NULL
16710 && get_attr_conds (prev
) == CONDS_NOCOND
)));
16711 prev
= PREV_INSN (prev
))
16713 if (reg_set_p (op0
, prev
))
16714 insn_clobbered
= true;
16717 /* Skip if op0 is clobbered by insn other than prev. */
16718 if (insn_clobbered
)
16724 dest
= SET_DEST (set
);
16725 src
= SET_SRC (set
);
16726 if (!low_register_operand (dest
, SImode
)
16727 || !low_register_operand (src
, SImode
))
16730 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16731 in INSN. Both src and dest of the move insn are checked. */
16732 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
16734 dest
= copy_rtx (dest
);
16735 src
= copy_rtx (src
);
16736 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
16737 PATTERN (prev
) = gen_rtx_SET (VOIDmode
, dest
, src
);
16738 INSN_CODE (prev
) = -1;
16739 /* Set test register in INSN to dest. */
16740 XEXP (XEXP (SET_SRC (pat
), 0), 0) = copy_rtx (dest
);
16741 INSN_CODE (insn
) = -1;
16746 /* Convert instructions to their cc-clobbering variant if possible, since
16747 that allows us to use smaller encodings. */
16750 thumb2_reorg (void)
16755 INIT_REG_SET (&live
);
16757 /* We are freeing block_for_insn in the toplev to keep compatibility
16758 with old MDEP_REORGS that are not CFG based. Recompute it now. */
16759 compute_bb_for_insn ();
16762 FOR_EACH_BB_FN (bb
, cfun
)
16766 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
16767 df_simulate_initialize_backwards (bb
, &live
);
16768 FOR_BB_INSNS_REVERSE (bb
, insn
)
16770 if (NONJUMP_INSN_P (insn
)
16771 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
16772 && GET_CODE (PATTERN (insn
)) == SET
)
16774 enum {SKIP
, CONV
, SWAP_CONV
} action
= SKIP
;
16775 rtx pat
= PATTERN (insn
);
16776 rtx dst
= XEXP (pat
, 0);
16777 rtx src
= XEXP (pat
, 1);
16778 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
16780 if (!OBJECT_P (src
))
16781 op0
= XEXP (src
, 0);
16783 if (BINARY_P (src
))
16784 op1
= XEXP (src
, 1);
16786 if (low_register_operand (dst
, SImode
))
16788 switch (GET_CODE (src
))
16791 /* Adding two registers and storing the result
16792 in the first source is already a 16-bit
16794 if (rtx_equal_p (dst
, op0
)
16795 && register_operand (op1
, SImode
))
16798 if (low_register_operand (op0
, SImode
))
16800 /* ADDS <Rd>,<Rn>,<Rm> */
16801 if (low_register_operand (op1
, SImode
))
16803 /* ADDS <Rdn>,#<imm8> */
16804 /* SUBS <Rdn>,#<imm8> */
16805 else if (rtx_equal_p (dst
, op0
)
16806 && CONST_INT_P (op1
)
16807 && IN_RANGE (INTVAL (op1
), -255, 255))
16809 /* ADDS <Rd>,<Rn>,#<imm3> */
16810 /* SUBS <Rd>,<Rn>,#<imm3> */
16811 else if (CONST_INT_P (op1
)
16812 && IN_RANGE (INTVAL (op1
), -7, 7))
16815 /* ADCS <Rd>, <Rn> */
16816 else if (GET_CODE (XEXP (src
, 0)) == PLUS
16817 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
16818 && low_register_operand (XEXP (XEXP (src
, 0), 1),
16820 && COMPARISON_P (op1
)
16821 && cc_register (XEXP (op1
, 0), VOIDmode
)
16822 && maybe_get_arm_condition_code (op1
) == ARM_CS
16823 && XEXP (op1
, 1) == const0_rtx
)
16828 /* RSBS <Rd>,<Rn>,#0
16829 Not handled here: see NEG below. */
16830 /* SUBS <Rd>,<Rn>,#<imm3>
16832 Not handled here: see PLUS above. */
16833 /* SUBS <Rd>,<Rn>,<Rm> */
16834 if (low_register_operand (op0
, SImode
)
16835 && low_register_operand (op1
, SImode
))
16840 /* MULS <Rdm>,<Rn>,<Rdm>
16841 As an exception to the rule, this is only used
16842 when optimizing for size since MULS is slow on all
16843 known implementations. We do not even want to use
16844 MULS in cold code, if optimizing for speed, so we
16845 test the global flag here. */
16846 if (!optimize_size
)
16848 /* else fall through. */
16852 /* ANDS <Rdn>,<Rm> */
16853 if (rtx_equal_p (dst
, op0
)
16854 && low_register_operand (op1
, SImode
))
16856 else if (rtx_equal_p (dst
, op1
)
16857 && low_register_operand (op0
, SImode
))
16858 action
= SWAP_CONV
;
16864 /* ASRS <Rdn>,<Rm> */
16865 /* LSRS <Rdn>,<Rm> */
16866 /* LSLS <Rdn>,<Rm> */
16867 if (rtx_equal_p (dst
, op0
)
16868 && low_register_operand (op1
, SImode
))
16870 /* ASRS <Rd>,<Rm>,#<imm5> */
16871 /* LSRS <Rd>,<Rm>,#<imm5> */
16872 /* LSLS <Rd>,<Rm>,#<imm5> */
16873 else if (low_register_operand (op0
, SImode
)
16874 && CONST_INT_P (op1
)
16875 && IN_RANGE (INTVAL (op1
), 0, 31))
16880 /* RORS <Rdn>,<Rm> */
16881 if (rtx_equal_p (dst
, op0
)
16882 && low_register_operand (op1
, SImode
))
16888 /* MVNS <Rd>,<Rm> */
16889 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
16890 if (low_register_operand (op0
, SImode
))
16895 /* MOVS <Rd>,#<imm8> */
16896 if (CONST_INT_P (src
)
16897 && IN_RANGE (INTVAL (src
), 0, 255))
16902 /* MOVS and MOV<c> with registers have different
16903 encodings, so are not relevant here. */
16911 if (action
!= SKIP
)
16913 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
16914 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
16917 if (action
== SWAP_CONV
)
16919 src
= copy_rtx (src
);
16920 XEXP (src
, 0) = op1
;
16921 XEXP (src
, 1) = op0
;
16922 pat
= gen_rtx_SET (VOIDmode
, dst
, src
);
16923 vec
= gen_rtvec (2, pat
, clobber
);
16925 else /* action == CONV */
16926 vec
= gen_rtvec (2, pat
, clobber
);
16928 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
16929 INSN_CODE (insn
) = -1;
16933 if (NONDEBUG_INSN_P (insn
))
16934 df_simulate_one_insn_backwards (bb
, insn
, &live
);
16938 CLEAR_REG_SET (&live
);
16941 /* Gcc puts the pool in the wrong place for ARM, since we can only
16942 load addresses a limited distance around the pc. We do some
16943 special munging to move the constant pool values to the correct
16944 point in the code. */
16949 HOST_WIDE_INT address
= 0;
16954 else if (TARGET_THUMB2
)
16957 /* Ensure all insns that must be split have been split at this point.
16958 Otherwise, the pool placement code below may compute incorrect
16959 insn lengths. Note that when optimizing, all insns have already
16960 been split at this point. */
16962 split_all_insns_noflow ();
16964 minipool_fix_head
= minipool_fix_tail
= NULL
;
16966 /* The first insn must always be a note, or the code below won't
16967 scan it properly. */
16968 insn
= get_insns ();
16969 gcc_assert (NOTE_P (insn
));
16972 /* Scan all the insns and record the operands that will need fixing. */
16973 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
16975 if (BARRIER_P (insn
))
16976 push_minipool_barrier (insn
, address
);
16977 else if (INSN_P (insn
))
16981 note_invalid_constants (insn
, address
, true);
16982 address
+= get_attr_length (insn
);
16984 /* If the insn is a vector jump, add the size of the table
16985 and skip the table. */
16986 if (tablejump_p (insn
, NULL
, &table
))
16988 address
+= get_jump_table_size (table
);
16992 else if (LABEL_P (insn
))
16993 /* Add the worst-case padding due to alignment. We don't add
16994 the _current_ padding because the minipool insertions
16995 themselves might change it. */
16996 address
+= get_label_padding (insn
);
16999 fix
= minipool_fix_head
;
17001 /* Now scan the fixups and perform the required changes. */
17006 Mfix
* last_added_fix
;
17007 Mfix
* last_barrier
= NULL
;
17010 /* Skip any further barriers before the next fix. */
17011 while (fix
&& BARRIER_P (fix
->insn
))
17014 /* No more fixes. */
17018 last_added_fix
= NULL
;
17020 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17022 if (BARRIER_P (ftmp
->insn
))
17024 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17027 last_barrier
= ftmp
;
17029 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17032 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17035 /* If we found a barrier, drop back to that; any fixes that we
17036 could have reached but come after the barrier will now go in
17037 the next mini-pool. */
17038 if (last_barrier
!= NULL
)
17040 /* Reduce the refcount for those fixes that won't go into this
17042 for (fdel
= last_barrier
->next
;
17043 fdel
&& fdel
!= ftmp
;
17046 fdel
->minipool
->refcount
--;
17047 fdel
->minipool
= NULL
;
17050 ftmp
= last_barrier
;
17054 /* ftmp is first fix that we can't fit into this pool and
17055 there no natural barriers that we could use. Insert a
17056 new barrier in the code somewhere between the previous
17057 fix and this one, and arrange to jump around it. */
17058 HOST_WIDE_INT max_address
;
17060 /* The last item on the list of fixes must be a barrier, so
17061 we can never run off the end of the list of fixes without
17062 last_barrier being set. */
17065 max_address
= minipool_vector_head
->max_address
;
17066 /* Check that there isn't another fix that is in range that
17067 we couldn't fit into this pool because the pool was
17068 already too large: we need to put the pool before such an
17069 instruction. The pool itself may come just after the
17070 fix because create_fix_barrier also allows space for a
17071 jump instruction. */
17072 if (ftmp
->address
< max_address
)
17073 max_address
= ftmp
->address
+ 1;
17075 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17078 assign_minipool_offsets (last_barrier
);
17082 if (!BARRIER_P (ftmp
->insn
)
17083 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17090 /* Scan over the fixes we have identified for this pool, fixing them
17091 up and adding the constants to the pool itself. */
17092 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17093 this_fix
= this_fix
->next
)
17094 if (!BARRIER_P (this_fix
->insn
))
17097 = plus_constant (Pmode
,
17098 gen_rtx_LABEL_REF (VOIDmode
,
17099 minipool_vector_label
),
17100 this_fix
->minipool
->offset
);
17101 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17104 dump_minipool (last_barrier
->insn
);
17108 /* From now on we must synthesize any constants that we can't handle
17109 directly. This can happen if the RTL gets split during final
17110 instruction generation. */
17111 after_arm_reorg
= 1;
17113 /* Free the minipool memory. */
17114 obstack_free (&minipool_obstack
, minipool_startobj
);
17117 /* Routines to output assembly language. */
17119 /* If the rtx is the correct value then return the string of the number.
17120 In this way we can ensure that valid double constants are generated even
17121 when cross compiling. */
17123 fp_immediate_constant (rtx x
)
17127 if (!fp_consts_inited
)
17130 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
17132 gcc_assert (REAL_VALUES_EQUAL (r
, value_fp0
));
17136 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
17137 static const char *
17138 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17140 if (!fp_consts_inited
)
17143 gcc_assert (REAL_VALUES_EQUAL (*r
, value_fp0
));
17147 /* OPERANDS[0] is the entire list of insns that constitute pop,
17148 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17149 is in the list, UPDATE is true iff the list contains explicit
17150 update of base register. */
17152 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17158 const char *conditional
;
17159 int num_saves
= XVECLEN (operands
[0], 0);
17160 unsigned int regno
;
17161 unsigned int regno_base
= REGNO (operands
[1]);
17164 offset
+= update
? 1 : 0;
17165 offset
+= return_pc
? 1 : 0;
17167 /* Is the base register in the list? */
17168 for (i
= offset
; i
< num_saves
; i
++)
17170 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17171 /* If SP is in the list, then the base register must be SP. */
17172 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17173 /* If base register is in the list, there must be no explicit update. */
17174 if (regno
== regno_base
)
17175 gcc_assert (!update
);
17178 conditional
= reverse
? "%?%D0" : "%?%d0";
17179 if ((regno_base
== SP_REGNUM
) && TARGET_UNIFIED_ASM
)
17181 /* Output pop (not stmfd) because it has a shorter encoding. */
17182 gcc_assert (update
);
17183 sprintf (pattern
, "pop%s\t{", conditional
);
17187 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17188 It's just a convention, their semantics are identical. */
17189 if (regno_base
== SP_REGNUM
)
17190 sprintf (pattern
, "ldm%sfd\t", conditional
);
17191 else if (TARGET_UNIFIED_ASM
)
17192 sprintf (pattern
, "ldmia%s\t", conditional
);
17194 sprintf (pattern
, "ldm%sia\t", conditional
);
17196 strcat (pattern
, reg_names
[regno_base
]);
17198 strcat (pattern
, "!, {");
17200 strcat (pattern
, ", {");
17203 /* Output the first destination register. */
17205 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17207 /* Output the rest of the destination registers. */
17208 for (i
= offset
+ 1; i
< num_saves
; i
++)
17210 strcat (pattern
, ", ");
17212 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17215 strcat (pattern
, "}");
17217 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc
)
17218 strcat (pattern
, "^");
17220 output_asm_insn (pattern
, &cond
);
17224 /* Output the assembly for a store multiple. */
17227 vfp_output_fstmd (rtx
* operands
)
17234 strcpy (pattern
, "fstmfdd%?\t%m0!, {%P1");
17235 p
= strlen (pattern
);
17237 gcc_assert (REG_P (operands
[1]));
17239 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17240 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17242 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17244 strcpy (&pattern
[p
], "}");
17246 output_asm_insn (pattern
, operands
);
17251 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17252 number of bytes pushed. */
17255 vfp_emit_fstmd (int base_reg
, int count
)
17262 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17263 register pairs are stored by a store multiple insn. We avoid this
17264 by pushing an extra pair. */
17265 if (count
== 2 && !arm_arch6
)
17267 if (base_reg
== LAST_VFP_REGNUM
- 3)
17272 /* FSTMD may not store more than 16 doubleword registers at once. Split
17273 larger stores into multiple parts (up to a maximum of two, in
17278 /* NOTE: base_reg is an internal register number, so each D register
17280 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17281 saved
+= vfp_emit_fstmd (base_reg
, 16);
17285 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17286 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17288 reg
= gen_rtx_REG (DFmode
, base_reg
);
17291 XVECEXP (par
, 0, 0)
17292 = gen_rtx_SET (VOIDmode
,
17295 gen_rtx_PRE_MODIFY (Pmode
,
17298 (Pmode
, stack_pointer_rtx
,
17301 gen_rtx_UNSPEC (BLKmode
,
17302 gen_rtvec (1, reg
),
17303 UNSPEC_PUSH_MULT
));
17305 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
17306 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17307 RTX_FRAME_RELATED_P (tmp
) = 1;
17308 XVECEXP (dwarf
, 0, 0) = tmp
;
17310 tmp
= gen_rtx_SET (VOIDmode
,
17311 gen_frame_mem (DFmode
, stack_pointer_rtx
),
17313 RTX_FRAME_RELATED_P (tmp
) = 1;
17314 XVECEXP (dwarf
, 0, 1) = tmp
;
17316 for (i
= 1; i
< count
; i
++)
17318 reg
= gen_rtx_REG (DFmode
, base_reg
);
17320 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17322 tmp
= gen_rtx_SET (VOIDmode
,
17323 gen_frame_mem (DFmode
,
17324 plus_constant (Pmode
,
17328 RTX_FRAME_RELATED_P (tmp
) = 1;
17329 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17332 par
= emit_insn (par
);
17333 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17334 RTX_FRAME_RELATED_P (par
) = 1;
17339 /* Emit a call instruction with pattern PAT. ADDR is the address of
17340 the call target. */
17343 arm_emit_call_insn (rtx pat
, rtx addr
)
17347 insn
= emit_call_insn (pat
);
17349 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17350 If the call might use such an entry, add a use of the PIC register
17351 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17352 if (TARGET_VXWORKS_RTP
17354 && GET_CODE (addr
) == SYMBOL_REF
17355 && (SYMBOL_REF_DECL (addr
)
17356 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17357 : !SYMBOL_REF_LOCAL_P (addr
)))
17359 require_pic_register ();
17360 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17364 /* Output a 'call' insn. */
17366 output_call (rtx
*operands
)
17368 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17370 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17371 if (REGNO (operands
[0]) == LR_REGNUM
)
17373 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17374 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17377 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17379 if (TARGET_INTERWORK
|| arm_arch4t
)
17380 output_asm_insn ("bx%?\t%0", operands
);
17382 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17387 /* Output a 'call' insn that is a reference in memory. This is
17388 disabled for ARMv5 and we prefer a blx instead because otherwise
17389 there's a significant performance overhead. */
17391 output_call_mem (rtx
*operands
)
17393 gcc_assert (!arm_arch5
);
17394 if (TARGET_INTERWORK
)
17396 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17397 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17398 output_asm_insn ("bx%?\t%|ip", operands
);
17400 else if (regno_use_in (LR_REGNUM
, operands
[0]))
17402 /* LR is used in the memory address. We load the address in the
17403 first instruction. It's safe to use IP as the target of the
17404 load since the call will kill it anyway. */
17405 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17406 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17408 output_asm_insn ("bx%?\t%|ip", operands
);
17410 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
17414 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17415 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
17422 /* Output a move from arm registers to arm registers of a long double
17423 OPERANDS[0] is the destination.
17424 OPERANDS[1] is the source. */
17426 output_mov_long_double_arm_from_arm (rtx
*operands
)
17428 /* We have to be careful here because the two might overlap. */
17429 int dest_start
= REGNO (operands
[0]);
17430 int src_start
= REGNO (operands
[1]);
17434 if (dest_start
< src_start
)
17436 for (i
= 0; i
< 3; i
++)
17438 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17439 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17440 output_asm_insn ("mov%?\t%0, %1", ops
);
17445 for (i
= 2; i
>= 0; i
--)
17447 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17448 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17449 output_asm_insn ("mov%?\t%0, %1", ops
);
17457 arm_emit_movpair (rtx dest
, rtx src
)
17459 /* If the src is an immediate, simplify it. */
17460 if (CONST_INT_P (src
))
17462 HOST_WIDE_INT val
= INTVAL (src
);
17463 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17464 if ((val
>> 16) & 0x0000ffff)
17465 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17467 GEN_INT ((val
>> 16) & 0x0000ffff));
17470 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17471 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17474 /* Output a move between double words. It must be REG<-MEM
17477 output_move_double (rtx
*operands
, bool emit
, int *count
)
17479 enum rtx_code code0
= GET_CODE (operands
[0]);
17480 enum rtx_code code1
= GET_CODE (operands
[1]);
17485 /* The only case when this might happen is when
17486 you are looking at the length of a DImode instruction
17487 that has an invalid constant in it. */
17488 if (code0
== REG
&& code1
!= MEM
)
17490 gcc_assert (!emit
);
17497 unsigned int reg0
= REGNO (operands
[0]);
17499 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17501 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17503 switch (GET_CODE (XEXP (operands
[1], 0)))
17510 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17511 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
17513 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
17518 gcc_assert (TARGET_LDRD
);
17520 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
17527 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
17529 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
17537 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
17539 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
17544 gcc_assert (TARGET_LDRD
);
17546 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
17551 /* Autoicrement addressing modes should never have overlapping
17552 base and destination registers, and overlapping index registers
17553 are already prohibited, so this doesn't need to worry about
17555 otherops
[0] = operands
[0];
17556 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
17557 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
17559 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
17561 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
17563 /* Registers overlap so split out the increment. */
17566 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
17567 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
17574 /* Use a single insn if we can.
17575 FIXME: IWMMXT allows offsets larger than ldrd can
17576 handle, fix these up with a pair of ldr. */
17578 || !CONST_INT_P (otherops
[2])
17579 || (INTVAL (otherops
[2]) > -256
17580 && INTVAL (otherops
[2]) < 256))
17583 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
17589 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
17590 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17600 /* Use a single insn if we can.
17601 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17602 fix these up with a pair of ldr. */
17604 || !CONST_INT_P (otherops
[2])
17605 || (INTVAL (otherops
[2]) > -256
17606 && INTVAL (otherops
[2]) < 256))
17609 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
17615 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17616 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
17626 /* We might be able to use ldrd %0, %1 here. However the range is
17627 different to ldr/adr, and it is broken on some ARMv7-M
17628 implementations. */
17629 /* Use the second register of the pair to avoid problematic
17631 otherops
[1] = operands
[1];
17633 output_asm_insn ("adr%?\t%0, %1", otherops
);
17634 operands
[1] = otherops
[0];
17638 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
17640 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
17647 /* ??? This needs checking for thumb2. */
17649 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
17650 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
17652 otherops
[0] = operands
[0];
17653 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
17654 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
17656 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
17658 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
17660 switch ((int) INTVAL (otherops
[2]))
17664 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
17670 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
17676 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
17680 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
17681 operands
[1] = otherops
[0];
17683 && (REG_P (otherops
[2])
17685 || (CONST_INT_P (otherops
[2])
17686 && INTVAL (otherops
[2]) > -256
17687 && INTVAL (otherops
[2]) < 256)))
17689 if (reg_overlap_mentioned_p (operands
[0],
17693 /* Swap base and index registers over to
17694 avoid a conflict. */
17696 otherops
[1] = otherops
[2];
17699 /* If both registers conflict, it will usually
17700 have been fixed by a splitter. */
17701 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
17702 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
17706 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17707 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
17714 otherops
[0] = operands
[0];
17716 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
17721 if (CONST_INT_P (otherops
[2]))
17725 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
17726 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
17728 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17734 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17740 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
17747 return "ldr%(d%)\t%0, [%1]";
17749 return "ldm%(ia%)\t%1, %M0";
17753 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
17754 /* Take care of overlapping base/data reg. */
17755 if (reg_mentioned_p (operands
[0], operands
[1]))
17759 output_asm_insn ("ldr%?\t%0, %1", otherops
);
17760 output_asm_insn ("ldr%?\t%0, %1", operands
);
17770 output_asm_insn ("ldr%?\t%0, %1", operands
);
17771 output_asm_insn ("ldr%?\t%0, %1", otherops
);
17781 /* Constraints should ensure this. */
17782 gcc_assert (code0
== MEM
&& code1
== REG
);
17783 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
17784 || (TARGET_ARM
&& TARGET_LDRD
));
17786 switch (GET_CODE (XEXP (operands
[0], 0)))
17792 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
17794 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
17799 gcc_assert (TARGET_LDRD
);
17801 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
17808 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
17810 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
17818 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
17820 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
17825 gcc_assert (TARGET_LDRD
);
17827 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
17832 otherops
[0] = operands
[1];
17833 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
17834 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
17836 /* IWMMXT allows offsets larger than ldrd can handle,
17837 fix these up with a pair of ldr. */
17839 && CONST_INT_P (otherops
[2])
17840 && (INTVAL(otherops
[2]) <= -256
17841 || INTVAL(otherops
[2]) >= 256))
17843 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
17847 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
17848 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
17857 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
17858 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
17864 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
17867 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
17872 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
17877 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
17878 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
17880 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
17884 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
17891 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
17898 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
17903 && (REG_P (otherops
[2])
17905 || (CONST_INT_P (otherops
[2])
17906 && INTVAL (otherops
[2]) > -256
17907 && INTVAL (otherops
[2]) < 256)))
17909 otherops
[0] = operands
[1];
17910 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
17912 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
17918 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
17919 otherops
[1] = operands
[1];
17922 output_asm_insn ("str%?\t%1, %0", operands
);
17923 output_asm_insn ("str%?\t%H1, %0", otherops
);
17933 /* Output a move, load or store for quad-word vectors in ARM registers. Only
17934 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
17937 output_move_quad (rtx
*operands
)
17939 if (REG_P (operands
[0]))
17941 /* Load, or reg->reg move. */
17943 if (MEM_P (operands
[1]))
17945 switch (GET_CODE (XEXP (operands
[1], 0)))
17948 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
17953 output_asm_insn ("adr%?\t%0, %1", operands
);
17954 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
17958 gcc_unreachable ();
17966 gcc_assert (REG_P (operands
[1]));
17968 dest
= REGNO (operands
[0]);
17969 src
= REGNO (operands
[1]);
17971 /* This seems pretty dumb, but hopefully GCC won't try to do it
17974 for (i
= 0; i
< 4; i
++)
17976 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
17977 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
17978 output_asm_insn ("mov%?\t%0, %1", ops
);
17981 for (i
= 3; i
>= 0; i
--)
17983 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
17984 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
17985 output_asm_insn ("mov%?\t%0, %1", ops
);
17991 gcc_assert (MEM_P (operands
[0]));
17992 gcc_assert (REG_P (operands
[1]));
17993 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
17995 switch (GET_CODE (XEXP (operands
[0], 0)))
17998 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18002 gcc_unreachable ();
18009 /* Output a VFP load or store instruction. */
18012 output_move_vfp (rtx
*operands
)
18014 rtx reg
, mem
, addr
, ops
[2];
18015 int load
= REG_P (operands
[0]);
18016 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18017 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18020 enum machine_mode mode
;
18022 reg
= operands
[!load
];
18023 mem
= operands
[load
];
18025 mode
= GET_MODE (reg
);
18027 gcc_assert (REG_P (reg
));
18028 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18029 gcc_assert (mode
== SFmode
18033 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18034 gcc_assert (MEM_P (mem
));
18036 addr
= XEXP (mem
, 0);
18038 switch (GET_CODE (addr
))
18041 templ
= "f%smdb%c%%?\t%%0!, {%%%s1}%s";
18042 ops
[0] = XEXP (addr
, 0);
18047 templ
= "f%smia%c%%?\t%%0!, {%%%s1}%s";
18048 ops
[0] = XEXP (addr
, 0);
18053 templ
= "f%s%c%%?\t%%%s0, %%1%s";
18059 sprintf (buff
, templ
,
18060 load
? "ld" : "st",
18063 integer_p
? "\t%@ int" : "");
18064 output_asm_insn (buff
, ops
);
18069 /* Output a Neon double-word or quad-word load or store, or a load
18070 or store for larger structure modes.
18072 WARNING: The ordering of elements is weird in big-endian mode,
18073 because the EABI requires that vectors stored in memory appear
18074 as though they were stored by a VSTM, as required by the EABI.
18075 GCC RTL defines element ordering based on in-memory order.
18076 This can be different from the architectural ordering of elements
18077 within a NEON register. The intrinsics defined in arm_neon.h use the
18078 NEON register element ordering, not the GCC RTL element ordering.
18080 For example, the in-memory ordering of a big-endian a quadword
18081 vector with 16-bit elements when stored from register pair {d0,d1}
18082 will be (lowest address first, d0[N] is NEON register element N):
18084 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18086 When necessary, quadword registers (dN, dN+1) are moved to ARM
18087 registers from rN in the order:
18089 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18091 So that STM/LDM can be used on vectors in ARM registers, and the
18092 same memory layout will result as if VSTM/VLDM were used.
18094 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18095 possible, which allows use of appropriate alignment tags.
18096 Note that the choice of "64" is independent of the actual vector
18097 element size; this size simply ensures that the behavior is
18098 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18100 Due to limitations of those instructions, use of VST1.64/VLD1.64
18101 is not possible if:
18102 - the address contains PRE_DEC, or
18103 - the mode refers to more than 4 double-word registers
18105 In those cases, it would be possible to replace VSTM/VLDM by a
18106 sequence of instructions; this is not currently implemented since
18107 this is not certain to actually improve performance. */
18110 output_move_neon (rtx
*operands
)
18112 rtx reg
, mem
, addr
, ops
[2];
18113 int regno
, nregs
, load
= REG_P (operands
[0]);
18116 enum machine_mode mode
;
18118 reg
= operands
[!load
];
18119 mem
= operands
[load
];
18121 mode
= GET_MODE (reg
);
18123 gcc_assert (REG_P (reg
));
18124 regno
= REGNO (reg
);
18125 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18126 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18127 || NEON_REGNO_OK_FOR_QUAD (regno
));
18128 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18129 || VALID_NEON_QREG_MODE (mode
)
18130 || VALID_NEON_STRUCT_MODE (mode
));
18131 gcc_assert (MEM_P (mem
));
18133 addr
= XEXP (mem
, 0);
18135 /* Strip off const from addresses like (const (plus (...))). */
18136 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18137 addr
= XEXP (addr
, 0);
18139 switch (GET_CODE (addr
))
18142 /* We have to use vldm / vstm for too-large modes. */
18145 templ
= "v%smia%%?\t%%0!, %%h1";
18146 ops
[0] = XEXP (addr
, 0);
18150 templ
= "v%s1.64\t%%h1, %%A0";
18157 /* We have to use vldm / vstm in this case, since there is no
18158 pre-decrement form of the vld1 / vst1 instructions. */
18159 templ
= "v%smdb%%?\t%%0!, %%h1";
18160 ops
[0] = XEXP (addr
, 0);
18165 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18166 gcc_unreachable ();
18173 for (i
= 0; i
< nregs
; i
++)
18175 /* We're only using DImode here because it's a convenient size. */
18176 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18177 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18178 if (reg_overlap_mentioned_p (ops
[0], mem
))
18180 gcc_assert (overlap
== -1);
18185 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18186 output_asm_insn (buff
, ops
);
18191 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18192 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18193 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18194 output_asm_insn (buff
, ops
);
18201 /* We have to use vldm / vstm for too-large modes. */
18203 templ
= "v%smia%%?\t%%m0, %%h1";
18205 templ
= "v%s1.64\t%%h1, %%A0";
18211 sprintf (buff
, templ
, load
? "ld" : "st");
18212 output_asm_insn (buff
, ops
);
18217 /* Compute and return the length of neon_mov<mode>, where <mode> is
18218 one of VSTRUCT modes: EI, OI, CI or XI. */
18220 arm_attr_length_move_neon (rtx insn
)
18222 rtx reg
, mem
, addr
;
18224 enum machine_mode mode
;
18226 extract_insn_cached (insn
);
18228 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18230 mode
= GET_MODE (recog_data
.operand
[0]);
18241 gcc_unreachable ();
18245 load
= REG_P (recog_data
.operand
[0]);
18246 reg
= recog_data
.operand
[!load
];
18247 mem
= recog_data
.operand
[load
];
18249 gcc_assert (MEM_P (mem
));
18251 mode
= GET_MODE (reg
);
18252 addr
= XEXP (mem
, 0);
18254 /* Strip off const from addresses like (const (plus (...))). */
18255 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18256 addr
= XEXP (addr
, 0);
18258 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18260 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18267 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18271 arm_address_offset_is_imm (rtx insn
)
18275 extract_insn_cached (insn
);
18277 if (REG_P (recog_data
.operand
[0]))
18280 mem
= recog_data
.operand
[0];
18282 gcc_assert (MEM_P (mem
));
18284 addr
= XEXP (mem
, 0);
18287 || (GET_CODE (addr
) == PLUS
18288 && REG_P (XEXP (addr
, 0))
18289 && CONST_INT_P (XEXP (addr
, 1))))
18295 /* Output an ADD r, s, #n where n may be too big for one instruction.
18296 If adding zero to one register, output nothing. */
18298 output_add_immediate (rtx
*operands
)
18300 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18302 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18305 output_multi_immediate (operands
,
18306 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18309 output_multi_immediate (operands
,
18310 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18317 /* Output a multiple immediate operation.
18318 OPERANDS is the vector of operands referred to in the output patterns.
18319 INSTR1 is the output pattern to use for the first constant.
18320 INSTR2 is the output pattern to use for subsequent constants.
18321 IMMED_OP is the index of the constant slot in OPERANDS.
18322 N is the constant value. */
18323 static const char *
18324 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18325 int immed_op
, HOST_WIDE_INT n
)
18327 #if HOST_BITS_PER_WIDE_INT > 32
18333 /* Quick and easy output. */
18334 operands
[immed_op
] = const0_rtx
;
18335 output_asm_insn (instr1
, operands
);
18340 const char * instr
= instr1
;
18342 /* Note that n is never zero here (which would give no output). */
18343 for (i
= 0; i
< 32; i
+= 2)
18347 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18348 output_asm_insn (instr
, operands
);
18358 /* Return the name of a shifter operation. */
18359 static const char *
18360 arm_shift_nmem(enum rtx_code code
)
18365 return ARM_LSL_NAME
;
18381 /* Return the appropriate ARM instruction for the operation code.
18382 The returned result should not be overwritten. OP is the rtx of the
18383 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18386 arithmetic_instr (rtx op
, int shift_first_arg
)
18388 switch (GET_CODE (op
))
18394 return shift_first_arg
? "rsb" : "sub";
18409 return arm_shift_nmem(GET_CODE(op
));
18412 gcc_unreachable ();
18416 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18417 for the operation code. The returned result should not be overwritten.
18418 OP is the rtx code of the shift.
18419 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18421 static const char *
18422 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18425 enum rtx_code code
= GET_CODE (op
);
18430 if (!CONST_INT_P (XEXP (op
, 1)))
18432 output_operand_lossage ("invalid shift operand");
18437 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18445 mnem
= arm_shift_nmem(code
);
18446 if (CONST_INT_P (XEXP (op
, 1)))
18448 *amountp
= INTVAL (XEXP (op
, 1));
18450 else if (REG_P (XEXP (op
, 1)))
18457 output_operand_lossage ("invalid shift operand");
18463 /* We never have to worry about the amount being other than a
18464 power of 2, since this case can never be reloaded from a reg. */
18465 if (!CONST_INT_P (XEXP (op
, 1)))
18467 output_operand_lossage ("invalid shift operand");
18471 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18473 /* Amount must be a power of two. */
18474 if (*amountp
& (*amountp
- 1))
18476 output_operand_lossage ("invalid shift operand");
18480 *amountp
= int_log2 (*amountp
);
18481 return ARM_LSL_NAME
;
18484 output_operand_lossage ("invalid shift operand");
18488 /* This is not 100% correct, but follows from the desire to merge
18489 multiplication by a power of 2 with the recognizer for a
18490 shift. >=32 is not a valid shift for "lsl", so we must try and
18491 output a shift that produces the correct arithmetical result.
18492 Using lsr #32 is identical except for the fact that the carry bit
18493 is not set correctly if we set the flags; but we never use the
18494 carry bit from such an operation, so we can ignore that. */
18495 if (code
== ROTATERT
)
18496 /* Rotate is just modulo 32. */
18498 else if (*amountp
!= (*amountp
& 31))
18500 if (code
== ASHIFT
)
18505 /* Shifts of 0 are no-ops. */
18512 /* Obtain the shift from the POWER of two. */
18514 static HOST_WIDE_INT
18515 int_log2 (HOST_WIDE_INT power
)
18517 HOST_WIDE_INT shift
= 0;
18519 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
18521 gcc_assert (shift
<= 31);
18528 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18529 because /bin/as is horribly restrictive. The judgement about
18530 whether or not each character is 'printable' (and can be output as
18531 is) or not (and must be printed with an octal escape) must be made
18532 with reference to the *host* character set -- the situation is
18533 similar to that discussed in the comments above pp_c_char in
18534 c-pretty-print.c. */
18536 #define MAX_ASCII_LEN 51
18539 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18542 int len_so_far
= 0;
18544 fputs ("\t.ascii\t\"", stream
);
18546 for (i
= 0; i
< len
; i
++)
18550 if (len_so_far
>= MAX_ASCII_LEN
)
18552 fputs ("\"\n\t.ascii\t\"", stream
);
18558 if (c
== '\\' || c
== '\"')
18560 putc ('\\', stream
);
18568 fprintf (stream
, "\\%03o", c
);
18573 fputs ("\"\n", stream
);
18576 /* Compute the register save mask for registers 0 through 12
18577 inclusive. This code is used by arm_compute_save_reg_mask. */
18579 static unsigned long
18580 arm_compute_save_reg0_reg12_mask (void)
18582 unsigned long func_type
= arm_current_func_type ();
18583 unsigned long save_reg_mask
= 0;
18586 if (IS_INTERRUPT (func_type
))
18588 unsigned int max_reg
;
18589 /* Interrupt functions must not corrupt any registers,
18590 even call clobbered ones. If this is a leaf function
18591 we can just examine the registers used by the RTL, but
18592 otherwise we have to assume that whatever function is
18593 called might clobber anything, and so we have to save
18594 all the call-clobbered registers as well. */
18595 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
18596 /* FIQ handlers have registers r8 - r12 banked, so
18597 we only need to check r0 - r7, Normal ISRs only
18598 bank r14 and r15, so we must check up to r12.
18599 r13 is the stack pointer which is always preserved,
18600 so we do not need to consider it here. */
18605 for (reg
= 0; reg
<= max_reg
; reg
++)
18606 if (df_regs_ever_live_p (reg
)
18607 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
18608 save_reg_mask
|= (1 << reg
);
18610 /* Also save the pic base register if necessary. */
18612 && !TARGET_SINGLE_PIC_BASE
18613 && arm_pic_register
!= INVALID_REGNUM
18614 && crtl
->uses_pic_offset_table
)
18615 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18617 else if (IS_VOLATILE(func_type
))
18619 /* For noreturn functions we historically omitted register saves
18620 altogether. However this really messes up debugging. As a
18621 compromise save just the frame pointers. Combined with the link
18622 register saved elsewhere this should be sufficient to get
18624 if (frame_pointer_needed
)
18625 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18626 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
18627 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
18628 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
18629 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
18633 /* In the normal case we only need to save those registers
18634 which are call saved and which are used by this function. */
18635 for (reg
= 0; reg
<= 11; reg
++)
18636 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
18637 save_reg_mask
|= (1 << reg
);
18639 /* Handle the frame pointer as a special case. */
18640 if (frame_pointer_needed
)
18641 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18643 /* If we aren't loading the PIC register,
18644 don't stack it even though it may be live. */
18646 && !TARGET_SINGLE_PIC_BASE
18647 && arm_pic_register
!= INVALID_REGNUM
18648 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
18649 || crtl
->uses_pic_offset_table
))
18650 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18652 /* The prologue will copy SP into R0, so save it. */
18653 if (IS_STACKALIGN (func_type
))
18654 save_reg_mask
|= 1;
18657 /* Save registers so the exception handler can modify them. */
18658 if (crtl
->calls_eh_return
)
18664 reg
= EH_RETURN_DATA_REGNO (i
);
18665 if (reg
== INVALID_REGNUM
)
18667 save_reg_mask
|= 1 << reg
;
18671 return save_reg_mask
;
18674 /* Return true if r3 is live at the start of the function. */
18677 arm_r3_live_at_start_p (void)
18679 /* Just look at cfg info, which is still close enough to correct at this
18680 point. This gives false positives for broken functions that might use
18681 uninitialized data that happens to be allocated in r3, but who cares? */
18682 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
18686 /* Compute the number of bytes used to store the static chain register on the
18687 stack, above the stack frame. We need to know this accurately to get the
18688 alignment of the rest of the stack frame correct. */
18691 arm_compute_static_chain_stack_bytes (void)
18693 /* See the defining assertion in arm_expand_prologue. */
18694 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
18695 && IS_NESTED (arm_current_func_type ())
18696 && arm_r3_live_at_start_p ()
18697 && crtl
->args
.pretend_args_size
== 0)
18703 /* Compute a bit mask of which registers need to be
18704 saved on the stack for the current function.
18705 This is used by arm_get_frame_offsets, which may add extra registers. */
18707 static unsigned long
18708 arm_compute_save_reg_mask (void)
18710 unsigned int save_reg_mask
= 0;
18711 unsigned long func_type
= arm_current_func_type ();
18714 if (IS_NAKED (func_type
))
18715 /* This should never really happen. */
18718 /* If we are creating a stack frame, then we must save the frame pointer,
18719 IP (which will hold the old stack pointer), LR and the PC. */
18720 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
18722 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
18725 | (1 << PC_REGNUM
);
18727 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
18729 /* Decide if we need to save the link register.
18730 Interrupt routines have their own banked link register,
18731 so they never need to save it.
18732 Otherwise if we do not use the link register we do not need to save
18733 it. If we are pushing other registers onto the stack however, we
18734 can save an instruction in the epilogue by pushing the link register
18735 now and then popping it back into the PC. This incurs extra memory
18736 accesses though, so we only do it when optimizing for size, and only
18737 if we know that we will not need a fancy return sequence. */
18738 if (df_regs_ever_live_p (LR_REGNUM
)
18741 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
18742 && !crtl
->calls_eh_return
))
18743 save_reg_mask
|= 1 << LR_REGNUM
;
18745 if (cfun
->machine
->lr_save_eliminated
)
18746 save_reg_mask
&= ~ (1 << LR_REGNUM
);
18748 if (TARGET_REALLY_IWMMXT
18749 && ((bit_count (save_reg_mask
)
18750 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
18751 arm_compute_static_chain_stack_bytes())
18754 /* The total number of registers that are going to be pushed
18755 onto the stack is odd. We need to ensure that the stack
18756 is 64-bit aligned before we start to save iWMMXt registers,
18757 and also before we start to create locals. (A local variable
18758 might be a double or long long which we will load/store using
18759 an iWMMXt instruction). Therefore we need to push another
18760 ARM register, so that the stack will be 64-bit aligned. We
18761 try to avoid using the arg registers (r0 -r3) as they might be
18762 used to pass values in a tail call. */
18763 for (reg
= 4; reg
<= 12; reg
++)
18764 if ((save_reg_mask
& (1 << reg
)) == 0)
18768 save_reg_mask
|= (1 << reg
);
18771 cfun
->machine
->sibcall_blocked
= 1;
18772 save_reg_mask
|= (1 << 3);
18776 /* We may need to push an additional register for use initializing the
18777 PIC base register. */
18778 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
18779 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
18781 reg
= thumb_find_work_register (1 << 4);
18782 if (!call_used_regs
[reg
])
18783 save_reg_mask
|= (1 << reg
);
18786 return save_reg_mask
;
18790 /* Compute a bit mask of which registers need to be
18791 saved on the stack for the current function. */
18792 static unsigned long
18793 thumb1_compute_save_reg_mask (void)
18795 unsigned long mask
;
18799 for (reg
= 0; reg
< 12; reg
++)
18800 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
18804 && !TARGET_SINGLE_PIC_BASE
18805 && arm_pic_register
!= INVALID_REGNUM
18806 && crtl
->uses_pic_offset_table
)
18807 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18809 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
18810 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
18811 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
18813 /* LR will also be pushed if any lo regs are pushed. */
18814 if (mask
& 0xff || thumb_force_lr_save ())
18815 mask
|= (1 << LR_REGNUM
);
18817 /* Make sure we have a low work register if we need one.
18818 We will need one if we are going to push a high register,
18819 but we are not currently intending to push a low register. */
18820 if ((mask
& 0xff) == 0
18821 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
18823 /* Use thumb_find_work_register to choose which register
18824 we will use. If the register is live then we will
18825 have to push it. Use LAST_LO_REGNUM as our fallback
18826 choice for the register to select. */
18827 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
18828 /* Make sure the register returned by thumb_find_work_register is
18829 not part of the return value. */
18830 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
18831 reg
= LAST_LO_REGNUM
;
18833 if (! call_used_regs
[reg
])
18837 /* The 504 below is 8 bytes less than 512 because there are two possible
18838 alignment words. We can't tell here if they will be present or not so we
18839 have to play it safe and assume that they are. */
18840 if ((CALLER_INTERWORKING_SLOT_SIZE
+
18841 ROUND_UP_WORD (get_frame_size ()) +
18842 crtl
->outgoing_args_size
) >= 504)
18844 /* This is the same as the code in thumb1_expand_prologue() which
18845 determines which register to use for stack decrement. */
18846 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
18847 if (mask
& (1 << reg
))
18850 if (reg
> LAST_LO_REGNUM
)
18852 /* Make sure we have a register available for stack decrement. */
18853 mask
|= 1 << LAST_LO_REGNUM
;
18861 /* Return the number of bytes required to save VFP registers. */
18863 arm_get_vfp_saved_size (void)
18865 unsigned int regno
;
18870 /* Space for saved VFP registers. */
18871 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
18874 for (regno
= FIRST_VFP_REGNUM
;
18875 regno
< LAST_VFP_REGNUM
;
18878 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
18879 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
18883 /* Workaround ARM10 VFPr1 bug. */
18884 if (count
== 2 && !arm_arch6
)
18886 saved
+= count
* 8;
18895 if (count
== 2 && !arm_arch6
)
18897 saved
+= count
* 8;
18904 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
18905 everything bar the final return instruction. If simple_return is true,
18906 then do not output epilogue, because it has already been emitted in RTL. */
18908 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
18909 bool simple_return
)
18911 char conditional
[10];
18914 unsigned long live_regs_mask
;
18915 unsigned long func_type
;
18916 arm_stack_offsets
*offsets
;
18918 func_type
= arm_current_func_type ();
18920 if (IS_NAKED (func_type
))
18923 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
18925 /* If this function was declared non-returning, and we have
18926 found a tail call, then we have to trust that the called
18927 function won't return. */
18932 /* Otherwise, trap an attempted return by aborting. */
18934 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
18936 assemble_external_libcall (ops
[1]);
18937 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
18943 gcc_assert (!cfun
->calls_alloca
|| really_return
);
18945 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
18947 cfun
->machine
->return_used_this_function
= 1;
18949 offsets
= arm_get_frame_offsets ();
18950 live_regs_mask
= offsets
->saved_regs_mask
;
18952 if (!simple_return
&& live_regs_mask
)
18954 const char * return_reg
;
18956 /* If we do not have any special requirements for function exit
18957 (e.g. interworking) then we can load the return address
18958 directly into the PC. Otherwise we must load it into LR. */
18960 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
18961 return_reg
= reg_names
[PC_REGNUM
];
18963 return_reg
= reg_names
[LR_REGNUM
];
18965 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
18967 /* There are three possible reasons for the IP register
18968 being saved. 1) a stack frame was created, in which case
18969 IP contains the old stack pointer, or 2) an ISR routine
18970 corrupted it, or 3) it was saved to align the stack on
18971 iWMMXt. In case 1, restore IP into SP, otherwise just
18973 if (frame_pointer_needed
)
18975 live_regs_mask
&= ~ (1 << IP_REGNUM
);
18976 live_regs_mask
|= (1 << SP_REGNUM
);
18979 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
18982 /* On some ARM architectures it is faster to use LDR rather than
18983 LDM to load a single register. On other architectures, the
18984 cost is the same. In 26 bit mode, or for exception handlers,
18985 we have to use LDM to load the PC so that the CPSR is also
18987 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
18988 if (live_regs_mask
== (1U << reg
))
18991 if (reg
<= LAST_ARM_REGNUM
18992 && (reg
!= LR_REGNUM
18994 || ! IS_INTERRUPT (func_type
)))
18996 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
18997 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19004 /* Generate the load multiple instruction to restore the
19005 registers. Note we can get here, even if
19006 frame_pointer_needed is true, but only if sp already
19007 points to the base of the saved core registers. */
19008 if (live_regs_mask
& (1 << SP_REGNUM
))
19010 unsigned HOST_WIDE_INT stack_adjust
;
19012 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19013 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19015 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19016 if (TARGET_UNIFIED_ASM
)
19017 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19019 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
19022 /* If we can't use ldmib (SA110 bug),
19023 then try to pop r3 instead. */
19025 live_regs_mask
|= 1 << 3;
19027 if (TARGET_UNIFIED_ASM
)
19028 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19030 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
19034 if (TARGET_UNIFIED_ASM
)
19035 sprintf (instr
, "pop%s\t{", conditional
);
19037 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
19039 p
= instr
+ strlen (instr
);
19041 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19042 if (live_regs_mask
& (1 << reg
))
19044 int l
= strlen (reg_names
[reg
]);
19050 memcpy (p
, ", ", 2);
19054 memcpy (p
, "%|", 2);
19055 memcpy (p
+ 2, reg_names
[reg
], l
);
19059 if (live_regs_mask
& (1 << LR_REGNUM
))
19061 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19062 /* If returning from an interrupt, restore the CPSR. */
19063 if (IS_INTERRUPT (func_type
))
19070 output_asm_insn (instr
, & operand
);
19072 /* See if we need to generate an extra instruction to
19073 perform the actual function return. */
19075 && func_type
!= ARM_FT_INTERWORKED
19076 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19078 /* The return has already been handled
19079 by loading the LR into the PC. */
19086 switch ((int) ARM_FUNC_TYPE (func_type
))
19090 /* ??? This is wrong for unified assembly syntax. */
19091 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19094 case ARM_FT_INTERWORKED
:
19095 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19098 case ARM_FT_EXCEPTION
:
19099 /* ??? This is wrong for unified assembly syntax. */
19100 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19104 /* Use bx if it's available. */
19105 if (arm_arch5
|| arm_arch4t
)
19106 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19108 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19112 output_asm_insn (instr
, & operand
);
19118 /* Write the function name into the code section, directly preceding
19119 the function prologue.
19121 Code will be output similar to this:
19123 .ascii "arm_poke_function_name", 0
19126 .word 0xff000000 + (t1 - t0)
19127 arm_poke_function_name
19129 stmfd sp!, {fp, ip, lr, pc}
19132 When performing a stack backtrace, code can inspect the value
19133 of 'pc' stored at 'fp' + 0. If the trace function then looks
19134 at location pc - 12 and the top 8 bits are set, then we know
19135 that there is a function name embedded immediately preceding this
19136 location and has length ((pc[-3]) & 0xff000000).
19138 We assume that pc is declared as a pointer to an unsigned long.
19140 It is of no benefit to output the function name if we are assembling
19141 a leaf function. These function types will not contain a stack
19142 backtrace structure, therefore it is not possible to determine the
19145 arm_poke_function_name (FILE *stream
, const char *name
)
19147 unsigned long alignlength
;
19148 unsigned long length
;
19151 length
= strlen (name
) + 1;
19152 alignlength
= ROUND_UP_WORD (length
);
19154 ASM_OUTPUT_ASCII (stream
, name
, length
);
19155 ASM_OUTPUT_ALIGN (stream
, 2);
19156 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19157 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19160 /* Place some comments into the assembler stream
19161 describing the current function. */
19163 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19165 unsigned long func_type
;
19167 /* ??? Do we want to print some of the below anyway? */
19171 /* Sanity check. */
19172 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19174 func_type
= arm_current_func_type ();
19176 switch ((int) ARM_FUNC_TYPE (func_type
))
19179 case ARM_FT_NORMAL
:
19181 case ARM_FT_INTERWORKED
:
19182 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19185 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19188 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19190 case ARM_FT_EXCEPTION
:
19191 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19195 if (IS_NAKED (func_type
))
19196 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19198 if (IS_VOLATILE (func_type
))
19199 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19201 if (IS_NESTED (func_type
))
19202 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19203 if (IS_STACKALIGN (func_type
))
19204 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19206 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19208 crtl
->args
.pretend_args_size
, frame_size
);
19210 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19211 frame_pointer_needed
,
19212 cfun
->machine
->uses_anonymous_args
);
19214 if (cfun
->machine
->lr_save_eliminated
)
19215 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19217 if (crtl
->calls_eh_return
)
19218 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19223 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19224 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19226 arm_stack_offsets
*offsets
;
19232 /* Emit any call-via-reg trampolines that are needed for v4t support
19233 of call_reg and call_value_reg type insns. */
19234 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19236 rtx label
= cfun
->machine
->call_via
[regno
];
19240 switch_to_section (function_section (current_function_decl
));
19241 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19242 CODE_LABEL_NUMBER (label
));
19243 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19247 /* ??? Probably not safe to set this here, since it assumes that a
19248 function will be emitted as assembly immediately after we generate
19249 RTL for it. This does not happen for inline functions. */
19250 cfun
->machine
->return_used_this_function
= 0;
19252 else /* TARGET_32BIT */
19254 /* We need to take into account any stack-frame rounding. */
19255 offsets
= arm_get_frame_offsets ();
19257 gcc_assert (!use_return_insn (FALSE
, NULL
)
19258 || (cfun
->machine
->return_used_this_function
!= 0)
19259 || offsets
->saved_regs
== offsets
->outgoing_args
19260 || frame_pointer_needed
);
19262 /* Reset the ARM-specific per-function variables. */
19263 after_arm_reorg
= 0;
19267 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19268 STR and STRD. If an even number of registers are being pushed, one
19269 or more STRD patterns are created for each register pair. If an
19270 odd number of registers are pushed, emit an initial STR followed by
19271 as many STRD instructions as are needed. This works best when the
19272 stack is initially 64-bit aligned (the normal case), since it
19273 ensures that each STRD is also 64-bit aligned. */
19275 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19280 rtx par
= NULL_RTX
;
19281 rtx dwarf
= NULL_RTX
;
19285 num_regs
= bit_count (saved_regs_mask
);
19287 /* Must be at least one register to save, and can't save SP or PC. */
19288 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19289 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19290 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19292 /* Create sequence for DWARF info. All the frame-related data for
19293 debugging is held in this wrapper. */
19294 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19296 /* Describe the stack adjustment. */
19297 tmp
= gen_rtx_SET (VOIDmode
,
19299 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19300 RTX_FRAME_RELATED_P (tmp
) = 1;
19301 XVECEXP (dwarf
, 0, 0) = tmp
;
19303 /* Find the first register. */
19304 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19309 /* If there's an odd number of registers to push. Start off by
19310 pushing a single register. This ensures that subsequent strd
19311 operations are dword aligned (assuming that SP was originally
19312 64-bit aligned). */
19313 if ((num_regs
& 1) != 0)
19315 rtx reg
, mem
, insn
;
19317 reg
= gen_rtx_REG (SImode
, regno
);
19319 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19320 stack_pointer_rtx
));
19322 mem
= gen_frame_mem (Pmode
,
19324 (Pmode
, stack_pointer_rtx
,
19325 plus_constant (Pmode
, stack_pointer_rtx
,
19328 tmp
= gen_rtx_SET (VOIDmode
, mem
, reg
);
19329 RTX_FRAME_RELATED_P (tmp
) = 1;
19330 insn
= emit_insn (tmp
);
19331 RTX_FRAME_RELATED_P (insn
) = 1;
19332 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19333 tmp
= gen_rtx_SET (VOIDmode
, gen_frame_mem (Pmode
, stack_pointer_rtx
),
19335 RTX_FRAME_RELATED_P (tmp
) = 1;
19338 XVECEXP (dwarf
, 0, i
) = tmp
;
19342 while (i
< num_regs
)
19343 if (saved_regs_mask
& (1 << regno
))
19345 rtx reg1
, reg2
, mem1
, mem2
;
19346 rtx tmp0
, tmp1
, tmp2
;
19349 /* Find the register to pair with this one. */
19350 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19354 reg1
= gen_rtx_REG (SImode
, regno
);
19355 reg2
= gen_rtx_REG (SImode
, regno2
);
19362 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19365 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19367 -4 * (num_regs
- 1)));
19368 tmp0
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
19369 plus_constant (Pmode
, stack_pointer_rtx
,
19371 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19372 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19373 RTX_FRAME_RELATED_P (tmp0
) = 1;
19374 RTX_FRAME_RELATED_P (tmp1
) = 1;
19375 RTX_FRAME_RELATED_P (tmp2
) = 1;
19376 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19377 XVECEXP (par
, 0, 0) = tmp0
;
19378 XVECEXP (par
, 0, 1) = tmp1
;
19379 XVECEXP (par
, 0, 2) = tmp2
;
19380 insn
= emit_insn (par
);
19381 RTX_FRAME_RELATED_P (insn
) = 1;
19382 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19386 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19389 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19392 tmp1
= gen_rtx_SET (VOIDmode
, mem1
, reg1
);
19393 tmp2
= gen_rtx_SET (VOIDmode
, mem2
, reg2
);
19394 RTX_FRAME_RELATED_P (tmp1
) = 1;
19395 RTX_FRAME_RELATED_P (tmp2
) = 1;
19396 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19397 XVECEXP (par
, 0, 0) = tmp1
;
19398 XVECEXP (par
, 0, 1) = tmp2
;
19402 /* Create unwind information. This is an approximation. */
19403 tmp1
= gen_rtx_SET (VOIDmode
,
19404 gen_frame_mem (Pmode
,
19405 plus_constant (Pmode
,
19409 tmp2
= gen_rtx_SET (VOIDmode
,
19410 gen_frame_mem (Pmode
,
19411 plus_constant (Pmode
,
19416 RTX_FRAME_RELATED_P (tmp1
) = 1;
19417 RTX_FRAME_RELATED_P (tmp2
) = 1;
19418 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19419 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19421 regno
= regno2
+ 1;
19429 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19430 whenever possible, otherwise it emits single-word stores. The first store
19431 also allocates stack space for all saved registers, using writeback with
19432 post-addressing mode. All other stores use offset addressing. If no STRD
19433 can be emitted, this function emits a sequence of single-word stores,
19434 and not an STM as before, because single-word stores provide more freedom
19435 scheduling and can be turned into an STM by peephole optimizations. */
19437 arm_emit_strd_push (unsigned long saved_regs_mask
)
19440 int i
, j
, dwarf_index
= 0;
19442 rtx dwarf
= NULL_RTX
;
19443 rtx insn
= NULL_RTX
;
19446 /* TODO: A more efficient code can be emitted by changing the
19447 layout, e.g., first push all pairs that can use STRD to keep the
19448 stack aligned, and then push all other registers. */
19449 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19450 if (saved_regs_mask
& (1 << i
))
19453 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19454 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19455 gcc_assert (num_regs
> 0);
19457 /* Create sequence for DWARF info. */
19458 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19460 /* For dwarf info, we generate explicit stack update. */
19461 tmp
= gen_rtx_SET (VOIDmode
,
19463 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19464 RTX_FRAME_RELATED_P (tmp
) = 1;
19465 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19467 /* Save registers. */
19468 offset
= - 4 * num_regs
;
19470 while (j
<= LAST_ARM_REGNUM
)
19471 if (saved_regs_mask
& (1 << j
))
19474 && (saved_regs_mask
& (1 << (j
+ 1))))
19476 /* Current register and previous register form register pair for
19477 which STRD can be generated. */
19480 /* Allocate stack space for all saved registers. */
19481 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19482 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19483 mem
= gen_frame_mem (DImode
, tmp
);
19486 else if (offset
> 0)
19487 mem
= gen_frame_mem (DImode
,
19488 plus_constant (Pmode
,
19492 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19494 tmp
= gen_rtx_SET (DImode
, mem
, gen_rtx_REG (DImode
, j
));
19495 RTX_FRAME_RELATED_P (tmp
) = 1;
19496 tmp
= emit_insn (tmp
);
19498 /* Record the first store insn. */
19499 if (dwarf_index
== 1)
19502 /* Generate dwarf info. */
19503 mem
= gen_frame_mem (SImode
,
19504 plus_constant (Pmode
,
19507 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19508 RTX_FRAME_RELATED_P (tmp
) = 1;
19509 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19511 mem
= gen_frame_mem (SImode
,
19512 plus_constant (Pmode
,
19515 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
+ 1));
19516 RTX_FRAME_RELATED_P (tmp
) = 1;
19517 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19524 /* Emit a single word store. */
19527 /* Allocate stack space for all saved registers. */
19528 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19529 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19530 mem
= gen_frame_mem (SImode
, tmp
);
19533 else if (offset
> 0)
19534 mem
= gen_frame_mem (SImode
,
19535 plus_constant (Pmode
,
19539 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
19541 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19542 RTX_FRAME_RELATED_P (tmp
) = 1;
19543 tmp
= emit_insn (tmp
);
19545 /* Record the first store insn. */
19546 if (dwarf_index
== 1)
19549 /* Generate dwarf info. */
19550 mem
= gen_frame_mem (SImode
,
19551 plus_constant(Pmode
,
19554 tmp
= gen_rtx_SET (SImode
, mem
, gen_rtx_REG (SImode
, j
));
19555 RTX_FRAME_RELATED_P (tmp
) = 1;
19556 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19565 /* Attach dwarf info to the first insn we generate. */
19566 gcc_assert (insn
!= NULL_RTX
);
19567 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19568 RTX_FRAME_RELATED_P (insn
) = 1;
19571 /* Generate and emit an insn that we will recognize as a push_multi.
19572 Unfortunately, since this insn does not reflect very well the actual
19573 semantics of the operation, we need to annotate the insn for the benefit
19574 of DWARF2 frame unwind information. */
19576 emit_multi_reg_push (unsigned long mask
)
19579 int num_dwarf_regs
;
19583 int dwarf_par_index
;
19586 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19587 if (mask
& (1 << i
))
19590 gcc_assert (num_regs
&& num_regs
<= 16);
19592 /* We don't record the PC in the dwarf frame information. */
19593 num_dwarf_regs
= num_regs
;
19594 if (mask
& (1 << PC_REGNUM
))
19597 /* For the body of the insn we are going to generate an UNSPEC in
19598 parallel with several USEs. This allows the insn to be recognized
19599 by the push_multi pattern in the arm.md file.
19601 The body of the insn looks something like this:
19604 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19605 (const_int:SI <num>)))
19606 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19612 For the frame note however, we try to be more explicit and actually
19613 show each register being stored into the stack frame, plus a (single)
19614 decrement of the stack pointer. We do it this way in order to be
19615 friendly to the stack unwinding code, which only wants to see a single
19616 stack decrement per instruction. The RTL we generate for the note looks
19617 something like this:
19620 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19621 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19622 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19623 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19627 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19628 instead we'd have a parallel expression detailing all
19629 the stores to the various memory addresses so that debug
19630 information is more up-to-date. Remember however while writing
19631 this to take care of the constraints with the push instruction.
19633 Note also that this has to be taken care of for the VFP registers.
19635 For more see PR43399. */
19637 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
19638 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
19639 dwarf_par_index
= 1;
19641 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19643 if (mask
& (1 << i
))
19645 reg
= gen_rtx_REG (SImode
, i
);
19647 XVECEXP (par
, 0, 0)
19648 = gen_rtx_SET (VOIDmode
,
19651 gen_rtx_PRE_MODIFY (Pmode
,
19654 (Pmode
, stack_pointer_rtx
,
19657 gen_rtx_UNSPEC (BLKmode
,
19658 gen_rtvec (1, reg
),
19659 UNSPEC_PUSH_MULT
));
19661 if (i
!= PC_REGNUM
)
19663 tmp
= gen_rtx_SET (VOIDmode
,
19664 gen_frame_mem (SImode
, stack_pointer_rtx
),
19666 RTX_FRAME_RELATED_P (tmp
) = 1;
19667 XVECEXP (dwarf
, 0, dwarf_par_index
) = tmp
;
19675 for (j
= 1, i
++; j
< num_regs
; i
++)
19677 if (mask
& (1 << i
))
19679 reg
= gen_rtx_REG (SImode
, i
);
19681 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
19683 if (i
!= PC_REGNUM
)
19686 = gen_rtx_SET (VOIDmode
,
19689 plus_constant (Pmode
, stack_pointer_rtx
,
19692 RTX_FRAME_RELATED_P (tmp
) = 1;
19693 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
19700 par
= emit_insn (par
);
19702 tmp
= gen_rtx_SET (VOIDmode
,
19704 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19705 RTX_FRAME_RELATED_P (tmp
) = 1;
19706 XVECEXP (dwarf
, 0, 0) = tmp
;
19708 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
19713 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19714 SIZE is the offset to be adjusted.
19715 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19717 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
19721 RTX_FRAME_RELATED_P (insn
) = 1;
19722 dwarf
= gen_rtx_SET (VOIDmode
, dest
, plus_constant (Pmode
, src
, size
));
19723 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
19726 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19727 SAVED_REGS_MASK shows which registers need to be restored.
19729 Unfortunately, since this insn does not reflect very well the actual
19730 semantics of the operation, we need to annotate the insn for the benefit
19731 of DWARF2 frame unwind information. */
19733 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
19738 rtx dwarf
= NULL_RTX
;
19744 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
19745 offset_adj
= return_in_pc
? 1 : 0;
19746 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19747 if (saved_regs_mask
& (1 << i
))
19750 gcc_assert (num_regs
&& num_regs
<= 16);
19752 /* If SP is in reglist, then we don't emit SP update insn. */
19753 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
19755 /* The parallel needs to hold num_regs SETs
19756 and one SET for the stack update. */
19757 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
19762 XVECEXP (par
, 0, 0) = tmp
;
19767 /* Increment the stack pointer, based on there being
19768 num_regs 4-byte registers to restore. */
19769 tmp
= gen_rtx_SET (VOIDmode
,
19771 plus_constant (Pmode
,
19774 RTX_FRAME_RELATED_P (tmp
) = 1;
19775 XVECEXP (par
, 0, offset_adj
) = tmp
;
19778 /* Now restore every reg, which may include PC. */
19779 for (j
= 0, i
= 0; j
< num_regs
; i
++)
19780 if (saved_regs_mask
& (1 << i
))
19782 reg
= gen_rtx_REG (SImode
, i
);
19783 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
19785 /* Emit single load with writeback. */
19786 tmp
= gen_frame_mem (SImode
,
19787 gen_rtx_POST_INC (Pmode
,
19788 stack_pointer_rtx
));
19789 tmp
= emit_insn (gen_rtx_SET (VOIDmode
, reg
, tmp
));
19790 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19794 tmp
= gen_rtx_SET (VOIDmode
,
19798 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
19799 RTX_FRAME_RELATED_P (tmp
) = 1;
19800 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
19802 /* We need to maintain a sequence for DWARF info too. As dwarf info
19803 should not have PC, skip PC. */
19804 if (i
!= PC_REGNUM
)
19805 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19811 par
= emit_jump_insn (par
);
19813 par
= emit_insn (par
);
19815 REG_NOTES (par
) = dwarf
;
19817 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
19818 stack_pointer_rtx
, stack_pointer_rtx
);
19821 /* Generate and emit an insn pattern that we will recognize as a pop_multi
19822 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
19824 Unfortunately, since this insn does not reflect very well the actual
19825 semantics of the operation, we need to annotate the insn for the benefit
19826 of DWARF2 frame unwind information. */
19828 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
19832 rtx dwarf
= NULL_RTX
;
19835 gcc_assert (num_regs
&& num_regs
<= 32);
19837 /* Workaround ARM10 VFPr1 bug. */
19838 if (num_regs
== 2 && !arm_arch6
)
19840 if (first_reg
== 15)
19846 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
19847 there could be up to 32 D-registers to restore.
19848 If there are more than 16 D-registers, make two recursive calls,
19849 each of which emits one pop_multi instruction. */
19852 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
19853 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
19857 /* The parallel needs to hold num_regs SETs
19858 and one SET for the stack update. */
19859 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19861 /* Increment the stack pointer, based on there being
19862 num_regs 8-byte registers to restore. */
19863 tmp
= gen_rtx_SET (VOIDmode
,
19865 plus_constant (Pmode
, base_reg
, 8 * num_regs
));
19866 RTX_FRAME_RELATED_P (tmp
) = 1;
19867 XVECEXP (par
, 0, 0) = tmp
;
19869 /* Now show every reg that will be restored, using a SET for each. */
19870 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
19872 reg
= gen_rtx_REG (DFmode
, i
);
19874 tmp
= gen_rtx_SET (VOIDmode
,
19878 plus_constant (Pmode
, base_reg
, 8 * j
)));
19879 RTX_FRAME_RELATED_P (tmp
) = 1;
19880 XVECEXP (par
, 0, j
+ 1) = tmp
;
19882 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19887 par
= emit_insn (par
);
19888 REG_NOTES (par
) = dwarf
;
19890 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
19891 base_reg
, base_reg
);
19894 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
19895 number of registers are being popped, multiple LDRD patterns are created for
19896 all register pairs. If odd number of registers are popped, last register is
19897 loaded by using LDR pattern. */
19899 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
19903 rtx par
= NULL_RTX
;
19904 rtx dwarf
= NULL_RTX
;
19905 rtx tmp
, reg
, tmp1
;
19908 return_in_pc
= (saved_regs_mask
& (1 << PC_REGNUM
)) ? true : false;
19909 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19910 if (saved_regs_mask
& (1 << i
))
19913 gcc_assert (num_regs
&& num_regs
<= 16);
19915 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
19916 to be popped. So, if num_regs is even, now it will become odd,
19917 and we can generate pop with PC. If num_regs is odd, it will be
19918 even now, and ldr with return can be generated for PC. */
19922 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19924 /* Var j iterates over all the registers to gather all the registers in
19925 saved_regs_mask. Var i gives index of saved registers in stack frame.
19926 A PARALLEL RTX of register-pair is created here, so that pattern for
19927 LDRD can be matched. As PC is always last register to be popped, and
19928 we have already decremented num_regs if PC, we don't have to worry
19929 about PC in this loop. */
19930 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
19931 if (saved_regs_mask
& (1 << j
))
19933 /* Create RTX for memory load. */
19934 reg
= gen_rtx_REG (SImode
, j
);
19935 tmp
= gen_rtx_SET (SImode
,
19937 gen_frame_mem (SImode
,
19938 plus_constant (Pmode
,
19939 stack_pointer_rtx
, 4 * i
)));
19940 RTX_FRAME_RELATED_P (tmp
) = 1;
19944 /* When saved-register index (i) is even, the RTX to be emitted is
19945 yet to be created. Hence create it first. The LDRD pattern we
19946 are generating is :
19947 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
19948 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
19949 where target registers need not be consecutive. */
19950 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19954 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
19955 added as 0th element and if i is odd, reg_i is added as 1st element
19956 of LDRD pattern shown above. */
19957 XVECEXP (par
, 0, (i
% 2)) = tmp
;
19958 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19962 /* When saved-register index (i) is odd, RTXs for both the registers
19963 to be loaded are generated in above given LDRD pattern, and the
19964 pattern can be emitted now. */
19965 par
= emit_insn (par
);
19966 REG_NOTES (par
) = dwarf
;
19967 RTX_FRAME_RELATED_P (par
) = 1;
19973 /* If the number of registers pushed is odd AND return_in_pc is false OR
19974 number of registers are even AND return_in_pc is true, last register is
19975 popped using LDR. It can be PC as well. Hence, adjust the stack first and
19976 then LDR with post increment. */
19978 /* Increment the stack pointer, based on there being
19979 num_regs 4-byte registers to restore. */
19980 tmp
= gen_rtx_SET (VOIDmode
,
19982 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
19983 RTX_FRAME_RELATED_P (tmp
) = 1;
19984 tmp
= emit_insn (tmp
);
19987 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
19988 stack_pointer_rtx
, stack_pointer_rtx
);
19993 if (((num_regs
% 2) == 1 && !return_in_pc
)
19994 || ((num_regs
% 2) == 0 && return_in_pc
))
19996 /* Scan for the single register to be popped. Skip until the saved
19997 register is found. */
19998 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20000 /* Gen LDR with post increment here. */
20001 tmp1
= gen_rtx_MEM (SImode
,
20002 gen_rtx_POST_INC (SImode
,
20003 stack_pointer_rtx
));
20004 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20006 reg
= gen_rtx_REG (SImode
, j
);
20007 tmp
= gen_rtx_SET (SImode
, reg
, tmp1
);
20008 RTX_FRAME_RELATED_P (tmp
) = 1;
20009 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20013 /* If return_in_pc, j must be PC_REGNUM. */
20014 gcc_assert (j
== PC_REGNUM
);
20015 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20016 XVECEXP (par
, 0, 0) = ret_rtx
;
20017 XVECEXP (par
, 0, 1) = tmp
;
20018 par
= emit_jump_insn (par
);
20022 par
= emit_insn (tmp
);
20023 REG_NOTES (par
) = dwarf
;
20024 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20025 stack_pointer_rtx
, stack_pointer_rtx
);
20029 else if ((num_regs
% 2) == 1 && return_in_pc
)
20031 /* There are 2 registers to be popped. So, generate the pattern
20032 pop_multiple_with_stack_update_and_return to pop in PC. */
20033 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20039 /* LDRD in ARM mode needs consecutive registers as operands. This function
20040 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20041 offset addressing and then generates one separate stack udpate. This provides
20042 more scheduling freedom, compared to writeback on every load. However,
20043 if the function returns using load into PC directly
20044 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20045 before the last load. TODO: Add a peephole optimization to recognize
20046 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20047 peephole optimization to merge the load at stack-offset zero
20048 with the stack update instruction using load with writeback
20049 in post-index addressing mode. */
20051 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20055 rtx par
= NULL_RTX
;
20056 rtx dwarf
= NULL_RTX
;
20059 /* Restore saved registers. */
20060 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20062 while (j
<= LAST_ARM_REGNUM
)
20063 if (saved_regs_mask
& (1 << j
))
20066 && (saved_regs_mask
& (1 << (j
+ 1)))
20067 && (j
+ 1) != PC_REGNUM
)
20069 /* Current register and next register form register pair for which
20070 LDRD can be generated. PC is always the last register popped, and
20071 we handle it separately. */
20073 mem
= gen_frame_mem (DImode
,
20074 plus_constant (Pmode
,
20078 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20080 tmp
= gen_rtx_SET (DImode
, gen_rtx_REG (DImode
, j
), mem
);
20081 tmp
= emit_insn (tmp
);
20082 RTX_FRAME_RELATED_P (tmp
) = 1;
20084 /* Generate dwarf info. */
20086 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20087 gen_rtx_REG (SImode
, j
),
20089 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20090 gen_rtx_REG (SImode
, j
+ 1),
20093 REG_NOTES (tmp
) = dwarf
;
20098 else if (j
!= PC_REGNUM
)
20100 /* Emit a single word load. */
20102 mem
= gen_frame_mem (SImode
,
20103 plus_constant (Pmode
,
20107 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20109 tmp
= gen_rtx_SET (SImode
, gen_rtx_REG (SImode
, j
), mem
);
20110 tmp
= emit_insn (tmp
);
20111 RTX_FRAME_RELATED_P (tmp
) = 1;
20113 /* Generate dwarf info. */
20114 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20115 gen_rtx_REG (SImode
, j
),
20121 else /* j == PC_REGNUM */
20127 /* Update the stack. */
20130 tmp
= gen_rtx_SET (Pmode
,
20132 plus_constant (Pmode
,
20135 tmp
= emit_insn (tmp
);
20136 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20137 stack_pointer_rtx
, stack_pointer_rtx
);
20141 if (saved_regs_mask
& (1 << PC_REGNUM
))
20143 /* Only PC is to be popped. */
20144 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20145 XVECEXP (par
, 0, 0) = ret_rtx
;
20146 tmp
= gen_rtx_SET (SImode
,
20147 gen_rtx_REG (SImode
, PC_REGNUM
),
20148 gen_frame_mem (SImode
,
20149 gen_rtx_POST_INC (SImode
,
20150 stack_pointer_rtx
)));
20151 RTX_FRAME_RELATED_P (tmp
) = 1;
20152 XVECEXP (par
, 0, 1) = tmp
;
20153 par
= emit_jump_insn (par
);
20155 /* Generate dwarf info. */
20156 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20157 gen_rtx_REG (SImode
, PC_REGNUM
),
20159 REG_NOTES (par
) = dwarf
;
20160 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20161 stack_pointer_rtx
, stack_pointer_rtx
);
20165 /* Calculate the size of the return value that is passed in registers. */
20167 arm_size_return_regs (void)
20169 enum machine_mode mode
;
20171 if (crtl
->return_rtx
!= 0)
20172 mode
= GET_MODE (crtl
->return_rtx
);
20174 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20176 return GET_MODE_SIZE (mode
);
20179 /* Return true if the current function needs to save/restore LR. */
20181 thumb_force_lr_save (void)
20183 return !cfun
->machine
->lr_save_eliminated
20184 && (!leaf_function_p ()
20185 || thumb_far_jump_used_p ()
20186 || df_regs_ever_live_p (LR_REGNUM
));
20189 /* We do not know if r3 will be available because
20190 we do have an indirect tailcall happening in this
20191 particular case. */
20193 is_indirect_tailcall_p (rtx call
)
20195 rtx pat
= PATTERN (call
);
20197 /* Indirect tail call. */
20198 pat
= XVECEXP (pat
, 0, 0);
20199 if (GET_CODE (pat
) == SET
)
20200 pat
= SET_SRC (pat
);
20202 pat
= XEXP (XEXP (pat
, 0), 0);
20203 return REG_P (pat
);
20206 /* Return true if r3 is used by any of the tail call insns in the
20207 current function. */
20209 any_sibcall_could_use_r3 (void)
20214 if (!crtl
->tail_call_emit
)
20216 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20217 if (e
->flags
& EDGE_SIBCALL
)
20219 rtx call
= BB_END (e
->src
);
20220 if (!CALL_P (call
))
20221 call
= prev_nonnote_nondebug_insn (call
);
20222 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20223 if (find_regno_fusage (call
, USE
, 3)
20224 || is_indirect_tailcall_p (call
))
20231 /* Compute the distance from register FROM to register TO.
20232 These can be the arg pointer (26), the soft frame pointer (25),
20233 the stack pointer (13) or the hard frame pointer (11).
20234 In thumb mode r7 is used as the soft frame pointer, if needed.
20235 Typical stack layout looks like this:
20237 old stack pointer -> | |
20240 | | saved arguments for
20241 | | vararg functions
20244 hard FP & arg pointer -> | | \
20252 soft frame pointer -> | | /
20257 locals base pointer -> | | /
20262 current stack pointer -> | | /
20265 For a given function some or all of these stack components
20266 may not be needed, giving rise to the possibility of
20267 eliminating some of the registers.
20269 The values returned by this function must reflect the behavior
20270 of arm_expand_prologue() and arm_compute_save_reg_mask().
20272 The sign of the number returned reflects the direction of stack
20273 growth, so the values are positive for all eliminations except
20274 from the soft frame pointer to the hard frame pointer.
20276 SFP may point just inside the local variables block to ensure correct
20280 /* Calculate stack offsets. These are used to calculate register elimination
20281 offsets and in prologue/epilogue code. Also calculates which registers
20282 should be saved. */
20284 static arm_stack_offsets
*
20285 arm_get_frame_offsets (void)
20287 struct arm_stack_offsets
*offsets
;
20288 unsigned long func_type
;
20292 HOST_WIDE_INT frame_size
;
20295 offsets
= &cfun
->machine
->stack_offsets
;
20297 /* We need to know if we are a leaf function. Unfortunately, it
20298 is possible to be called after start_sequence has been called,
20299 which causes get_insns to return the insns for the sequence,
20300 not the function, which will cause leaf_function_p to return
20301 the incorrect result.
20303 to know about leaf functions once reload has completed, and the
20304 frame size cannot be changed after that time, so we can safely
20305 use the cached value. */
20307 if (reload_completed
)
20310 /* Initially this is the size of the local variables. It will translated
20311 into an offset once we have determined the size of preceding data. */
20312 frame_size
= ROUND_UP_WORD (get_frame_size ());
20314 leaf
= leaf_function_p ();
20316 /* Space for variadic functions. */
20317 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20319 /* In Thumb mode this is incorrect, but never used. */
20320 offsets
->frame
= offsets
->saved_args
+ (frame_pointer_needed
? 4 : 0) +
20321 arm_compute_static_chain_stack_bytes();
20325 unsigned int regno
;
20327 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20328 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20329 saved
= core_saved
;
20331 /* We know that SP will be doubleword aligned on entry, and we must
20332 preserve that condition at any subroutine call. We also require the
20333 soft frame pointer to be doubleword aligned. */
20335 if (TARGET_REALLY_IWMMXT
)
20337 /* Check for the call-saved iWMMXt registers. */
20338 for (regno
= FIRST_IWMMXT_REGNUM
;
20339 regno
<= LAST_IWMMXT_REGNUM
;
20341 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20345 func_type
= arm_current_func_type ();
20346 /* Space for saved VFP registers. */
20347 if (! IS_VOLATILE (func_type
)
20348 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
20349 saved
+= arm_get_vfp_saved_size ();
20351 else /* TARGET_THUMB1 */
20353 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20354 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20355 saved
= core_saved
;
20356 if (TARGET_BACKTRACE
)
20360 /* Saved registers include the stack frame. */
20361 offsets
->saved_regs
= offsets
->saved_args
+ saved
+
20362 arm_compute_static_chain_stack_bytes();
20363 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20364 /* A leaf function does not need any stack alignment if it has nothing
20366 if (leaf
&& frame_size
== 0
20367 /* However if it calls alloca(), we have a dynamically allocated
20368 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20369 && ! cfun
->calls_alloca
)
20371 offsets
->outgoing_args
= offsets
->soft_frame
;
20372 offsets
->locals_base
= offsets
->soft_frame
;
20376 /* Ensure SFP has the correct alignment. */
20377 if (ARM_DOUBLEWORD_ALIGN
20378 && (offsets
->soft_frame
& 7))
20380 offsets
->soft_frame
+= 4;
20381 /* Try to align stack by pushing an extra reg. Don't bother doing this
20382 when there is a stack frame as the alignment will be rolled into
20383 the normal stack adjustment. */
20384 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20388 /* If it is safe to use r3, then do so. This sometimes
20389 generates better code on Thumb-2 by avoiding the need to
20390 use 32-bit push/pop instructions. */
20391 if (! any_sibcall_could_use_r3 ()
20392 && arm_size_return_regs () <= 12
20393 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20395 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20400 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20402 /* Avoid fixed registers; they may be changed at
20403 arbitrary times so it's unsafe to restore them
20404 during the epilogue. */
20406 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20415 offsets
->saved_regs
+= 4;
20416 offsets
->saved_regs_mask
|= (1 << reg
);
20421 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20422 offsets
->outgoing_args
= (offsets
->locals_base
20423 + crtl
->outgoing_args_size
);
20425 if (ARM_DOUBLEWORD_ALIGN
)
20427 /* Ensure SP remains doubleword aligned. */
20428 if (offsets
->outgoing_args
& 7)
20429 offsets
->outgoing_args
+= 4;
20430 gcc_assert (!(offsets
->outgoing_args
& 7));
20437 /* Calculate the relative offsets for the different stack pointers. Positive
20438 offsets are in the direction of stack growth. */
20441 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20443 arm_stack_offsets
*offsets
;
20445 offsets
= arm_get_frame_offsets ();
20447 /* OK, now we have enough information to compute the distances.
20448 There must be an entry in these switch tables for each pair
20449 of registers in ELIMINABLE_REGS, even if some of the entries
20450 seem to be redundant or useless. */
20453 case ARG_POINTER_REGNUM
:
20456 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20459 case FRAME_POINTER_REGNUM
:
20460 /* This is the reverse of the soft frame pointer
20461 to hard frame pointer elimination below. */
20462 return offsets
->soft_frame
- offsets
->saved_args
;
20464 case ARM_HARD_FRAME_POINTER_REGNUM
:
20465 /* This is only non-zero in the case where the static chain register
20466 is stored above the frame. */
20467 return offsets
->frame
- offsets
->saved_args
- 4;
20469 case STACK_POINTER_REGNUM
:
20470 /* If nothing has been pushed on the stack at all
20471 then this will return -4. This *is* correct! */
20472 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
20475 gcc_unreachable ();
20477 gcc_unreachable ();
20479 case FRAME_POINTER_REGNUM
:
20482 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20485 case ARM_HARD_FRAME_POINTER_REGNUM
:
20486 /* The hard frame pointer points to the top entry in the
20487 stack frame. The soft frame pointer to the bottom entry
20488 in the stack frame. If there is no stack frame at all,
20489 then they are identical. */
20491 return offsets
->frame
- offsets
->soft_frame
;
20493 case STACK_POINTER_REGNUM
:
20494 return offsets
->outgoing_args
- offsets
->soft_frame
;
20497 gcc_unreachable ();
20499 gcc_unreachable ();
20502 /* You cannot eliminate from the stack pointer.
20503 In theory you could eliminate from the hard frame
20504 pointer to the stack pointer, but this will never
20505 happen, since if a stack frame is not needed the
20506 hard frame pointer will never be used. */
20507 gcc_unreachable ();
20511 /* Given FROM and TO register numbers, say whether this elimination is
20512 allowed. Frame pointer elimination is automatically handled.
20514 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20515 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20516 pointer, we must eliminate FRAME_POINTER_REGNUM into
20517 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20518 ARG_POINTER_REGNUM. */
20521 arm_can_eliminate (const int from
, const int to
)
20523 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
20524 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
20525 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
20526 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
20530 /* Emit RTL to save coprocessor registers on function entry. Returns the
20531 number of bytes pushed. */
20534 arm_save_coproc_regs(void)
20536 int saved_size
= 0;
20538 unsigned start_reg
;
20541 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
20542 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
20544 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20545 insn
= gen_rtx_MEM (V2SImode
, insn
);
20546 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
20547 RTX_FRAME_RELATED_P (insn
) = 1;
20551 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
20553 start_reg
= FIRST_VFP_REGNUM
;
20555 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
20557 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
20558 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
20560 if (start_reg
!= reg
)
20561 saved_size
+= vfp_emit_fstmd (start_reg
,
20562 (reg
- start_reg
) / 2);
20563 start_reg
= reg
+ 2;
20566 if (start_reg
!= reg
)
20567 saved_size
+= vfp_emit_fstmd (start_reg
,
20568 (reg
- start_reg
) / 2);
20574 /* Set the Thumb frame pointer from the stack pointer. */
20577 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
20579 HOST_WIDE_INT amount
;
20582 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
20584 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20585 stack_pointer_rtx
, GEN_INT (amount
)));
20588 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
20589 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20590 expects the first two operands to be the same. */
20593 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20595 hard_frame_pointer_rtx
));
20599 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20600 hard_frame_pointer_rtx
,
20601 stack_pointer_rtx
));
20603 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
20604 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
20605 RTX_FRAME_RELATED_P (dwarf
) = 1;
20606 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20609 RTX_FRAME_RELATED_P (insn
) = 1;
20612 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20615 arm_expand_prologue (void)
20620 unsigned long live_regs_mask
;
20621 unsigned long func_type
;
20623 int saved_pretend_args
= 0;
20624 int saved_regs
= 0;
20625 unsigned HOST_WIDE_INT args_to_push
;
20626 arm_stack_offsets
*offsets
;
20628 func_type
= arm_current_func_type ();
20630 /* Naked functions don't have prologues. */
20631 if (IS_NAKED (func_type
))
20634 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20635 args_to_push
= crtl
->args
.pretend_args_size
;
20637 /* Compute which register we will have to save onto the stack. */
20638 offsets
= arm_get_frame_offsets ();
20639 live_regs_mask
= offsets
->saved_regs_mask
;
20641 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
20643 if (IS_STACKALIGN (func_type
))
20647 /* Handle a word-aligned stack pointer. We generate the following:
20652 <save and restore r0 in normal prologue/epilogue>
20656 The unwinder doesn't need to know about the stack realignment.
20657 Just tell it we saved SP in r0. */
20658 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
20660 r0
= gen_rtx_REG (SImode
, 0);
20661 r1
= gen_rtx_REG (SImode
, 1);
20663 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
20664 RTX_FRAME_RELATED_P (insn
) = 1;
20665 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
20667 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
20669 /* ??? The CFA changes here, which may cause GDB to conclude that it
20670 has entered a different function. That said, the unwind info is
20671 correct, individually, before and after this instruction because
20672 we've described the save of SP, which will override the default
20673 handling of SP as restoring from the CFA. */
20674 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
20677 /* For APCS frames, if IP register is clobbered
20678 when creating frame, save that register in a special
20680 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
20682 if (IS_INTERRUPT (func_type
))
20684 /* Interrupt functions must not corrupt any registers.
20685 Creating a frame pointer however, corrupts the IP
20686 register, so we must push it first. */
20687 emit_multi_reg_push (1 << IP_REGNUM
);
20689 /* Do not set RTX_FRAME_RELATED_P on this insn.
20690 The dwarf stack unwinding code only wants to see one
20691 stack decrement per function, and this is not it. If
20692 this instruction is labeled as being part of the frame
20693 creation sequence then dwarf2out_frame_debug_expr will
20694 die when it encounters the assignment of IP to FP
20695 later on, since the use of SP here establishes SP as
20696 the CFA register and not IP.
20698 Anyway this instruction is not really part of the stack
20699 frame creation although it is part of the prologue. */
20701 else if (IS_NESTED (func_type
))
20703 /* The static chain register is the same as the IP register
20704 used as a scratch register during stack frame creation.
20705 To get around this need to find somewhere to store IP
20706 whilst the frame is being created. We try the following
20709 1. The last argument register r3.
20710 2. A slot on the stack above the frame. (This only
20711 works if the function is not a varargs function).
20712 3. Register r3 again, after pushing the argument registers
20715 Note - we only need to tell the dwarf2 backend about the SP
20716 adjustment in the second variant; the static chain register
20717 doesn't need to be unwound, as it doesn't contain a value
20718 inherited from the caller. */
20720 if (!arm_r3_live_at_start_p ())
20721 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
20722 else if (args_to_push
== 0)
20726 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
20729 insn
= gen_rtx_PRE_DEC (SImode
, stack_pointer_rtx
);
20730 insn
= emit_set_insn (gen_frame_mem (SImode
, insn
), ip_rtx
);
20733 /* Just tell the dwarf backend that we adjusted SP. */
20734 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
20735 plus_constant (Pmode
, stack_pointer_rtx
,
20737 RTX_FRAME_RELATED_P (insn
) = 1;
20738 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20742 /* Store the args on the stack. */
20743 if (cfun
->machine
->uses_anonymous_args
)
20744 insn
= emit_multi_reg_push
20745 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
20748 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20749 GEN_INT (- args_to_push
)));
20751 RTX_FRAME_RELATED_P (insn
) = 1;
20753 saved_pretend_args
= 1;
20754 fp_offset
= args_to_push
;
20757 /* Now reuse r3 to preserve IP. */
20758 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
20762 insn
= emit_set_insn (ip_rtx
,
20763 plus_constant (Pmode
, stack_pointer_rtx
,
20765 RTX_FRAME_RELATED_P (insn
) = 1;
20770 /* Push the argument registers, or reserve space for them. */
20771 if (cfun
->machine
->uses_anonymous_args
)
20772 insn
= emit_multi_reg_push
20773 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
20776 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20777 GEN_INT (- args_to_push
)));
20778 RTX_FRAME_RELATED_P (insn
) = 1;
20781 /* If this is an interrupt service routine, and the link register
20782 is going to be pushed, and we're not generating extra
20783 push of IP (needed when frame is needed and frame layout if apcs),
20784 subtracting four from LR now will mean that the function return
20785 can be done with a single instruction. */
20786 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
20787 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
20788 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
20791 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
20793 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
20796 if (live_regs_mask
)
20798 saved_regs
+= bit_count (live_regs_mask
) * 4;
20799 if (optimize_size
&& !frame_pointer_needed
20800 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
20802 /* If no coprocessor registers are being pushed and we don't have
20803 to worry about a frame pointer then push extra registers to
20804 create the stack frame. This is done is a way that does not
20805 alter the frame layout, so is independent of the epilogue. */
20809 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
20811 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
20812 if (frame
&& n
* 4 >= frame
)
20815 live_regs_mask
|= (1 << n
) - 1;
20816 saved_regs
+= frame
;
20821 && current_tune
->prefer_ldrd_strd
20822 && !optimize_function_for_size_p (cfun
))
20826 thumb2_emit_strd_push (live_regs_mask
);
20828 else if (TARGET_ARM
20829 && !TARGET_APCS_FRAME
20830 && !IS_INTERRUPT (func_type
))
20832 arm_emit_strd_push (live_regs_mask
);
20836 insn
= emit_multi_reg_push (live_regs_mask
);
20837 RTX_FRAME_RELATED_P (insn
) = 1;
20842 insn
= emit_multi_reg_push (live_regs_mask
);
20843 RTX_FRAME_RELATED_P (insn
) = 1;
20847 if (! IS_VOLATILE (func_type
))
20848 saved_regs
+= arm_save_coproc_regs ();
20850 if (frame_pointer_needed
&& TARGET_ARM
)
20852 /* Create the new frame pointer. */
20853 if (TARGET_APCS_FRAME
)
20855 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
20856 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
20857 RTX_FRAME_RELATED_P (insn
) = 1;
20859 if (IS_NESTED (func_type
))
20861 /* Recover the static chain register. */
20862 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
20863 insn
= gen_rtx_REG (SImode
, 3);
20864 else /* if (crtl->args.pretend_args_size == 0) */
20866 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
20867 insn
= gen_frame_mem (SImode
, insn
);
20869 emit_set_insn (ip_rtx
, insn
);
20870 /* Add a USE to stop propagate_one_insn() from barfing. */
20871 emit_insn (gen_force_register_use (ip_rtx
));
20876 insn
= GEN_INT (saved_regs
- 4);
20877 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20878 stack_pointer_rtx
, insn
));
20879 RTX_FRAME_RELATED_P (insn
) = 1;
20883 if (flag_stack_usage_info
)
20884 current_function_static_stack_size
20885 = offsets
->outgoing_args
- offsets
->saved_args
;
20887 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
20889 /* This add can produce multiple insns for a large constant, so we
20890 need to get tricky. */
20891 rtx last
= get_last_insn ();
20893 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
20894 - offsets
->outgoing_args
);
20896 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20900 last
= last
? NEXT_INSN (last
) : get_insns ();
20901 RTX_FRAME_RELATED_P (last
) = 1;
20903 while (last
!= insn
);
20905 /* If the frame pointer is needed, emit a special barrier that
20906 will prevent the scheduler from moving stores to the frame
20907 before the stack adjustment. */
20908 if (frame_pointer_needed
)
20909 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
20910 hard_frame_pointer_rtx
));
20914 if (frame_pointer_needed
&& TARGET_THUMB2
)
20915 thumb_set_frame_pointer (offsets
);
20917 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
20919 unsigned long mask
;
20921 mask
= live_regs_mask
;
20922 mask
&= THUMB2_WORK_REGS
;
20923 if (!IS_NESTED (func_type
))
20924 mask
|= (1 << IP_REGNUM
);
20925 arm_load_pic_register (mask
);
20928 /* If we are profiling, make sure no instructions are scheduled before
20929 the call to mcount. Similarly if the user has requested no
20930 scheduling in the prolog. Similarly if we want non-call exceptions
20931 using the EABI unwinder, to prevent faulting instructions from being
20932 swapped with a stack adjustment. */
20933 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
20934 || (arm_except_unwind_info (&global_options
) == UI_TARGET
20935 && cfun
->can_throw_non_call_exceptions
))
20936 emit_insn (gen_blockage ());
20938 /* If the link register is being kept alive, with the return address in it,
20939 then make sure that it does not get reused by the ce2 pass. */
20940 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
20941 cfun
->machine
->lr_save_eliminated
= 1;
20944 /* Print condition code to STREAM. Helper function for arm_print_operand. */
20946 arm_print_condition (FILE *stream
)
20948 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
20950 /* Branch conversion is not implemented for Thumb-2. */
20953 output_operand_lossage ("predicated Thumb instruction");
20956 if (current_insn_predicate
!= NULL
)
20958 output_operand_lossage
20959 ("predicated instruction in conditional sequence");
20963 fputs (arm_condition_codes
[arm_current_cc
], stream
);
20965 else if (current_insn_predicate
)
20967 enum arm_cond_code code
;
20971 output_operand_lossage ("predicated Thumb instruction");
20975 code
= get_arm_condition_code (current_insn_predicate
);
20976 fputs (arm_condition_codes
[code
], stream
);
20981 /* If CODE is 'd', then the X is a condition operand and the instruction
20982 should only be executed if the condition is true.
20983 if CODE is 'D', then the X is a condition operand and the instruction
20984 should only be executed if the condition is false: however, if the mode
20985 of the comparison is CCFPEmode, then always execute the instruction -- we
20986 do this because in these circumstances !GE does not necessarily imply LT;
20987 in these cases the instruction pattern will take care to make sure that
20988 an instruction containing %d will follow, thereby undoing the effects of
20989 doing this instruction unconditionally.
20990 If CODE is 'N' then X is a floating point operand that must be negated
20992 If CODE is 'B' then output a bitwise inverted value of X (a const int).
20993 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
20995 arm_print_operand (FILE *stream
, rtx x
, int code
)
21000 fputs (ASM_COMMENT_START
, stream
);
21004 fputs (user_label_prefix
, stream
);
21008 fputs (REGISTER_PREFIX
, stream
);
21012 arm_print_condition (stream
);
21016 /* Nothing in unified syntax, otherwise the current condition code. */
21017 if (!TARGET_UNIFIED_ASM
)
21018 arm_print_condition (stream
);
21022 /* The current condition code in unified syntax, otherwise nothing. */
21023 if (TARGET_UNIFIED_ASM
)
21024 arm_print_condition (stream
);
21028 /* The current condition code for a condition code setting instruction.
21029 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21030 if (TARGET_UNIFIED_ASM
)
21032 fputc('s', stream
);
21033 arm_print_condition (stream
);
21037 arm_print_condition (stream
);
21038 fputc('s', stream
);
21043 /* If the instruction is conditionally executed then print
21044 the current condition code, otherwise print 's'. */
21045 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
21046 if (current_insn_predicate
)
21047 arm_print_condition (stream
);
21049 fputc('s', stream
);
21052 /* %# is a "break" sequence. It doesn't output anything, but is used to
21053 separate e.g. operand numbers from following text, if that text consists
21054 of further digits which we don't want to be part of the operand
21062 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
21063 r
= real_value_negate (&r
);
21064 fprintf (stream
, "%s", fp_const_from_val (&r
));
21068 /* An integer or symbol address without a preceding # sign. */
21070 switch (GET_CODE (x
))
21073 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21077 output_addr_const (stream
, x
);
21081 if (GET_CODE (XEXP (x
, 0)) == PLUS
21082 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21084 output_addr_const (stream
, x
);
21087 /* Fall through. */
21090 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21094 /* An integer that we want to print in HEX. */
21096 switch (GET_CODE (x
))
21099 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21103 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21108 if (CONST_INT_P (x
))
21111 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21112 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21116 putc ('~', stream
);
21117 output_addr_const (stream
, x
);
21122 /* The low 16 bits of an immediate constant. */
21123 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21127 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21131 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21139 shift
= shift_op (x
, &val
);
21143 fprintf (stream
, ", %s ", shift
);
21145 arm_print_operand (stream
, XEXP (x
, 1), 0);
21147 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21152 /* An explanation of the 'Q', 'R' and 'H' register operands:
21154 In a pair of registers containing a DI or DF value the 'Q'
21155 operand returns the register number of the register containing
21156 the least significant part of the value. The 'R' operand returns
21157 the register number of the register containing the most
21158 significant part of the value.
21160 The 'H' operand returns the higher of the two register numbers.
21161 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21162 same as the 'Q' operand, since the most significant part of the
21163 value is held in the lower number register. The reverse is true
21164 on systems where WORDS_BIG_ENDIAN is false.
21166 The purpose of these operands is to distinguish between cases
21167 where the endian-ness of the values is important (for example
21168 when they are added together), and cases where the endian-ness
21169 is irrelevant, but the order of register operations is important.
21170 For example when loading a value from memory into a register
21171 pair, the endian-ness does not matter. Provided that the value
21172 from the lower memory address is put into the lower numbered
21173 register, and the value from the higher address is put into the
21174 higher numbered register, the load will work regardless of whether
21175 the value being loaded is big-wordian or little-wordian. The
21176 order of the two register loads can matter however, if the address
21177 of the memory location is actually held in one of the registers
21178 being overwritten by the load.
21180 The 'Q' and 'R' constraints are also available for 64-bit
21183 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21185 rtx part
= gen_lowpart (SImode
, x
);
21186 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21190 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21192 output_operand_lossage ("invalid operand for code '%c'", code
);
21196 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
21200 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21202 enum machine_mode mode
= GET_MODE (x
);
21205 if (mode
== VOIDmode
)
21207 part
= gen_highpart_mode (SImode
, mode
, x
);
21208 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21212 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21214 output_operand_lossage ("invalid operand for code '%c'", code
);
21218 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
21222 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21224 output_operand_lossage ("invalid operand for code '%c'", code
);
21228 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
21232 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21234 output_operand_lossage ("invalid operand for code '%c'", code
);
21238 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
21242 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21244 output_operand_lossage ("invalid operand for code '%c'", code
);
21248 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
21252 asm_fprintf (stream
, "%r",
21253 REG_P (XEXP (x
, 0))
21254 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
21258 asm_fprintf (stream
, "{%r-%r}",
21260 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
21263 /* Like 'M', but writing doubleword vector registers, for use by Neon
21267 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
21268 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
21270 asm_fprintf (stream
, "{d%d}", regno
);
21272 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
21277 /* CONST_TRUE_RTX means always -- that's the default. */
21278 if (x
== const_true_rtx
)
21281 if (!COMPARISON_P (x
))
21283 output_operand_lossage ("invalid operand for code '%c'", code
);
21287 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
21292 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21293 want to do that. */
21294 if (x
== const_true_rtx
)
21296 output_operand_lossage ("instruction never executed");
21299 if (!COMPARISON_P (x
))
21301 output_operand_lossage ("invalid operand for code '%c'", code
);
21305 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
21306 (get_arm_condition_code (x
))],
21316 /* Former Maverick support, removed after GCC-4.7. */
21317 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
21322 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
21323 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
21324 /* Bad value for wCG register number. */
21326 output_operand_lossage ("invalid operand for code '%c'", code
);
21331 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
21334 /* Print an iWMMXt control register name. */
21336 if (!CONST_INT_P (x
)
21338 || INTVAL (x
) >= 16)
21339 /* Bad value for wC register number. */
21341 output_operand_lossage ("invalid operand for code '%c'", code
);
21347 static const char * wc_reg_names
[16] =
21349 "wCID", "wCon", "wCSSF", "wCASF",
21350 "wC4", "wC5", "wC6", "wC7",
21351 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21352 "wC12", "wC13", "wC14", "wC15"
21355 fputs (wc_reg_names
[INTVAL (x
)], stream
);
21359 /* Print the high single-precision register of a VFP double-precision
21363 int mode
= GET_MODE (x
);
21366 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
21368 output_operand_lossage ("invalid operand for code '%c'", code
);
21373 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
21375 output_operand_lossage ("invalid operand for code '%c'", code
);
21379 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
21383 /* Print a VFP/Neon double precision or quad precision register name. */
21387 int mode
= GET_MODE (x
);
21388 int is_quad
= (code
== 'q');
21391 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
21393 output_operand_lossage ("invalid operand for code '%c'", code
);
21398 || !IS_VFP_REGNUM (REGNO (x
)))
21400 output_operand_lossage ("invalid operand for code '%c'", code
);
21405 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
21406 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
21408 output_operand_lossage ("invalid operand for code '%c'", code
);
21412 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
21413 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
21417 /* These two codes print the low/high doubleword register of a Neon quad
21418 register, respectively. For pair-structure types, can also print
21419 low/high quadword registers. */
21423 int mode
= GET_MODE (x
);
21426 if ((GET_MODE_SIZE (mode
) != 16
21427 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
21429 output_operand_lossage ("invalid operand for code '%c'", code
);
21434 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
21436 output_operand_lossage ("invalid operand for code '%c'", code
);
21440 if (GET_MODE_SIZE (mode
) == 16)
21441 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
21442 + (code
== 'f' ? 1 : 0));
21444 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
21445 + (code
== 'f' ? 1 : 0));
21449 /* Print a VFPv3 floating-point constant, represented as an integer
21453 int index
= vfp3_const_double_index (x
);
21454 gcc_assert (index
!= -1);
21455 fprintf (stream
, "%d", index
);
21459 /* Print bits representing opcode features for Neon.
21461 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21462 and polynomials as unsigned.
21464 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21466 Bit 2 is 1 for rounding functions, 0 otherwise. */
21468 /* Identify the type as 's', 'u', 'p' or 'f'. */
21471 HOST_WIDE_INT bits
= INTVAL (x
);
21472 fputc ("uspf"[bits
& 3], stream
);
21476 /* Likewise, but signed and unsigned integers are both 'i'. */
21479 HOST_WIDE_INT bits
= INTVAL (x
);
21480 fputc ("iipf"[bits
& 3], stream
);
21484 /* As for 'T', but emit 'u' instead of 'p'. */
21487 HOST_WIDE_INT bits
= INTVAL (x
);
21488 fputc ("usuf"[bits
& 3], stream
);
21492 /* Bit 2: rounding (vs none). */
21495 HOST_WIDE_INT bits
= INTVAL (x
);
21496 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
21500 /* Memory operand for vld1/vst1 instruction. */
21504 bool postinc
= FALSE
;
21505 unsigned align
, memsize
, align_bits
;
21507 gcc_assert (MEM_P (x
));
21508 addr
= XEXP (x
, 0);
21509 if (GET_CODE (addr
) == POST_INC
)
21512 addr
= XEXP (addr
, 0);
21514 asm_fprintf (stream
, "[%r", REGNO (addr
));
21516 /* We know the alignment of this access, so we can emit a hint in the
21517 instruction (for some alignments) as an aid to the memory subsystem
21519 align
= MEM_ALIGN (x
) >> 3;
21520 memsize
= MEM_SIZE (x
);
21522 /* Only certain alignment specifiers are supported by the hardware. */
21523 if (memsize
== 32 && (align
% 32) == 0)
21525 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
21527 else if (memsize
>= 8 && (align
% 8) == 0)
21532 if (align_bits
!= 0)
21533 asm_fprintf (stream
, ":%d", align_bits
);
21535 asm_fprintf (stream
, "]");
21538 fputs("!", stream
);
21546 gcc_assert (MEM_P (x
));
21547 addr
= XEXP (x
, 0);
21548 gcc_assert (REG_P (addr
));
21549 asm_fprintf (stream
, "[%r]", REGNO (addr
));
21553 /* Translate an S register number into a D register number and element index. */
21556 int mode
= GET_MODE (x
);
21559 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
21561 output_operand_lossage ("invalid operand for code '%c'", code
);
21566 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
21568 output_operand_lossage ("invalid operand for code '%c'", code
);
21572 regno
= regno
- FIRST_VFP_REGNUM
;
21573 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
21578 gcc_assert (CONST_DOUBLE_P (x
));
21579 fprintf (stream
, "#%d", vfp3_const_double_for_fract_bits (x
));
21582 /* Register specifier for vld1.16/vst1.16. Translate the S register
21583 number into a D register number and element index. */
21586 int mode
= GET_MODE (x
);
21589 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
21591 output_operand_lossage ("invalid operand for code '%c'", code
);
21596 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
21598 output_operand_lossage ("invalid operand for code '%c'", code
);
21602 regno
= regno
- FIRST_VFP_REGNUM
;
21603 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
21610 output_operand_lossage ("missing operand");
21614 switch (GET_CODE (x
))
21617 asm_fprintf (stream
, "%r", REGNO (x
));
21621 output_memory_reference_mode
= GET_MODE (x
);
21622 output_address (XEXP (x
, 0));
21629 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
21630 sizeof (fpstr
), 0, 1);
21631 fprintf (stream
, "#%s", fpstr
);
21634 fprintf (stream
, "#%s", fp_immediate_constant (x
));
21638 gcc_assert (GET_CODE (x
) != NEG
);
21639 fputc ('#', stream
);
21640 if (GET_CODE (x
) == HIGH
)
21642 fputs (":lower16:", stream
);
21646 output_addr_const (stream
, x
);
21652 /* Target hook for printing a memory address. */
21654 arm_print_operand_address (FILE *stream
, rtx x
)
21658 int is_minus
= GET_CODE (x
) == MINUS
;
21661 asm_fprintf (stream
, "[%r]", REGNO (x
));
21662 else if (GET_CODE (x
) == PLUS
|| is_minus
)
21664 rtx base
= XEXP (x
, 0);
21665 rtx index
= XEXP (x
, 1);
21666 HOST_WIDE_INT offset
= 0;
21668 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
21670 /* Ensure that BASE is a register. */
21671 /* (one of them must be). */
21672 /* Also ensure the SP is not used as in index register. */
21677 switch (GET_CODE (index
))
21680 offset
= INTVAL (index
);
21683 asm_fprintf (stream
, "[%r, #%wd]",
21684 REGNO (base
), offset
);
21688 asm_fprintf (stream
, "[%r, %s%r]",
21689 REGNO (base
), is_minus
? "-" : "",
21699 asm_fprintf (stream
, "[%r, %s%r",
21700 REGNO (base
), is_minus
? "-" : "",
21701 REGNO (XEXP (index
, 0)));
21702 arm_print_operand (stream
, index
, 'S');
21703 fputs ("]", stream
);
21708 gcc_unreachable ();
21711 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
21712 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
21714 extern enum machine_mode output_memory_reference_mode
;
21716 gcc_assert (REG_P (XEXP (x
, 0)));
21718 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
21719 asm_fprintf (stream
, "[%r, #%s%d]!",
21720 REGNO (XEXP (x
, 0)),
21721 GET_CODE (x
) == PRE_DEC
? "-" : "",
21722 GET_MODE_SIZE (output_memory_reference_mode
));
21724 asm_fprintf (stream
, "[%r], #%s%d",
21725 REGNO (XEXP (x
, 0)),
21726 GET_CODE (x
) == POST_DEC
? "-" : "",
21727 GET_MODE_SIZE (output_memory_reference_mode
));
21729 else if (GET_CODE (x
) == PRE_MODIFY
)
21731 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
21732 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
21733 asm_fprintf (stream
, "#%wd]!",
21734 INTVAL (XEXP (XEXP (x
, 1), 1)));
21736 asm_fprintf (stream
, "%r]!",
21737 REGNO (XEXP (XEXP (x
, 1), 1)));
21739 else if (GET_CODE (x
) == POST_MODIFY
)
21741 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
21742 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
21743 asm_fprintf (stream
, "#%wd",
21744 INTVAL (XEXP (XEXP (x
, 1), 1)));
21746 asm_fprintf (stream
, "%r",
21747 REGNO (XEXP (XEXP (x
, 1), 1)));
21749 else output_addr_const (stream
, x
);
21754 asm_fprintf (stream
, "[%r]", REGNO (x
));
21755 else if (GET_CODE (x
) == POST_INC
)
21756 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
21757 else if (GET_CODE (x
) == PLUS
)
21759 gcc_assert (REG_P (XEXP (x
, 0)));
21760 if (CONST_INT_P (XEXP (x
, 1)))
21761 asm_fprintf (stream
, "[%r, #%wd]",
21762 REGNO (XEXP (x
, 0)),
21763 INTVAL (XEXP (x
, 1)));
21765 asm_fprintf (stream
, "[%r, %r]",
21766 REGNO (XEXP (x
, 0)),
21767 REGNO (XEXP (x
, 1)));
21770 output_addr_const (stream
, x
);
21774 /* Target hook for indicating whether a punctuation character for
21775 TARGET_PRINT_OPERAND is valid. */
21777 arm_print_operand_punct_valid_p (unsigned char code
)
21779 return (code
== '@' || code
== '|' || code
== '.'
21780 || code
== '(' || code
== ')' || code
== '#'
21781 || (TARGET_32BIT
&& (code
== '?'))
21782 || (TARGET_THUMB2
&& (code
== '!'))
21783 || (TARGET_THUMB
&& (code
== '_')));
21786 /* Target hook for assembling integer objects. The ARM version needs to
21787 handle word-sized values specially. */
21789 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
21791 enum machine_mode mode
;
21793 if (size
== UNITS_PER_WORD
&& aligned_p
)
21795 fputs ("\t.word\t", asm_out_file
);
21796 output_addr_const (asm_out_file
, x
);
21798 /* Mark symbols as position independent. We only do this in the
21799 .text segment, not in the .data segment. */
21800 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
21801 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
21803 /* See legitimize_pic_address for an explanation of the
21804 TARGET_VXWORKS_RTP check. */
21805 if (!arm_pic_data_is_text_relative
21806 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
21807 fputs ("(GOT)", asm_out_file
);
21809 fputs ("(GOTOFF)", asm_out_file
);
21811 fputc ('\n', asm_out_file
);
21815 mode
= GET_MODE (x
);
21817 if (arm_vector_mode_supported_p (mode
))
21821 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
21823 units
= CONST_VECTOR_NUNITS (x
);
21824 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
21826 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
21827 for (i
= 0; i
< units
; i
++)
21829 rtx elt
= CONST_VECTOR_ELT (x
, i
);
21831 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
21834 for (i
= 0; i
< units
; i
++)
21836 rtx elt
= CONST_VECTOR_ELT (x
, i
);
21837 REAL_VALUE_TYPE rval
;
21839 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
21842 (rval
, GET_MODE_INNER (mode
),
21843 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
21849 return default_assemble_integer (x
, size
, aligned_p
);
21853 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
21857 if (!TARGET_AAPCS_BASED
)
21860 default_named_section_asm_out_constructor
21861 : default_named_section_asm_out_destructor
) (symbol
, priority
);
21865 /* Put these in the .init_array section, using a special relocation. */
21866 if (priority
!= DEFAULT_INIT_PRIORITY
)
21869 sprintf (buf
, "%s.%.5u",
21870 is_ctor
? ".init_array" : ".fini_array",
21872 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
21879 switch_to_section (s
);
21880 assemble_align (POINTER_SIZE
);
21881 fputs ("\t.word\t", asm_out_file
);
21882 output_addr_const (asm_out_file
, symbol
);
21883 fputs ("(target1)\n", asm_out_file
);
21886 /* Add a function to the list of static constructors. */
21889 arm_elf_asm_constructor (rtx symbol
, int priority
)
21891 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
21894 /* Add a function to the list of static destructors. */
21897 arm_elf_asm_destructor (rtx symbol
, int priority
)
21899 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
21902 /* A finite state machine takes care of noticing whether or not instructions
21903 can be conditionally executed, and thus decrease execution time and code
21904 size by deleting branch instructions. The fsm is controlled by
21905 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
21907 /* The state of the fsm controlling condition codes are:
21908 0: normal, do nothing special
21909 1: make ASM_OUTPUT_OPCODE not output this instruction
21910 2: make ASM_OUTPUT_OPCODE not output this instruction
21911 3: make instructions conditional
21912 4: make instructions conditional
21914 State transitions (state->state by whom under condition):
21915 0 -> 1 final_prescan_insn if the `target' is a label
21916 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
21917 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
21918 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
21919 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
21920 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
21921 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
21922 (the target insn is arm_target_insn).
21924 If the jump clobbers the conditions then we use states 2 and 4.
21926 A similar thing can be done with conditional return insns.
21928 XXX In case the `target' is an unconditional branch, this conditionalising
21929 of the instructions always reduces code size, but not always execution
21930 time. But then, I want to reduce the code size to somewhere near what
21931 /bin/cc produces. */
21933 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
21934 instructions. When a COND_EXEC instruction is seen the subsequent
21935 instructions are scanned so that multiple conditional instructions can be
21936 combined into a single IT block. arm_condexec_count and arm_condexec_mask
21937 specify the length and true/false mask for the IT block. These will be
21938 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
21940 /* Returns the index of the ARM condition code string in
21941 `arm_condition_codes', or ARM_NV if the comparison is invalid.
21942 COMPARISON should be an rtx like `(eq (...) (...))'. */
21945 maybe_get_arm_condition_code (rtx comparison
)
21947 enum machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
21948 enum arm_cond_code code
;
21949 enum rtx_code comp_code
= GET_CODE (comparison
);
21951 if (GET_MODE_CLASS (mode
) != MODE_CC
)
21952 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
21953 XEXP (comparison
, 1));
21957 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
21958 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
21959 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
21960 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
21961 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
21962 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
21963 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
21964 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
21965 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
21966 case CC_DLTUmode
: code
= ARM_CC
;
21969 if (comp_code
== EQ
)
21970 return ARM_INVERSE_CONDITION_CODE (code
);
21971 if (comp_code
== NE
)
21978 case NE
: return ARM_NE
;
21979 case EQ
: return ARM_EQ
;
21980 case GE
: return ARM_PL
;
21981 case LT
: return ARM_MI
;
21982 default: return ARM_NV
;
21988 case NE
: return ARM_NE
;
21989 case EQ
: return ARM_EQ
;
21990 default: return ARM_NV
;
21996 case NE
: return ARM_MI
;
21997 case EQ
: return ARM_PL
;
21998 default: return ARM_NV
;
22003 /* We can handle all cases except UNEQ and LTGT. */
22006 case GE
: return ARM_GE
;
22007 case GT
: return ARM_GT
;
22008 case LE
: return ARM_LS
;
22009 case LT
: return ARM_MI
;
22010 case NE
: return ARM_NE
;
22011 case EQ
: return ARM_EQ
;
22012 case ORDERED
: return ARM_VC
;
22013 case UNORDERED
: return ARM_VS
;
22014 case UNLT
: return ARM_LT
;
22015 case UNLE
: return ARM_LE
;
22016 case UNGT
: return ARM_HI
;
22017 case UNGE
: return ARM_PL
;
22018 /* UNEQ and LTGT do not have a representation. */
22019 case UNEQ
: /* Fall through. */
22020 case LTGT
: /* Fall through. */
22021 default: return ARM_NV
;
22027 case NE
: return ARM_NE
;
22028 case EQ
: return ARM_EQ
;
22029 case GE
: return ARM_LE
;
22030 case GT
: return ARM_LT
;
22031 case LE
: return ARM_GE
;
22032 case LT
: return ARM_GT
;
22033 case GEU
: return ARM_LS
;
22034 case GTU
: return ARM_CC
;
22035 case LEU
: return ARM_CS
;
22036 case LTU
: return ARM_HI
;
22037 default: return ARM_NV
;
22043 case LTU
: return ARM_CS
;
22044 case GEU
: return ARM_CC
;
22045 default: return ARM_NV
;
22051 case NE
: return ARM_NE
;
22052 case EQ
: return ARM_EQ
;
22053 case GEU
: return ARM_CS
;
22054 case GTU
: return ARM_HI
;
22055 case LEU
: return ARM_LS
;
22056 case LTU
: return ARM_CC
;
22057 default: return ARM_NV
;
22063 case GE
: return ARM_GE
;
22064 case LT
: return ARM_LT
;
22065 case GEU
: return ARM_CS
;
22066 case LTU
: return ARM_CC
;
22067 default: return ARM_NV
;
22073 case NE
: return ARM_NE
;
22074 case EQ
: return ARM_EQ
;
22075 case GE
: return ARM_GE
;
22076 case GT
: return ARM_GT
;
22077 case LE
: return ARM_LE
;
22078 case LT
: return ARM_LT
;
22079 case GEU
: return ARM_CS
;
22080 case GTU
: return ARM_HI
;
22081 case LEU
: return ARM_LS
;
22082 case LTU
: return ARM_CC
;
22083 default: return ARM_NV
;
22086 default: gcc_unreachable ();
22090 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22091 static enum arm_cond_code
22092 get_arm_condition_code (rtx comparison
)
22094 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22095 gcc_assert (code
!= ARM_NV
);
22099 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22102 thumb2_final_prescan_insn (rtx insn
)
22104 rtx first_insn
= insn
;
22105 rtx body
= PATTERN (insn
);
22107 enum arm_cond_code code
;
22112 /* Maximum number of conditionally executed instructions in a block
22113 is minimum of the two max values: maximum allowed in an IT block
22114 and maximum that is beneficial according to the cost model and tune. */
22115 max
= (max_insns_skipped
< MAX_INSN_PER_IT_BLOCK
) ?
22116 max_insns_skipped
: MAX_INSN_PER_IT_BLOCK
;
22118 /* Remove the previous insn from the count of insns to be output. */
22119 if (arm_condexec_count
)
22120 arm_condexec_count
--;
22122 /* Nothing to do if we are already inside a conditional block. */
22123 if (arm_condexec_count
)
22126 if (GET_CODE (body
) != COND_EXEC
)
22129 /* Conditional jumps are implemented directly. */
22133 predicate
= COND_EXEC_TEST (body
);
22134 arm_current_cc
= get_arm_condition_code (predicate
);
22136 n
= get_attr_ce_count (insn
);
22137 arm_condexec_count
= 1;
22138 arm_condexec_mask
= (1 << n
) - 1;
22139 arm_condexec_masklen
= n
;
22140 /* See if subsequent instructions can be combined into the same block. */
22143 insn
= next_nonnote_insn (insn
);
22145 /* Jumping into the middle of an IT block is illegal, so a label or
22146 barrier terminates the block. */
22147 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22150 body
= PATTERN (insn
);
22151 /* USE and CLOBBER aren't really insns, so just skip them. */
22152 if (GET_CODE (body
) == USE
22153 || GET_CODE (body
) == CLOBBER
)
22156 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22157 if (GET_CODE (body
) != COND_EXEC
)
22159 /* Maximum number of conditionally executed instructions in a block. */
22160 n
= get_attr_ce_count (insn
);
22161 if (arm_condexec_masklen
+ n
> max
)
22164 predicate
= COND_EXEC_TEST (body
);
22165 code
= get_arm_condition_code (predicate
);
22166 mask
= (1 << n
) - 1;
22167 if (arm_current_cc
== code
)
22168 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22169 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22172 arm_condexec_count
++;
22173 arm_condexec_masklen
+= n
;
22175 /* A jump must be the last instruction in a conditional block. */
22179 /* Restore recog_data (getting the attributes of other insns can
22180 destroy this array, but final.c assumes that it remains intact
22181 across this call). */
22182 extract_constrain_insn_cached (first_insn
);
22186 arm_final_prescan_insn (rtx insn
)
22188 /* BODY will hold the body of INSN. */
22189 rtx body
= PATTERN (insn
);
22191 /* This will be 1 if trying to repeat the trick, and things need to be
22192 reversed if it appears to fail. */
22195 /* If we start with a return insn, we only succeed if we find another one. */
22196 int seeking_return
= 0;
22197 enum rtx_code return_code
= UNKNOWN
;
22199 /* START_INSN will hold the insn from where we start looking. This is the
22200 first insn after the following code_label if REVERSE is true. */
22201 rtx start_insn
= insn
;
22203 /* If in state 4, check if the target branch is reached, in order to
22204 change back to state 0. */
22205 if (arm_ccfsm_state
== 4)
22207 if (insn
== arm_target_insn
)
22209 arm_target_insn
= NULL
;
22210 arm_ccfsm_state
= 0;
22215 /* If in state 3, it is possible to repeat the trick, if this insn is an
22216 unconditional branch to a label, and immediately following this branch
22217 is the previous target label which is only used once, and the label this
22218 branch jumps to is not too far off. */
22219 if (arm_ccfsm_state
== 3)
22221 if (simplejump_p (insn
))
22223 start_insn
= next_nonnote_insn (start_insn
);
22224 if (BARRIER_P (start_insn
))
22226 /* XXX Isn't this always a barrier? */
22227 start_insn
= next_nonnote_insn (start_insn
);
22229 if (LABEL_P (start_insn
)
22230 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22231 && LABEL_NUSES (start_insn
) == 1)
22236 else if (ANY_RETURN_P (body
))
22238 start_insn
= next_nonnote_insn (start_insn
);
22239 if (BARRIER_P (start_insn
))
22240 start_insn
= next_nonnote_insn (start_insn
);
22241 if (LABEL_P (start_insn
)
22242 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22243 && LABEL_NUSES (start_insn
) == 1)
22246 seeking_return
= 1;
22247 return_code
= GET_CODE (body
);
22256 gcc_assert (!arm_ccfsm_state
|| reverse
);
22257 if (!JUMP_P (insn
))
22260 /* This jump might be paralleled with a clobber of the condition codes
22261 the jump should always come first */
22262 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
22263 body
= XVECEXP (body
, 0, 0);
22266 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
22267 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
22270 int fail
= FALSE
, succeed
= FALSE
;
22271 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22272 int then_not_else
= TRUE
;
22273 rtx this_insn
= start_insn
, label
= 0;
22275 /* Register the insn jumped to. */
22278 if (!seeking_return
)
22279 label
= XEXP (SET_SRC (body
), 0);
22281 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
22282 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
22283 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
22285 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
22286 then_not_else
= FALSE
;
22288 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
22290 seeking_return
= 1;
22291 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
22293 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
22295 seeking_return
= 1;
22296 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
22297 then_not_else
= FALSE
;
22300 gcc_unreachable ();
22302 /* See how many insns this branch skips, and what kind of insns. If all
22303 insns are okay, and the label or unconditional branch to the same
22304 label is not too far away, succeed. */
22305 for (insns_skipped
= 0;
22306 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
22310 this_insn
= next_nonnote_insn (this_insn
);
22314 switch (GET_CODE (this_insn
))
22317 /* Succeed if it is the target label, otherwise fail since
22318 control falls in from somewhere else. */
22319 if (this_insn
== label
)
22321 arm_ccfsm_state
= 1;
22329 /* Succeed if the following insn is the target label.
22331 If return insns are used then the last insn in a function
22332 will be a barrier. */
22333 this_insn
= next_nonnote_insn (this_insn
);
22334 if (this_insn
&& this_insn
== label
)
22336 arm_ccfsm_state
= 1;
22344 /* The AAPCS says that conditional calls should not be
22345 used since they make interworking inefficient (the
22346 linker can't transform BL<cond> into BLX). That's
22347 only a problem if the machine has BLX. */
22354 /* Succeed if the following insn is the target label, or
22355 if the following two insns are a barrier and the
22357 this_insn
= next_nonnote_insn (this_insn
);
22358 if (this_insn
&& BARRIER_P (this_insn
))
22359 this_insn
= next_nonnote_insn (this_insn
);
22361 if (this_insn
&& this_insn
== label
22362 && insns_skipped
< max_insns_skipped
)
22364 arm_ccfsm_state
= 1;
22372 /* If this is an unconditional branch to the same label, succeed.
22373 If it is to another label, do nothing. If it is conditional,
22375 /* XXX Probably, the tests for SET and the PC are
22378 scanbody
= PATTERN (this_insn
);
22379 if (GET_CODE (scanbody
) == SET
22380 && GET_CODE (SET_DEST (scanbody
)) == PC
)
22382 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
22383 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
22385 arm_ccfsm_state
= 2;
22388 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
22391 /* Fail if a conditional return is undesirable (e.g. on a
22392 StrongARM), but still allow this if optimizing for size. */
22393 else if (GET_CODE (scanbody
) == return_code
22394 && !use_return_insn (TRUE
, NULL
)
22397 else if (GET_CODE (scanbody
) == return_code
)
22399 arm_ccfsm_state
= 2;
22402 else if (GET_CODE (scanbody
) == PARALLEL
)
22404 switch (get_attr_conds (this_insn
))
22414 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
22419 /* Instructions using or affecting the condition codes make it
22421 scanbody
= PATTERN (this_insn
);
22422 if (!(GET_CODE (scanbody
) == SET
22423 || GET_CODE (scanbody
) == PARALLEL
)
22424 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
22434 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
22435 arm_target_label
= CODE_LABEL_NUMBER (label
);
22438 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
22440 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
22442 this_insn
= next_nonnote_insn (this_insn
);
22443 gcc_assert (!this_insn
22444 || (!BARRIER_P (this_insn
)
22445 && !LABEL_P (this_insn
)));
22449 /* Oh, dear! we ran off the end.. give up. */
22450 extract_constrain_insn_cached (insn
);
22451 arm_ccfsm_state
= 0;
22452 arm_target_insn
= NULL
;
22455 arm_target_insn
= this_insn
;
22458 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22461 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
22463 if (reverse
|| then_not_else
)
22464 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
22467 /* Restore recog_data (getting the attributes of other insns can
22468 destroy this array, but final.c assumes that it remains intact
22469 across this call. */
22470 extract_constrain_insn_cached (insn
);
22474 /* Output IT instructions. */
22476 thumb2_asm_output_opcode (FILE * stream
)
22481 if (arm_condexec_mask
)
22483 for (n
= 0; n
< arm_condexec_masklen
; n
++)
22484 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
22486 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
22487 arm_condition_codes
[arm_current_cc
]);
22488 arm_condexec_mask
= 0;
22492 /* Returns true if REGNO is a valid register
22493 for holding a quantity of type MODE. */
22495 arm_hard_regno_mode_ok (unsigned int regno
, enum machine_mode mode
)
22497 if (GET_MODE_CLASS (mode
) == MODE_CC
)
22498 return (regno
== CC_REGNUM
22499 || (TARGET_HARD_FLOAT
&& TARGET_VFP
22500 && regno
== VFPCC_REGNUM
));
22503 /* For the Thumb we only allow values bigger than SImode in
22504 registers 0 - 6, so that there is always a second low
22505 register available to hold the upper part of the value.
22506 We probably we ought to ensure that the register is the
22507 start of an even numbered register pair. */
22508 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
22510 if (TARGET_HARD_FLOAT
&& TARGET_VFP
22511 && IS_VFP_REGNUM (regno
))
22513 if (mode
== SFmode
|| mode
== SImode
)
22514 return VFP_REGNO_OK_FOR_SINGLE (regno
);
22516 if (mode
== DFmode
)
22517 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
22519 /* VFP registers can hold HFmode values, but there is no point in
22520 putting them there unless we have hardware conversion insns. */
22521 if (mode
== HFmode
)
22522 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
22525 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
22526 || (VALID_NEON_QREG_MODE (mode
)
22527 && NEON_REGNO_OK_FOR_QUAD (regno
))
22528 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
22529 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
22530 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
22531 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
22532 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
22537 if (TARGET_REALLY_IWMMXT
)
22539 if (IS_IWMMXT_GR_REGNUM (regno
))
22540 return mode
== SImode
;
22542 if (IS_IWMMXT_REGNUM (regno
))
22543 return VALID_IWMMXT_REG_MODE (mode
);
22546 /* We allow almost any value to be stored in the general registers.
22547 Restrict doubleword quantities to even register pairs so that we can
22548 use ldrd. Do not allow very large Neon structure opaque modes in
22549 general registers; they would use too many. */
22550 if (regno
<= LAST_ARM_REGNUM
)
22551 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0)
22552 && ARM_NUM_REGS (mode
) <= 4;
22554 if (regno
== FRAME_POINTER_REGNUM
22555 || regno
== ARG_POINTER_REGNUM
)
22556 /* We only allow integers in the fake hard registers. */
22557 return GET_MODE_CLASS (mode
) == MODE_INT
;
22562 /* Implement MODES_TIEABLE_P. */
22565 arm_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
22567 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
22570 /* We specifically want to allow elements of "structure" modes to
22571 be tieable to the structure. This more general condition allows
22572 other rarer situations too. */
22574 && (VALID_NEON_DREG_MODE (mode1
)
22575 || VALID_NEON_QREG_MODE (mode1
)
22576 || VALID_NEON_STRUCT_MODE (mode1
))
22577 && (VALID_NEON_DREG_MODE (mode2
)
22578 || VALID_NEON_QREG_MODE (mode2
)
22579 || VALID_NEON_STRUCT_MODE (mode2
)))
22585 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22586 not used in arm mode. */
22589 arm_regno_class (int regno
)
22593 if (regno
== STACK_POINTER_REGNUM
)
22595 if (regno
== CC_REGNUM
)
22602 if (TARGET_THUMB2
&& regno
< 8)
22605 if ( regno
<= LAST_ARM_REGNUM
22606 || regno
== FRAME_POINTER_REGNUM
22607 || regno
== ARG_POINTER_REGNUM
)
22608 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
22610 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
22611 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
22613 if (IS_VFP_REGNUM (regno
))
22615 if (regno
<= D7_VFP_REGNUM
)
22616 return VFP_D0_D7_REGS
;
22617 else if (regno
<= LAST_LO_VFP_REGNUM
)
22618 return VFP_LO_REGS
;
22620 return VFP_HI_REGS
;
22623 if (IS_IWMMXT_REGNUM (regno
))
22624 return IWMMXT_REGS
;
22626 if (IS_IWMMXT_GR_REGNUM (regno
))
22627 return IWMMXT_GR_REGS
;
22632 /* Handle a special case when computing the offset
22633 of an argument from the frame pointer. */
22635 arm_debugger_arg_offset (int value
, rtx addr
)
22639 /* We are only interested if dbxout_parms() failed to compute the offset. */
22643 /* We can only cope with the case where the address is held in a register. */
22647 /* If we are using the frame pointer to point at the argument, then
22648 an offset of 0 is correct. */
22649 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
22652 /* If we are using the stack pointer to point at the
22653 argument, then an offset of 0 is correct. */
22654 /* ??? Check this is consistent with thumb2 frame layout. */
22655 if ((TARGET_THUMB
|| !frame_pointer_needed
)
22656 && REGNO (addr
) == SP_REGNUM
)
22659 /* Oh dear. The argument is pointed to by a register rather
22660 than being held in a register, or being stored at a known
22661 offset from the frame pointer. Since GDB only understands
22662 those two kinds of argument we must translate the address
22663 held in the register into an offset from the frame pointer.
22664 We do this by searching through the insns for the function
22665 looking to see where this register gets its value. If the
22666 register is initialized from the frame pointer plus an offset
22667 then we are in luck and we can continue, otherwise we give up.
22669 This code is exercised by producing debugging information
22670 for a function with arguments like this:
22672 double func (double a, double b, int c, double d) {return d;}
22674 Without this code the stab for parameter 'd' will be set to
22675 an offset of 0 from the frame pointer, rather than 8. */
22677 /* The if() statement says:
22679 If the insn is a normal instruction
22680 and if the insn is setting the value in a register
22681 and if the register being set is the register holding the address of the argument
22682 and if the address is computing by an addition
22683 that involves adding to a register
22684 which is the frame pointer
22689 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
22691 if ( NONJUMP_INSN_P (insn
)
22692 && GET_CODE (PATTERN (insn
)) == SET
22693 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
22694 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
22695 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
22696 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
22697 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
22700 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
22709 warning (0, "unable to compute real location of stacked parameter");
22710 value
= 8; /* XXX magic hack */
22731 T_MAX
/* Size of enum. Keep last. */
22732 } neon_builtin_type_mode
;
22734 #define TYPE_MODE_BIT(X) (1 << (X))
22736 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
22737 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
22738 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
22739 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
22740 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
22741 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
22743 #define v8qi_UP T_V8QI
22744 #define v4hi_UP T_V4HI
22745 #define v4hf_UP T_V4HF
22746 #define v2si_UP T_V2SI
22747 #define v2sf_UP T_V2SF
22749 #define v16qi_UP T_V16QI
22750 #define v8hi_UP T_V8HI
22751 #define v4si_UP T_V4SI
22752 #define v4sf_UP T_V4SF
22753 #define v2di_UP T_V2DI
22758 #define UP(X) X##_UP
22794 NEON_LOADSTRUCTLANE
,
22796 NEON_STORESTRUCTLANE
,
22805 const neon_itype itype
;
22806 const neon_builtin_type_mode mode
;
22807 const enum insn_code code
;
22808 unsigned int fcode
;
22809 } neon_builtin_datum
;
22811 #define CF(N,X) CODE_FOR_neon_##N##X
22813 #define VAR1(T, N, A) \
22814 {#N, NEON_##T, UP (A), CF (N, A), 0}
22815 #define VAR2(T, N, A, B) \
22817 {#N, NEON_##T, UP (B), CF (N, B), 0}
22818 #define VAR3(T, N, A, B, C) \
22819 VAR2 (T, N, A, B), \
22820 {#N, NEON_##T, UP (C), CF (N, C), 0}
22821 #define VAR4(T, N, A, B, C, D) \
22822 VAR3 (T, N, A, B, C), \
22823 {#N, NEON_##T, UP (D), CF (N, D), 0}
22824 #define VAR5(T, N, A, B, C, D, E) \
22825 VAR4 (T, N, A, B, C, D), \
22826 {#N, NEON_##T, UP (E), CF (N, E), 0}
22827 #define VAR6(T, N, A, B, C, D, E, F) \
22828 VAR5 (T, N, A, B, C, D, E), \
22829 {#N, NEON_##T, UP (F), CF (N, F), 0}
22830 #define VAR7(T, N, A, B, C, D, E, F, G) \
22831 VAR6 (T, N, A, B, C, D, E, F), \
22832 {#N, NEON_##T, UP (G), CF (N, G), 0}
22833 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22834 VAR7 (T, N, A, B, C, D, E, F, G), \
22835 {#N, NEON_##T, UP (H), CF (N, H), 0}
22836 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22837 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22838 {#N, NEON_##T, UP (I), CF (N, I), 0}
22839 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22840 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22841 {#N, NEON_##T, UP (J), CF (N, J), 0}
22843 /* The NEON builtin data can be found in arm_neon_builtins.def.
22844 The mode entries in the following table correspond to the "key" type of the
22845 instruction variant, i.e. equivalent to that which would be specified after
22846 the assembler mnemonic, which usually refers to the last vector operand.
22847 (Signed/unsigned/polynomial types are not differentiated between though, and
22848 are all mapped onto the same mode for a given element size.) The modes
22849 listed per instruction should be the same as those defined for that
22850 instruction's pattern in neon.md. */
22852 static neon_builtin_datum neon_builtin_data
[] =
22854 #include "arm_neon_builtins.def"
22869 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
22870 #define VAR1(T, N, A) \
22872 #define VAR2(T, N, A, B) \
22875 #define VAR3(T, N, A, B, C) \
22876 VAR2 (T, N, A, B), \
22878 #define VAR4(T, N, A, B, C, D) \
22879 VAR3 (T, N, A, B, C), \
22881 #define VAR5(T, N, A, B, C, D, E) \
22882 VAR4 (T, N, A, B, C, D), \
22884 #define VAR6(T, N, A, B, C, D, E, F) \
22885 VAR5 (T, N, A, B, C, D, E), \
22887 #define VAR7(T, N, A, B, C, D, E, F, G) \
22888 VAR6 (T, N, A, B, C, D, E, F), \
22890 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22891 VAR7 (T, N, A, B, C, D, E, F, G), \
22893 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22894 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22896 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22897 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22901 ARM_BUILTIN_GETWCGR0
,
22902 ARM_BUILTIN_GETWCGR1
,
22903 ARM_BUILTIN_GETWCGR2
,
22904 ARM_BUILTIN_GETWCGR3
,
22906 ARM_BUILTIN_SETWCGR0
,
22907 ARM_BUILTIN_SETWCGR1
,
22908 ARM_BUILTIN_SETWCGR2
,
22909 ARM_BUILTIN_SETWCGR3
,
22913 ARM_BUILTIN_WAVG2BR
,
22914 ARM_BUILTIN_WAVG2HR
,
22915 ARM_BUILTIN_WAVG2B
,
22916 ARM_BUILTIN_WAVG2H
,
22923 ARM_BUILTIN_WMACSZ
,
22925 ARM_BUILTIN_WMACUZ
,
22928 ARM_BUILTIN_WSADBZ
,
22930 ARM_BUILTIN_WSADHZ
,
22932 ARM_BUILTIN_WALIGNI
,
22933 ARM_BUILTIN_WALIGNR0
,
22934 ARM_BUILTIN_WALIGNR1
,
22935 ARM_BUILTIN_WALIGNR2
,
22936 ARM_BUILTIN_WALIGNR3
,
22939 ARM_BUILTIN_TMIAPH
,
22940 ARM_BUILTIN_TMIABB
,
22941 ARM_BUILTIN_TMIABT
,
22942 ARM_BUILTIN_TMIATB
,
22943 ARM_BUILTIN_TMIATT
,
22945 ARM_BUILTIN_TMOVMSKB
,
22946 ARM_BUILTIN_TMOVMSKH
,
22947 ARM_BUILTIN_TMOVMSKW
,
22949 ARM_BUILTIN_TBCSTB
,
22950 ARM_BUILTIN_TBCSTH
,
22951 ARM_BUILTIN_TBCSTW
,
22953 ARM_BUILTIN_WMADDS
,
22954 ARM_BUILTIN_WMADDU
,
22956 ARM_BUILTIN_WPACKHSS
,
22957 ARM_BUILTIN_WPACKWSS
,
22958 ARM_BUILTIN_WPACKDSS
,
22959 ARM_BUILTIN_WPACKHUS
,
22960 ARM_BUILTIN_WPACKWUS
,
22961 ARM_BUILTIN_WPACKDUS
,
22966 ARM_BUILTIN_WADDSSB
,
22967 ARM_BUILTIN_WADDSSH
,
22968 ARM_BUILTIN_WADDSSW
,
22969 ARM_BUILTIN_WADDUSB
,
22970 ARM_BUILTIN_WADDUSH
,
22971 ARM_BUILTIN_WADDUSW
,
22975 ARM_BUILTIN_WSUBSSB
,
22976 ARM_BUILTIN_WSUBSSH
,
22977 ARM_BUILTIN_WSUBSSW
,
22978 ARM_BUILTIN_WSUBUSB
,
22979 ARM_BUILTIN_WSUBUSH
,
22980 ARM_BUILTIN_WSUBUSW
,
22987 ARM_BUILTIN_WCMPEQB
,
22988 ARM_BUILTIN_WCMPEQH
,
22989 ARM_BUILTIN_WCMPEQW
,
22990 ARM_BUILTIN_WCMPGTUB
,
22991 ARM_BUILTIN_WCMPGTUH
,
22992 ARM_BUILTIN_WCMPGTUW
,
22993 ARM_BUILTIN_WCMPGTSB
,
22994 ARM_BUILTIN_WCMPGTSH
,
22995 ARM_BUILTIN_WCMPGTSW
,
22997 ARM_BUILTIN_TEXTRMSB
,
22998 ARM_BUILTIN_TEXTRMSH
,
22999 ARM_BUILTIN_TEXTRMSW
,
23000 ARM_BUILTIN_TEXTRMUB
,
23001 ARM_BUILTIN_TEXTRMUH
,
23002 ARM_BUILTIN_TEXTRMUW
,
23003 ARM_BUILTIN_TINSRB
,
23004 ARM_BUILTIN_TINSRH
,
23005 ARM_BUILTIN_TINSRW
,
23007 ARM_BUILTIN_WMAXSW
,
23008 ARM_BUILTIN_WMAXSH
,
23009 ARM_BUILTIN_WMAXSB
,
23010 ARM_BUILTIN_WMAXUW
,
23011 ARM_BUILTIN_WMAXUH
,
23012 ARM_BUILTIN_WMAXUB
,
23013 ARM_BUILTIN_WMINSW
,
23014 ARM_BUILTIN_WMINSH
,
23015 ARM_BUILTIN_WMINSB
,
23016 ARM_BUILTIN_WMINUW
,
23017 ARM_BUILTIN_WMINUH
,
23018 ARM_BUILTIN_WMINUB
,
23020 ARM_BUILTIN_WMULUM
,
23021 ARM_BUILTIN_WMULSM
,
23022 ARM_BUILTIN_WMULUL
,
23024 ARM_BUILTIN_PSADBH
,
23025 ARM_BUILTIN_WSHUFH
,
23039 ARM_BUILTIN_WSLLHI
,
23040 ARM_BUILTIN_WSLLWI
,
23041 ARM_BUILTIN_WSLLDI
,
23042 ARM_BUILTIN_WSRAHI
,
23043 ARM_BUILTIN_WSRAWI
,
23044 ARM_BUILTIN_WSRADI
,
23045 ARM_BUILTIN_WSRLHI
,
23046 ARM_BUILTIN_WSRLWI
,
23047 ARM_BUILTIN_WSRLDI
,
23048 ARM_BUILTIN_WRORHI
,
23049 ARM_BUILTIN_WRORWI
,
23050 ARM_BUILTIN_WRORDI
,
23052 ARM_BUILTIN_WUNPCKIHB
,
23053 ARM_BUILTIN_WUNPCKIHH
,
23054 ARM_BUILTIN_WUNPCKIHW
,
23055 ARM_BUILTIN_WUNPCKILB
,
23056 ARM_BUILTIN_WUNPCKILH
,
23057 ARM_BUILTIN_WUNPCKILW
,
23059 ARM_BUILTIN_WUNPCKEHSB
,
23060 ARM_BUILTIN_WUNPCKEHSH
,
23061 ARM_BUILTIN_WUNPCKEHSW
,
23062 ARM_BUILTIN_WUNPCKEHUB
,
23063 ARM_BUILTIN_WUNPCKEHUH
,
23064 ARM_BUILTIN_WUNPCKEHUW
,
23065 ARM_BUILTIN_WUNPCKELSB
,
23066 ARM_BUILTIN_WUNPCKELSH
,
23067 ARM_BUILTIN_WUNPCKELSW
,
23068 ARM_BUILTIN_WUNPCKELUB
,
23069 ARM_BUILTIN_WUNPCKELUH
,
23070 ARM_BUILTIN_WUNPCKELUW
,
23076 ARM_BUILTIN_WADDSUBHX
,
23077 ARM_BUILTIN_WSUBADDHX
,
23079 ARM_BUILTIN_WABSDIFFB
,
23080 ARM_BUILTIN_WABSDIFFH
,
23081 ARM_BUILTIN_WABSDIFFW
,
23083 ARM_BUILTIN_WADDCH
,
23084 ARM_BUILTIN_WADDCW
,
23087 ARM_BUILTIN_WAVG4R
,
23089 ARM_BUILTIN_WMADDSX
,
23090 ARM_BUILTIN_WMADDUX
,
23092 ARM_BUILTIN_WMADDSN
,
23093 ARM_BUILTIN_WMADDUN
,
23095 ARM_BUILTIN_WMULWSM
,
23096 ARM_BUILTIN_WMULWUM
,
23098 ARM_BUILTIN_WMULWSMR
,
23099 ARM_BUILTIN_WMULWUMR
,
23101 ARM_BUILTIN_WMULWL
,
23103 ARM_BUILTIN_WMULSMR
,
23104 ARM_BUILTIN_WMULUMR
,
23106 ARM_BUILTIN_WQMULM
,
23107 ARM_BUILTIN_WQMULMR
,
23109 ARM_BUILTIN_WQMULWM
,
23110 ARM_BUILTIN_WQMULWMR
,
23112 ARM_BUILTIN_WADDBHUSM
,
23113 ARM_BUILTIN_WADDBHUSL
,
23115 ARM_BUILTIN_WQMIABB
,
23116 ARM_BUILTIN_WQMIABT
,
23117 ARM_BUILTIN_WQMIATB
,
23118 ARM_BUILTIN_WQMIATT
,
23120 ARM_BUILTIN_WQMIABBN
,
23121 ARM_BUILTIN_WQMIABTN
,
23122 ARM_BUILTIN_WQMIATBN
,
23123 ARM_BUILTIN_WQMIATTN
,
23125 ARM_BUILTIN_WMIABB
,
23126 ARM_BUILTIN_WMIABT
,
23127 ARM_BUILTIN_WMIATB
,
23128 ARM_BUILTIN_WMIATT
,
23130 ARM_BUILTIN_WMIABBN
,
23131 ARM_BUILTIN_WMIABTN
,
23132 ARM_BUILTIN_WMIATBN
,
23133 ARM_BUILTIN_WMIATTN
,
23135 ARM_BUILTIN_WMIAWBB
,
23136 ARM_BUILTIN_WMIAWBT
,
23137 ARM_BUILTIN_WMIAWTB
,
23138 ARM_BUILTIN_WMIAWTT
,
23140 ARM_BUILTIN_WMIAWBBN
,
23141 ARM_BUILTIN_WMIAWBTN
,
23142 ARM_BUILTIN_WMIAWTBN
,
23143 ARM_BUILTIN_WMIAWTTN
,
23145 ARM_BUILTIN_WMERGE
,
23147 ARM_BUILTIN_CRC32B
,
23148 ARM_BUILTIN_CRC32H
,
23149 ARM_BUILTIN_CRC32W
,
23150 ARM_BUILTIN_CRC32CB
,
23151 ARM_BUILTIN_CRC32CH
,
23152 ARM_BUILTIN_CRC32CW
,
23158 #define CRYPTO1(L, U, M1, M2) \
23159 ARM_BUILTIN_CRYPTO_##U,
23160 #define CRYPTO2(L, U, M1, M2, M3) \
23161 ARM_BUILTIN_CRYPTO_##U,
23162 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23163 ARM_BUILTIN_CRYPTO_##U,
23165 #include "crypto.def"
23171 #include "arm_neon_builtins.def"
23176 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23190 static GTY(()) tree arm_builtin_decls
[ARM_BUILTIN_MAX
];
23192 #define NUM_DREG_TYPES 5
23193 #define NUM_QREG_TYPES 6
23196 arm_init_neon_builtins (void)
23198 unsigned int i
, fcode
;
23201 tree neon_intQI_type_node
;
23202 tree neon_intHI_type_node
;
23203 tree neon_floatHF_type_node
;
23204 tree neon_polyQI_type_node
;
23205 tree neon_polyHI_type_node
;
23206 tree neon_intSI_type_node
;
23207 tree neon_intDI_type_node
;
23208 tree neon_intUTI_type_node
;
23209 tree neon_float_type_node
;
23211 tree intQI_pointer_node
;
23212 tree intHI_pointer_node
;
23213 tree intSI_pointer_node
;
23214 tree intDI_pointer_node
;
23215 tree float_pointer_node
;
23217 tree const_intQI_node
;
23218 tree const_intHI_node
;
23219 tree const_intSI_node
;
23220 tree const_intDI_node
;
23221 tree const_float_node
;
23223 tree const_intQI_pointer_node
;
23224 tree const_intHI_pointer_node
;
23225 tree const_intSI_pointer_node
;
23226 tree const_intDI_pointer_node
;
23227 tree const_float_pointer_node
;
23229 tree V8QI_type_node
;
23230 tree V4HI_type_node
;
23231 tree V4HF_type_node
;
23232 tree V2SI_type_node
;
23233 tree V2SF_type_node
;
23234 tree V16QI_type_node
;
23235 tree V8HI_type_node
;
23236 tree V4SI_type_node
;
23237 tree V4SF_type_node
;
23238 tree V2DI_type_node
;
23240 tree intUQI_type_node
;
23241 tree intUHI_type_node
;
23242 tree intUSI_type_node
;
23243 tree intUDI_type_node
;
23245 tree intEI_type_node
;
23246 tree intOI_type_node
;
23247 tree intCI_type_node
;
23248 tree intXI_type_node
;
23250 tree V8QI_pointer_node
;
23251 tree V4HI_pointer_node
;
23252 tree V2SI_pointer_node
;
23253 tree V2SF_pointer_node
;
23254 tree V16QI_pointer_node
;
23255 tree V8HI_pointer_node
;
23256 tree V4SI_pointer_node
;
23257 tree V4SF_pointer_node
;
23258 tree V2DI_pointer_node
;
23260 tree void_ftype_pv8qi_v8qi_v8qi
;
23261 tree void_ftype_pv4hi_v4hi_v4hi
;
23262 tree void_ftype_pv2si_v2si_v2si
;
23263 tree void_ftype_pv2sf_v2sf_v2sf
;
23264 tree void_ftype_pdi_di_di
;
23265 tree void_ftype_pv16qi_v16qi_v16qi
;
23266 tree void_ftype_pv8hi_v8hi_v8hi
;
23267 tree void_ftype_pv4si_v4si_v4si
;
23268 tree void_ftype_pv4sf_v4sf_v4sf
;
23269 tree void_ftype_pv2di_v2di_v2di
;
23271 tree reinterp_ftype_dreg
[NUM_DREG_TYPES
][NUM_DREG_TYPES
];
23272 tree reinterp_ftype_qreg
[NUM_QREG_TYPES
][NUM_QREG_TYPES
];
23273 tree dreg_types
[NUM_DREG_TYPES
], qreg_types
[NUM_QREG_TYPES
];
23275 /* Create distinguished type nodes for NEON vector element types,
23276 and pointers to values of such types, so we can detect them later. */
23277 neon_intQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
23278 neon_intHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
23279 neon_polyQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
23280 neon_polyHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
23281 neon_intSI_type_node
= make_signed_type (GET_MODE_PRECISION (SImode
));
23282 neon_intDI_type_node
= make_signed_type (GET_MODE_PRECISION (DImode
));
23283 neon_float_type_node
= make_node (REAL_TYPE
);
23284 TYPE_PRECISION (neon_float_type_node
) = FLOAT_TYPE_SIZE
;
23285 layout_type (neon_float_type_node
);
23286 neon_floatHF_type_node
= make_node (REAL_TYPE
);
23287 TYPE_PRECISION (neon_floatHF_type_node
) = GET_MODE_PRECISION (HFmode
);
23288 layout_type (neon_floatHF_type_node
);
23290 /* Define typedefs which exactly correspond to the modes we are basing vector
23291 types on. If you change these names you'll need to change
23292 the table used by arm_mangle_type too. */
23293 (*lang_hooks
.types
.register_builtin_type
) (neon_intQI_type_node
,
23294 "__builtin_neon_qi");
23295 (*lang_hooks
.types
.register_builtin_type
) (neon_intHI_type_node
,
23296 "__builtin_neon_hi");
23297 (*lang_hooks
.types
.register_builtin_type
) (neon_floatHF_type_node
,
23298 "__builtin_neon_hf");
23299 (*lang_hooks
.types
.register_builtin_type
) (neon_intSI_type_node
,
23300 "__builtin_neon_si");
23301 (*lang_hooks
.types
.register_builtin_type
) (neon_float_type_node
,
23302 "__builtin_neon_sf");
23303 (*lang_hooks
.types
.register_builtin_type
) (neon_intDI_type_node
,
23304 "__builtin_neon_di");
23305 (*lang_hooks
.types
.register_builtin_type
) (neon_polyQI_type_node
,
23306 "__builtin_neon_poly8");
23307 (*lang_hooks
.types
.register_builtin_type
) (neon_polyHI_type_node
,
23308 "__builtin_neon_poly16");
23310 intQI_pointer_node
= build_pointer_type (neon_intQI_type_node
);
23311 intHI_pointer_node
= build_pointer_type (neon_intHI_type_node
);
23312 intSI_pointer_node
= build_pointer_type (neon_intSI_type_node
);
23313 intDI_pointer_node
= build_pointer_type (neon_intDI_type_node
);
23314 float_pointer_node
= build_pointer_type (neon_float_type_node
);
23316 /* Next create constant-qualified versions of the above types. */
23317 const_intQI_node
= build_qualified_type (neon_intQI_type_node
,
23319 const_intHI_node
= build_qualified_type (neon_intHI_type_node
,
23321 const_intSI_node
= build_qualified_type (neon_intSI_type_node
,
23323 const_intDI_node
= build_qualified_type (neon_intDI_type_node
,
23325 const_float_node
= build_qualified_type (neon_float_type_node
,
23328 const_intQI_pointer_node
= build_pointer_type (const_intQI_node
);
23329 const_intHI_pointer_node
= build_pointer_type (const_intHI_node
);
23330 const_intSI_pointer_node
= build_pointer_type (const_intSI_node
);
23331 const_intDI_pointer_node
= build_pointer_type (const_intDI_node
);
23332 const_float_pointer_node
= build_pointer_type (const_float_node
);
23334 /* Now create vector types based on our NEON element types. */
23335 /* 64-bit vectors. */
23337 build_vector_type_for_mode (neon_intQI_type_node
, V8QImode
);
23339 build_vector_type_for_mode (neon_intHI_type_node
, V4HImode
);
23341 build_vector_type_for_mode (neon_floatHF_type_node
, V4HFmode
);
23343 build_vector_type_for_mode (neon_intSI_type_node
, V2SImode
);
23345 build_vector_type_for_mode (neon_float_type_node
, V2SFmode
);
23346 /* 128-bit vectors. */
23348 build_vector_type_for_mode (neon_intQI_type_node
, V16QImode
);
23350 build_vector_type_for_mode (neon_intHI_type_node
, V8HImode
);
23352 build_vector_type_for_mode (neon_intSI_type_node
, V4SImode
);
23354 build_vector_type_for_mode (neon_float_type_node
, V4SFmode
);
23356 build_vector_type_for_mode (neon_intDI_type_node
, V2DImode
);
23358 /* Unsigned integer types for various mode sizes. */
23359 intUQI_type_node
= make_unsigned_type (GET_MODE_PRECISION (QImode
));
23360 intUHI_type_node
= make_unsigned_type (GET_MODE_PRECISION (HImode
));
23361 intUSI_type_node
= make_unsigned_type (GET_MODE_PRECISION (SImode
));
23362 intUDI_type_node
= make_unsigned_type (GET_MODE_PRECISION (DImode
));
23363 neon_intUTI_type_node
= make_unsigned_type (GET_MODE_PRECISION (TImode
));
23366 (*lang_hooks
.types
.register_builtin_type
) (intUQI_type_node
,
23367 "__builtin_neon_uqi");
23368 (*lang_hooks
.types
.register_builtin_type
) (intUHI_type_node
,
23369 "__builtin_neon_uhi");
23370 (*lang_hooks
.types
.register_builtin_type
) (intUSI_type_node
,
23371 "__builtin_neon_usi");
23372 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
23373 "__builtin_neon_udi");
23374 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
23375 "__builtin_neon_poly64");
23376 (*lang_hooks
.types
.register_builtin_type
) (neon_intUTI_type_node
,
23377 "__builtin_neon_poly128");
23379 /* Opaque integer types for structures of vectors. */
23380 intEI_type_node
= make_signed_type (GET_MODE_PRECISION (EImode
));
23381 intOI_type_node
= make_signed_type (GET_MODE_PRECISION (OImode
));
23382 intCI_type_node
= make_signed_type (GET_MODE_PRECISION (CImode
));
23383 intXI_type_node
= make_signed_type (GET_MODE_PRECISION (XImode
));
23385 (*lang_hooks
.types
.register_builtin_type
) (intTI_type_node
,
23386 "__builtin_neon_ti");
23387 (*lang_hooks
.types
.register_builtin_type
) (intEI_type_node
,
23388 "__builtin_neon_ei");
23389 (*lang_hooks
.types
.register_builtin_type
) (intOI_type_node
,
23390 "__builtin_neon_oi");
23391 (*lang_hooks
.types
.register_builtin_type
) (intCI_type_node
,
23392 "__builtin_neon_ci");
23393 (*lang_hooks
.types
.register_builtin_type
) (intXI_type_node
,
23394 "__builtin_neon_xi");
23396 /* Pointers to vector types. */
23397 V8QI_pointer_node
= build_pointer_type (V8QI_type_node
);
23398 V4HI_pointer_node
= build_pointer_type (V4HI_type_node
);
23399 V2SI_pointer_node
= build_pointer_type (V2SI_type_node
);
23400 V2SF_pointer_node
= build_pointer_type (V2SF_type_node
);
23401 V16QI_pointer_node
= build_pointer_type (V16QI_type_node
);
23402 V8HI_pointer_node
= build_pointer_type (V8HI_type_node
);
23403 V4SI_pointer_node
= build_pointer_type (V4SI_type_node
);
23404 V4SF_pointer_node
= build_pointer_type (V4SF_type_node
);
23405 V2DI_pointer_node
= build_pointer_type (V2DI_type_node
);
23407 /* Operations which return results as pairs. */
23408 void_ftype_pv8qi_v8qi_v8qi
=
23409 build_function_type_list (void_type_node
, V8QI_pointer_node
, V8QI_type_node
,
23410 V8QI_type_node
, NULL
);
23411 void_ftype_pv4hi_v4hi_v4hi
=
23412 build_function_type_list (void_type_node
, V4HI_pointer_node
, V4HI_type_node
,
23413 V4HI_type_node
, NULL
);
23414 void_ftype_pv2si_v2si_v2si
=
23415 build_function_type_list (void_type_node
, V2SI_pointer_node
, V2SI_type_node
,
23416 V2SI_type_node
, NULL
);
23417 void_ftype_pv2sf_v2sf_v2sf
=
23418 build_function_type_list (void_type_node
, V2SF_pointer_node
, V2SF_type_node
,
23419 V2SF_type_node
, NULL
);
23420 void_ftype_pdi_di_di
=
23421 build_function_type_list (void_type_node
, intDI_pointer_node
,
23422 neon_intDI_type_node
, neon_intDI_type_node
, NULL
);
23423 void_ftype_pv16qi_v16qi_v16qi
=
23424 build_function_type_list (void_type_node
, V16QI_pointer_node
,
23425 V16QI_type_node
, V16QI_type_node
, NULL
);
23426 void_ftype_pv8hi_v8hi_v8hi
=
23427 build_function_type_list (void_type_node
, V8HI_pointer_node
, V8HI_type_node
,
23428 V8HI_type_node
, NULL
);
23429 void_ftype_pv4si_v4si_v4si
=
23430 build_function_type_list (void_type_node
, V4SI_pointer_node
, V4SI_type_node
,
23431 V4SI_type_node
, NULL
);
23432 void_ftype_pv4sf_v4sf_v4sf
=
23433 build_function_type_list (void_type_node
, V4SF_pointer_node
, V4SF_type_node
,
23434 V4SF_type_node
, NULL
);
23435 void_ftype_pv2di_v2di_v2di
=
23436 build_function_type_list (void_type_node
, V2DI_pointer_node
, V2DI_type_node
,
23437 V2DI_type_node
, NULL
);
23439 if (TARGET_CRYPTO
&& TARGET_HARD_FLOAT
)
23441 tree V4USI_type_node
=
23442 build_vector_type_for_mode (intUSI_type_node
, V4SImode
);
23444 tree V16UQI_type_node
=
23445 build_vector_type_for_mode (intUQI_type_node
, V16QImode
);
23447 tree v16uqi_ftype_v16uqi
23448 = build_function_type_list (V16UQI_type_node
, V16UQI_type_node
, NULL_TREE
);
23450 tree v16uqi_ftype_v16uqi_v16uqi
23451 = build_function_type_list (V16UQI_type_node
, V16UQI_type_node
,
23452 V16UQI_type_node
, NULL_TREE
);
23454 tree v4usi_ftype_v4usi
23455 = build_function_type_list (V4USI_type_node
, V4USI_type_node
, NULL_TREE
);
23457 tree v4usi_ftype_v4usi_v4usi
23458 = build_function_type_list (V4USI_type_node
, V4USI_type_node
,
23459 V4USI_type_node
, NULL_TREE
);
23461 tree v4usi_ftype_v4usi_v4usi_v4usi
23462 = build_function_type_list (V4USI_type_node
, V4USI_type_node
,
23463 V4USI_type_node
, V4USI_type_node
, NULL_TREE
);
23465 tree uti_ftype_udi_udi
23466 = build_function_type_list (neon_intUTI_type_node
, intUDI_type_node
,
23467 intUDI_type_node
, NULL_TREE
);
23480 ARM_BUILTIN_CRYPTO_##U
23482 "__builtin_arm_crypto_"#L
23483 #define FT1(R, A) \
23485 #define FT2(R, A1, A2) \
23486 R##_ftype_##A1##_##A2
23487 #define FT3(R, A1, A2, A3) \
23488 R##_ftype_##A1##_##A2##_##A3
23489 #define CRYPTO1(L, U, R, A) \
23490 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23491 C (U), BUILT_IN_MD, \
23493 #define CRYPTO2(L, U, R, A1, A2) \
23494 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23495 C (U), BUILT_IN_MD, \
23498 #define CRYPTO3(L, U, R, A1, A2, A3) \
23499 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23500 C (U), BUILT_IN_MD, \
23502 #include "crypto.def"
23513 dreg_types
[0] = V8QI_type_node
;
23514 dreg_types
[1] = V4HI_type_node
;
23515 dreg_types
[2] = V2SI_type_node
;
23516 dreg_types
[3] = V2SF_type_node
;
23517 dreg_types
[4] = neon_intDI_type_node
;
23519 qreg_types
[0] = V16QI_type_node
;
23520 qreg_types
[1] = V8HI_type_node
;
23521 qreg_types
[2] = V4SI_type_node
;
23522 qreg_types
[3] = V4SF_type_node
;
23523 qreg_types
[4] = V2DI_type_node
;
23524 qreg_types
[5] = neon_intUTI_type_node
;
23526 for (i
= 0; i
< NUM_QREG_TYPES
; i
++)
23529 for (j
= 0; j
< NUM_QREG_TYPES
; j
++)
23531 if (i
< NUM_DREG_TYPES
&& j
< NUM_DREG_TYPES
)
23532 reinterp_ftype_dreg
[i
][j
]
23533 = build_function_type_list (dreg_types
[i
], dreg_types
[j
], NULL
);
23535 reinterp_ftype_qreg
[i
][j
]
23536 = build_function_type_list (qreg_types
[i
], qreg_types
[j
], NULL
);
23540 for (i
= 0, fcode
= ARM_BUILTIN_NEON_BASE
;
23541 i
< ARRAY_SIZE (neon_builtin_data
);
23544 neon_builtin_datum
*d
= &neon_builtin_data
[i
];
23546 const char* const modenames
[] = {
23547 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23548 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
23553 int is_load
= 0, is_store
= 0;
23555 gcc_assert (ARRAY_SIZE (modenames
) == T_MAX
);
23562 case NEON_LOAD1LANE
:
23563 case NEON_LOADSTRUCT
:
23564 case NEON_LOADSTRUCTLANE
:
23566 /* Fall through. */
23568 case NEON_STORE1LANE
:
23569 case NEON_STORESTRUCT
:
23570 case NEON_STORESTRUCTLANE
:
23573 /* Fall through. */
23577 case NEON_LOGICBINOP
:
23578 case NEON_SHIFTINSERT
:
23585 case NEON_SHIFTIMM
:
23586 case NEON_SHIFTACC
:
23592 case NEON_LANEMULL
:
23593 case NEON_LANEMULH
:
23595 case NEON_SCALARMUL
:
23596 case NEON_SCALARMULL
:
23597 case NEON_SCALARMULH
:
23598 case NEON_SCALARMAC
:
23604 tree return_type
= void_type_node
, args
= void_list_node
;
23606 /* Build a function type directly from the insn_data for
23607 this builtin. The build_function_type() function takes
23608 care of removing duplicates for us. */
23609 for (k
= insn_data
[d
->code
].n_generator_args
- 1; k
>= 0; k
--)
23613 if (is_load
&& k
== 1)
23615 /* Neon load patterns always have the memory
23616 operand in the operand 1 position. */
23617 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
23618 == neon_struct_operand
);
23624 eltype
= const_intQI_pointer_node
;
23629 eltype
= const_intHI_pointer_node
;
23634 eltype
= const_intSI_pointer_node
;
23639 eltype
= const_float_pointer_node
;
23644 eltype
= const_intDI_pointer_node
;
23647 default: gcc_unreachable ();
23650 else if (is_store
&& k
== 0)
23652 /* Similarly, Neon store patterns use operand 0 as
23653 the memory location to store to. */
23654 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
23655 == neon_struct_operand
);
23661 eltype
= intQI_pointer_node
;
23666 eltype
= intHI_pointer_node
;
23671 eltype
= intSI_pointer_node
;
23676 eltype
= float_pointer_node
;
23681 eltype
= intDI_pointer_node
;
23684 default: gcc_unreachable ();
23689 switch (insn_data
[d
->code
].operand
[k
].mode
)
23691 case VOIDmode
: eltype
= void_type_node
; break;
23693 case QImode
: eltype
= neon_intQI_type_node
; break;
23694 case HImode
: eltype
= neon_intHI_type_node
; break;
23695 case SImode
: eltype
= neon_intSI_type_node
; break;
23696 case SFmode
: eltype
= neon_float_type_node
; break;
23697 case DImode
: eltype
= neon_intDI_type_node
; break;
23698 case TImode
: eltype
= intTI_type_node
; break;
23699 case EImode
: eltype
= intEI_type_node
; break;
23700 case OImode
: eltype
= intOI_type_node
; break;
23701 case CImode
: eltype
= intCI_type_node
; break;
23702 case XImode
: eltype
= intXI_type_node
; break;
23703 /* 64-bit vectors. */
23704 case V8QImode
: eltype
= V8QI_type_node
; break;
23705 case V4HImode
: eltype
= V4HI_type_node
; break;
23706 case V2SImode
: eltype
= V2SI_type_node
; break;
23707 case V2SFmode
: eltype
= V2SF_type_node
; break;
23708 /* 128-bit vectors. */
23709 case V16QImode
: eltype
= V16QI_type_node
; break;
23710 case V8HImode
: eltype
= V8HI_type_node
; break;
23711 case V4SImode
: eltype
= V4SI_type_node
; break;
23712 case V4SFmode
: eltype
= V4SF_type_node
; break;
23713 case V2DImode
: eltype
= V2DI_type_node
; break;
23714 default: gcc_unreachable ();
23718 if (k
== 0 && !is_store
)
23719 return_type
= eltype
;
23721 args
= tree_cons (NULL_TREE
, eltype
, args
);
23724 ftype
= build_function_type (return_type
, args
);
23728 case NEON_RESULTPAIR
:
23730 switch (insn_data
[d
->code
].operand
[1].mode
)
23732 case V8QImode
: ftype
= void_ftype_pv8qi_v8qi_v8qi
; break;
23733 case V4HImode
: ftype
= void_ftype_pv4hi_v4hi_v4hi
; break;
23734 case V2SImode
: ftype
= void_ftype_pv2si_v2si_v2si
; break;
23735 case V2SFmode
: ftype
= void_ftype_pv2sf_v2sf_v2sf
; break;
23736 case DImode
: ftype
= void_ftype_pdi_di_di
; break;
23737 case V16QImode
: ftype
= void_ftype_pv16qi_v16qi_v16qi
; break;
23738 case V8HImode
: ftype
= void_ftype_pv8hi_v8hi_v8hi
; break;
23739 case V4SImode
: ftype
= void_ftype_pv4si_v4si_v4si
; break;
23740 case V4SFmode
: ftype
= void_ftype_pv4sf_v4sf_v4sf
; break;
23741 case V2DImode
: ftype
= void_ftype_pv2di_v2di_v2di
; break;
23742 default: gcc_unreachable ();
23747 case NEON_REINTERP
:
23749 /* We iterate over NUM_DREG_TYPES doubleword types,
23750 then NUM_QREG_TYPES quadword types.
23751 V4HF is not a type used in reinterpret, so we translate
23752 d->mode to the correct index in reinterp_ftype_dreg. */
23754 = GET_MODE_SIZE (insn_data
[d
->code
].operand
[0].mode
) > 8;
23755 int rhs
= (d
->mode
- ((!qreg_p
&& (d
->mode
> T_V4HF
)) ? 1 : 0))
23757 switch (insn_data
[d
->code
].operand
[0].mode
)
23759 case V8QImode
: ftype
= reinterp_ftype_dreg
[0][rhs
]; break;
23760 case V4HImode
: ftype
= reinterp_ftype_dreg
[1][rhs
]; break;
23761 case V2SImode
: ftype
= reinterp_ftype_dreg
[2][rhs
]; break;
23762 case V2SFmode
: ftype
= reinterp_ftype_dreg
[3][rhs
]; break;
23763 case DImode
: ftype
= reinterp_ftype_dreg
[4][rhs
]; break;
23764 case V16QImode
: ftype
= reinterp_ftype_qreg
[0][rhs
]; break;
23765 case V8HImode
: ftype
= reinterp_ftype_qreg
[1][rhs
]; break;
23766 case V4SImode
: ftype
= reinterp_ftype_qreg
[2][rhs
]; break;
23767 case V4SFmode
: ftype
= reinterp_ftype_qreg
[3][rhs
]; break;
23768 case V2DImode
: ftype
= reinterp_ftype_qreg
[4][rhs
]; break;
23769 case TImode
: ftype
= reinterp_ftype_qreg
[5][rhs
]; break;
23770 default: gcc_unreachable ();
23774 case NEON_FLOAT_WIDEN
:
23776 tree eltype
= NULL_TREE
;
23777 tree return_type
= NULL_TREE
;
23779 switch (insn_data
[d
->code
].operand
[1].mode
)
23782 eltype
= V4HF_type_node
;
23783 return_type
= V4SF_type_node
;
23785 default: gcc_unreachable ();
23787 ftype
= build_function_type_list (return_type
, eltype
, NULL
);
23790 case NEON_FLOAT_NARROW
:
23792 tree eltype
= NULL_TREE
;
23793 tree return_type
= NULL_TREE
;
23795 switch (insn_data
[d
->code
].operand
[1].mode
)
23798 eltype
= V4SF_type_node
;
23799 return_type
= V4HF_type_node
;
23801 default: gcc_unreachable ();
23803 ftype
= build_function_type_list (return_type
, eltype
, NULL
);
23807 gcc_unreachable ();
23810 gcc_assert (ftype
!= NULL
);
23812 sprintf (namebuf
, "__builtin_neon_%s%s", d
->name
, modenames
[d
->mode
]);
23814 decl
= add_builtin_function (namebuf
, ftype
, fcode
, BUILT_IN_MD
, NULL
,
23816 arm_builtin_decls
[fcode
] = decl
;
23820 #undef NUM_DREG_TYPES
23821 #undef NUM_QREG_TYPES
23823 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
23826 if ((MASK) & insn_flags) \
23829 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
23830 BUILT_IN_MD, NULL, NULL_TREE); \
23831 arm_builtin_decls[CODE] = bdecl; \
23836 struct builtin_description
23838 const unsigned int mask
;
23839 const enum insn_code icode
;
23840 const char * const name
;
23841 const enum arm_builtins code
;
23842 const enum rtx_code comparison
;
23843 const unsigned int flag
;
23846 static const struct builtin_description bdesc_2arg
[] =
23848 #define IWMMXT_BUILTIN(code, string, builtin) \
23849 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
23850 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23852 #define IWMMXT2_BUILTIN(code, string, builtin) \
23853 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
23854 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23856 IWMMXT_BUILTIN (addv8qi3
, "waddb", WADDB
)
23857 IWMMXT_BUILTIN (addv4hi3
, "waddh", WADDH
)
23858 IWMMXT_BUILTIN (addv2si3
, "waddw", WADDW
)
23859 IWMMXT_BUILTIN (subv8qi3
, "wsubb", WSUBB
)
23860 IWMMXT_BUILTIN (subv4hi3
, "wsubh", WSUBH
)
23861 IWMMXT_BUILTIN (subv2si3
, "wsubw", WSUBW
)
23862 IWMMXT_BUILTIN (ssaddv8qi3
, "waddbss", WADDSSB
)
23863 IWMMXT_BUILTIN (ssaddv4hi3
, "waddhss", WADDSSH
)
23864 IWMMXT_BUILTIN (ssaddv2si3
, "waddwss", WADDSSW
)
23865 IWMMXT_BUILTIN (sssubv8qi3
, "wsubbss", WSUBSSB
)
23866 IWMMXT_BUILTIN (sssubv4hi3
, "wsubhss", WSUBSSH
)
23867 IWMMXT_BUILTIN (sssubv2si3
, "wsubwss", WSUBSSW
)
23868 IWMMXT_BUILTIN (usaddv8qi3
, "waddbus", WADDUSB
)
23869 IWMMXT_BUILTIN (usaddv4hi3
, "waddhus", WADDUSH
)
23870 IWMMXT_BUILTIN (usaddv2si3
, "waddwus", WADDUSW
)
23871 IWMMXT_BUILTIN (ussubv8qi3
, "wsubbus", WSUBUSB
)
23872 IWMMXT_BUILTIN (ussubv4hi3
, "wsubhus", WSUBUSH
)
23873 IWMMXT_BUILTIN (ussubv2si3
, "wsubwus", WSUBUSW
)
23874 IWMMXT_BUILTIN (mulv4hi3
, "wmulul", WMULUL
)
23875 IWMMXT_BUILTIN (smulv4hi3_highpart
, "wmulsm", WMULSM
)
23876 IWMMXT_BUILTIN (umulv4hi3_highpart
, "wmulum", WMULUM
)
23877 IWMMXT_BUILTIN (eqv8qi3
, "wcmpeqb", WCMPEQB
)
23878 IWMMXT_BUILTIN (eqv4hi3
, "wcmpeqh", WCMPEQH
)
23879 IWMMXT_BUILTIN (eqv2si3
, "wcmpeqw", WCMPEQW
)
23880 IWMMXT_BUILTIN (gtuv8qi3
, "wcmpgtub", WCMPGTUB
)
23881 IWMMXT_BUILTIN (gtuv4hi3
, "wcmpgtuh", WCMPGTUH
)
23882 IWMMXT_BUILTIN (gtuv2si3
, "wcmpgtuw", WCMPGTUW
)
23883 IWMMXT_BUILTIN (gtv8qi3
, "wcmpgtsb", WCMPGTSB
)
23884 IWMMXT_BUILTIN (gtv4hi3
, "wcmpgtsh", WCMPGTSH
)
23885 IWMMXT_BUILTIN (gtv2si3
, "wcmpgtsw", WCMPGTSW
)
23886 IWMMXT_BUILTIN (umaxv8qi3
, "wmaxub", WMAXUB
)
23887 IWMMXT_BUILTIN (smaxv8qi3
, "wmaxsb", WMAXSB
)
23888 IWMMXT_BUILTIN (umaxv4hi3
, "wmaxuh", WMAXUH
)
23889 IWMMXT_BUILTIN (smaxv4hi3
, "wmaxsh", WMAXSH
)
23890 IWMMXT_BUILTIN (umaxv2si3
, "wmaxuw", WMAXUW
)
23891 IWMMXT_BUILTIN (smaxv2si3
, "wmaxsw", WMAXSW
)
23892 IWMMXT_BUILTIN (uminv8qi3
, "wminub", WMINUB
)
23893 IWMMXT_BUILTIN (sminv8qi3
, "wminsb", WMINSB
)
23894 IWMMXT_BUILTIN (uminv4hi3
, "wminuh", WMINUH
)
23895 IWMMXT_BUILTIN (sminv4hi3
, "wminsh", WMINSH
)
23896 IWMMXT_BUILTIN (uminv2si3
, "wminuw", WMINUW
)
23897 IWMMXT_BUILTIN (sminv2si3
, "wminsw", WMINSW
)
23898 IWMMXT_BUILTIN (iwmmxt_anddi3
, "wand", WAND
)
23899 IWMMXT_BUILTIN (iwmmxt_nanddi3
, "wandn", WANDN
)
23900 IWMMXT_BUILTIN (iwmmxt_iordi3
, "wor", WOR
)
23901 IWMMXT_BUILTIN (iwmmxt_xordi3
, "wxor", WXOR
)
23902 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3
, "wavg2b", WAVG2B
)
23903 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3
, "wavg2h", WAVG2H
)
23904 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3
, "wavg2br", WAVG2BR
)
23905 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3
, "wavg2hr", WAVG2HR
)
23906 IWMMXT_BUILTIN (iwmmxt_wunpckilb
, "wunpckilb", WUNPCKILB
)
23907 IWMMXT_BUILTIN (iwmmxt_wunpckilh
, "wunpckilh", WUNPCKILH
)
23908 IWMMXT_BUILTIN (iwmmxt_wunpckilw
, "wunpckilw", WUNPCKILW
)
23909 IWMMXT_BUILTIN (iwmmxt_wunpckihb
, "wunpckihb", WUNPCKIHB
)
23910 IWMMXT_BUILTIN (iwmmxt_wunpckihh
, "wunpckihh", WUNPCKIHH
)
23911 IWMMXT_BUILTIN (iwmmxt_wunpckihw
, "wunpckihw", WUNPCKIHW
)
23912 IWMMXT2_BUILTIN (iwmmxt_waddsubhx
, "waddsubhx", WADDSUBHX
)
23913 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx
, "wsubaddhx", WSUBADDHX
)
23914 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb
, "wabsdiffb", WABSDIFFB
)
23915 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh
, "wabsdiffh", WABSDIFFH
)
23916 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw
, "wabsdiffw", WABSDIFFW
)
23917 IWMMXT2_BUILTIN (iwmmxt_avg4
, "wavg4", WAVG4
)
23918 IWMMXT2_BUILTIN (iwmmxt_avg4r
, "wavg4r", WAVG4R
)
23919 IWMMXT2_BUILTIN (iwmmxt_wmulwsm
, "wmulwsm", WMULWSM
)
23920 IWMMXT2_BUILTIN (iwmmxt_wmulwum
, "wmulwum", WMULWUM
)
23921 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr
, "wmulwsmr", WMULWSMR
)
23922 IWMMXT2_BUILTIN (iwmmxt_wmulwumr
, "wmulwumr", WMULWUMR
)
23923 IWMMXT2_BUILTIN (iwmmxt_wmulwl
, "wmulwl", WMULWL
)
23924 IWMMXT2_BUILTIN (iwmmxt_wmulsmr
, "wmulsmr", WMULSMR
)
23925 IWMMXT2_BUILTIN (iwmmxt_wmulumr
, "wmulumr", WMULUMR
)
23926 IWMMXT2_BUILTIN (iwmmxt_wqmulm
, "wqmulm", WQMULM
)
23927 IWMMXT2_BUILTIN (iwmmxt_wqmulmr
, "wqmulmr", WQMULMR
)
23928 IWMMXT2_BUILTIN (iwmmxt_wqmulwm
, "wqmulwm", WQMULWM
)
23929 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr
, "wqmulwmr", WQMULWMR
)
23930 IWMMXT_BUILTIN (iwmmxt_walignr0
, "walignr0", WALIGNR0
)
23931 IWMMXT_BUILTIN (iwmmxt_walignr1
, "walignr1", WALIGNR1
)
23932 IWMMXT_BUILTIN (iwmmxt_walignr2
, "walignr2", WALIGNR2
)
23933 IWMMXT_BUILTIN (iwmmxt_walignr3
, "walignr3", WALIGNR3
)
23935 #define IWMMXT_BUILTIN2(code, builtin) \
23936 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23938 #define IWMMXT2_BUILTIN2(code, builtin) \
23939 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23941 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm
, WADDBHUSM
)
23942 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl
, WADDBHUSL
)
23943 IWMMXT_BUILTIN2 (iwmmxt_wpackhss
, WPACKHSS
)
23944 IWMMXT_BUILTIN2 (iwmmxt_wpackwss
, WPACKWSS
)
23945 IWMMXT_BUILTIN2 (iwmmxt_wpackdss
, WPACKDSS
)
23946 IWMMXT_BUILTIN2 (iwmmxt_wpackhus
, WPACKHUS
)
23947 IWMMXT_BUILTIN2 (iwmmxt_wpackwus
, WPACKWUS
)
23948 IWMMXT_BUILTIN2 (iwmmxt_wpackdus
, WPACKDUS
)
23949 IWMMXT_BUILTIN2 (iwmmxt_wmacuz
, WMACUZ
)
23950 IWMMXT_BUILTIN2 (iwmmxt_wmacsz
, WMACSZ
)
23952 #define CRC32_BUILTIN(L, U) \
23953 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
23955 CRC32_BUILTIN (crc32b
, CRC32B
)
23956 CRC32_BUILTIN (crc32h
, CRC32H
)
23957 CRC32_BUILTIN (crc32w
, CRC32W
)
23958 CRC32_BUILTIN (crc32cb
, CRC32CB
)
23959 CRC32_BUILTIN (crc32ch
, CRC32CH
)
23960 CRC32_BUILTIN (crc32cw
, CRC32CW
)
23961 #undef CRC32_BUILTIN
23964 #define CRYPTO_BUILTIN(L, U) \
23965 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
23970 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
23971 #define CRYPTO1(L, U, R, A)
23972 #define CRYPTO3(L, U, R, A1, A2, A3)
23973 #include "crypto.def"
23980 static const struct builtin_description bdesc_1arg
[] =
23982 IWMMXT_BUILTIN (iwmmxt_tmovmskb
, "tmovmskb", TMOVMSKB
)
23983 IWMMXT_BUILTIN (iwmmxt_tmovmskh
, "tmovmskh", TMOVMSKH
)
23984 IWMMXT_BUILTIN (iwmmxt_tmovmskw
, "tmovmskw", TMOVMSKW
)
23985 IWMMXT_BUILTIN (iwmmxt_waccb
, "waccb", WACCB
)
23986 IWMMXT_BUILTIN (iwmmxt_wacch
, "wacch", WACCH
)
23987 IWMMXT_BUILTIN (iwmmxt_waccw
, "waccw", WACCW
)
23988 IWMMXT_BUILTIN (iwmmxt_wunpckehub
, "wunpckehub", WUNPCKEHUB
)
23989 IWMMXT_BUILTIN (iwmmxt_wunpckehuh
, "wunpckehuh", WUNPCKEHUH
)
23990 IWMMXT_BUILTIN (iwmmxt_wunpckehuw
, "wunpckehuw", WUNPCKEHUW
)
23991 IWMMXT_BUILTIN (iwmmxt_wunpckehsb
, "wunpckehsb", WUNPCKEHSB
)
23992 IWMMXT_BUILTIN (iwmmxt_wunpckehsh
, "wunpckehsh", WUNPCKEHSH
)
23993 IWMMXT_BUILTIN (iwmmxt_wunpckehsw
, "wunpckehsw", WUNPCKEHSW
)
23994 IWMMXT_BUILTIN (iwmmxt_wunpckelub
, "wunpckelub", WUNPCKELUB
)
23995 IWMMXT_BUILTIN (iwmmxt_wunpckeluh
, "wunpckeluh", WUNPCKELUH
)
23996 IWMMXT_BUILTIN (iwmmxt_wunpckeluw
, "wunpckeluw", WUNPCKELUW
)
23997 IWMMXT_BUILTIN (iwmmxt_wunpckelsb
, "wunpckelsb", WUNPCKELSB
)
23998 IWMMXT_BUILTIN (iwmmxt_wunpckelsh
, "wunpckelsh", WUNPCKELSH
)
23999 IWMMXT_BUILTIN (iwmmxt_wunpckelsw
, "wunpckelsw", WUNPCKELSW
)
24000 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3
, "wabsb", WABSB
)
24001 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3
, "wabsh", WABSH
)
24002 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3
, "wabsw", WABSW
)
24003 IWMMXT_BUILTIN (tbcstv8qi
, "tbcstb", TBCSTB
)
24004 IWMMXT_BUILTIN (tbcstv4hi
, "tbcsth", TBCSTH
)
24005 IWMMXT_BUILTIN (tbcstv2si
, "tbcstw", TBCSTW
)
24007 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24008 #define CRYPTO2(L, U, R, A1, A2)
24009 #define CRYPTO3(L, U, R, A1, A2, A3)
24010 #include "crypto.def"
24016 static const struct builtin_description bdesc_3arg
[] =
24018 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24019 #define CRYPTO1(L, U, R, A)
24020 #define CRYPTO2(L, U, R, A1, A2)
24021 #include "crypto.def"
24026 #undef CRYPTO_BUILTIN
24028 /* Set up all the iWMMXt builtins. This is not called if
24029 TARGET_IWMMXT is zero. */
24032 arm_init_iwmmxt_builtins (void)
24034 const struct builtin_description
* d
;
24037 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
24038 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
24039 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
24041 tree v8qi_ftype_v8qi_v8qi_int
24042 = build_function_type_list (V8QI_type_node
,
24043 V8QI_type_node
, V8QI_type_node
,
24044 integer_type_node
, NULL_TREE
);
24045 tree v4hi_ftype_v4hi_int
24046 = build_function_type_list (V4HI_type_node
,
24047 V4HI_type_node
, integer_type_node
, NULL_TREE
);
24048 tree v2si_ftype_v2si_int
24049 = build_function_type_list (V2SI_type_node
,
24050 V2SI_type_node
, integer_type_node
, NULL_TREE
);
24051 tree v2si_ftype_di_di
24052 = build_function_type_list (V2SI_type_node
,
24053 long_long_integer_type_node
,
24054 long_long_integer_type_node
,
24056 tree di_ftype_di_int
24057 = build_function_type_list (long_long_integer_type_node
,
24058 long_long_integer_type_node
,
24059 integer_type_node
, NULL_TREE
);
24060 tree di_ftype_di_int_int
24061 = build_function_type_list (long_long_integer_type_node
,
24062 long_long_integer_type_node
,
24064 integer_type_node
, NULL_TREE
);
24065 tree int_ftype_v8qi
24066 = build_function_type_list (integer_type_node
,
24067 V8QI_type_node
, NULL_TREE
);
24068 tree int_ftype_v4hi
24069 = build_function_type_list (integer_type_node
,
24070 V4HI_type_node
, NULL_TREE
);
24071 tree int_ftype_v2si
24072 = build_function_type_list (integer_type_node
,
24073 V2SI_type_node
, NULL_TREE
);
24074 tree int_ftype_v8qi_int
24075 = build_function_type_list (integer_type_node
,
24076 V8QI_type_node
, integer_type_node
, NULL_TREE
);
24077 tree int_ftype_v4hi_int
24078 = build_function_type_list (integer_type_node
,
24079 V4HI_type_node
, integer_type_node
, NULL_TREE
);
24080 tree int_ftype_v2si_int
24081 = build_function_type_list (integer_type_node
,
24082 V2SI_type_node
, integer_type_node
, NULL_TREE
);
24083 tree v8qi_ftype_v8qi_int_int
24084 = build_function_type_list (V8QI_type_node
,
24085 V8QI_type_node
, integer_type_node
,
24086 integer_type_node
, NULL_TREE
);
24087 tree v4hi_ftype_v4hi_int_int
24088 = build_function_type_list (V4HI_type_node
,
24089 V4HI_type_node
, integer_type_node
,
24090 integer_type_node
, NULL_TREE
);
24091 tree v2si_ftype_v2si_int_int
24092 = build_function_type_list (V2SI_type_node
,
24093 V2SI_type_node
, integer_type_node
,
24094 integer_type_node
, NULL_TREE
);
24095 /* Miscellaneous. */
24096 tree v8qi_ftype_v4hi_v4hi
24097 = build_function_type_list (V8QI_type_node
,
24098 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
24099 tree v4hi_ftype_v2si_v2si
24100 = build_function_type_list (V4HI_type_node
,
24101 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
24102 tree v8qi_ftype_v4hi_v8qi
24103 = build_function_type_list (V8QI_type_node
,
24104 V4HI_type_node
, V8QI_type_node
, NULL_TREE
);
24105 tree v2si_ftype_v4hi_v4hi
24106 = build_function_type_list (V2SI_type_node
,
24107 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
24108 tree v2si_ftype_v8qi_v8qi
24109 = build_function_type_list (V2SI_type_node
,
24110 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
24111 tree v4hi_ftype_v4hi_di
24112 = build_function_type_list (V4HI_type_node
,
24113 V4HI_type_node
, long_long_integer_type_node
,
24115 tree v2si_ftype_v2si_di
24116 = build_function_type_list (V2SI_type_node
,
24117 V2SI_type_node
, long_long_integer_type_node
,
24120 = build_function_type_list (long_long_unsigned_type_node
, NULL_TREE
);
24121 tree int_ftype_void
24122 = build_function_type_list (integer_type_node
, NULL_TREE
);
24124 = build_function_type_list (long_long_integer_type_node
,
24125 V8QI_type_node
, NULL_TREE
);
24127 = build_function_type_list (long_long_integer_type_node
,
24128 V4HI_type_node
, NULL_TREE
);
24130 = build_function_type_list (long_long_integer_type_node
,
24131 V2SI_type_node
, NULL_TREE
);
24132 tree v2si_ftype_v4hi
24133 = build_function_type_list (V2SI_type_node
,
24134 V4HI_type_node
, NULL_TREE
);
24135 tree v4hi_ftype_v8qi
24136 = build_function_type_list (V4HI_type_node
,
24137 V8QI_type_node
, NULL_TREE
);
24138 tree v8qi_ftype_v8qi
24139 = build_function_type_list (V8QI_type_node
,
24140 V8QI_type_node
, NULL_TREE
);
24141 tree v4hi_ftype_v4hi
24142 = build_function_type_list (V4HI_type_node
,
24143 V4HI_type_node
, NULL_TREE
);
24144 tree v2si_ftype_v2si
24145 = build_function_type_list (V2SI_type_node
,
24146 V2SI_type_node
, NULL_TREE
);
24148 tree di_ftype_di_v4hi_v4hi
24149 = build_function_type_list (long_long_unsigned_type_node
,
24150 long_long_unsigned_type_node
,
24151 V4HI_type_node
, V4HI_type_node
,
24154 tree di_ftype_v4hi_v4hi
24155 = build_function_type_list (long_long_unsigned_type_node
,
24156 V4HI_type_node
,V4HI_type_node
,
24159 tree v2si_ftype_v2si_v4hi_v4hi
24160 = build_function_type_list (V2SI_type_node
,
24161 V2SI_type_node
, V4HI_type_node
,
24162 V4HI_type_node
, NULL_TREE
);
24164 tree v2si_ftype_v2si_v8qi_v8qi
24165 = build_function_type_list (V2SI_type_node
,
24166 V2SI_type_node
, V8QI_type_node
,
24167 V8QI_type_node
, NULL_TREE
);
24169 tree di_ftype_di_v2si_v2si
24170 = build_function_type_list (long_long_unsigned_type_node
,
24171 long_long_unsigned_type_node
,
24172 V2SI_type_node
, V2SI_type_node
,
24175 tree di_ftype_di_di_int
24176 = build_function_type_list (long_long_unsigned_type_node
,
24177 long_long_unsigned_type_node
,
24178 long_long_unsigned_type_node
,
24179 integer_type_node
, NULL_TREE
);
24181 tree void_ftype_int
24182 = build_function_type_list (void_type_node
,
24183 integer_type_node
, NULL_TREE
);
24185 tree v8qi_ftype_char
24186 = build_function_type_list (V8QI_type_node
,
24187 signed_char_type_node
, NULL_TREE
);
24189 tree v4hi_ftype_short
24190 = build_function_type_list (V4HI_type_node
,
24191 short_integer_type_node
, NULL_TREE
);
24193 tree v2si_ftype_int
24194 = build_function_type_list (V2SI_type_node
,
24195 integer_type_node
, NULL_TREE
);
24197 /* Normal vector binops. */
24198 tree v8qi_ftype_v8qi_v8qi
24199 = build_function_type_list (V8QI_type_node
,
24200 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
24201 tree v4hi_ftype_v4hi_v4hi
24202 = build_function_type_list (V4HI_type_node
,
24203 V4HI_type_node
,V4HI_type_node
, NULL_TREE
);
24204 tree v2si_ftype_v2si_v2si
24205 = build_function_type_list (V2SI_type_node
,
24206 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
24207 tree di_ftype_di_di
24208 = build_function_type_list (long_long_unsigned_type_node
,
24209 long_long_unsigned_type_node
,
24210 long_long_unsigned_type_node
,
24213 /* Add all builtins that are more or less simple operations on two
24215 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
24217 /* Use one of the operands; the target can have a different mode for
24218 mask-generating compares. */
24219 enum machine_mode mode
;
24225 mode
= insn_data
[d
->icode
].operand
[1].mode
;
24230 type
= v8qi_ftype_v8qi_v8qi
;
24233 type
= v4hi_ftype_v4hi_v4hi
;
24236 type
= v2si_ftype_v2si_v2si
;
24239 type
= di_ftype_di_di
;
24243 gcc_unreachable ();
24246 def_mbuiltin (d
->mask
, d
->name
, type
, d
->code
);
24249 /* Add the remaining MMX insns with somewhat more complicated types. */
24250 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24251 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24252 ARM_BUILTIN_ ## CODE)
24254 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24255 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24256 ARM_BUILTIN_ ## CODE)
24258 iwmmx_mbuiltin ("wzero", di_ftype_void
, WZERO
);
24259 iwmmx_mbuiltin ("setwcgr0", void_ftype_int
, SETWCGR0
);
24260 iwmmx_mbuiltin ("setwcgr1", void_ftype_int
, SETWCGR1
);
24261 iwmmx_mbuiltin ("setwcgr2", void_ftype_int
, SETWCGR2
);
24262 iwmmx_mbuiltin ("setwcgr3", void_ftype_int
, SETWCGR3
);
24263 iwmmx_mbuiltin ("getwcgr0", int_ftype_void
, GETWCGR0
);
24264 iwmmx_mbuiltin ("getwcgr1", int_ftype_void
, GETWCGR1
);
24265 iwmmx_mbuiltin ("getwcgr2", int_ftype_void
, GETWCGR2
);
24266 iwmmx_mbuiltin ("getwcgr3", int_ftype_void
, GETWCGR3
);
24268 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di
, WSLLH
);
24269 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di
, WSLLW
);
24270 iwmmx_mbuiltin ("wslld", di_ftype_di_di
, WSLLD
);
24271 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int
, WSLLHI
);
24272 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int
, WSLLWI
);
24273 iwmmx_mbuiltin ("wslldi", di_ftype_di_int
, WSLLDI
);
24275 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di
, WSRLH
);
24276 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di
, WSRLW
);
24277 iwmmx_mbuiltin ("wsrld", di_ftype_di_di
, WSRLD
);
24278 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int
, WSRLHI
);
24279 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int
, WSRLWI
);
24280 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int
, WSRLDI
);
24282 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di
, WSRAH
);
24283 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di
, WSRAW
);
24284 iwmmx_mbuiltin ("wsrad", di_ftype_di_di
, WSRAD
);
24285 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int
, WSRAHI
);
24286 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int
, WSRAWI
);
24287 iwmmx_mbuiltin ("wsradi", di_ftype_di_int
, WSRADI
);
24289 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di
, WRORH
);
24290 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di
, WRORW
);
24291 iwmmx_mbuiltin ("wrord", di_ftype_di_di
, WRORD
);
24292 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int
, WRORHI
);
24293 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int
, WRORWI
);
24294 iwmmx_mbuiltin ("wrordi", di_ftype_di_int
, WRORDI
);
24296 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int
, WSHUFH
);
24298 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi
, WSADB
);
24299 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi
, WSADH
);
24300 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi
, WMADDS
);
24301 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi
, WMADDSX
);
24302 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi
, WMADDSN
);
24303 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi
, WMADDU
);
24304 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi
, WMADDUX
);
24305 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi
, WMADDUN
);
24306 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi
, WSADBZ
);
24307 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi
, WSADHZ
);
24309 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int
, TEXTRMSB
);
24310 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int
, TEXTRMSH
);
24311 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int
, TEXTRMSW
);
24312 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int
, TEXTRMUB
);
24313 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int
, TEXTRMUH
);
24314 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int
, TEXTRMUW
);
24315 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int
, TINSRB
);
24316 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int
, TINSRH
);
24317 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int
, TINSRW
);
24319 iwmmx_mbuiltin ("waccb", di_ftype_v8qi
, WACCB
);
24320 iwmmx_mbuiltin ("wacch", di_ftype_v4hi
, WACCH
);
24321 iwmmx_mbuiltin ("waccw", di_ftype_v2si
, WACCW
);
24323 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi
, TMOVMSKB
);
24324 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi
, TMOVMSKH
);
24325 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si
, TMOVMSKW
);
24327 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi
, WADDBHUSM
);
24328 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi
, WADDBHUSL
);
24330 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi
, WPACKHSS
);
24331 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi
, WPACKHUS
);
24332 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si
, WPACKWUS
);
24333 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si
, WPACKWSS
);
24334 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di
, WPACKDUS
);
24335 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di
, WPACKDSS
);
24337 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi
, WUNPCKEHUB
);
24338 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi
, WUNPCKEHUH
);
24339 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si
, WUNPCKEHUW
);
24340 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi
, WUNPCKEHSB
);
24341 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi
, WUNPCKEHSH
);
24342 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si
, WUNPCKEHSW
);
24343 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi
, WUNPCKELUB
);
24344 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi
, WUNPCKELUH
);
24345 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si
, WUNPCKELUW
);
24346 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi
, WUNPCKELSB
);
24347 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi
, WUNPCKELSH
);
24348 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si
, WUNPCKELSW
);
24350 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi
, WMACS
);
24351 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi
, WMACSZ
);
24352 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi
, WMACU
);
24353 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi
, WMACUZ
);
24355 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int
, WALIGNI
);
24356 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int
, TMIA
);
24357 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int
, TMIAPH
);
24358 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int
, TMIABB
);
24359 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int
, TMIABT
);
24360 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int
, TMIATB
);
24361 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int
, TMIATT
);
24363 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi
, WABSB
);
24364 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi
, WABSH
);
24365 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si
, WABSW
);
24367 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi
, WQMIABB
);
24368 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi
, WQMIABT
);
24369 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi
, WQMIATB
);
24370 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi
, WQMIATT
);
24372 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABBN
);
24373 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi
, WQMIABTN
);
24374 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATBN
);
24375 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi
, WQMIATTN
);
24377 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi
, WMIABB
);
24378 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi
, WMIABT
);
24379 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi
, WMIATB
);
24380 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi
, WMIATT
);
24382 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi
, WMIABBN
);
24383 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi
, WMIABTN
);
24384 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi
, WMIATBN
);
24385 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi
, WMIATTN
);
24387 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si
, WMIAWBB
);
24388 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si
, WMIAWBT
);
24389 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si
, WMIAWTB
);
24390 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si
, WMIAWTT
);
24392 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si
, WMIAWBBN
);
24393 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si
, WMIAWBTN
);
24394 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si
, WMIAWTBN
);
24395 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si
, WMIAWTTN
);
24397 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int
, WMERGE
);
24399 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char
, TBCSTB
);
24400 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short
, TBCSTH
);
24401 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int
, TBCSTW
);
24403 #undef iwmmx_mbuiltin
24404 #undef iwmmx2_mbuiltin
24408 arm_init_fp16_builtins (void)
24410 tree fp16_type
= make_node (REAL_TYPE
);
24411 TYPE_PRECISION (fp16_type
) = 16;
24412 layout_type (fp16_type
);
24413 (*lang_hooks
.types
.register_builtin_type
) (fp16_type
, "__fp16");
24417 arm_init_crc32_builtins ()
24419 tree si_ftype_si_qi
24420 = build_function_type_list (unsigned_intSI_type_node
,
24421 unsigned_intSI_type_node
,
24422 unsigned_intQI_type_node
, NULL_TREE
);
24423 tree si_ftype_si_hi
24424 = build_function_type_list (unsigned_intSI_type_node
,
24425 unsigned_intSI_type_node
,
24426 unsigned_intHI_type_node
, NULL_TREE
);
24427 tree si_ftype_si_si
24428 = build_function_type_list (unsigned_intSI_type_node
,
24429 unsigned_intSI_type_node
,
24430 unsigned_intSI_type_node
, NULL_TREE
);
24432 arm_builtin_decls
[ARM_BUILTIN_CRC32B
]
24433 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi
,
24434 ARM_BUILTIN_CRC32B
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24435 arm_builtin_decls
[ARM_BUILTIN_CRC32H
]
24436 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi
,
24437 ARM_BUILTIN_CRC32H
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24438 arm_builtin_decls
[ARM_BUILTIN_CRC32W
]
24439 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si
,
24440 ARM_BUILTIN_CRC32W
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24441 arm_builtin_decls
[ARM_BUILTIN_CRC32CB
]
24442 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi
,
24443 ARM_BUILTIN_CRC32CB
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24444 arm_builtin_decls
[ARM_BUILTIN_CRC32CH
]
24445 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi
,
24446 ARM_BUILTIN_CRC32CH
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24447 arm_builtin_decls
[ARM_BUILTIN_CRC32CW
]
24448 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si
,
24449 ARM_BUILTIN_CRC32CW
, BUILT_IN_MD
, NULL
, NULL_TREE
);
24453 arm_init_builtins (void)
24455 if (TARGET_REALLY_IWMMXT
)
24456 arm_init_iwmmxt_builtins ();
24459 arm_init_neon_builtins ();
24461 if (arm_fp16_format
)
24462 arm_init_fp16_builtins ();
24465 arm_init_crc32_builtins ();
24468 /* Return the ARM builtin for CODE. */
24471 arm_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
24473 if (code
>= ARM_BUILTIN_MAX
)
24474 return error_mark_node
;
24476 return arm_builtin_decls
[code
];
24479 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24481 static const char *
24482 arm_invalid_parameter_type (const_tree t
)
24484 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
24485 return N_("function parameters cannot have __fp16 type");
24489 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24491 static const char *
24492 arm_invalid_return_type (const_tree t
)
24494 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
24495 return N_("functions cannot return __fp16 type");
24499 /* Implement TARGET_PROMOTED_TYPE. */
24502 arm_promoted_type (const_tree t
)
24504 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
24505 return float_type_node
;
24509 /* Implement TARGET_CONVERT_TO_TYPE.
24510 Specifically, this hook implements the peculiarity of the ARM
24511 half-precision floating-point C semantics that requires conversions between
24512 __fp16 to or from double to do an intermediate conversion to float. */
24515 arm_convert_to_type (tree type
, tree expr
)
24517 tree fromtype
= TREE_TYPE (expr
);
24518 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
24520 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
24521 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
24522 return convert (type
, convert (float_type_node
, expr
));
24526 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24527 This simply adds HFmode as a supported mode; even though we don't
24528 implement arithmetic on this type directly, it's supported by
24529 optabs conversions, much the way the double-word arithmetic is
24530 special-cased in the default hook. */
24533 arm_scalar_mode_supported_p (enum machine_mode mode
)
24535 if (mode
== HFmode
)
24536 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
24537 else if (ALL_FIXED_POINT_MODE_P (mode
))
24540 return default_scalar_mode_supported_p (mode
);
24543 /* Errors in the source file can cause expand_expr to return const0_rtx
24544 where we expect a vector. To avoid crashing, use one of the vector
24545 clear instructions. */
24548 safe_vector_operand (rtx x
, enum machine_mode mode
)
24550 if (x
!= const0_rtx
)
24552 x
= gen_reg_rtx (mode
);
24554 emit_insn (gen_iwmmxt_clrdi (mode
== DImode
? x
24555 : gen_rtx_SUBREG (DImode
, x
, 0)));
24559 /* Function to expand ternary builtins. */
24561 arm_expand_ternop_builtin (enum insn_code icode
,
24562 tree exp
, rtx target
)
24565 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24566 tree arg1
= CALL_EXPR_ARG (exp
, 1);
24567 tree arg2
= CALL_EXPR_ARG (exp
, 2);
24569 rtx op0
= expand_normal (arg0
);
24570 rtx op1
= expand_normal (arg1
);
24571 rtx op2
= expand_normal (arg2
);
24572 rtx op3
= NULL_RTX
;
24574 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
24575 lane operand depending on endianness. */
24576 bool builtin_sha1cpm_p
= false;
24578 if (insn_data
[icode
].n_operands
== 5)
24580 gcc_assert (icode
== CODE_FOR_crypto_sha1c
24581 || icode
== CODE_FOR_crypto_sha1p
24582 || icode
== CODE_FOR_crypto_sha1m
);
24583 builtin_sha1cpm_p
= true;
24585 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24586 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
24587 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
24588 enum machine_mode mode2
= insn_data
[icode
].operand
[3].mode
;
24591 if (VECTOR_MODE_P (mode0
))
24592 op0
= safe_vector_operand (op0
, mode0
);
24593 if (VECTOR_MODE_P (mode1
))
24594 op1
= safe_vector_operand (op1
, mode1
);
24595 if (VECTOR_MODE_P (mode2
))
24596 op2
= safe_vector_operand (op2
, mode2
);
24599 || GET_MODE (target
) != tmode
24600 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24601 target
= gen_reg_rtx (tmode
);
24603 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
24604 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
)
24605 && (GET_MODE (op2
) == mode2
|| GET_MODE (op2
) == VOIDmode
));
24607 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24608 op0
= copy_to_mode_reg (mode0
, op0
);
24609 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
24610 op1
= copy_to_mode_reg (mode1
, op1
);
24611 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
24612 op2
= copy_to_mode_reg (mode2
, op2
);
24613 if (builtin_sha1cpm_p
)
24614 op3
= GEN_INT (TARGET_BIG_END
? 1 : 0);
24616 if (builtin_sha1cpm_p
)
24617 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
24619 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
24626 /* Subroutine of arm_expand_builtin to take care of binop insns. */
24629 arm_expand_binop_builtin (enum insn_code icode
,
24630 tree exp
, rtx target
)
24633 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24634 tree arg1
= CALL_EXPR_ARG (exp
, 1);
24635 rtx op0
= expand_normal (arg0
);
24636 rtx op1
= expand_normal (arg1
);
24637 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24638 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
24639 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
24641 if (VECTOR_MODE_P (mode0
))
24642 op0
= safe_vector_operand (op0
, mode0
);
24643 if (VECTOR_MODE_P (mode1
))
24644 op1
= safe_vector_operand (op1
, mode1
);
24647 || GET_MODE (target
) != tmode
24648 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24649 target
= gen_reg_rtx (tmode
);
24651 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
24652 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
24654 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24655 op0
= copy_to_mode_reg (mode0
, op0
);
24656 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
24657 op1
= copy_to_mode_reg (mode1
, op1
);
24659 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
24666 /* Subroutine of arm_expand_builtin to take care of unop insns. */
24669 arm_expand_unop_builtin (enum insn_code icode
,
24670 tree exp
, rtx target
, int do_load
)
24673 tree arg0
= CALL_EXPR_ARG (exp
, 0);
24674 rtx op0
= expand_normal (arg0
);
24675 rtx op1
= NULL_RTX
;
24676 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24677 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
24678 bool builtin_sha1h_p
= false;
24680 if (insn_data
[icode
].n_operands
== 3)
24682 gcc_assert (icode
== CODE_FOR_crypto_sha1h
);
24683 builtin_sha1h_p
= true;
24687 || GET_MODE (target
) != tmode
24688 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
24689 target
= gen_reg_rtx (tmode
);
24691 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
24694 if (VECTOR_MODE_P (mode0
))
24695 op0
= safe_vector_operand (op0
, mode0
);
24697 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
24698 op0
= copy_to_mode_reg (mode0
, op0
);
24700 if (builtin_sha1h_p
)
24701 op1
= GEN_INT (TARGET_BIG_END
? 1 : 0);
24703 if (builtin_sha1h_p
)
24704 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
24706 pat
= GEN_FCN (icode
) (target
, op0
);
24714 NEON_ARG_COPY_TO_REG
,
24720 #define NEON_MAX_BUILTIN_ARGS 5
24722 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
24723 and return an expression for the accessed memory.
24725 The intrinsic function operates on a block of registers that has
24726 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
24727 function references the memory at EXP of type TYPE and in mode
24728 MEM_MODE; this mode may be BLKmode if no more suitable mode is
24732 neon_dereference_pointer (tree exp
, tree type
, enum machine_mode mem_mode
,
24733 enum machine_mode reg_mode
,
24734 neon_builtin_type_mode type_mode
)
24736 HOST_WIDE_INT reg_size
, vector_size
, nvectors
, nelems
;
24737 tree elem_type
, upper_bound
, array_type
;
24739 /* Work out the size of the register block in bytes. */
24740 reg_size
= GET_MODE_SIZE (reg_mode
);
24742 /* Work out the size of each vector in bytes. */
24743 gcc_assert (TYPE_MODE_BIT (type_mode
) & (TB_DREG
| TB_QREG
));
24744 vector_size
= (TYPE_MODE_BIT (type_mode
) & TB_QREG
? 16 : 8);
24746 /* Work out how many vectors there are. */
24747 gcc_assert (reg_size
% vector_size
== 0);
24748 nvectors
= reg_size
/ vector_size
;
24750 /* Work out the type of each element. */
24751 gcc_assert (POINTER_TYPE_P (type
));
24752 elem_type
= TREE_TYPE (type
);
24754 /* Work out how many elements are being loaded or stored.
24755 MEM_MODE == REG_MODE implies a one-to-one mapping between register
24756 and memory elements; anything else implies a lane load or store. */
24757 if (mem_mode
== reg_mode
)
24758 nelems
= vector_size
* nvectors
/ int_size_in_bytes (elem_type
);
24762 /* Create a type that describes the full access. */
24763 upper_bound
= build_int_cst (size_type_node
, nelems
- 1);
24764 array_type
= build_array_type (elem_type
, build_index_type (upper_bound
));
24766 /* Dereference EXP using that type. */
24767 return fold_build2 (MEM_REF
, array_type
, exp
,
24768 build_int_cst (build_pointer_type (array_type
), 0));
24771 /* Expand a Neon builtin. */
24773 arm_expand_neon_args (rtx target
, int icode
, int have_retval
,
24774 neon_builtin_type_mode type_mode
,
24775 tree exp
, int fcode
, ...)
24779 tree arg
[NEON_MAX_BUILTIN_ARGS
];
24780 rtx op
[NEON_MAX_BUILTIN_ARGS
];
24783 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
24784 enum machine_mode mode
[NEON_MAX_BUILTIN_ARGS
];
24785 enum machine_mode other_mode
;
24791 || GET_MODE (target
) != tmode
24792 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
)))
24793 target
= gen_reg_rtx (tmode
);
24795 va_start (ap
, fcode
);
24797 formals
= TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls
[fcode
]));
24801 builtin_arg thisarg
= (builtin_arg
) va_arg (ap
, int);
24803 if (thisarg
== NEON_ARG_STOP
)
24807 opno
= argc
+ have_retval
;
24808 mode
[argc
] = insn_data
[icode
].operand
[opno
].mode
;
24809 arg
[argc
] = CALL_EXPR_ARG (exp
, argc
);
24810 arg_type
= TREE_VALUE (formals
);
24811 if (thisarg
== NEON_ARG_MEMORY
)
24813 other_mode
= insn_data
[icode
].operand
[1 - opno
].mode
;
24814 arg
[argc
] = neon_dereference_pointer (arg
[argc
], arg_type
,
24815 mode
[argc
], other_mode
,
24819 op
[argc
] = expand_normal (arg
[argc
]);
24823 case NEON_ARG_COPY_TO_REG
:
24824 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
24825 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
24826 (op
[argc
], mode
[argc
]))
24827 op
[argc
] = copy_to_mode_reg (mode
[argc
], op
[argc
]);
24830 case NEON_ARG_CONSTANT
:
24831 /* FIXME: This error message is somewhat unhelpful. */
24832 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
24833 (op
[argc
], mode
[argc
]))
24834 error ("argument must be a constant");
24837 case NEON_ARG_MEMORY
:
24838 gcc_assert (MEM_P (op
[argc
]));
24839 PUT_MODE (op
[argc
], mode
[argc
]);
24840 /* ??? arm_neon.h uses the same built-in functions for signed
24841 and unsigned accesses, casting where necessary. This isn't
24843 set_mem_alias_set (op
[argc
], 0);
24844 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
24845 (op
[argc
], mode
[argc
]))
24846 op
[argc
] = (replace_equiv_address
24847 (op
[argc
], force_reg (Pmode
, XEXP (op
[argc
], 0))));
24850 case NEON_ARG_STOP
:
24851 gcc_unreachable ();
24855 formals
= TREE_CHAIN (formals
);
24865 pat
= GEN_FCN (icode
) (target
, op
[0]);
24869 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
24873 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
24877 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
24881 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
24885 gcc_unreachable ();
24891 pat
= GEN_FCN (icode
) (op
[0]);
24895 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
24899 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
24903 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
24907 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
24911 gcc_unreachable ();
24922 /* Expand a Neon builtin. These are "special" because they don't have symbolic
24923 constants defined per-instruction or per instruction-variant. Instead, the
24924 required info is looked up in the table neon_builtin_data. */
24926 arm_expand_neon_builtin (int fcode
, tree exp
, rtx target
)
24928 neon_builtin_datum
*d
= &neon_builtin_data
[fcode
- ARM_BUILTIN_NEON_BASE
];
24929 neon_itype itype
= d
->itype
;
24930 enum insn_code icode
= d
->code
;
24931 neon_builtin_type_mode type_mode
= d
->mode
;
24938 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24939 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
24943 case NEON_SCALARMUL
:
24944 case NEON_SCALARMULL
:
24945 case NEON_SCALARMULH
:
24946 case NEON_SHIFTINSERT
:
24947 case NEON_LOGICBINOP
:
24948 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24949 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
24953 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24954 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
24955 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
24959 case NEON_SHIFTIMM
:
24960 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24961 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
,
24965 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24966 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
24971 case NEON_FLOAT_WIDEN
:
24972 case NEON_FLOAT_NARROW
:
24973 case NEON_REINTERP
:
24974 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24975 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
24979 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24980 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
24982 case NEON_RESULTPAIR
:
24983 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
24984 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
24988 case NEON_LANEMULL
:
24989 case NEON_LANEMULH
:
24990 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24991 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
24992 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
24995 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
24996 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
24997 NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
24999 case NEON_SHIFTACC
:
25000 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25001 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25002 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25004 case NEON_SCALARMAC
:
25005 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25006 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25007 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
25011 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25012 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
25016 case NEON_LOADSTRUCT
:
25017 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25018 NEON_ARG_MEMORY
, NEON_ARG_STOP
);
25020 case NEON_LOAD1LANE
:
25021 case NEON_LOADSTRUCTLANE
:
25022 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
, fcode
,
25023 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25027 case NEON_STORESTRUCT
:
25028 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25029 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
25031 case NEON_STORE1LANE
:
25032 case NEON_STORESTRUCTLANE
:
25033 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
, fcode
,
25034 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
25038 gcc_unreachable ();
25041 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25043 neon_reinterpret (rtx dest
, rtx src
)
25045 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
25048 /* Emit code to place a Neon pair result in memory locations (with equal
25051 neon_emit_pair_result_insn (enum machine_mode mode
,
25052 rtx (*intfn
) (rtx
, rtx
, rtx
, rtx
), rtx destaddr
,
25055 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
25056 rtx tmp1
= gen_reg_rtx (mode
);
25057 rtx tmp2
= gen_reg_rtx (mode
);
25059 emit_insn (intfn (tmp1
, op1
, op2
, tmp2
));
25061 emit_move_insn (mem
, tmp1
);
25062 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
25063 emit_move_insn (mem
, tmp2
);
25066 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25067 not to early-clobber SRC registers in the process.
25069 We assume that the operands described by SRC and DEST represent a
25070 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25071 number of components into which the copy has been decomposed. */
25073 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
25077 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
25078 || REGNO (operands
[0]) < REGNO (operands
[1]))
25080 for (i
= 0; i
< count
; i
++)
25082 operands
[2 * i
] = dest
[i
];
25083 operands
[2 * i
+ 1] = src
[i
];
25088 for (i
= 0; i
< count
; i
++)
25090 operands
[2 * i
] = dest
[count
- i
- 1];
25091 operands
[2 * i
+ 1] = src
[count
- i
- 1];
25096 /* Split operands into moves from op[1] + op[2] into op[0]. */
25099 neon_split_vcombine (rtx operands
[3])
25101 unsigned int dest
= REGNO (operands
[0]);
25102 unsigned int src1
= REGNO (operands
[1]);
25103 unsigned int src2
= REGNO (operands
[2]);
25104 enum machine_mode halfmode
= GET_MODE (operands
[1]);
25105 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
25106 rtx destlo
, desthi
;
25108 if (src1
== dest
&& src2
== dest
+ halfregs
)
25110 /* No-op move. Can't split to nothing; emit something. */
25111 emit_note (NOTE_INSN_DELETED
);
25115 /* Preserve register attributes for variable tracking. */
25116 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
25117 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
25118 GET_MODE_SIZE (halfmode
));
25120 /* Special case of reversed high/low parts. Use VSWP. */
25121 if (src2
== dest
&& src1
== dest
+ halfregs
)
25123 rtx x
= gen_rtx_SET (VOIDmode
, destlo
, operands
[1]);
25124 rtx y
= gen_rtx_SET (VOIDmode
, desthi
, operands
[2]);
25125 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
25129 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
25131 /* Try to avoid unnecessary moves if part of the result
25132 is in the right place already. */
25134 emit_move_insn (destlo
, operands
[1]);
25135 if (src2
!= dest
+ halfregs
)
25136 emit_move_insn (desthi
, operands
[2]);
25140 if (src2
!= dest
+ halfregs
)
25141 emit_move_insn (desthi
, operands
[2]);
25143 emit_move_insn (destlo
, operands
[1]);
25147 /* Expand an expression EXP that calls a built-in function,
25148 with result going to TARGET if that's convenient
25149 (and in mode MODE if that's convenient).
25150 SUBTARGET may be used as the target for computing one of EXP's operands.
25151 IGNORE is nonzero if the value is to be ignored. */
25154 arm_expand_builtin (tree exp
,
25156 rtx subtarget ATTRIBUTE_UNUSED
,
25157 enum machine_mode mode ATTRIBUTE_UNUSED
,
25158 int ignore ATTRIBUTE_UNUSED
)
25160 const struct builtin_description
* d
;
25161 enum insn_code icode
;
25162 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
25170 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
25172 enum machine_mode tmode
;
25173 enum machine_mode mode0
;
25174 enum machine_mode mode1
;
25175 enum machine_mode mode2
;
25181 if (fcode
>= ARM_BUILTIN_NEON_BASE
)
25182 return arm_expand_neon_builtin (fcode
, exp
, target
);
25186 case ARM_BUILTIN_TEXTRMSB
:
25187 case ARM_BUILTIN_TEXTRMUB
:
25188 case ARM_BUILTIN_TEXTRMSH
:
25189 case ARM_BUILTIN_TEXTRMUH
:
25190 case ARM_BUILTIN_TEXTRMSW
:
25191 case ARM_BUILTIN_TEXTRMUW
:
25192 icode
= (fcode
== ARM_BUILTIN_TEXTRMSB
? CODE_FOR_iwmmxt_textrmsb
25193 : fcode
== ARM_BUILTIN_TEXTRMUB
? CODE_FOR_iwmmxt_textrmub
25194 : fcode
== ARM_BUILTIN_TEXTRMSH
? CODE_FOR_iwmmxt_textrmsh
25195 : fcode
== ARM_BUILTIN_TEXTRMUH
? CODE_FOR_iwmmxt_textrmuh
25196 : CODE_FOR_iwmmxt_textrmw
);
25198 arg0
= CALL_EXPR_ARG (exp
, 0);
25199 arg1
= CALL_EXPR_ARG (exp
, 1);
25200 op0
= expand_normal (arg0
);
25201 op1
= expand_normal (arg1
);
25202 tmode
= insn_data
[icode
].operand
[0].mode
;
25203 mode0
= insn_data
[icode
].operand
[1].mode
;
25204 mode1
= insn_data
[icode
].operand
[2].mode
;
25206 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25207 op0
= copy_to_mode_reg (mode0
, op0
);
25208 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25210 /* @@@ better error message */
25211 error ("selector must be an immediate");
25212 return gen_reg_rtx (tmode
);
25215 opint
= INTVAL (op1
);
25216 if (fcode
== ARM_BUILTIN_TEXTRMSB
|| fcode
== ARM_BUILTIN_TEXTRMUB
)
25218 if (opint
> 7 || opint
< 0)
25219 error ("the range of selector should be in 0 to 7");
25221 else if (fcode
== ARM_BUILTIN_TEXTRMSH
|| fcode
== ARM_BUILTIN_TEXTRMUH
)
25223 if (opint
> 3 || opint
< 0)
25224 error ("the range of selector should be in 0 to 3");
25226 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25228 if (opint
> 1 || opint
< 0)
25229 error ("the range of selector should be in 0 to 1");
25233 || GET_MODE (target
) != tmode
25234 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25235 target
= gen_reg_rtx (tmode
);
25236 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25242 case ARM_BUILTIN_WALIGNI
:
25243 /* If op2 is immediate, call walighi, else call walighr. */
25244 arg0
= CALL_EXPR_ARG (exp
, 0);
25245 arg1
= CALL_EXPR_ARG (exp
, 1);
25246 arg2
= CALL_EXPR_ARG (exp
, 2);
25247 op0
= expand_normal (arg0
);
25248 op1
= expand_normal (arg1
);
25249 op2
= expand_normal (arg2
);
25250 if (CONST_INT_P (op2
))
25252 icode
= CODE_FOR_iwmmxt_waligni
;
25253 tmode
= insn_data
[icode
].operand
[0].mode
;
25254 mode0
= insn_data
[icode
].operand
[1].mode
;
25255 mode1
= insn_data
[icode
].operand
[2].mode
;
25256 mode2
= insn_data
[icode
].operand
[3].mode
;
25257 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25258 op0
= copy_to_mode_reg (mode0
, op0
);
25259 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25260 op1
= copy_to_mode_reg (mode1
, op1
);
25261 gcc_assert ((*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
));
25262 selector
= INTVAL (op2
);
25263 if (selector
> 7 || selector
< 0)
25264 error ("the range of selector should be in 0 to 7");
25268 icode
= CODE_FOR_iwmmxt_walignr
;
25269 tmode
= insn_data
[icode
].operand
[0].mode
;
25270 mode0
= insn_data
[icode
].operand
[1].mode
;
25271 mode1
= insn_data
[icode
].operand
[2].mode
;
25272 mode2
= insn_data
[icode
].operand
[3].mode
;
25273 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25274 op0
= copy_to_mode_reg (mode0
, op0
);
25275 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25276 op1
= copy_to_mode_reg (mode1
, op1
);
25277 if (!(*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25278 op2
= copy_to_mode_reg (mode2
, op2
);
25281 || GET_MODE (target
) != tmode
25282 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25283 target
= gen_reg_rtx (tmode
);
25284 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25290 case ARM_BUILTIN_TINSRB
:
25291 case ARM_BUILTIN_TINSRH
:
25292 case ARM_BUILTIN_TINSRW
:
25293 case ARM_BUILTIN_WMERGE
:
25294 icode
= (fcode
== ARM_BUILTIN_TINSRB
? CODE_FOR_iwmmxt_tinsrb
25295 : fcode
== ARM_BUILTIN_TINSRH
? CODE_FOR_iwmmxt_tinsrh
25296 : fcode
== ARM_BUILTIN_WMERGE
? CODE_FOR_iwmmxt_wmerge
25297 : CODE_FOR_iwmmxt_tinsrw
);
25298 arg0
= CALL_EXPR_ARG (exp
, 0);
25299 arg1
= CALL_EXPR_ARG (exp
, 1);
25300 arg2
= CALL_EXPR_ARG (exp
, 2);
25301 op0
= expand_normal (arg0
);
25302 op1
= expand_normal (arg1
);
25303 op2
= expand_normal (arg2
);
25304 tmode
= insn_data
[icode
].operand
[0].mode
;
25305 mode0
= insn_data
[icode
].operand
[1].mode
;
25306 mode1
= insn_data
[icode
].operand
[2].mode
;
25307 mode2
= insn_data
[icode
].operand
[3].mode
;
25309 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25310 op0
= copy_to_mode_reg (mode0
, op0
);
25311 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25312 op1
= copy_to_mode_reg (mode1
, op1
);
25313 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25315 error ("selector must be an immediate");
25318 if (icode
== CODE_FOR_iwmmxt_wmerge
)
25320 selector
= INTVAL (op2
);
25321 if (selector
> 7 || selector
< 0)
25322 error ("the range of selector should be in 0 to 7");
25324 if ((icode
== CODE_FOR_iwmmxt_tinsrb
)
25325 || (icode
== CODE_FOR_iwmmxt_tinsrh
)
25326 || (icode
== CODE_FOR_iwmmxt_tinsrw
))
25329 selector
= INTVAL (op2
);
25330 if (icode
== CODE_FOR_iwmmxt_tinsrb
&& (selector
< 0 || selector
> 7))
25331 error ("the range of selector should be in 0 to 7");
25332 else if (icode
== CODE_FOR_iwmmxt_tinsrh
&& (selector
< 0 ||selector
> 3))
25333 error ("the range of selector should be in 0 to 3");
25334 else if (icode
== CODE_FOR_iwmmxt_tinsrw
&& (selector
< 0 ||selector
> 1))
25335 error ("the range of selector should be in 0 to 1");
25337 op2
= GEN_INT (mask
);
25340 || GET_MODE (target
) != tmode
25341 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25342 target
= gen_reg_rtx (tmode
);
25343 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25349 case ARM_BUILTIN_SETWCGR0
:
25350 case ARM_BUILTIN_SETWCGR1
:
25351 case ARM_BUILTIN_SETWCGR2
:
25352 case ARM_BUILTIN_SETWCGR3
:
25353 icode
= (fcode
== ARM_BUILTIN_SETWCGR0
? CODE_FOR_iwmmxt_setwcgr0
25354 : fcode
== ARM_BUILTIN_SETWCGR1
? CODE_FOR_iwmmxt_setwcgr1
25355 : fcode
== ARM_BUILTIN_SETWCGR2
? CODE_FOR_iwmmxt_setwcgr2
25356 : CODE_FOR_iwmmxt_setwcgr3
);
25357 arg0
= CALL_EXPR_ARG (exp
, 0);
25358 op0
= expand_normal (arg0
);
25359 mode0
= insn_data
[icode
].operand
[0].mode
;
25360 if (!(*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
25361 op0
= copy_to_mode_reg (mode0
, op0
);
25362 pat
= GEN_FCN (icode
) (op0
);
25368 case ARM_BUILTIN_GETWCGR0
:
25369 case ARM_BUILTIN_GETWCGR1
:
25370 case ARM_BUILTIN_GETWCGR2
:
25371 case ARM_BUILTIN_GETWCGR3
:
25372 icode
= (fcode
== ARM_BUILTIN_GETWCGR0
? CODE_FOR_iwmmxt_getwcgr0
25373 : fcode
== ARM_BUILTIN_GETWCGR1
? CODE_FOR_iwmmxt_getwcgr1
25374 : fcode
== ARM_BUILTIN_GETWCGR2
? CODE_FOR_iwmmxt_getwcgr2
25375 : CODE_FOR_iwmmxt_getwcgr3
);
25376 tmode
= insn_data
[icode
].operand
[0].mode
;
25378 || GET_MODE (target
) != tmode
25379 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25380 target
= gen_reg_rtx (tmode
);
25381 pat
= GEN_FCN (icode
) (target
);
25387 case ARM_BUILTIN_WSHUFH
:
25388 icode
= CODE_FOR_iwmmxt_wshufh
;
25389 arg0
= CALL_EXPR_ARG (exp
, 0);
25390 arg1
= CALL_EXPR_ARG (exp
, 1);
25391 op0
= expand_normal (arg0
);
25392 op1
= expand_normal (arg1
);
25393 tmode
= insn_data
[icode
].operand
[0].mode
;
25394 mode1
= insn_data
[icode
].operand
[1].mode
;
25395 mode2
= insn_data
[icode
].operand
[2].mode
;
25397 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
25398 op0
= copy_to_mode_reg (mode1
, op0
);
25399 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
25401 error ("mask must be an immediate");
25404 selector
= INTVAL (op1
);
25405 if (selector
< 0 || selector
> 255)
25406 error ("the range of mask should be in 0 to 255");
25408 || GET_MODE (target
) != tmode
25409 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25410 target
= gen_reg_rtx (tmode
);
25411 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
25417 case ARM_BUILTIN_WMADDS
:
25418 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds
, exp
, target
);
25419 case ARM_BUILTIN_WMADDSX
:
25420 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx
, exp
, target
);
25421 case ARM_BUILTIN_WMADDSN
:
25422 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn
, exp
, target
);
25423 case ARM_BUILTIN_WMADDU
:
25424 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu
, exp
, target
);
25425 case ARM_BUILTIN_WMADDUX
:
25426 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux
, exp
, target
);
25427 case ARM_BUILTIN_WMADDUN
:
25428 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun
, exp
, target
);
25429 case ARM_BUILTIN_WSADBZ
:
25430 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz
, exp
, target
);
25431 case ARM_BUILTIN_WSADHZ
:
25432 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz
, exp
, target
);
25434 /* Several three-argument builtins. */
25435 case ARM_BUILTIN_WMACS
:
25436 case ARM_BUILTIN_WMACU
:
25437 case ARM_BUILTIN_TMIA
:
25438 case ARM_BUILTIN_TMIAPH
:
25439 case ARM_BUILTIN_TMIATT
:
25440 case ARM_BUILTIN_TMIATB
:
25441 case ARM_BUILTIN_TMIABT
:
25442 case ARM_BUILTIN_TMIABB
:
25443 case ARM_BUILTIN_WQMIABB
:
25444 case ARM_BUILTIN_WQMIABT
:
25445 case ARM_BUILTIN_WQMIATB
:
25446 case ARM_BUILTIN_WQMIATT
:
25447 case ARM_BUILTIN_WQMIABBN
:
25448 case ARM_BUILTIN_WQMIABTN
:
25449 case ARM_BUILTIN_WQMIATBN
:
25450 case ARM_BUILTIN_WQMIATTN
:
25451 case ARM_BUILTIN_WMIABB
:
25452 case ARM_BUILTIN_WMIABT
:
25453 case ARM_BUILTIN_WMIATB
:
25454 case ARM_BUILTIN_WMIATT
:
25455 case ARM_BUILTIN_WMIABBN
:
25456 case ARM_BUILTIN_WMIABTN
:
25457 case ARM_BUILTIN_WMIATBN
:
25458 case ARM_BUILTIN_WMIATTN
:
25459 case ARM_BUILTIN_WMIAWBB
:
25460 case ARM_BUILTIN_WMIAWBT
:
25461 case ARM_BUILTIN_WMIAWTB
:
25462 case ARM_BUILTIN_WMIAWTT
:
25463 case ARM_BUILTIN_WMIAWBBN
:
25464 case ARM_BUILTIN_WMIAWBTN
:
25465 case ARM_BUILTIN_WMIAWTBN
:
25466 case ARM_BUILTIN_WMIAWTTN
:
25467 case ARM_BUILTIN_WSADB
:
25468 case ARM_BUILTIN_WSADH
:
25469 icode
= (fcode
== ARM_BUILTIN_WMACS
? CODE_FOR_iwmmxt_wmacs
25470 : fcode
== ARM_BUILTIN_WMACU
? CODE_FOR_iwmmxt_wmacu
25471 : fcode
== ARM_BUILTIN_TMIA
? CODE_FOR_iwmmxt_tmia
25472 : fcode
== ARM_BUILTIN_TMIAPH
? CODE_FOR_iwmmxt_tmiaph
25473 : fcode
== ARM_BUILTIN_TMIABB
? CODE_FOR_iwmmxt_tmiabb
25474 : fcode
== ARM_BUILTIN_TMIABT
? CODE_FOR_iwmmxt_tmiabt
25475 : fcode
== ARM_BUILTIN_TMIATB
? CODE_FOR_iwmmxt_tmiatb
25476 : fcode
== ARM_BUILTIN_TMIATT
? CODE_FOR_iwmmxt_tmiatt
25477 : fcode
== ARM_BUILTIN_WQMIABB
? CODE_FOR_iwmmxt_wqmiabb
25478 : fcode
== ARM_BUILTIN_WQMIABT
? CODE_FOR_iwmmxt_wqmiabt
25479 : fcode
== ARM_BUILTIN_WQMIATB
? CODE_FOR_iwmmxt_wqmiatb
25480 : fcode
== ARM_BUILTIN_WQMIATT
? CODE_FOR_iwmmxt_wqmiatt
25481 : fcode
== ARM_BUILTIN_WQMIABBN
? CODE_FOR_iwmmxt_wqmiabbn
25482 : fcode
== ARM_BUILTIN_WQMIABTN
? CODE_FOR_iwmmxt_wqmiabtn
25483 : fcode
== ARM_BUILTIN_WQMIATBN
? CODE_FOR_iwmmxt_wqmiatbn
25484 : fcode
== ARM_BUILTIN_WQMIATTN
? CODE_FOR_iwmmxt_wqmiattn
25485 : fcode
== ARM_BUILTIN_WMIABB
? CODE_FOR_iwmmxt_wmiabb
25486 : fcode
== ARM_BUILTIN_WMIABT
? CODE_FOR_iwmmxt_wmiabt
25487 : fcode
== ARM_BUILTIN_WMIATB
? CODE_FOR_iwmmxt_wmiatb
25488 : fcode
== ARM_BUILTIN_WMIATT
? CODE_FOR_iwmmxt_wmiatt
25489 : fcode
== ARM_BUILTIN_WMIABBN
? CODE_FOR_iwmmxt_wmiabbn
25490 : fcode
== ARM_BUILTIN_WMIABTN
? CODE_FOR_iwmmxt_wmiabtn
25491 : fcode
== ARM_BUILTIN_WMIATBN
? CODE_FOR_iwmmxt_wmiatbn
25492 : fcode
== ARM_BUILTIN_WMIATTN
? CODE_FOR_iwmmxt_wmiattn
25493 : fcode
== ARM_BUILTIN_WMIAWBB
? CODE_FOR_iwmmxt_wmiawbb
25494 : fcode
== ARM_BUILTIN_WMIAWBT
? CODE_FOR_iwmmxt_wmiawbt
25495 : fcode
== ARM_BUILTIN_WMIAWTB
? CODE_FOR_iwmmxt_wmiawtb
25496 : fcode
== ARM_BUILTIN_WMIAWTT
? CODE_FOR_iwmmxt_wmiawtt
25497 : fcode
== ARM_BUILTIN_WMIAWBBN
? CODE_FOR_iwmmxt_wmiawbbn
25498 : fcode
== ARM_BUILTIN_WMIAWBTN
? CODE_FOR_iwmmxt_wmiawbtn
25499 : fcode
== ARM_BUILTIN_WMIAWTBN
? CODE_FOR_iwmmxt_wmiawtbn
25500 : fcode
== ARM_BUILTIN_WMIAWTTN
? CODE_FOR_iwmmxt_wmiawttn
25501 : fcode
== ARM_BUILTIN_WSADB
? CODE_FOR_iwmmxt_wsadb
25502 : CODE_FOR_iwmmxt_wsadh
);
25503 arg0
= CALL_EXPR_ARG (exp
, 0);
25504 arg1
= CALL_EXPR_ARG (exp
, 1);
25505 arg2
= CALL_EXPR_ARG (exp
, 2);
25506 op0
= expand_normal (arg0
);
25507 op1
= expand_normal (arg1
);
25508 op2
= expand_normal (arg2
);
25509 tmode
= insn_data
[icode
].operand
[0].mode
;
25510 mode0
= insn_data
[icode
].operand
[1].mode
;
25511 mode1
= insn_data
[icode
].operand
[2].mode
;
25512 mode2
= insn_data
[icode
].operand
[3].mode
;
25514 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
25515 op0
= copy_to_mode_reg (mode0
, op0
);
25516 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
25517 op1
= copy_to_mode_reg (mode1
, op1
);
25518 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
25519 op2
= copy_to_mode_reg (mode2
, op2
);
25521 || GET_MODE (target
) != tmode
25522 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
25523 target
= gen_reg_rtx (tmode
);
25524 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
25530 case ARM_BUILTIN_WZERO
:
25531 target
= gen_reg_rtx (DImode
);
25532 emit_insn (gen_iwmmxt_clrdi (target
));
25535 case ARM_BUILTIN_WSRLHI
:
25536 case ARM_BUILTIN_WSRLWI
:
25537 case ARM_BUILTIN_WSRLDI
:
25538 case ARM_BUILTIN_WSLLHI
:
25539 case ARM_BUILTIN_WSLLWI
:
25540 case ARM_BUILTIN_WSLLDI
:
25541 case ARM_BUILTIN_WSRAHI
:
25542 case ARM_BUILTIN_WSRAWI
:
25543 case ARM_BUILTIN_WSRADI
:
25544 case ARM_BUILTIN_WRORHI
:
25545 case ARM_BUILTIN_WRORWI
:
25546 case ARM_BUILTIN_WRORDI
:
25547 case ARM_BUILTIN_WSRLH
:
25548 case ARM_BUILTIN_WSRLW
:
25549 case ARM_BUILTIN_WSRLD
:
25550 case ARM_BUILTIN_WSLLH
:
25551 case ARM_BUILTIN_WSLLW
:
25552 case ARM_BUILTIN_WSLLD
:
25553 case ARM_BUILTIN_WSRAH
:
25554 case ARM_BUILTIN_WSRAW
:
25555 case ARM_BUILTIN_WSRAD
:
25556 case ARM_BUILTIN_WRORH
:
25557 case ARM_BUILTIN_WRORW
:
25558 case ARM_BUILTIN_WRORD
:
25559 icode
= (fcode
== ARM_BUILTIN_WSRLHI
? CODE_FOR_lshrv4hi3_iwmmxt
25560 : fcode
== ARM_BUILTIN_WSRLWI
? CODE_FOR_lshrv2si3_iwmmxt
25561 : fcode
== ARM_BUILTIN_WSRLDI
? CODE_FOR_lshrdi3_iwmmxt
25562 : fcode
== ARM_BUILTIN_WSLLHI
? CODE_FOR_ashlv4hi3_iwmmxt
25563 : fcode
== ARM_BUILTIN_WSLLWI
? CODE_FOR_ashlv2si3_iwmmxt
25564 : fcode
== ARM_BUILTIN_WSLLDI
? CODE_FOR_ashldi3_iwmmxt
25565 : fcode
== ARM_BUILTIN_WSRAHI
? CODE_FOR_ashrv4hi3_iwmmxt
25566 : fcode
== ARM_BUILTIN_WSRAWI
? CODE_FOR_ashrv2si3_iwmmxt
25567 : fcode
== ARM_BUILTIN_WSRADI
? CODE_FOR_ashrdi3_iwmmxt
25568 : fcode
== ARM_BUILTIN_WRORHI
? CODE_FOR_rorv4hi3
25569 : fcode
== ARM_BUILTIN_WRORWI
? CODE_FOR_rorv2si3
25570 : fcode
== ARM_BUILTIN_WRORDI
? CODE_FOR_rordi3
25571 : fcode
== ARM_BUILTIN_WSRLH
? CODE_FOR_lshrv4hi3_di
25572 : fcode
== ARM_BUILTIN_WSRLW
? CODE_FOR_lshrv2si3_di
25573 : fcode
== ARM_BUILTIN_WSRLD
? CODE_FOR_lshrdi3_di
25574 : fcode
== ARM_BUILTIN_WSLLH
? CODE_FOR_ashlv4hi3_di
25575 : fcode
== ARM_BUILTIN_WSLLW
? CODE_FOR_ashlv2si3_di
25576 : fcode
== ARM_BUILTIN_WSLLD
? CODE_FOR_ashldi3_di
25577 : fcode
== ARM_BUILTIN_WSRAH
? CODE_FOR_ashrv4hi3_di
25578 : fcode
== ARM_BUILTIN_WSRAW
? CODE_FOR_ashrv2si3_di
25579 : fcode
== ARM_BUILTIN_WSRAD
? CODE_FOR_ashrdi3_di
25580 : fcode
== ARM_BUILTIN_WRORH
? CODE_FOR_rorv4hi3_di
25581 : fcode
== ARM_BUILTIN_WRORW
? CODE_FOR_rorv2si3_di
25582 : fcode
== ARM_BUILTIN_WRORD
? CODE_FOR_rordi3_di
25583 : CODE_FOR_nothing
);
25584 arg1
= CALL_EXPR_ARG (exp
, 1);
25585 op1
= expand_normal (arg1
);
25586 if (GET_MODE (op1
) == VOIDmode
)
25588 imm
= INTVAL (op1
);
25589 if ((fcode
== ARM_BUILTIN_WRORHI
|| fcode
== ARM_BUILTIN_WRORWI
25590 || fcode
== ARM_BUILTIN_WRORH
|| fcode
== ARM_BUILTIN_WRORW
)
25591 && (imm
< 0 || imm
> 32))
25593 if (fcode
== ARM_BUILTIN_WRORHI
)
25594 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
25595 else if (fcode
== ARM_BUILTIN_WRORWI
)
25596 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
25597 else if (fcode
== ARM_BUILTIN_WRORH
)
25598 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
25600 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
25602 else if ((fcode
== ARM_BUILTIN_WRORDI
|| fcode
== ARM_BUILTIN_WRORD
)
25603 && (imm
< 0 || imm
> 64))
25605 if (fcode
== ARM_BUILTIN_WRORDI
)
25606 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
25608 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
25612 if (fcode
== ARM_BUILTIN_WSRLHI
)
25613 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
25614 else if (fcode
== ARM_BUILTIN_WSRLWI
)
25615 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
25616 else if (fcode
== ARM_BUILTIN_WSRLDI
)
25617 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
25618 else if (fcode
== ARM_BUILTIN_WSLLHI
)
25619 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
25620 else if (fcode
== ARM_BUILTIN_WSLLWI
)
25621 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
25622 else if (fcode
== ARM_BUILTIN_WSLLDI
)
25623 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
25624 else if (fcode
== ARM_BUILTIN_WSRAHI
)
25625 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
25626 else if (fcode
== ARM_BUILTIN_WSRAWI
)
25627 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
25628 else if (fcode
== ARM_BUILTIN_WSRADI
)
25629 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
25630 else if (fcode
== ARM_BUILTIN_WSRLH
)
25631 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
25632 else if (fcode
== ARM_BUILTIN_WSRLW
)
25633 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
25634 else if (fcode
== ARM_BUILTIN_WSRLD
)
25635 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
25636 else if (fcode
== ARM_BUILTIN_WSLLH
)
25637 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
25638 else if (fcode
== ARM_BUILTIN_WSLLW
)
25639 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
25640 else if (fcode
== ARM_BUILTIN_WSLLD
)
25641 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
25642 else if (fcode
== ARM_BUILTIN_WSRAH
)
25643 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
25644 else if (fcode
== ARM_BUILTIN_WSRAW
)
25645 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
25647 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
25650 return arm_expand_binop_builtin (icode
, exp
, target
);
25656 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
25657 if (d
->code
== (const enum arm_builtins
) fcode
)
25658 return arm_expand_binop_builtin (d
->icode
, exp
, target
);
25660 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
25661 if (d
->code
== (const enum arm_builtins
) fcode
)
25662 return arm_expand_unop_builtin (d
->icode
, exp
, target
, 0);
25664 for (i
= 0, d
= bdesc_3arg
; i
< ARRAY_SIZE (bdesc_3arg
); i
++, d
++)
25665 if (d
->code
== (const enum arm_builtins
) fcode
)
25666 return arm_expand_ternop_builtin (d
->icode
, exp
, target
);
25668 /* @@@ Should really do something sensible here. */
25672 /* Return the number (counting from 0) of
25673 the least significant set bit in MASK. */
25676 number_of_first_bit_set (unsigned mask
)
25678 return ctz_hwi (mask
);
25681 /* Like emit_multi_reg_push, but allowing for a different set of
25682 registers to be described as saved. MASK is the set of registers
25683 to be saved; REAL_REGS is the set of registers to be described as
25684 saved. If REAL_REGS is 0, only describe the stack adjustment. */
25687 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
25689 unsigned long regno
;
25690 rtx par
[10], tmp
, reg
, insn
;
25693 /* Build the parallel of the registers actually being stored. */
25694 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
25696 regno
= ctz_hwi (mask
);
25697 reg
= gen_rtx_REG (SImode
, regno
);
25700 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
25702 tmp
= gen_rtx_USE (VOIDmode
, reg
);
25707 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
25708 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
25709 tmp
= gen_frame_mem (BLKmode
, tmp
);
25710 tmp
= gen_rtx_SET (VOIDmode
, tmp
, par
[0]);
25713 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
25714 insn
= emit_insn (tmp
);
25716 /* Always build the stack adjustment note for unwind info. */
25717 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
25718 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
);
25721 /* Build the parallel of the registers recorded as saved for unwind. */
25722 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
25724 regno
= ctz_hwi (real_regs
);
25725 reg
= gen_rtx_REG (SImode
, regno
);
25727 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
25728 tmp
= gen_frame_mem (SImode
, tmp
);
25729 tmp
= gen_rtx_SET (VOIDmode
, tmp
, reg
);
25730 RTX_FRAME_RELATED_P (tmp
) = 1;
25738 RTX_FRAME_RELATED_P (par
[0]) = 1;
25739 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
25742 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
25747 /* Emit code to push or pop registers to or from the stack. F is the
25748 assembly file. MASK is the registers to pop. */
25750 thumb_pop (FILE *f
, unsigned long mask
)
25753 int lo_mask
= mask
& 0xFF;
25754 int pushed_words
= 0;
25758 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
25760 /* Special case. Do not generate a POP PC statement here, do it in
25762 thumb_exit (f
, -1);
25766 fprintf (f
, "\tpop\t{");
25768 /* Look at the low registers first. */
25769 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
25773 asm_fprintf (f
, "%r", regno
);
25775 if ((lo_mask
& ~1) != 0)
25782 if (mask
& (1 << PC_REGNUM
))
25784 /* Catch popping the PC. */
25785 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
25786 || crtl
->calls_eh_return
)
25788 /* The PC is never poped directly, instead
25789 it is popped into r3 and then BX is used. */
25790 fprintf (f
, "}\n");
25792 thumb_exit (f
, -1);
25801 asm_fprintf (f
, "%r", PC_REGNUM
);
25805 fprintf (f
, "}\n");
25808 /* Generate code to return from a thumb function.
25809 If 'reg_containing_return_addr' is -1, then the return address is
25810 actually on the stack, at the stack pointer. */
25812 thumb_exit (FILE *f
, int reg_containing_return_addr
)
25814 unsigned regs_available_for_popping
;
25815 unsigned regs_to_pop
;
25817 unsigned available
;
25821 int restore_a4
= FALSE
;
25823 /* Compute the registers we need to pop. */
25827 if (reg_containing_return_addr
== -1)
25829 regs_to_pop
|= 1 << LR_REGNUM
;
25833 if (TARGET_BACKTRACE
)
25835 /* Restore the (ARM) frame pointer and stack pointer. */
25836 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
25840 /* If there is nothing to pop then just emit the BX instruction and
25842 if (pops_needed
== 0)
25844 if (crtl
->calls_eh_return
)
25845 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
25847 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
25850 /* Otherwise if we are not supporting interworking and we have not created
25851 a backtrace structure and the function was not entered in ARM mode then
25852 just pop the return address straight into the PC. */
25853 else if (!TARGET_INTERWORK
25854 && !TARGET_BACKTRACE
25855 && !is_called_in_ARM_mode (current_function_decl
)
25856 && !crtl
->calls_eh_return
)
25858 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
25862 /* Find out how many of the (return) argument registers we can corrupt. */
25863 regs_available_for_popping
= 0;
25865 /* If returning via __builtin_eh_return, the bottom three registers
25866 all contain information needed for the return. */
25867 if (crtl
->calls_eh_return
)
25871 /* If we can deduce the registers used from the function's
25872 return value. This is more reliable that examining
25873 df_regs_ever_live_p () because that will be set if the register is
25874 ever used in the function, not just if the register is used
25875 to hold a return value. */
25877 if (crtl
->return_rtx
!= 0)
25878 mode
= GET_MODE (crtl
->return_rtx
);
25880 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
25882 size
= GET_MODE_SIZE (mode
);
25886 /* In a void function we can use any argument register.
25887 In a function that returns a structure on the stack
25888 we can use the second and third argument registers. */
25889 if (mode
== VOIDmode
)
25890 regs_available_for_popping
=
25891 (1 << ARG_REGISTER (1))
25892 | (1 << ARG_REGISTER (2))
25893 | (1 << ARG_REGISTER (3));
25895 regs_available_for_popping
=
25896 (1 << ARG_REGISTER (2))
25897 | (1 << ARG_REGISTER (3));
25899 else if (size
<= 4)
25900 regs_available_for_popping
=
25901 (1 << ARG_REGISTER (2))
25902 | (1 << ARG_REGISTER (3));
25903 else if (size
<= 8)
25904 regs_available_for_popping
=
25905 (1 << ARG_REGISTER (3));
25908 /* Match registers to be popped with registers into which we pop them. */
25909 for (available
= regs_available_for_popping
,
25910 required
= regs_to_pop
;
25911 required
!= 0 && available
!= 0;
25912 available
&= ~(available
& - available
),
25913 required
&= ~(required
& - required
))
25916 /* If we have any popping registers left over, remove them. */
25918 regs_available_for_popping
&= ~available
;
25920 /* Otherwise if we need another popping register we can use
25921 the fourth argument register. */
25922 else if (pops_needed
)
25924 /* If we have not found any free argument registers and
25925 reg a4 contains the return address, we must move it. */
25926 if (regs_available_for_popping
== 0
25927 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
25929 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
25930 reg_containing_return_addr
= LR_REGNUM
;
25932 else if (size
> 12)
25934 /* Register a4 is being used to hold part of the return value,
25935 but we have dire need of a free, low register. */
25938 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
25941 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
25943 /* The fourth argument register is available. */
25944 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
25950 /* Pop as many registers as we can. */
25951 thumb_pop (f
, regs_available_for_popping
);
25953 /* Process the registers we popped. */
25954 if (reg_containing_return_addr
== -1)
25956 /* The return address was popped into the lowest numbered register. */
25957 regs_to_pop
&= ~(1 << LR_REGNUM
);
25959 reg_containing_return_addr
=
25960 number_of_first_bit_set (regs_available_for_popping
);
25962 /* Remove this register for the mask of available registers, so that
25963 the return address will not be corrupted by further pops. */
25964 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
25967 /* If we popped other registers then handle them here. */
25968 if (regs_available_for_popping
)
25972 /* Work out which register currently contains the frame pointer. */
25973 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
25975 /* Move it into the correct place. */
25976 asm_fprintf (f
, "\tmov\t%r, %r\n",
25977 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
25979 /* (Temporarily) remove it from the mask of popped registers. */
25980 regs_available_for_popping
&= ~(1 << frame_pointer
);
25981 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
25983 if (regs_available_for_popping
)
25987 /* We popped the stack pointer as well,
25988 find the register that contains it. */
25989 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
25991 /* Move it into the stack register. */
25992 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
25994 /* At this point we have popped all necessary registers, so
25995 do not worry about restoring regs_available_for_popping
25996 to its correct value:
25998 assert (pops_needed == 0)
25999 assert (regs_available_for_popping == (1 << frame_pointer))
26000 assert (regs_to_pop == (1 << STACK_POINTER)) */
26004 /* Since we have just move the popped value into the frame
26005 pointer, the popping register is available for reuse, and
26006 we know that we still have the stack pointer left to pop. */
26007 regs_available_for_popping
|= (1 << frame_pointer
);
26011 /* If we still have registers left on the stack, but we no longer have
26012 any registers into which we can pop them, then we must move the return
26013 address into the link register and make available the register that
26015 if (regs_available_for_popping
== 0 && pops_needed
> 0)
26017 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
26019 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
26020 reg_containing_return_addr
);
26022 reg_containing_return_addr
= LR_REGNUM
;
26025 /* If we have registers left on the stack then pop some more.
26026 We know that at most we will want to pop FP and SP. */
26027 if (pops_needed
> 0)
26032 thumb_pop (f
, regs_available_for_popping
);
26034 /* We have popped either FP or SP.
26035 Move whichever one it is into the correct register. */
26036 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26037 move_to
= number_of_first_bit_set (regs_to_pop
);
26039 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
26041 regs_to_pop
&= ~(1 << move_to
);
26046 /* If we still have not popped everything then we must have only
26047 had one register available to us and we are now popping the SP. */
26048 if (pops_needed
> 0)
26052 thumb_pop (f
, regs_available_for_popping
);
26054 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26056 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
26058 assert (regs_to_pop == (1 << STACK_POINTER))
26059 assert (pops_needed == 1)
26063 /* If necessary restore the a4 register. */
26066 if (reg_containing_return_addr
!= LR_REGNUM
)
26068 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26069 reg_containing_return_addr
= LR_REGNUM
;
26072 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
26075 if (crtl
->calls_eh_return
)
26076 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26078 /* Return to caller. */
26079 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26082 /* Scan INSN just before assembler is output for it.
26083 For Thumb-1, we track the status of the condition codes; this
26084 information is used in the cbranchsi4_insn pattern. */
26086 thumb1_final_prescan_insn (rtx insn
)
26088 if (flag_print_asm_name
)
26089 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
26090 INSN_ADDRESSES (INSN_UID (insn
)));
26091 /* Don't overwrite the previous setter when we get to a cbranch. */
26092 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
26094 enum attr_conds conds
;
26096 if (cfun
->machine
->thumb1_cc_insn
)
26098 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
26099 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
26102 conds
= get_attr_conds (insn
);
26103 if (conds
== CONDS_SET
)
26105 rtx set
= single_set (insn
);
26106 cfun
->machine
->thumb1_cc_insn
= insn
;
26107 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
26108 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
26109 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
26110 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
26112 rtx src1
= XEXP (SET_SRC (set
), 1);
26113 if (src1
== const0_rtx
)
26114 cfun
->machine
->thumb1_cc_mode
= CCmode
;
26116 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
26118 /* Record the src register operand instead of dest because
26119 cprop_hardreg pass propagates src. */
26120 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
26123 else if (conds
!= CONDS_NOCOND
)
26124 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
26127 /* Check if unexpected far jump is used. */
26128 if (cfun
->machine
->lr_save_eliminated
26129 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26130 internal_error("Unexpected thumb1 far jump");
26134 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
26136 unsigned HOST_WIDE_INT mask
= 0xff;
26139 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
26140 if (val
== 0) /* XXX */
26143 for (i
= 0; i
< 25; i
++)
26144 if ((val
& (mask
<< i
)) == val
)
26150 /* Returns nonzero if the current function contains,
26151 or might contain a far jump. */
26153 thumb_far_jump_used_p (void)
26156 bool far_jump
= false;
26157 unsigned int func_size
= 0;
26159 /* This test is only important for leaf functions. */
26160 /* assert (!leaf_function_p ()); */
26162 /* If we have already decided that far jumps may be used,
26163 do not bother checking again, and always return true even if
26164 it turns out that they are not being used. Once we have made
26165 the decision that far jumps are present (and that hence the link
26166 register will be pushed onto the stack) we cannot go back on it. */
26167 if (cfun
->machine
->far_jump_used
)
26170 /* If this function is not being called from the prologue/epilogue
26171 generation code then it must be being called from the
26172 INITIAL_ELIMINATION_OFFSET macro. */
26173 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
26175 /* In this case we know that we are being asked about the elimination
26176 of the arg pointer register. If that register is not being used,
26177 then there are no arguments on the stack, and we do not have to
26178 worry that a far jump might force the prologue to push the link
26179 register, changing the stack offsets. In this case we can just
26180 return false, since the presence of far jumps in the function will
26181 not affect stack offsets.
26183 If the arg pointer is live (or if it was live, but has now been
26184 eliminated and so set to dead) then we do have to test to see if
26185 the function might contain a far jump. This test can lead to some
26186 false negatives, since before reload is completed, then length of
26187 branch instructions is not known, so gcc defaults to returning their
26188 longest length, which in turn sets the far jump attribute to true.
26190 A false negative will not result in bad code being generated, but it
26191 will result in a needless push and pop of the link register. We
26192 hope that this does not occur too often.
26194 If we need doubleword stack alignment this could affect the other
26195 elimination offsets so we can't risk getting it wrong. */
26196 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
26197 cfun
->machine
->arg_pointer_live
= 1;
26198 else if (!cfun
->machine
->arg_pointer_live
)
26202 /* Check to see if the function contains a branch
26203 insn with the far jump attribute set. */
26204 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
26206 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26210 func_size
+= get_attr_length (insn
);
26213 /* Attribute far_jump will always be true for thumb1 before
26214 shorten_branch pass. So checking far_jump attribute before
26215 shorten_branch isn't much useful.
26217 Following heuristic tries to estimate more accurately if a far jump
26218 may finally be used. The heuristic is very conservative as there is
26219 no chance to roll-back the decision of not to use far jump.
26221 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26222 2-byte insn is associated with a 4 byte constant pool. Using
26223 function size 2048/3 as the threshold is conservative enough. */
26226 if ((func_size
* 3) >= 2048)
26228 /* Record the fact that we have decided that
26229 the function does use far jumps. */
26230 cfun
->machine
->far_jump_used
= 1;
26238 /* Return nonzero if FUNC must be entered in ARM mode. */
26240 is_called_in_ARM_mode (tree func
)
26242 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
26244 /* Ignore the problem about functions whose address is taken. */
26245 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
26249 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
26255 /* Given the stack offsets and register mask in OFFSETS, decide how
26256 many additional registers to push instead of subtracting a constant
26257 from SP. For epilogues the principle is the same except we use pop.
26258 FOR_PROLOGUE indicates which we're generating. */
26260 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
26262 HOST_WIDE_INT amount
;
26263 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
26264 /* Extract a mask of the ones we can give to the Thumb's push/pop
26266 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
26267 /* Then count how many other high registers will need to be pushed. */
26268 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26269 int n_free
, reg_base
, size
;
26271 if (!for_prologue
&& frame_pointer_needed
)
26272 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26274 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26276 /* If the stack frame size is 512 exactly, we can save one load
26277 instruction, which should make this a win even when optimizing
26279 if (!optimize_size
&& amount
!= 512)
26282 /* Can't do this if there are high registers to push. */
26283 if (high_regs_pushed
!= 0)
26286 /* Shouldn't do it in the prologue if no registers would normally
26287 be pushed at all. In the epilogue, also allow it if we'll have
26288 a pop insn for the PC. */
26291 || TARGET_BACKTRACE
26292 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
26293 || TARGET_INTERWORK
26294 || crtl
->args
.pretend_args_size
!= 0))
26297 /* Don't do this if thumb_expand_prologue wants to emit instructions
26298 between the push and the stack frame allocation. */
26300 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26301 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
26308 size
= arm_size_return_regs ();
26309 reg_base
= ARM_NUM_INTS (size
);
26310 live_regs_mask
>>= reg_base
;
26313 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
26314 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
26316 live_regs_mask
>>= 1;
26322 gcc_assert (amount
/ 4 * 4 == amount
);
26324 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
26325 return (amount
- 508) / 4;
26326 if (amount
<= n_free
* 4)
26331 /* The bits which aren't usefully expanded as rtl. */
26333 thumb1_unexpanded_epilogue (void)
26335 arm_stack_offsets
*offsets
;
26337 unsigned long live_regs_mask
= 0;
26338 int high_regs_pushed
= 0;
26340 int had_to_push_lr
;
26343 if (cfun
->machine
->return_used_this_function
!= 0)
26346 if (IS_NAKED (arm_current_func_type ()))
26349 offsets
= arm_get_frame_offsets ();
26350 live_regs_mask
= offsets
->saved_regs_mask
;
26351 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26353 /* If we can deduce the registers used from the function's return value.
26354 This is more reliable that examining df_regs_ever_live_p () because that
26355 will be set if the register is ever used in the function, not just if
26356 the register is used to hold a return value. */
26357 size
= arm_size_return_regs ();
26359 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
26362 unsigned long extra_mask
= (1 << extra_pop
) - 1;
26363 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
26366 /* The prolog may have pushed some high registers to use as
26367 work registers. e.g. the testsuite file:
26368 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26369 compiles to produce:
26370 push {r4, r5, r6, r7, lr}
26374 as part of the prolog. We have to undo that pushing here. */
26376 if (high_regs_pushed
)
26378 unsigned long mask
= live_regs_mask
& 0xff;
26381 /* The available low registers depend on the size of the value we are
26389 /* Oh dear! We have no low registers into which we can pop
26392 ("no low registers available for popping high registers");
26394 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
26395 if (live_regs_mask
& (1 << next_hi_reg
))
26398 while (high_regs_pushed
)
26400 /* Find lo register(s) into which the high register(s) can
26402 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
26404 if (mask
& (1 << regno
))
26405 high_regs_pushed
--;
26406 if (high_regs_pushed
== 0)
26410 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
26412 /* Pop the values into the low register(s). */
26413 thumb_pop (asm_out_file
, mask
);
26415 /* Move the value(s) into the high registers. */
26416 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
26418 if (mask
& (1 << regno
))
26420 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
26423 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
26424 if (live_regs_mask
& (1 << next_hi_reg
))
26429 live_regs_mask
&= ~0x0f00;
26432 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
26433 live_regs_mask
&= 0xff;
26435 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
26437 /* Pop the return address into the PC. */
26438 if (had_to_push_lr
)
26439 live_regs_mask
|= 1 << PC_REGNUM
;
26441 /* Either no argument registers were pushed or a backtrace
26442 structure was created which includes an adjusted stack
26443 pointer, so just pop everything. */
26444 if (live_regs_mask
)
26445 thumb_pop (asm_out_file
, live_regs_mask
);
26447 /* We have either just popped the return address into the
26448 PC or it is was kept in LR for the entire function.
26449 Note that thumb_pop has already called thumb_exit if the
26450 PC was in the list. */
26451 if (!had_to_push_lr
)
26452 thumb_exit (asm_out_file
, LR_REGNUM
);
26456 /* Pop everything but the return address. */
26457 if (live_regs_mask
)
26458 thumb_pop (asm_out_file
, live_regs_mask
);
26460 if (had_to_push_lr
)
26464 /* We have no free low regs, so save one. */
26465 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
26469 /* Get the return address into a temporary register. */
26470 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
26474 /* Move the return address to lr. */
26475 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
26477 /* Restore the low register. */
26478 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
26483 regno
= LAST_ARG_REGNUM
;
26488 /* Remove the argument registers that were pushed onto the stack. */
26489 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
26490 SP_REGNUM
, SP_REGNUM
,
26491 crtl
->args
.pretend_args_size
);
26493 thumb_exit (asm_out_file
, regno
);
26499 /* Functions to save and restore machine-specific function data. */
26500 static struct machine_function
*
26501 arm_init_machine_status (void)
26503 struct machine_function
*machine
;
26504 machine
= ggc_alloc_cleared_machine_function ();
26506 #if ARM_FT_UNKNOWN != 0
26507 machine
->func_type
= ARM_FT_UNKNOWN
;
26512 /* Return an RTX indicating where the return address to the
26513 calling function can be found. */
26515 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
26520 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
26523 /* Do anything needed before RTL is emitted for each function. */
26525 arm_init_expanders (void)
26527 /* Arrange to initialize and mark the machine per-function status. */
26528 init_machine_status
= arm_init_machine_status
;
26530 /* This is to stop the combine pass optimizing away the alignment
26531 adjustment of va_arg. */
26532 /* ??? It is claimed that this should not be necessary. */
26534 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
26538 /* Like arm_compute_initial_elimination offset. Simpler because there
26539 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26540 to point at the base of the local variables after static stack
26541 space for a function has been allocated. */
26544 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
26546 arm_stack_offsets
*offsets
;
26548 offsets
= arm_get_frame_offsets ();
26552 case ARG_POINTER_REGNUM
:
26555 case STACK_POINTER_REGNUM
:
26556 return offsets
->outgoing_args
- offsets
->saved_args
;
26558 case FRAME_POINTER_REGNUM
:
26559 return offsets
->soft_frame
- offsets
->saved_args
;
26561 case ARM_HARD_FRAME_POINTER_REGNUM
:
26562 return offsets
->saved_regs
- offsets
->saved_args
;
26564 case THUMB_HARD_FRAME_POINTER_REGNUM
:
26565 return offsets
->locals_base
- offsets
->saved_args
;
26568 gcc_unreachable ();
26572 case FRAME_POINTER_REGNUM
:
26575 case STACK_POINTER_REGNUM
:
26576 return offsets
->outgoing_args
- offsets
->soft_frame
;
26578 case ARM_HARD_FRAME_POINTER_REGNUM
:
26579 return offsets
->saved_regs
- offsets
->soft_frame
;
26581 case THUMB_HARD_FRAME_POINTER_REGNUM
:
26582 return offsets
->locals_base
- offsets
->soft_frame
;
26585 gcc_unreachable ();
26590 gcc_unreachable ();
26594 /* Generate the function's prologue. */
26597 thumb1_expand_prologue (void)
26601 HOST_WIDE_INT amount
;
26602 arm_stack_offsets
*offsets
;
26603 unsigned long func_type
;
26605 unsigned long live_regs_mask
;
26606 unsigned long l_mask
;
26607 unsigned high_regs_pushed
= 0;
26609 func_type
= arm_current_func_type ();
26611 /* Naked functions don't have prologues. */
26612 if (IS_NAKED (func_type
))
26615 if (IS_INTERRUPT (func_type
))
26617 error ("interrupt Service Routines cannot be coded in Thumb mode");
26621 if (is_called_in_ARM_mode (current_function_decl
))
26622 emit_insn (gen_prologue_thumb1_interwork ());
26624 offsets
= arm_get_frame_offsets ();
26625 live_regs_mask
= offsets
->saved_regs_mask
;
26627 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26628 l_mask
= live_regs_mask
& 0x40ff;
26629 /* Then count how many other high registers will need to be pushed. */
26630 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26632 if (crtl
->args
.pretend_args_size
)
26634 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
26636 if (cfun
->machine
->uses_anonymous_args
)
26638 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
26639 unsigned long mask
;
26641 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
26642 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
26644 insn
= thumb1_emit_multi_reg_push (mask
, 0);
26648 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26649 stack_pointer_rtx
, x
));
26651 RTX_FRAME_RELATED_P (insn
) = 1;
26654 if (TARGET_BACKTRACE
)
26656 HOST_WIDE_INT offset
= 0;
26657 unsigned work_register
;
26658 rtx work_reg
, x
, arm_hfp_rtx
;
26660 /* We have been asked to create a stack backtrace structure.
26661 The code looks like this:
26665 0 sub SP, #16 Reserve space for 4 registers.
26666 2 push {R7} Push low registers.
26667 4 add R7, SP, #20 Get the stack pointer before the push.
26668 6 str R7, [SP, #8] Store the stack pointer
26669 (before reserving the space).
26670 8 mov R7, PC Get hold of the start of this code + 12.
26671 10 str R7, [SP, #16] Store it.
26672 12 mov R7, FP Get hold of the current frame pointer.
26673 14 str R7, [SP, #4] Store it.
26674 16 mov R7, LR Get hold of the current return address.
26675 18 str R7, [SP, #12] Store it.
26676 20 add R7, SP, #16 Point at the start of the
26677 backtrace structure.
26678 22 mov FP, R7 Put this value into the frame pointer. */
26680 work_register
= thumb_find_work_register (live_regs_mask
);
26681 work_reg
= gen_rtx_REG (SImode
, work_register
);
26682 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
26684 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26685 stack_pointer_rtx
, GEN_INT (-16)));
26686 RTX_FRAME_RELATED_P (insn
) = 1;
26690 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
26691 RTX_FRAME_RELATED_P (insn
) = 1;
26693 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
26696 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
26697 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
26699 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
26700 x
= gen_frame_mem (SImode
, x
);
26701 emit_move_insn (x
, work_reg
);
26703 /* Make sure that the instruction fetching the PC is in the right place
26704 to calculate "start of backtrace creation code + 12". */
26705 /* ??? The stores using the common WORK_REG ought to be enough to
26706 prevent the scheduler from doing anything weird. Failing that
26707 we could always move all of the following into an UNSPEC_VOLATILE. */
26710 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
26711 emit_move_insn (work_reg
, x
);
26713 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
26714 x
= gen_frame_mem (SImode
, x
);
26715 emit_move_insn (x
, work_reg
);
26717 emit_move_insn (work_reg
, arm_hfp_rtx
);
26719 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
26720 x
= gen_frame_mem (SImode
, x
);
26721 emit_move_insn (x
, work_reg
);
26725 emit_move_insn (work_reg
, arm_hfp_rtx
);
26727 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
26728 x
= gen_frame_mem (SImode
, x
);
26729 emit_move_insn (x
, work_reg
);
26731 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
26732 emit_move_insn (work_reg
, x
);
26734 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
26735 x
= gen_frame_mem (SImode
, x
);
26736 emit_move_insn (x
, work_reg
);
26739 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
26740 emit_move_insn (work_reg
, x
);
26742 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
26743 x
= gen_frame_mem (SImode
, x
);
26744 emit_move_insn (x
, work_reg
);
26746 x
= GEN_INT (offset
+ 12);
26747 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
26749 emit_move_insn (arm_hfp_rtx
, work_reg
);
26751 /* Optimization: If we are not pushing any low registers but we are going
26752 to push some high registers then delay our first push. This will just
26753 be a push of LR and we can combine it with the push of the first high
26755 else if ((l_mask
& 0xff) != 0
26756 || (high_regs_pushed
== 0 && l_mask
))
26758 unsigned long mask
= l_mask
;
26759 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
26760 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
26761 RTX_FRAME_RELATED_P (insn
) = 1;
26764 if (high_regs_pushed
)
26766 unsigned pushable_regs
;
26767 unsigned next_hi_reg
;
26768 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
26769 : crtl
->args
.info
.nregs
;
26770 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
26772 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
26773 if (live_regs_mask
& (1 << next_hi_reg
))
26776 /* Here we need to mask out registers used for passing arguments
26777 even if they can be pushed. This is to avoid using them to stash the high
26778 registers. Such kind of stash may clobber the use of arguments. */
26779 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
26781 if (pushable_regs
== 0)
26782 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
26784 while (high_regs_pushed
> 0)
26786 unsigned long real_regs_mask
= 0;
26788 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
26790 if (pushable_regs
& (1 << regno
))
26792 emit_move_insn (gen_rtx_REG (SImode
, regno
),
26793 gen_rtx_REG (SImode
, next_hi_reg
));
26795 high_regs_pushed
--;
26796 real_regs_mask
|= (1 << next_hi_reg
);
26798 if (high_regs_pushed
)
26800 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
26802 if (live_regs_mask
& (1 << next_hi_reg
))
26807 pushable_regs
&= ~((1 << regno
) - 1);
26813 /* If we had to find a work register and we have not yet
26814 saved the LR then add it to the list of regs to push. */
26815 if (l_mask
== (1 << LR_REGNUM
))
26817 pushable_regs
|= l_mask
;
26818 real_regs_mask
|= l_mask
;
26822 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
26823 RTX_FRAME_RELATED_P (insn
) = 1;
26827 /* Load the pic register before setting the frame pointer,
26828 so we can use r7 as a temporary work register. */
26829 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26830 arm_load_pic_register (live_regs_mask
);
26832 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
26833 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
26834 stack_pointer_rtx
);
26836 if (flag_stack_usage_info
)
26837 current_function_static_stack_size
26838 = offsets
->outgoing_args
- offsets
->saved_args
;
26840 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26841 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
26846 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
26847 GEN_INT (- amount
)));
26848 RTX_FRAME_RELATED_P (insn
) = 1;
26854 /* The stack decrement is too big for an immediate value in a single
26855 insn. In theory we could issue multiple subtracts, but after
26856 three of them it becomes more space efficient to place the full
26857 value in the constant pool and load into a register. (Also the
26858 ARM debugger really likes to see only one stack decrement per
26859 function). So instead we look for a scratch register into which
26860 we can load the decrement, and then we subtract this from the
26861 stack pointer. Unfortunately on the thumb the only available
26862 scratch registers are the argument registers, and we cannot use
26863 these as they may hold arguments to the function. Instead we
26864 attempt to locate a call preserved register which is used by this
26865 function. If we can find one, then we know that it will have
26866 been pushed at the start of the prologue and so we can corrupt
26868 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
26869 if (live_regs_mask
& (1 << regno
))
26872 gcc_assert(regno
<= LAST_LO_REGNUM
);
26874 reg
= gen_rtx_REG (SImode
, regno
);
26876 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
26878 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26879 stack_pointer_rtx
, reg
));
26881 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
26882 plus_constant (Pmode
, stack_pointer_rtx
,
26884 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
26885 RTX_FRAME_RELATED_P (insn
) = 1;
26889 if (frame_pointer_needed
)
26890 thumb_set_frame_pointer (offsets
);
26892 /* If we are profiling, make sure no instructions are scheduled before
26893 the call to mcount. Similarly if the user has requested no
26894 scheduling in the prolog. Similarly if we want non-call exceptions
26895 using the EABI unwinder, to prevent faulting instructions from being
26896 swapped with a stack adjustment. */
26897 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
26898 || (arm_except_unwind_info (&global_options
) == UI_TARGET
26899 && cfun
->can_throw_non_call_exceptions
))
26900 emit_insn (gen_blockage ());
26902 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
26903 if (live_regs_mask
& 0xff)
26904 cfun
->machine
->lr_save_eliminated
= 0;
26907 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
26908 POP instruction can be generated. LR should be replaced by PC. All
26909 the checks required are already done by USE_RETURN_INSN (). Hence,
26910 all we really need to check here is if single register is to be
26911 returned, or multiple register return. */
26913 thumb2_expand_return (bool simple_return
)
26916 unsigned long saved_regs_mask
;
26917 arm_stack_offsets
*offsets
;
26919 offsets
= arm_get_frame_offsets ();
26920 saved_regs_mask
= offsets
->saved_regs_mask
;
26922 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
26923 if (saved_regs_mask
& (1 << i
))
26926 if (!simple_return
&& saved_regs_mask
)
26930 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
26931 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
26932 rtx addr
= gen_rtx_MEM (SImode
,
26933 gen_rtx_POST_INC (SImode
,
26934 stack_pointer_rtx
));
26935 set_mem_alias_set (addr
, get_frame_alias_set ());
26936 XVECEXP (par
, 0, 0) = ret_rtx
;
26937 XVECEXP (par
, 0, 1) = gen_rtx_SET (SImode
, reg
, addr
);
26938 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
26939 emit_jump_insn (par
);
26943 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
26944 saved_regs_mask
|= (1 << PC_REGNUM
);
26945 arm_emit_multi_reg_pop (saved_regs_mask
);
26950 emit_jump_insn (simple_return_rtx
);
26955 thumb1_expand_epilogue (void)
26957 HOST_WIDE_INT amount
;
26958 arm_stack_offsets
*offsets
;
26961 /* Naked functions don't have prologues. */
26962 if (IS_NAKED (arm_current_func_type ()))
26965 offsets
= arm_get_frame_offsets ();
26966 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26968 if (frame_pointer_needed
)
26970 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
26971 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26973 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
26975 gcc_assert (amount
>= 0);
26978 emit_insn (gen_blockage ());
26981 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
26982 GEN_INT (amount
)));
26985 /* r3 is always free in the epilogue. */
26986 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
26988 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
26989 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
26993 /* Emit a USE (stack_pointer_rtx), so that
26994 the stack adjustment will not be deleted. */
26995 emit_insn (gen_force_register_use (stack_pointer_rtx
));
26997 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
26998 emit_insn (gen_blockage ());
27000 /* Emit a clobber for each insn that will be restored in the epilogue,
27001 so that flow2 will get register lifetimes correct. */
27002 for (regno
= 0; regno
< 13; regno
++)
27003 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
27004 emit_clobber (gen_rtx_REG (SImode
, regno
));
27006 if (! df_regs_ever_live_p (LR_REGNUM
))
27007 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
27010 /* Epilogue code for APCS frame. */
27012 arm_expand_epilogue_apcs_frame (bool really_return
)
27014 unsigned long func_type
;
27015 unsigned long saved_regs_mask
;
27018 int floats_from_frame
= 0;
27019 arm_stack_offsets
*offsets
;
27021 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
27022 func_type
= arm_current_func_type ();
27024 /* Get frame offsets for ARM. */
27025 offsets
= arm_get_frame_offsets ();
27026 saved_regs_mask
= offsets
->saved_regs_mask
;
27028 /* Find the offset of the floating-point save area in the frame. */
27029 floats_from_frame
= offsets
->saved_args
- offsets
->frame
;
27031 /* Compute how many core registers saved and how far away the floats are. */
27032 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27033 if (saved_regs_mask
& (1 << i
))
27036 floats_from_frame
+= 4;
27039 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
27043 /* The offset is from IP_REGNUM. */
27044 int saved_size
= arm_get_vfp_saved_size ();
27045 if (saved_size
> 0)
27047 floats_from_frame
+= saved_size
;
27048 emit_insn (gen_addsi3 (gen_rtx_REG (SImode
, IP_REGNUM
),
27049 hard_frame_pointer_rtx
,
27050 GEN_INT (-floats_from_frame
)));
27053 /* Generate VFP register multi-pop. */
27054 start_reg
= FIRST_VFP_REGNUM
;
27056 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
27057 /* Look for a case where a reg does not need restoring. */
27058 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
27059 && (!df_regs_ever_live_p (i
+ 1)
27060 || call_used_regs
[i
+ 1]))
27062 if (start_reg
!= i
)
27063 arm_emit_vfp_multi_reg_pop (start_reg
,
27064 (i
- start_reg
) / 2,
27065 gen_rtx_REG (SImode
,
27070 /* Restore the remaining regs that we have discovered (or possibly
27071 even all of them, if the conditional in the for loop never
27073 if (start_reg
!= i
)
27074 arm_emit_vfp_multi_reg_pop (start_reg
,
27075 (i
- start_reg
) / 2,
27076 gen_rtx_REG (SImode
, IP_REGNUM
));
27081 /* The frame pointer is guaranteed to be non-double-word aligned, as
27082 it is set to double-word-aligned old_stack_pointer - 4. */
27084 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
27086 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
27087 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
27089 rtx addr
= gen_frame_mem (V2SImode
,
27090 plus_constant (Pmode
, hard_frame_pointer_rtx
,
27092 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27093 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27094 gen_rtx_REG (V2SImode
, i
),
27100 /* saved_regs_mask should contain IP which contains old stack pointer
27101 at the time of activation creation. Since SP and IP are adjacent registers,
27102 we can restore the value directly into SP. */
27103 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
27104 saved_regs_mask
&= ~(1 << IP_REGNUM
);
27105 saved_regs_mask
|= (1 << SP_REGNUM
);
27107 /* There are two registers left in saved_regs_mask - LR and PC. We
27108 only need to restore LR (the return address), but to
27109 save time we can load it directly into PC, unless we need a
27110 special function exit sequence, or we are not really returning. */
27112 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
27113 && !crtl
->calls_eh_return
)
27114 /* Delete LR from the register mask, so that LR on
27115 the stack is loaded into the PC in the register mask. */
27116 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27118 saved_regs_mask
&= ~(1 << PC_REGNUM
);
27120 num_regs
= bit_count (saved_regs_mask
);
27121 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
27123 emit_insn (gen_blockage ());
27124 /* Unwind the stack to just below the saved registers. */
27125 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27126 hard_frame_pointer_rtx
,
27127 GEN_INT (- 4 * num_regs
)));
27130 arm_emit_multi_reg_pop (saved_regs_mask
);
27132 if (IS_INTERRUPT (func_type
))
27134 /* Interrupt handlers will have pushed the
27135 IP onto the stack, so restore it now. */
27137 rtx addr
= gen_rtx_MEM (SImode
,
27138 gen_rtx_POST_INC (SImode
,
27139 stack_pointer_rtx
));
27140 set_mem_alias_set (addr
, get_frame_alias_set ());
27141 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
27142 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27143 gen_rtx_REG (SImode
, IP_REGNUM
),
27147 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
27150 if (crtl
->calls_eh_return
)
27151 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27153 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27155 if (IS_STACKALIGN (func_type
))
27156 /* Restore the original stack pointer. Before prologue, the stack was
27157 realigned and the original stack pointer saved in r0. For details,
27158 see comment in arm_expand_prologue. */
27159 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
27161 emit_jump_insn (simple_return_rtx
);
27164 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27165 function is not a sibcall. */
27167 arm_expand_epilogue (bool really_return
)
27169 unsigned long func_type
;
27170 unsigned long saved_regs_mask
;
27174 arm_stack_offsets
*offsets
;
27176 func_type
= arm_current_func_type ();
27178 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27179 let output_return_instruction take care of instruction emission if any. */
27180 if (IS_NAKED (func_type
)
27181 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
27184 emit_jump_insn (simple_return_rtx
);
27188 /* If we are throwing an exception, then we really must be doing a
27189 return, so we can't tail-call. */
27190 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
27192 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
27194 arm_expand_epilogue_apcs_frame (really_return
);
27198 /* Get frame offsets for ARM. */
27199 offsets
= arm_get_frame_offsets ();
27200 saved_regs_mask
= offsets
->saved_regs_mask
;
27201 num_regs
= bit_count (saved_regs_mask
);
27203 if (frame_pointer_needed
)
27206 /* Restore stack pointer if necessary. */
27209 /* In ARM mode, frame pointer points to first saved register.
27210 Restore stack pointer to last saved register. */
27211 amount
= offsets
->frame
- offsets
->saved_regs
;
27213 /* Force out any pending memory operations that reference stacked data
27214 before stack de-allocation occurs. */
27215 emit_insn (gen_blockage ());
27216 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27217 hard_frame_pointer_rtx
,
27218 GEN_INT (amount
)));
27219 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27221 hard_frame_pointer_rtx
);
27223 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27225 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27229 /* In Thumb-2 mode, the frame pointer points to the last saved
27231 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27234 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
27235 hard_frame_pointer_rtx
,
27236 GEN_INT (amount
)));
27237 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27238 hard_frame_pointer_rtx
,
27239 hard_frame_pointer_rtx
);
27242 /* Force out any pending memory operations that reference stacked data
27243 before stack de-allocation occurs. */
27244 emit_insn (gen_blockage ());
27245 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
27246 hard_frame_pointer_rtx
));
27247 arm_add_cfa_adjust_cfa_note (insn
, 0,
27249 hard_frame_pointer_rtx
);
27250 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27252 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27257 /* Pop off outgoing args and local frame to adjust stack pointer to
27258 last saved register. */
27259 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27263 /* Force out any pending memory operations that reference stacked data
27264 before stack de-allocation occurs. */
27265 emit_insn (gen_blockage ());
27266 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27268 GEN_INT (amount
)));
27269 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
27270 stack_pointer_rtx
, stack_pointer_rtx
);
27271 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27273 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27277 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
27279 /* Generate VFP register multi-pop. */
27280 int end_reg
= LAST_VFP_REGNUM
+ 1;
27282 /* Scan the registers in reverse order. We need to match
27283 any groupings made in the prologue and generate matching
27284 vldm operations. The need to match groups is because,
27285 unlike pop, vldm can only do consecutive regs. */
27286 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
27287 /* Look for a case where a reg does not need restoring. */
27288 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
27289 && (!df_regs_ever_live_p (i
+ 1)
27290 || call_used_regs
[i
+ 1]))
27292 /* Restore the regs discovered so far (from reg+2 to
27294 if (end_reg
> i
+ 2)
27295 arm_emit_vfp_multi_reg_pop (i
+ 2,
27296 (end_reg
- (i
+ 2)) / 2,
27297 stack_pointer_rtx
);
27301 /* Restore the remaining regs that we have discovered (or possibly
27302 even all of them, if the conditional in the for loop never
27304 if (end_reg
> i
+ 2)
27305 arm_emit_vfp_multi_reg_pop (i
+ 2,
27306 (end_reg
- (i
+ 2)) / 2,
27307 stack_pointer_rtx
);
27311 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
27312 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
27315 rtx addr
= gen_rtx_MEM (V2SImode
,
27316 gen_rtx_POST_INC (SImode
,
27317 stack_pointer_rtx
));
27318 set_mem_alias_set (addr
, get_frame_alias_set ());
27319 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27320 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27321 gen_rtx_REG (V2SImode
, i
),
27323 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27324 stack_pointer_rtx
, stack_pointer_rtx
);
27327 if (saved_regs_mask
)
27330 bool return_in_pc
= false;
27332 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
27333 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
27334 && !IS_STACKALIGN (func_type
)
27336 && crtl
->args
.pretend_args_size
== 0
27337 && saved_regs_mask
& (1 << LR_REGNUM
)
27338 && !crtl
->calls_eh_return
)
27340 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27341 saved_regs_mask
|= (1 << PC_REGNUM
);
27342 return_in_pc
= true;
27345 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
27347 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27348 if (saved_regs_mask
& (1 << i
))
27350 rtx addr
= gen_rtx_MEM (SImode
,
27351 gen_rtx_POST_INC (SImode
,
27352 stack_pointer_rtx
));
27353 set_mem_alias_set (addr
, get_frame_alias_set ());
27355 if (i
== PC_REGNUM
)
27357 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27358 XVECEXP (insn
, 0, 0) = ret_rtx
;
27359 XVECEXP (insn
, 0, 1) = gen_rtx_SET (SImode
,
27360 gen_rtx_REG (SImode
, i
),
27362 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
27363 insn
= emit_jump_insn (insn
);
27367 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
27369 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27370 gen_rtx_REG (SImode
, i
),
27372 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27374 stack_pointer_rtx
);
27381 && current_tune
->prefer_ldrd_strd
27382 && !optimize_function_for_size_p (cfun
))
27385 thumb2_emit_ldrd_pop (saved_regs_mask
);
27386 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
27387 arm_emit_ldrd_pop (saved_regs_mask
);
27389 arm_emit_multi_reg_pop (saved_regs_mask
);
27392 arm_emit_multi_reg_pop (saved_regs_mask
);
27395 if (return_in_pc
== true)
27399 if (crtl
->args
.pretend_args_size
)
27402 rtx dwarf
= NULL_RTX
;
27403 rtx tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27405 GEN_INT (crtl
->args
.pretend_args_size
)));
27407 RTX_FRAME_RELATED_P (tmp
) = 1;
27409 if (cfun
->machine
->uses_anonymous_args
)
27411 /* Restore pretend args. Refer arm_expand_prologue on how to save
27412 pretend_args in stack. */
27413 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
27414 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
27415 for (j
= 0, i
= 0; j
< num_regs
; i
++)
27416 if (saved_regs_mask
& (1 << i
))
27418 rtx reg
= gen_rtx_REG (SImode
, i
);
27419 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
27422 REG_NOTES (tmp
) = dwarf
;
27424 arm_add_cfa_adjust_cfa_note (tmp
, crtl
->args
.pretend_args_size
,
27425 stack_pointer_rtx
, stack_pointer_rtx
);
27428 if (!really_return
)
27431 if (crtl
->calls_eh_return
)
27432 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27434 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27436 if (IS_STACKALIGN (func_type
))
27437 /* Restore the original stack pointer. Before prologue, the stack was
27438 realigned and the original stack pointer saved in r0. For details,
27439 see comment in arm_expand_prologue. */
27440 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, 0)));
27442 emit_jump_insn (simple_return_rtx
);
27445 /* Implementation of insn prologue_thumb1_interwork. This is the first
27446 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27449 thumb1_output_interwork (void)
27452 FILE *f
= asm_out_file
;
27454 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
27455 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
27457 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
27459 /* Generate code sequence to switch us into Thumb mode. */
27460 /* The .code 32 directive has already been emitted by
27461 ASM_DECLARE_FUNCTION_NAME. */
27462 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
27463 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
27465 /* Generate a label, so that the debugger will notice the
27466 change in instruction sets. This label is also used by
27467 the assembler to bypass the ARM code when this function
27468 is called from a Thumb encoded function elsewhere in the
27469 same file. Hence the definition of STUB_NAME here must
27470 agree with the definition in gas/config/tc-arm.c. */
27472 #define STUB_NAME ".real_start_of"
27474 fprintf (f
, "\t.code\t16\n");
27476 if (arm_dllexport_name_p (name
))
27477 name
= arm_strip_name_encoding (name
);
27479 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
27480 fprintf (f
, "\t.thumb_func\n");
27481 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
27486 /* Handle the case of a double word load into a low register from
27487 a computed memory address. The computed address may involve a
27488 register which is overwritten by the load. */
27490 thumb_load_double_from_address (rtx
*operands
)
27498 gcc_assert (REG_P (operands
[0]));
27499 gcc_assert (MEM_P (operands
[1]));
27501 /* Get the memory address. */
27502 addr
= XEXP (operands
[1], 0);
27504 /* Work out how the memory address is computed. */
27505 switch (GET_CODE (addr
))
27508 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27510 if (REGNO (operands
[0]) == REGNO (addr
))
27512 output_asm_insn ("ldr\t%H0, %2", operands
);
27513 output_asm_insn ("ldr\t%0, %1", operands
);
27517 output_asm_insn ("ldr\t%0, %1", operands
);
27518 output_asm_insn ("ldr\t%H0, %2", operands
);
27523 /* Compute <address> + 4 for the high order load. */
27524 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27526 output_asm_insn ("ldr\t%0, %1", operands
);
27527 output_asm_insn ("ldr\t%H0, %2", operands
);
27531 arg1
= XEXP (addr
, 0);
27532 arg2
= XEXP (addr
, 1);
27534 if (CONSTANT_P (arg1
))
27535 base
= arg2
, offset
= arg1
;
27537 base
= arg1
, offset
= arg2
;
27539 gcc_assert (REG_P (base
));
27541 /* Catch the case of <address> = <reg> + <reg> */
27542 if (REG_P (offset
))
27544 int reg_offset
= REGNO (offset
);
27545 int reg_base
= REGNO (base
);
27546 int reg_dest
= REGNO (operands
[0]);
27548 /* Add the base and offset registers together into the
27549 higher destination register. */
27550 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
27551 reg_dest
+ 1, reg_base
, reg_offset
);
27553 /* Load the lower destination register from the address in
27554 the higher destination register. */
27555 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
27556 reg_dest
, reg_dest
+ 1);
27558 /* Load the higher destination register from its own address
27560 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
27561 reg_dest
+ 1, reg_dest
+ 1);
27565 /* Compute <address> + 4 for the high order load. */
27566 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27568 /* If the computed address is held in the low order register
27569 then load the high order register first, otherwise always
27570 load the low order register first. */
27571 if (REGNO (operands
[0]) == REGNO (base
))
27573 output_asm_insn ("ldr\t%H0, %2", operands
);
27574 output_asm_insn ("ldr\t%0, %1", operands
);
27578 output_asm_insn ("ldr\t%0, %1", operands
);
27579 output_asm_insn ("ldr\t%H0, %2", operands
);
27585 /* With no registers to worry about we can just load the value
27587 operands
[2] = adjust_address (operands
[1], SImode
, 4);
27589 output_asm_insn ("ldr\t%H0, %2", operands
);
27590 output_asm_insn ("ldr\t%0, %1", operands
);
27594 gcc_unreachable ();
27601 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
27608 if (REGNO (operands
[4]) > REGNO (operands
[5]))
27611 operands
[4] = operands
[5];
27614 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
27615 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
27619 if (REGNO (operands
[4]) > REGNO (operands
[5]))
27622 operands
[4] = operands
[5];
27625 if (REGNO (operands
[5]) > REGNO (operands
[6]))
27628 operands
[5] = operands
[6];
27631 if (REGNO (operands
[4]) > REGNO (operands
[5]))
27634 operands
[4] = operands
[5];
27638 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
27639 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
27643 gcc_unreachable ();
27649 /* Output a call-via instruction for thumb state. */
27651 thumb_call_via_reg (rtx reg
)
27653 int regno
= REGNO (reg
);
27656 gcc_assert (regno
< LR_REGNUM
);
27658 /* If we are in the normal text section we can use a single instance
27659 per compilation unit. If we are doing function sections, then we need
27660 an entry per section, since we can't rely on reachability. */
27661 if (in_section
== text_section
)
27663 thumb_call_reg_needed
= 1;
27665 if (thumb_call_via_label
[regno
] == NULL
)
27666 thumb_call_via_label
[regno
] = gen_label_rtx ();
27667 labelp
= thumb_call_via_label
+ regno
;
27671 if (cfun
->machine
->call_via
[regno
] == NULL
)
27672 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
27673 labelp
= cfun
->machine
->call_via
+ regno
;
27676 output_asm_insn ("bl\t%a0", labelp
);
27680 /* Routines for generating rtl. */
27682 thumb_expand_movmemqi (rtx
*operands
)
27684 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
27685 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
27686 HOST_WIDE_INT len
= INTVAL (operands
[2]);
27687 HOST_WIDE_INT offset
= 0;
27691 emit_insn (gen_movmem12b (out
, in
, out
, in
));
27697 emit_insn (gen_movmem8b (out
, in
, out
, in
));
27703 rtx reg
= gen_reg_rtx (SImode
);
27704 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
27705 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
27712 rtx reg
= gen_reg_rtx (HImode
);
27713 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
27714 plus_constant (Pmode
, in
,
27716 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
27725 rtx reg
= gen_reg_rtx (QImode
);
27726 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
27727 plus_constant (Pmode
, in
,
27729 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
27736 thumb_reload_out_hi (rtx
*operands
)
27738 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
27741 /* Handle reading a half-word from memory during reload. */
27743 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
27745 gcc_unreachable ();
27748 /* Return the length of a function name prefix
27749 that starts with the character 'c'. */
27751 arm_get_strip_length (int c
)
27755 ARM_NAME_ENCODING_LENGTHS
27760 /* Return a pointer to a function's name with any
27761 and all prefix encodings stripped from it. */
27763 arm_strip_name_encoding (const char *name
)
27767 while ((skip
= arm_get_strip_length (* name
)))
27773 /* If there is a '*' anywhere in the name's prefix, then
27774 emit the stripped name verbatim, otherwise prepend an
27775 underscore if leading underscores are being used. */
27777 arm_asm_output_labelref (FILE *stream
, const char *name
)
27782 while ((skip
= arm_get_strip_length (* name
)))
27784 verbatim
|= (*name
== '*');
27789 fputs (name
, stream
);
27791 asm_fprintf (stream
, "%U%s", name
);
27794 /* This function is used to emit an EABI tag and its associated value.
27795 We emit the numerical value of the tag in case the assembler does not
27796 support textual tags. (Eg gas prior to 2.20). If requested we include
27797 the tag name in a comment so that anyone reading the assembler output
27798 will know which tag is being set.
27800 This function is not static because arm-c.c needs it too. */
27803 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
27805 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
27806 if (flag_verbose_asm
|| flag_debug_asm
)
27807 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
27808 asm_fprintf (asm_out_file
, "\n");
27812 arm_file_start (void)
27816 if (TARGET_UNIFIED_ASM
)
27817 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
27821 const char *fpu_name
;
27822 if (arm_selected_arch
)
27824 const char* pos
= strchr (arm_selected_arch
->name
, '+');
27828 gcc_assert (strlen (arm_selected_arch
->name
)
27829 <= sizeof (buf
) / sizeof (*pos
));
27830 strncpy (buf
, arm_selected_arch
->name
,
27831 (pos
- arm_selected_arch
->name
) * sizeof (*pos
));
27832 buf
[pos
- arm_selected_arch
->name
] = '\0';
27833 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
27834 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
27837 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
27839 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
27840 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
27843 const char* truncated_name
27844 = arm_rewrite_selected_cpu (arm_selected_cpu
->name
);
27845 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
27848 if (TARGET_SOFT_FLOAT
)
27850 fpu_name
= "softvfp";
27854 fpu_name
= arm_fpu_desc
->name
;
27855 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
27857 if (TARGET_HARD_FLOAT
)
27858 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
27859 if (TARGET_HARD_FLOAT_ABI
)
27860 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
27863 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
27865 /* Some of these attributes only apply when the corresponding features
27866 are used. However we don't have any easy way of figuring this out.
27867 Conservatively record the setting that would have been used. */
27869 if (flag_rounding_math
)
27870 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
27872 if (!flag_unsafe_math_optimizations
)
27874 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
27875 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
27877 if (flag_signaling_nans
)
27878 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
27880 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
27881 flag_finite_math_only
? 1 : 3);
27883 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
27884 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
27885 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
27886 flag_short_enums
? 1 : 2);
27888 /* Tag_ABI_optimization_goals. */
27891 else if (optimize
>= 2)
27897 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
27899 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
27902 if (arm_fp16_format
)
27903 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
27904 (int) arm_fp16_format
);
27906 if (arm_lang_output_object_attributes_hook
)
27907 arm_lang_output_object_attributes_hook();
27910 default_file_start ();
27914 arm_file_end (void)
27918 if (NEED_INDICATE_EXEC_STACK
)
27919 /* Add .note.GNU-stack. */
27920 file_end_indicate_exec_stack ();
27922 if (! thumb_call_reg_needed
)
27925 switch_to_section (text_section
);
27926 asm_fprintf (asm_out_file
, "\t.code 16\n");
27927 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
27929 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
27931 rtx label
= thumb_call_via_label
[regno
];
27935 targetm
.asm_out
.internal_label (asm_out_file
, "L",
27936 CODE_LABEL_NUMBER (label
));
27937 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
27943 /* Symbols in the text segment can be accessed without indirecting via the
27944 constant pool; it may take an extra binary operation, but this is still
27945 faster than indirecting via memory. Don't do this when not optimizing,
27946 since we won't be calculating al of the offsets necessary to do this
27950 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
27952 if (optimize
> 0 && TREE_CONSTANT (decl
))
27953 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
27955 default_encode_section_info (decl
, rtl
, first
);
27957 #endif /* !ARM_PE */
27960 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
27962 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
27963 && !strcmp (prefix
, "L"))
27965 arm_ccfsm_state
= 0;
27966 arm_target_insn
= NULL
;
27968 default_internal_label (stream
, prefix
, labelno
);
27971 /* Output code to add DELTA to the first argument, and then jump
27972 to FUNCTION. Used for C++ multiple inheritance. */
27974 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
27975 HOST_WIDE_INT delta
,
27976 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
27979 static int thunk_label
= 0;
27982 int mi_delta
= delta
;
27983 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
27985 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
27988 mi_delta
= - mi_delta
;
27990 final_start_function (emit_barrier (), file
, 1);
27994 int labelno
= thunk_label
++;
27995 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
27996 /* Thunks are entered in arm mode when avaiable. */
27997 if (TARGET_THUMB1_ONLY
)
27999 /* push r3 so we can use it as a temporary. */
28000 /* TODO: Omit this save if r3 is not used. */
28001 fputs ("\tpush {r3}\n", file
);
28002 fputs ("\tldr\tr3, ", file
);
28006 fputs ("\tldr\tr12, ", file
);
28008 assemble_name (file
, label
);
28009 fputc ('\n', file
);
28012 /* If we are generating PIC, the ldr instruction below loads
28013 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28014 the address of the add + 8, so we have:
28016 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28019 Note that we have "+ 1" because some versions of GNU ld
28020 don't set the low bit of the result for R_ARM_REL32
28021 relocations against thumb function symbols.
28022 On ARMv6M this is +4, not +8. */
28023 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
28024 assemble_name (file
, labelpc
);
28025 fputs (":\n", file
);
28026 if (TARGET_THUMB1_ONLY
)
28028 /* This is 2 insns after the start of the thunk, so we know it
28029 is 4-byte aligned. */
28030 fputs ("\tadd\tr3, pc, r3\n", file
);
28031 fputs ("\tmov r12, r3\n", file
);
28034 fputs ("\tadd\tr12, pc, r12\n", file
);
28036 else if (TARGET_THUMB1_ONLY
)
28037 fputs ("\tmov r12, r3\n", file
);
28039 if (TARGET_THUMB1_ONLY
)
28041 if (mi_delta
> 255)
28043 fputs ("\tldr\tr3, ", file
);
28044 assemble_name (file
, label
);
28045 fputs ("+4\n", file
);
28046 asm_fprintf (file
, "\t%s\t%r, %r, r3\n",
28047 mi_op
, this_regno
, this_regno
);
28049 else if (mi_delta
!= 0)
28051 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
28052 mi_op
, this_regno
, this_regno
,
28058 /* TODO: Use movw/movt for large constants when available. */
28059 while (mi_delta
!= 0)
28061 if ((mi_delta
& (3 << shift
)) == 0)
28065 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
28066 mi_op
, this_regno
, this_regno
,
28067 mi_delta
& (0xff << shift
));
28068 mi_delta
&= ~(0xff << shift
);
28075 if (TARGET_THUMB1_ONLY
)
28076 fputs ("\tpop\t{r3}\n", file
);
28078 fprintf (file
, "\tbx\tr12\n");
28079 ASM_OUTPUT_ALIGN (file
, 2);
28080 assemble_name (file
, label
);
28081 fputs (":\n", file
);
28084 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
28085 rtx tem
= XEXP (DECL_RTL (function
), 0);
28086 tem
= plus_constant (GET_MODE (tem
), tem
, -7);
28087 tem
= gen_rtx_MINUS (GET_MODE (tem
),
28089 gen_rtx_SYMBOL_REF (Pmode
,
28090 ggc_strdup (labelpc
)));
28091 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
28094 /* Output ".word .LTHUNKn". */
28095 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
28097 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
28098 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
28102 fputs ("\tb\t", file
);
28103 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28104 if (NEED_PLT_RELOC
)
28105 fputs ("(PLT)", file
);
28106 fputc ('\n', file
);
28109 final_end_function ();
28113 arm_emit_vector_const (FILE *file
, rtx x
)
28116 const char * pattern
;
28118 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
28120 switch (GET_MODE (x
))
28122 case V2SImode
: pattern
= "%08x"; break;
28123 case V4HImode
: pattern
= "%04x"; break;
28124 case V8QImode
: pattern
= "%02x"; break;
28125 default: gcc_unreachable ();
28128 fprintf (file
, "0x");
28129 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
28133 element
= CONST_VECTOR_ELT (x
, i
);
28134 fprintf (file
, pattern
, INTVAL (element
));
28140 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28141 HFmode constant pool entries are actually loaded with ldr. */
28143 arm_emit_fp16_const (rtx c
)
28148 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
28149 bits
= real_to_target (NULL
, &r
, HFmode
);
28150 if (WORDS_BIG_ENDIAN
)
28151 assemble_zeros (2);
28152 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
28153 if (!WORDS_BIG_ENDIAN
)
28154 assemble_zeros (2);
28158 arm_output_load_gr (rtx
*operands
)
28165 if (!MEM_P (operands
[1])
28166 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
28167 || !REG_P (reg
= XEXP (sum
, 0))
28168 || !CONST_INT_P (offset
= XEXP (sum
, 1))
28169 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
28170 return "wldrw%?\t%0, %1";
28172 /* Fix up an out-of-range load of a GR register. */
28173 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
28174 wcgr
= operands
[0];
28176 output_asm_insn ("ldr%?\t%0, %1", operands
);
28178 operands
[0] = wcgr
;
28180 output_asm_insn ("tmcr%?\t%0, %1", operands
);
28181 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
28186 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28188 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28189 named arg and all anonymous args onto the stack.
28190 XXX I know the prologue shouldn't be pushing registers, but it is faster
28194 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
28195 enum machine_mode mode
,
28198 int second_time ATTRIBUTE_UNUSED
)
28200 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
28203 cfun
->machine
->uses_anonymous_args
= 1;
28204 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
28206 nregs
= pcum
->aapcs_ncrn
;
28207 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
28211 nregs
= pcum
->nregs
;
28213 if (nregs
< NUM_ARG_REGS
)
28214 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
28217 /* We can't rely on the caller doing the proper promotion when
28218 using APCS or ATPCS. */
28221 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
28223 return !TARGET_AAPCS_BASED
;
28226 static enum machine_mode
28227 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
28228 enum machine_mode mode
,
28229 int *punsignedp ATTRIBUTE_UNUSED
,
28230 const_tree fntype ATTRIBUTE_UNUSED
,
28231 int for_return ATTRIBUTE_UNUSED
)
28233 if (GET_MODE_CLASS (mode
) == MODE_INT
28234 && GET_MODE_SIZE (mode
) < 4)
28240 /* AAPCS based ABIs use short enums by default. */
28243 arm_default_short_enums (void)
28245 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
28249 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28252 arm_align_anon_bitfield (void)
28254 return TARGET_AAPCS_BASED
;
28258 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28261 arm_cxx_guard_type (void)
28263 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
28267 /* The EABI says test the least significant bit of a guard variable. */
28270 arm_cxx_guard_mask_bit (void)
28272 return TARGET_AAPCS_BASED
;
28276 /* The EABI specifies that all array cookies are 8 bytes long. */
28279 arm_get_cookie_size (tree type
)
28283 if (!TARGET_AAPCS_BASED
)
28284 return default_cxx_get_cookie_size (type
);
28286 size
= build_int_cst (sizetype
, 8);
28291 /* The EABI says that array cookies should also contain the element size. */
28294 arm_cookie_has_size (void)
28296 return TARGET_AAPCS_BASED
;
28300 /* The EABI says constructors and destructors should return a pointer to
28301 the object constructed/destroyed. */
28304 arm_cxx_cdtor_returns_this (void)
28306 return TARGET_AAPCS_BASED
;
28309 /* The EABI says that an inline function may never be the key
28313 arm_cxx_key_method_may_be_inline (void)
28315 return !TARGET_AAPCS_BASED
;
28319 arm_cxx_determine_class_data_visibility (tree decl
)
28321 if (!TARGET_AAPCS_BASED
28322 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
28325 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28326 is exported. However, on systems without dynamic vague linkage,
28327 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28328 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
28329 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
28331 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
28332 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
28336 arm_cxx_class_data_always_comdat (void)
28338 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28339 vague linkage if the class has no key function. */
28340 return !TARGET_AAPCS_BASED
;
28344 /* The EABI says __aeabi_atexit should be used to register static
28348 arm_cxx_use_aeabi_atexit (void)
28350 return TARGET_AAPCS_BASED
;
28355 arm_set_return_address (rtx source
, rtx scratch
)
28357 arm_stack_offsets
*offsets
;
28358 HOST_WIDE_INT delta
;
28360 unsigned long saved_regs
;
28362 offsets
= arm_get_frame_offsets ();
28363 saved_regs
= offsets
->saved_regs_mask
;
28365 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
28366 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
28369 if (frame_pointer_needed
)
28370 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
28373 /* LR will be the first saved register. */
28374 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
28379 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
28380 GEN_INT (delta
& ~4095)));
28385 addr
= stack_pointer_rtx
;
28387 addr
= plus_constant (Pmode
, addr
, delta
);
28389 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
28395 thumb_set_return_address (rtx source
, rtx scratch
)
28397 arm_stack_offsets
*offsets
;
28398 HOST_WIDE_INT delta
;
28399 HOST_WIDE_INT limit
;
28402 unsigned long mask
;
28406 offsets
= arm_get_frame_offsets ();
28407 mask
= offsets
->saved_regs_mask
;
28408 if (mask
& (1 << LR_REGNUM
))
28411 /* Find the saved regs. */
28412 if (frame_pointer_needed
)
28414 delta
= offsets
->soft_frame
- offsets
->saved_args
;
28415 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
28421 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
28424 /* Allow for the stack frame. */
28425 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
28427 /* The link register is always the first saved register. */
28430 /* Construct the address. */
28431 addr
= gen_rtx_REG (SImode
, reg
);
28434 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
28435 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
28439 addr
= plus_constant (Pmode
, addr
, delta
);
28441 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
28444 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
28447 /* Implements target hook vector_mode_supported_p. */
28449 arm_vector_mode_supported_p (enum machine_mode mode
)
28451 /* Neon also supports V2SImode, etc. listed in the clause below. */
28452 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
28453 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
28456 if ((TARGET_NEON
|| TARGET_IWMMXT
)
28457 && ((mode
== V2SImode
)
28458 || (mode
== V4HImode
)
28459 || (mode
== V8QImode
)))
28462 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
28463 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
28464 || mode
== V2HAmode
))
28470 /* Implements target hook array_mode_supported_p. */
28473 arm_array_mode_supported_p (enum machine_mode mode
,
28474 unsigned HOST_WIDE_INT nelems
)
28477 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
28478 && (nelems
>= 2 && nelems
<= 4))
28484 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28485 registers when autovectorizing for Neon, at least until multiple vector
28486 widths are supported properly by the middle-end. */
28488 static enum machine_mode
28489 arm_preferred_simd_mode (enum machine_mode mode
)
28495 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
28497 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
28499 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
28501 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
28503 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
28510 if (TARGET_REALLY_IWMMXT
)
28526 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28528 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
28529 using r0-r4 for function arguments, r7 for the stack frame and don't have
28530 enough left over to do doubleword arithmetic. For Thumb-2 all the
28531 potentially problematic instructions accept high registers so this is not
28532 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
28533 that require many low registers. */
28535 arm_class_likely_spilled_p (reg_class_t rclass
)
28537 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
28538 || rclass
== CC_REG
)
28544 /* Implements target hook small_register_classes_for_mode_p. */
28546 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED
)
28548 return TARGET_THUMB1
;
28551 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
28552 ARM insns and therefore guarantee that the shift count is modulo 256.
28553 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
28554 guarantee no particular behavior for out-of-range counts. */
28556 static unsigned HOST_WIDE_INT
28557 arm_shift_truncation_mask (enum machine_mode mode
)
28559 return mode
== SImode
? 255 : 0;
28563 /* Map internal gcc register numbers to DWARF2 register numbers. */
28566 arm_dbx_register_number (unsigned int regno
)
28571 if (IS_VFP_REGNUM (regno
))
28573 /* See comment in arm_dwarf_register_span. */
28574 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
28575 return 64 + regno
- FIRST_VFP_REGNUM
;
28577 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
28580 if (IS_IWMMXT_GR_REGNUM (regno
))
28581 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
28583 if (IS_IWMMXT_REGNUM (regno
))
28584 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
28586 gcc_unreachable ();
28589 /* Dwarf models VFPv3 registers as 32 64-bit registers.
28590 GCC models tham as 64 32-bit registers, so we need to describe this to
28591 the DWARF generation code. Other registers can use the default. */
28593 arm_dwarf_register_span (rtx rtl
)
28595 enum machine_mode mode
;
28601 regno
= REGNO (rtl
);
28602 if (!IS_VFP_REGNUM (regno
))
28605 /* XXX FIXME: The EABI defines two VFP register ranges:
28606 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
28608 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
28609 corresponding D register. Until GDB supports this, we shall use the
28610 legacy encodings. We also use these encodings for D0-D15 for
28611 compatibility with older debuggers. */
28612 mode
= GET_MODE (rtl
);
28613 if (GET_MODE_SIZE (mode
) < 8)
28616 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
28618 nregs
= GET_MODE_SIZE (mode
) / 4;
28619 for (i
= 0; i
< nregs
; i
+= 2)
28620 if (TARGET_BIG_END
)
28622 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
28623 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
28627 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
28628 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
28633 nregs
= GET_MODE_SIZE (mode
) / 8;
28634 for (i
= 0; i
< nregs
; i
++)
28635 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
28638 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
28641 #if ARM_UNWIND_INFO
28642 /* Emit unwind directives for a store-multiple instruction or stack pointer
28643 push during alignment.
28644 These should only ever be generated by the function prologue code, so
28645 expect them to have a particular form. */
28648 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
28651 HOST_WIDE_INT offset
;
28652 HOST_WIDE_INT nregs
;
28658 e
= XVECEXP (p
, 0, 0);
28659 if (GET_CODE (e
) != SET
)
28662 /* First insn will adjust the stack pointer. */
28663 if (GET_CODE (e
) != SET
28664 || !REG_P (XEXP (e
, 0))
28665 || REGNO (XEXP (e
, 0)) != SP_REGNUM
28666 || GET_CODE (XEXP (e
, 1)) != PLUS
)
28669 offset
= -INTVAL (XEXP (XEXP (e
, 1), 1));
28670 nregs
= XVECLEN (p
, 0) - 1;
28672 reg
= REGNO (XEXP (XVECEXP (p
, 0, 1), 1));
28675 /* The function prologue may also push pc, but not annotate it as it is
28676 never restored. We turn this into a stack pointer adjustment. */
28677 if (nregs
* 4 == offset
- 4)
28679 fprintf (asm_out_file
, "\t.pad #4\n");
28683 fprintf (asm_out_file
, "\t.save {");
28685 else if (IS_VFP_REGNUM (reg
))
28688 fprintf (asm_out_file
, "\t.vsave {");
28691 /* Unknown register type. */
28694 /* If the stack increment doesn't match the size of the saved registers,
28695 something has gone horribly wrong. */
28696 if (offset
!= nregs
* reg_size
)
28701 /* The remaining insns will describe the stores. */
28702 for (i
= 1; i
<= nregs
; i
++)
28704 /* Expect (set (mem <addr>) (reg)).
28705 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
28706 e
= XVECEXP (p
, 0, i
);
28707 if (GET_CODE (e
) != SET
28708 || !MEM_P (XEXP (e
, 0))
28709 || !REG_P (XEXP (e
, 1)))
28712 reg
= REGNO (XEXP (e
, 1));
28717 fprintf (asm_out_file
, ", ");
28718 /* We can't use %r for vfp because we need to use the
28719 double precision register names. */
28720 if (IS_VFP_REGNUM (reg
))
28721 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
28723 asm_fprintf (asm_out_file
, "%r", reg
);
28725 #ifdef ENABLE_CHECKING
28726 /* Check that the addresses are consecutive. */
28727 e
= XEXP (XEXP (e
, 0), 0);
28728 if (GET_CODE (e
) == PLUS
)
28730 offset
+= reg_size
;
28731 if (!REG_P (XEXP (e
, 0))
28732 || REGNO (XEXP (e
, 0)) != SP_REGNUM
28733 || !CONST_INT_P (XEXP (e
, 1))
28734 || offset
!= INTVAL (XEXP (e
, 1)))
28739 || REGNO (e
) != SP_REGNUM
)
28743 fprintf (asm_out_file
, "}\n");
28746 /* Emit unwind directives for a SET. */
28749 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
28757 switch (GET_CODE (e0
))
28760 /* Pushing a single register. */
28761 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
28762 || !REG_P (XEXP (XEXP (e0
, 0), 0))
28763 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
28766 asm_fprintf (asm_out_file
, "\t.save ");
28767 if (IS_VFP_REGNUM (REGNO (e1
)))
28768 asm_fprintf(asm_out_file
, "{d%d}\n",
28769 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
28771 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
28775 if (REGNO (e0
) == SP_REGNUM
)
28777 /* A stack increment. */
28778 if (GET_CODE (e1
) != PLUS
28779 || !REG_P (XEXP (e1
, 0))
28780 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
28781 || !CONST_INT_P (XEXP (e1
, 1)))
28784 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
28785 -INTVAL (XEXP (e1
, 1)));
28787 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
28789 HOST_WIDE_INT offset
;
28791 if (GET_CODE (e1
) == PLUS
)
28793 if (!REG_P (XEXP (e1
, 0))
28794 || !CONST_INT_P (XEXP (e1
, 1)))
28796 reg
= REGNO (XEXP (e1
, 0));
28797 offset
= INTVAL (XEXP (e1
, 1));
28798 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
28799 HARD_FRAME_POINTER_REGNUM
, reg
,
28802 else if (REG_P (e1
))
28805 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
28806 HARD_FRAME_POINTER_REGNUM
, reg
);
28811 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
28813 /* Move from sp to reg. */
28814 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
28816 else if (GET_CODE (e1
) == PLUS
28817 && REG_P (XEXP (e1
, 0))
28818 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
28819 && CONST_INT_P (XEXP (e1
, 1)))
28821 /* Set reg to offset from sp. */
28822 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
28823 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
28835 /* Emit unwind directives for the given insn. */
28838 arm_unwind_emit (FILE * asm_out_file
, rtx insn
)
28841 bool handled_one
= false;
28843 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
28846 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
28847 && (TREE_NOTHROW (current_function_decl
)
28848 || crtl
->all_throwers_are_sibcalls
))
28851 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
28854 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
28856 switch (REG_NOTE_KIND (note
))
28858 case REG_FRAME_RELATED_EXPR
:
28859 pat
= XEXP (note
, 0);
28862 case REG_CFA_REGISTER
:
28863 pat
= XEXP (note
, 0);
28866 pat
= PATTERN (insn
);
28867 if (GET_CODE (pat
) == PARALLEL
)
28868 pat
= XVECEXP (pat
, 0, 0);
28871 /* Only emitted for IS_STACKALIGN re-alignment. */
28876 src
= SET_SRC (pat
);
28877 dest
= SET_DEST (pat
);
28879 gcc_assert (src
== stack_pointer_rtx
);
28880 reg
= REGNO (dest
);
28881 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
28884 handled_one
= true;
28887 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
28888 to get correct dwarf information for shrink-wrap. We should not
28889 emit unwind information for it because these are used either for
28890 pretend arguments or notes to adjust sp and restore registers from
28892 case REG_CFA_ADJUST_CFA
:
28893 case REG_CFA_RESTORE
:
28896 case REG_CFA_DEF_CFA
:
28897 case REG_CFA_EXPRESSION
:
28898 case REG_CFA_OFFSET
:
28899 /* ??? Only handling here what we actually emit. */
28900 gcc_unreachable ();
28908 pat
= PATTERN (insn
);
28911 switch (GET_CODE (pat
))
28914 arm_unwind_emit_set (asm_out_file
, pat
);
28918 /* Store multiple. */
28919 arm_unwind_emit_sequence (asm_out_file
, pat
);
28928 /* Output a reference from a function exception table to the type_info
28929 object X. The EABI specifies that the symbol should be relocated by
28930 an R_ARM_TARGET2 relocation. */
28933 arm_output_ttype (rtx x
)
28935 fputs ("\t.word\t", asm_out_file
);
28936 output_addr_const (asm_out_file
, x
);
28937 /* Use special relocations for symbol references. */
28938 if (!CONST_INT_P (x
))
28939 fputs ("(TARGET2)", asm_out_file
);
28940 fputc ('\n', asm_out_file
);
28945 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
28948 arm_asm_emit_except_personality (rtx personality
)
28950 fputs ("\t.personality\t", asm_out_file
);
28951 output_addr_const (asm_out_file
, personality
);
28952 fputc ('\n', asm_out_file
);
28955 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
28958 arm_asm_init_sections (void)
28960 exception_section
= get_unnamed_section (0, output_section_asm_op
,
28963 #endif /* ARM_UNWIND_INFO */
28965 /* Output unwind directives for the start/end of a function. */
28968 arm_output_fn_unwind (FILE * f
, bool prologue
)
28970 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
28974 fputs ("\t.fnstart\n", f
);
28977 /* If this function will never be unwound, then mark it as such.
28978 The came condition is used in arm_unwind_emit to suppress
28979 the frame annotations. */
28980 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
28981 && (TREE_NOTHROW (current_function_decl
)
28982 || crtl
->all_throwers_are_sibcalls
))
28983 fputs("\t.cantunwind\n", f
);
28985 fputs ("\t.fnend\n", f
);
28990 arm_emit_tls_decoration (FILE *fp
, rtx x
)
28992 enum tls_reloc reloc
;
28995 val
= XVECEXP (x
, 0, 0);
28996 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
28998 output_addr_const (fp
, val
);
29003 fputs ("(tlsgd)", fp
);
29006 fputs ("(tlsldm)", fp
);
29009 fputs ("(tlsldo)", fp
);
29012 fputs ("(gottpoff)", fp
);
29015 fputs ("(tpoff)", fp
);
29018 fputs ("(tlsdesc)", fp
);
29021 gcc_unreachable ();
29030 fputs (" + (. - ", fp
);
29031 output_addr_const (fp
, XVECEXP (x
, 0, 2));
29032 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29033 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
29034 output_addr_const (fp
, XVECEXP (x
, 0, 3));
29044 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29047 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
29049 gcc_assert (size
== 4);
29050 fputs ("\t.word\t", file
);
29051 output_addr_const (file
, x
);
29052 fputs ("(tlsldo)", file
);
29055 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29058 arm_output_addr_const_extra (FILE *fp
, rtx x
)
29060 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
29061 return arm_emit_tls_decoration (fp
, x
);
29062 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
29065 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
29067 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
29068 assemble_name_raw (fp
, label
);
29072 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
29074 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
29078 output_addr_const (fp
, XVECEXP (x
, 0, 0));
29082 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
29084 output_addr_const (fp
, XVECEXP (x
, 0, 0));
29088 output_addr_const (fp
, XVECEXP (x
, 0, 1));
29092 else if (GET_CODE (x
) == CONST_VECTOR
)
29093 return arm_emit_vector_const (fp
, x
);
29098 /* Output assembly for a shift instruction.
29099 SET_FLAGS determines how the instruction modifies the condition codes.
29100 0 - Do not set condition codes.
29101 1 - Set condition codes.
29102 2 - Use smallest instruction. */
29104 arm_output_shift(rtx
* operands
, int set_flags
)
29107 static const char flag_chars
[3] = {'?', '.', '!'};
29112 c
= flag_chars
[set_flags
];
29113 if (TARGET_UNIFIED_ASM
)
29115 shift
= shift_op(operands
[3], &val
);
29119 operands
[2] = GEN_INT(val
);
29120 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
29123 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
29126 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
29127 output_asm_insn (pattern
, operands
);
29131 /* Output assembly for a WMMX immediate shift instruction. */
29133 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
29135 int shift
= INTVAL (operands
[2]);
29137 enum machine_mode opmode
= GET_MODE (operands
[0]);
29139 gcc_assert (shift
>= 0);
29141 /* If the shift value in the register versions is > 63 (for D qualifier),
29142 31 (for W qualifier) or 15 (for H qualifier). */
29143 if (((opmode
== V4HImode
) && (shift
> 15))
29144 || ((opmode
== V2SImode
) && (shift
> 31))
29145 || ((opmode
== DImode
) && (shift
> 63)))
29149 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
29150 output_asm_insn (templ
, operands
);
29151 if (opmode
== DImode
)
29153 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
29154 output_asm_insn (templ
, operands
);
29159 /* The destination register will contain all zeros. */
29160 sprintf (templ
, "wzero\t%%0");
29161 output_asm_insn (templ
, operands
);
29166 if ((opmode
== DImode
) && (shift
> 32))
29168 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
29169 output_asm_insn (templ
, operands
);
29170 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
29171 output_asm_insn (templ
, operands
);
29175 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
29176 output_asm_insn (templ
, operands
);
29181 /* Output assembly for a WMMX tinsr instruction. */
29183 arm_output_iwmmxt_tinsr (rtx
*operands
)
29185 int mask
= INTVAL (operands
[3]);
29188 int units
= mode_nunits
[GET_MODE (operands
[0])];
29189 gcc_assert ((mask
& (mask
- 1)) == 0);
29190 for (i
= 0; i
< units
; ++i
)
29192 if ((mask
& 0x01) == 1)
29198 gcc_assert (i
< units
);
29200 switch (GET_MODE (operands
[0]))
29203 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
29206 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
29209 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
29212 gcc_unreachable ();
29215 output_asm_insn (templ
, operands
);
29220 /* Output a Thumb-1 casesi dispatch sequence. */
29222 thumb1_output_casesi (rtx
*operands
)
29224 rtx diff_vec
= PATTERN (NEXT_INSN (operands
[0]));
29226 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
29228 switch (GET_MODE(diff_vec
))
29231 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
29232 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29234 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
29235 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29237 return "bl\t%___gnu_thumb1_case_si";
29239 gcc_unreachable ();
29243 /* Output a Thumb-2 casesi instruction. */
29245 thumb2_output_casesi (rtx
*operands
)
29247 rtx diff_vec
= PATTERN (NEXT_INSN (operands
[2]));
29249 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
29251 output_asm_insn ("cmp\t%0, %1", operands
);
29252 output_asm_insn ("bhi\t%l3", operands
);
29253 switch (GET_MODE(diff_vec
))
29256 return "tbb\t[%|pc, %0]";
29258 return "tbh\t[%|pc, %0, lsl #1]";
29262 output_asm_insn ("adr\t%4, %l2", operands
);
29263 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
29264 output_asm_insn ("add\t%4, %4, %5", operands
);
29269 output_asm_insn ("adr\t%4, %l2", operands
);
29270 return "ldr\t%|pc, [%4, %0, lsl #2]";
29273 gcc_unreachable ();
29277 /* Most ARM cores are single issue, but some newer ones can dual issue.
29278 The scheduler descriptions rely on this being correct. */
29280 arm_issue_rate (void)
29306 /* A table and a function to perform ARM-specific name mangling for
29307 NEON vector types in order to conform to the AAPCS (see "Procedure
29308 Call Standard for the ARM Architecture", Appendix A). To qualify
29309 for emission with the mangled names defined in that document, a
29310 vector type must not only be of the correct mode but also be
29311 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29314 enum machine_mode mode
;
29315 const char *element_type_name
;
29316 const char *aapcs_name
;
29317 } arm_mangle_map_entry
;
29319 static arm_mangle_map_entry arm_mangle_map
[] = {
29320 /* 64-bit containerized types. */
29321 { V8QImode
, "__builtin_neon_qi", "15__simd64_int8_t" },
29322 { V8QImode
, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29323 { V4HImode
, "__builtin_neon_hi", "16__simd64_int16_t" },
29324 { V4HImode
, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29325 { V4HFmode
, "__builtin_neon_hf", "18__simd64_float16_t" },
29326 { V2SImode
, "__builtin_neon_si", "16__simd64_int32_t" },
29327 { V2SImode
, "__builtin_neon_usi", "17__simd64_uint32_t" },
29328 { V2SFmode
, "__builtin_neon_sf", "18__simd64_float32_t" },
29329 { V8QImode
, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29330 { V4HImode
, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29332 /* 128-bit containerized types. */
29333 { V16QImode
, "__builtin_neon_qi", "16__simd128_int8_t" },
29334 { V16QImode
, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29335 { V8HImode
, "__builtin_neon_hi", "17__simd128_int16_t" },
29336 { V8HImode
, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29337 { V4SImode
, "__builtin_neon_si", "17__simd128_int32_t" },
29338 { V4SImode
, "__builtin_neon_usi", "18__simd128_uint32_t" },
29339 { V4SFmode
, "__builtin_neon_sf", "19__simd128_float32_t" },
29340 { V16QImode
, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29341 { V8HImode
, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29342 { VOIDmode
, NULL
, NULL
}
29346 arm_mangle_type (const_tree type
)
29348 arm_mangle_map_entry
*pos
= arm_mangle_map
;
29350 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29351 has to be managled as if it is in the "std" namespace. */
29352 if (TARGET_AAPCS_BASED
29353 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
29354 return "St9__va_list";
29356 /* Half-precision float. */
29357 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
29360 if (TREE_CODE (type
) != VECTOR_TYPE
)
29363 /* Check the mode of the vector type, and the name of the vector
29364 element type, against the table. */
29365 while (pos
->mode
!= VOIDmode
)
29367 tree elt_type
= TREE_TYPE (type
);
29369 if (pos
->mode
== TYPE_MODE (type
)
29370 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
29371 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
29372 pos
->element_type_name
))
29373 return pos
->aapcs_name
;
29378 /* Use the default mangling for unrecognized (possibly user-defined)
29383 /* Order of allocation of core registers for Thumb: this allocation is
29384 written over the corresponding initial entries of the array
29385 initialized with REG_ALLOC_ORDER. We allocate all low registers
29386 first. Saving and restoring a low register is usually cheaper than
29387 using a call-clobbered high register. */
29389 static const int thumb_core_reg_alloc_order
[] =
29391 3, 2, 1, 0, 4, 5, 6, 7,
29392 14, 12, 8, 9, 10, 11
29395 /* Adjust register allocation order when compiling for Thumb. */
29398 arm_order_regs_for_local_alloc (void)
29400 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
29401 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
29403 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
29404 sizeof (thumb_core_reg_alloc_order
));
29407 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29410 arm_frame_pointer_required (void)
29412 return (cfun
->has_nonlocal_label
29413 || SUBTARGET_FRAME_POINTER_REQUIRED
29414 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
29417 /* Only thumb1 can't support conditional execution, so return true if
29418 the target is not thumb1. */
29420 arm_have_conditional_execution (void)
29422 return !TARGET_THUMB1
;
29426 arm_builtin_vectorized_function (tree fndecl
, tree type_out
, tree type_in
)
29428 enum machine_mode in_mode
, out_mode
;
29431 if (TREE_CODE (type_out
) != VECTOR_TYPE
29432 || TREE_CODE (type_in
) != VECTOR_TYPE
29433 || !(TARGET_NEON
&& TARGET_FPU_ARMV8
&& flag_unsafe_math_optimizations
))
29436 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
29437 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
29438 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
29439 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
29441 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
29442 decl of the vectorized builtin for the appropriate vector mode.
29443 NULL_TREE is returned if no such builtin is available. */
29444 #undef ARM_CHECK_BUILTIN_MODE
29445 #define ARM_CHECK_BUILTIN_MODE(C) \
29446 (out_mode == SFmode && out_n == C \
29447 && in_mode == SFmode && in_n == C)
29449 #undef ARM_FIND_VRINT_VARIANT
29450 #define ARM_FIND_VRINT_VARIANT(N) \
29451 (ARM_CHECK_BUILTIN_MODE (2) \
29452 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
29453 : (ARM_CHECK_BUILTIN_MODE (4) \
29454 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
29457 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
)
29459 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
29462 case BUILT_IN_FLOORF
:
29463 return ARM_FIND_VRINT_VARIANT (vrintm
);
29464 case BUILT_IN_CEILF
:
29465 return ARM_FIND_VRINT_VARIANT (vrintp
);
29466 case BUILT_IN_TRUNCF
:
29467 return ARM_FIND_VRINT_VARIANT (vrintz
);
29468 case BUILT_IN_ROUNDF
:
29469 return ARM_FIND_VRINT_VARIANT (vrinta
);
29476 #undef ARM_CHECK_BUILTIN_MODE
29477 #undef ARM_FIND_VRINT_VARIANT
29479 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
29480 static HOST_WIDE_INT
29481 arm_vector_alignment (const_tree type
)
29483 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
29485 if (TARGET_AAPCS_BASED
)
29486 align
= MIN (align
, 64);
29491 static unsigned int
29492 arm_autovectorize_vector_sizes (void)
29494 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
29498 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
29500 /* Vectors which aren't in packed structures will not be less aligned than
29501 the natural alignment of their element type, so this is safe. */
29502 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
)
29505 return default_builtin_vector_alignment_reachable (type
, is_packed
);
29509 arm_builtin_support_vector_misalignment (enum machine_mode mode
,
29510 const_tree type
, int misalignment
,
29513 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
)
29515 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
29520 /* If the misalignment is unknown, we should be able to handle the access
29521 so long as it is not to a member of a packed data structure. */
29522 if (misalignment
== -1)
29525 /* Return true if the misalignment is a multiple of the natural alignment
29526 of the vector's element type. This is probably always going to be
29527 true in practice, since we've already established that this isn't a
29529 return ((misalignment
% align
) == 0);
29532 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
29537 arm_conditional_register_usage (void)
29541 if (TARGET_THUMB1
&& optimize_size
)
29543 /* When optimizing for size on Thumb-1, it's better not
29544 to use the HI regs, because of the overhead of
29546 for (regno
= FIRST_HI_REGNUM
;
29547 regno
<= LAST_HI_REGNUM
; ++regno
)
29548 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
29551 /* The link register can be clobbered by any branch insn,
29552 but we have no way to track that at present, so mark
29553 it as unavailable. */
29555 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
29557 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
29559 /* VFPv3 registers are disabled when earlier VFP
29560 versions are selected due to the definition of
29561 LAST_VFP_REGNUM. */
29562 for (regno
= FIRST_VFP_REGNUM
;
29563 regno
<= LAST_VFP_REGNUM
; ++ regno
)
29565 fixed_regs
[regno
] = 0;
29566 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
29567 || regno
>= FIRST_VFP_REGNUM
+ 32;
29571 if (TARGET_REALLY_IWMMXT
)
29573 regno
= FIRST_IWMMXT_GR_REGNUM
;
29574 /* The 2002/10/09 revision of the XScale ABI has wCG0
29575 and wCG1 as call-preserved registers. The 2002/11/21
29576 revision changed this so that all wCG registers are
29577 scratch registers. */
29578 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
29579 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
29580 fixed_regs
[regno
] = 0;
29581 /* The XScale ABI has wR0 - wR9 as scratch registers,
29582 the rest as call-preserved registers. */
29583 for (regno
= FIRST_IWMMXT_REGNUM
;
29584 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
29586 fixed_regs
[regno
] = 0;
29587 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
29591 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
29593 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
29594 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
29596 else if (TARGET_APCS_STACK
)
29598 fixed_regs
[10] = 1;
29599 call_used_regs
[10] = 1;
29601 /* -mcaller-super-interworking reserves r11 for calls to
29602 _interwork_r11_call_via_rN(). Making the register global
29603 is an easy way of ensuring that it remains valid for all
29605 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
29606 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
29608 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
29609 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
29610 if (TARGET_CALLER_INTERWORKING
)
29611 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
29613 SUBTARGET_CONDITIONAL_REGISTER_USAGE
29617 arm_preferred_rename_class (reg_class_t rclass
)
29619 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
29620 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
29621 and code size can be reduced. */
29622 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
29628 /* Compute the atrribute "length" of insn "*push_multi".
29629 So this function MUST be kept in sync with that insn pattern. */
29631 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
29633 int i
, regno
, hi_reg
;
29634 int num_saves
= XVECLEN (parallel_op
, 0);
29644 regno
= REGNO (first_op
);
29645 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
29646 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
29648 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
29649 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
29657 /* Compute the number of instructions emitted by output_move_double. */
29659 arm_count_output_move_double_insns (rtx
*operands
)
29663 /* output_move_double may modify the operands array, so call it
29664 here on a copy of the array. */
29665 ops
[0] = operands
[0];
29666 ops
[1] = operands
[1];
29667 output_move_double (ops
, false, &count
);
29672 vfp3_const_double_for_fract_bits (rtx operand
)
29674 REAL_VALUE_TYPE r0
;
29676 if (!CONST_DOUBLE_P (operand
))
29679 REAL_VALUE_FROM_CONST_DOUBLE (r0
, operand
);
29680 if (exact_real_inverse (DFmode
, &r0
))
29682 if (exact_real_truncate (DFmode
, &r0
))
29684 HOST_WIDE_INT value
= real_to_integer (&r0
);
29685 value
= value
& 0xffffffff;
29686 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
29687 return int_log2 (value
);
29693 /* Emit a memory barrier around an atomic sequence according to MODEL. */
29696 arm_pre_atomic_barrier (enum memmodel model
)
29698 if (need_atomic_barrier_p (model
, true))
29699 emit_insn (gen_memory_barrier ());
29703 arm_post_atomic_barrier (enum memmodel model
)
29705 if (need_atomic_barrier_p (model
, false))
29706 emit_insn (gen_memory_barrier ());
29709 /* Emit the load-exclusive and store-exclusive instructions.
29710 Use acquire and release versions if necessary. */
29713 arm_emit_load_exclusive (enum machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
29715 rtx (*gen
) (rtx
, rtx
);
29721 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
29722 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
29723 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
29724 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
29726 gcc_unreachable ();
29733 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
29734 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
29735 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
29736 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
29738 gcc_unreachable ();
29742 emit_insn (gen (rval
, mem
));
29746 arm_emit_store_exclusive (enum machine_mode mode
, rtx bval
, rtx rval
,
29749 rtx (*gen
) (rtx
, rtx
, rtx
);
29755 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
29756 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
29757 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
29758 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
29760 gcc_unreachable ();
29767 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
29768 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
29769 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
29770 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
29772 gcc_unreachable ();
29776 emit_insn (gen (bval
, rval
, mem
));
29779 /* Mark the previous jump instruction as unlikely. */
29782 emit_unlikely_jump (rtx insn
)
29784 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
29786 insn
= emit_jump_insn (insn
);
29787 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
29790 /* Expand a compare and swap pattern. */
29793 arm_expand_compare_and_swap (rtx operands
[])
29795 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
29796 enum machine_mode mode
;
29797 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
29799 bval
= operands
[0];
29800 rval
= operands
[1];
29802 oldval
= operands
[3];
29803 newval
= operands
[4];
29804 is_weak
= operands
[5];
29805 mod_s
= operands
[6];
29806 mod_f
= operands
[7];
29807 mode
= GET_MODE (mem
);
29809 /* Normally the succ memory model must be stronger than fail, but in the
29810 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
29811 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
29813 if (TARGET_HAVE_LDACQ
29814 && INTVAL (mod_f
) == MEMMODEL_ACQUIRE
29815 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
29816 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
29822 /* For narrow modes, we're going to perform the comparison in SImode,
29823 so do the zero-extension now. */
29824 rval
= gen_reg_rtx (SImode
);
29825 oldval
= convert_modes (SImode
, mode
, oldval
, true);
29829 /* Force the value into a register if needed. We waited until after
29830 the zero-extension above to do this properly. */
29831 if (!arm_add_operand (oldval
, SImode
))
29832 oldval
= force_reg (SImode
, oldval
);
29836 if (!cmpdi_operand (oldval
, mode
))
29837 oldval
= force_reg (mode
, oldval
);
29841 gcc_unreachable ();
29846 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
29847 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
29848 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
29849 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
29851 gcc_unreachable ();
29854 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
29856 if (mode
== QImode
|| mode
== HImode
)
29857 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
29859 /* In all cases, we arrange for success to be signaled by Z set.
29860 This arrangement allows for the boolean result to be used directly
29861 in a subsequent branch, post optimization. */
29862 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
29863 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
29864 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
29867 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
29868 another memory store between the load-exclusive and store-exclusive can
29869 reset the monitor from Exclusive to Open state. This means we must wait
29870 until after reload to split the pattern, lest we get a register spill in
29871 the middle of the atomic sequence. */
29874 arm_split_compare_and_swap (rtx operands
[])
29876 rtx rval
, mem
, oldval
, newval
, scratch
;
29877 enum machine_mode mode
;
29878 enum memmodel mod_s
, mod_f
;
29880 rtx label1
, label2
, x
, cond
;
29882 rval
= operands
[0];
29884 oldval
= operands
[2];
29885 newval
= operands
[3];
29886 is_weak
= (operands
[4] != const0_rtx
);
29887 mod_s
= (enum memmodel
) INTVAL (operands
[5]);
29888 mod_f
= (enum memmodel
) INTVAL (operands
[6]);
29889 scratch
= operands
[7];
29890 mode
= GET_MODE (mem
);
29892 bool use_acquire
= TARGET_HAVE_LDACQ
29893 && !(mod_s
== MEMMODEL_RELAXED
29894 || mod_s
== MEMMODEL_CONSUME
29895 || mod_s
== MEMMODEL_RELEASE
);
29897 bool use_release
= TARGET_HAVE_LDACQ
29898 && !(mod_s
== MEMMODEL_RELAXED
29899 || mod_s
== MEMMODEL_CONSUME
29900 || mod_s
== MEMMODEL_ACQUIRE
);
29902 /* Checks whether a barrier is needed and emits one accordingly. */
29903 if (!(use_acquire
|| use_release
))
29904 arm_pre_atomic_barrier (mod_s
);
29909 label1
= gen_label_rtx ();
29910 emit_label (label1
);
29912 label2
= gen_label_rtx ();
29914 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
29916 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
29917 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
29918 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
29919 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
29920 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
29922 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
, use_release
);
29924 /* Weak or strong, we want EQ to be true for success, so that we
29925 match the flags that we got from the compare above. */
29926 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
29927 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
29928 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
29932 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
29933 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
29934 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
29935 emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
29938 if (mod_f
!= MEMMODEL_RELAXED
)
29939 emit_label (label2
);
29941 /* Checks whether a barrier is needed and emits one accordingly. */
29942 if (!(use_acquire
|| use_release
))
29943 arm_post_atomic_barrier (mod_s
);
29945 if (mod_f
== MEMMODEL_RELAXED
)
29946 emit_label (label2
);
29950 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
29951 rtx value
, rtx model_rtx
, rtx cond
)
29953 enum memmodel model
= (enum memmodel
) INTVAL (model_rtx
);
29954 enum machine_mode mode
= GET_MODE (mem
);
29955 enum machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
29958 bool use_acquire
= TARGET_HAVE_LDACQ
29959 && !(model
== MEMMODEL_RELAXED
29960 || model
== MEMMODEL_CONSUME
29961 || model
== MEMMODEL_RELEASE
);
29963 bool use_release
= TARGET_HAVE_LDACQ
29964 && !(model
== MEMMODEL_RELAXED
29965 || model
== MEMMODEL_CONSUME
29966 || model
== MEMMODEL_ACQUIRE
);
29968 /* Checks whether a barrier is needed and emits one accordingly. */
29969 if (!(use_acquire
|| use_release
))
29970 arm_pre_atomic_barrier (model
);
29972 label
= gen_label_rtx ();
29973 emit_label (label
);
29976 new_out
= gen_lowpart (wmode
, new_out
);
29978 old_out
= gen_lowpart (wmode
, old_out
);
29981 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
29983 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
29992 x
= gen_rtx_AND (wmode
, old_out
, value
);
29993 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
29994 x
= gen_rtx_NOT (wmode
, new_out
);
29995 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
29999 if (CONST_INT_P (value
))
30001 value
= GEN_INT (-INTVAL (value
));
30007 if (mode
== DImode
)
30009 /* DImode plus/minus need to clobber flags. */
30010 /* The adddi3 and subdi3 patterns are incorrectly written so that
30011 they require matching operands, even when we could easily support
30012 three operands. Thankfully, this can be fixed up post-splitting,
30013 as the individual add+adc patterns do accept three operands and
30014 post-reload cprop can make these moves go away. */
30015 emit_move_insn (new_out
, old_out
);
30017 x
= gen_adddi3 (new_out
, new_out
, value
);
30019 x
= gen_subdi3 (new_out
, new_out
, value
);
30026 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
30027 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
30031 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
30034 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
30035 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
30037 /* Checks whether a barrier is needed and emits one accordingly. */
30038 if (!(use_acquire
|| use_release
))
30039 arm_post_atomic_barrier (model
);
30042 #define MAX_VECT_LEN 16
30044 struct expand_vec_perm_d
30046 rtx target
, op0
, op1
;
30047 unsigned char perm
[MAX_VECT_LEN
];
30048 enum machine_mode vmode
;
30049 unsigned char nelt
;
30054 /* Generate a variable permutation. */
30057 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30059 enum machine_mode vmode
= GET_MODE (target
);
30060 bool one_vector_p
= rtx_equal_p (op0
, op1
);
30062 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
30063 gcc_checking_assert (GET_MODE (op0
) == vmode
);
30064 gcc_checking_assert (GET_MODE (op1
) == vmode
);
30065 gcc_checking_assert (GET_MODE (sel
) == vmode
);
30066 gcc_checking_assert (TARGET_NEON
);
30070 if (vmode
== V8QImode
)
30071 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
30073 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
30079 if (vmode
== V8QImode
)
30081 pair
= gen_reg_rtx (V16QImode
);
30082 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
30083 pair
= gen_lowpart (TImode
, pair
);
30084 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
30088 pair
= gen_reg_rtx (OImode
);
30089 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
30090 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
30096 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30098 enum machine_mode vmode
= GET_MODE (target
);
30099 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
30100 bool one_vector_p
= rtx_equal_p (op0
, op1
);
30101 rtx rmask
[MAX_VECT_LEN
], mask
;
30103 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30104 numbering of elements for big-endian, we must reverse the order. */
30105 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
30107 /* The VTBL instruction does not use a modulo index, so we must take care
30108 of that ourselves. */
30109 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30110 for (i
= 0; i
< nelt
; ++i
)
30112 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
30113 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
30115 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
30118 /* Generate or test for an insn that supports a constant permutation. */
30120 /* Recognize patterns for the VUZP insns. */
30123 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
30125 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
30126 rtx out0
, out1
, in0
, in1
, x
;
30127 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30129 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30132 /* Note that these are little-endian tests. Adjust for big-endian later. */
30133 if (d
->perm
[0] == 0)
30135 else if (d
->perm
[0] == 1)
30139 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30141 for (i
= 0; i
< nelt
; i
++)
30143 unsigned elt
= (i
* 2 + odd
) & mask
;
30144 if (d
->perm
[i
] != elt
)
30154 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
30155 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
30156 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
30157 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
30158 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
30159 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
30160 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
30161 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
30163 gcc_unreachable ();
30168 if (BYTES_BIG_ENDIAN
)
30170 x
= in0
, in0
= in1
, in1
= x
;
30175 out1
= gen_reg_rtx (d
->vmode
);
30177 x
= out0
, out0
= out1
, out1
= x
;
30179 emit_insn (gen (out0
, in0
, in1
, out1
));
30183 /* Recognize patterns for the VZIP insns. */
30186 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
30188 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
30189 rtx out0
, out1
, in0
, in1
, x
;
30190 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30192 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30195 /* Note that these are little-endian tests. Adjust for big-endian later. */
30197 if (d
->perm
[0] == high
)
30199 else if (d
->perm
[0] == 0)
30203 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30205 for (i
= 0; i
< nelt
/ 2; i
++)
30207 unsigned elt
= (i
+ high
) & mask
;
30208 if (d
->perm
[i
* 2] != elt
)
30210 elt
= (elt
+ nelt
) & mask
;
30211 if (d
->perm
[i
* 2 + 1] != elt
)
30221 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
30222 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
30223 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
30224 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
30225 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
30226 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
30227 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
30228 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
30230 gcc_unreachable ();
30235 if (BYTES_BIG_ENDIAN
)
30237 x
= in0
, in0
= in1
, in1
= x
;
30242 out1
= gen_reg_rtx (d
->vmode
);
30244 x
= out0
, out0
= out1
, out1
= x
;
30246 emit_insn (gen (out0
, in0
, in1
, out1
));
30250 /* Recognize patterns for the VREV insns. */
30253 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
30255 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
30256 rtx (*gen
)(rtx
, rtx
, rtx
);
30258 if (!d
->one_vector_p
)
30267 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
30268 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
30276 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
30277 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
30278 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
30279 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
30287 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
30288 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
30289 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
30290 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
30291 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
30292 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
30293 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
30294 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
30303 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
30304 for (j
= 0; j
<= diff
; j
+= 1)
30306 /* This is guaranteed to be true as the value of diff
30307 is 7, 3, 1 and we should have enough elements in the
30308 queue to generate this. Getting a vector mask with a
30309 value of diff other than these values implies that
30310 something is wrong by the time we get here. */
30311 gcc_assert (i
+ j
< nelt
);
30312 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
30320 /* ??? The third operand is an artifact of the builtin infrastructure
30321 and is ignored by the actual instruction. */
30322 emit_insn (gen (d
->target
, d
->op0
, const0_rtx
));
30326 /* Recognize patterns for the VTRN insns. */
30329 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
30331 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
30332 rtx out0
, out1
, in0
, in1
, x
;
30333 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
30335 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
30338 /* Note that these are little-endian tests. Adjust for big-endian later. */
30339 if (d
->perm
[0] == 0)
30341 else if (d
->perm
[0] == 1)
30345 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
30347 for (i
= 0; i
< nelt
; i
+= 2)
30349 if (d
->perm
[i
] != i
+ odd
)
30351 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
30361 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
30362 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
30363 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
30364 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
30365 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
30366 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
30367 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
30368 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
30370 gcc_unreachable ();
30375 if (BYTES_BIG_ENDIAN
)
30377 x
= in0
, in0
= in1
, in1
= x
;
30382 out1
= gen_reg_rtx (d
->vmode
);
30384 x
= out0
, out0
= out1
, out1
= x
;
30386 emit_insn (gen (out0
, in0
, in1
, out1
));
30390 /* Recognize patterns for the VEXT insns. */
30393 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
30395 unsigned int i
, nelt
= d
->nelt
;
30396 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
30399 unsigned int location
;
30401 unsigned int next
= d
->perm
[0] + 1;
30403 /* TODO: Handle GCC's numbering of elements for big-endian. */
30404 if (BYTES_BIG_ENDIAN
)
30407 /* Check if the extracted indexes are increasing by one. */
30408 for (i
= 1; i
< nelt
; next
++, i
++)
30410 /* If we hit the most significant element of the 2nd vector in
30411 the previous iteration, no need to test further. */
30412 if (next
== 2 * nelt
)
30415 /* If we are operating on only one vector: it could be a
30416 rotation. If there are only two elements of size < 64, let
30417 arm_evpc_neon_vrev catch it. */
30418 if (d
->one_vector_p
&& (next
== nelt
))
30420 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
30426 if (d
->perm
[i
] != next
)
30430 location
= d
->perm
[0];
30434 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
30435 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
30436 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
30437 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
30438 case V2SImode
: gen
= gen_neon_vextv2si
; break;
30439 case V4SImode
: gen
= gen_neon_vextv4si
; break;
30440 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
30441 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
30442 case V2DImode
: gen
= gen_neon_vextv2di
; break;
30451 offset
= GEN_INT (location
);
30452 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
30456 /* The NEON VTBL instruction is a fully variable permuation that's even
30457 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
30458 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
30459 can do slightly better by expanding this as a constant where we don't
30460 have to apply a mask. */
30463 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
30465 rtx rperm
[MAX_VECT_LEN
], sel
;
30466 enum machine_mode vmode
= d
->vmode
;
30467 unsigned int i
, nelt
= d
->nelt
;
30469 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30470 numbering of elements for big-endian, we must reverse the order. */
30471 if (BYTES_BIG_ENDIAN
)
30477 /* Generic code will try constant permutation twice. Once with the
30478 original mode and again with the elements lowered to QImode.
30479 So wait and don't do the selector expansion ourselves. */
30480 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
30483 for (i
= 0; i
< nelt
; ++i
)
30484 rperm
[i
] = GEN_INT (d
->perm
[i
]);
30485 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
30486 sel
= force_reg (vmode
, sel
);
30488 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
30493 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
30495 /* Check if the input mask matches vext before reordering the
30498 if (arm_evpc_neon_vext (d
))
30501 /* The pattern matching functions above are written to look for a small
30502 number to begin the sequence (0, 1, N/2). If we begin with an index
30503 from the second operand, we can swap the operands. */
30504 if (d
->perm
[0] >= d
->nelt
)
30506 unsigned i
, nelt
= d
->nelt
;
30509 for (i
= 0; i
< nelt
; ++i
)
30510 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
30519 if (arm_evpc_neon_vuzp (d
))
30521 if (arm_evpc_neon_vzip (d
))
30523 if (arm_evpc_neon_vrev (d
))
30525 if (arm_evpc_neon_vtrn (d
))
30527 return arm_evpc_neon_vtbl (d
);
30532 /* Expand a vec_perm_const pattern. */
30535 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
30537 struct expand_vec_perm_d d
;
30538 int i
, nelt
, which
;
30544 d
.vmode
= GET_MODE (target
);
30545 gcc_assert (VECTOR_MODE_P (d
.vmode
));
30546 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
30547 d
.testing_p
= false;
30549 for (i
= which
= 0; i
< nelt
; ++i
)
30551 rtx e
= XVECEXP (sel
, 0, i
);
30552 int ei
= INTVAL (e
) & (2 * nelt
- 1);
30553 which
|= (ei
< nelt
? 1 : 2);
30563 d
.one_vector_p
= false;
30564 if (!rtx_equal_p (op0
, op1
))
30567 /* The elements of PERM do not suggest that only the first operand
30568 is used, but both operands are identical. Allow easier matching
30569 of the permutation by folding the permutation into the single
30573 for (i
= 0; i
< nelt
; ++i
)
30574 d
.perm
[i
] &= nelt
- 1;
30576 d
.one_vector_p
= true;
30581 d
.one_vector_p
= true;
30585 return arm_expand_vec_perm_const_1 (&d
);
30588 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
30591 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
30592 const unsigned char *sel
)
30594 struct expand_vec_perm_d d
;
30595 unsigned int i
, nelt
, which
;
30599 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
30600 d
.testing_p
= true;
30601 memcpy (d
.perm
, sel
, nelt
);
30603 /* Categorize the set of elements in the selector. */
30604 for (i
= which
= 0; i
< nelt
; ++i
)
30606 unsigned char e
= d
.perm
[i
];
30607 gcc_assert (e
< 2 * nelt
);
30608 which
|= (e
< nelt
? 1 : 2);
30611 /* For all elements from second vector, fold the elements to first. */
30613 for (i
= 0; i
< nelt
; ++i
)
30616 /* Check whether the mask can be applied to the vector type. */
30617 d
.one_vector_p
= (which
!= 3);
30619 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
30620 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
30621 if (!d
.one_vector_p
)
30622 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
30625 ret
= arm_expand_vec_perm_const_1 (&d
);
30632 arm_autoinc_modes_ok_p (enum machine_mode mode
, enum arm_auto_incmodes code
)
30634 /* If we are soft float and we do not have ldrd
30635 then all auto increment forms are ok. */
30636 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
30641 /* Post increment and Pre Decrement are supported for all
30642 instruction forms except for vector forms. */
30645 if (VECTOR_MODE_P (mode
))
30647 if (code
!= ARM_PRE_DEC
)
30657 /* Without LDRD and mode size greater than
30658 word size, there is no point in auto-incrementing
30659 because ldm and stm will not have these forms. */
30660 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
30663 /* Vector and floating point modes do not support
30664 these auto increment forms. */
30665 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
30678 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
30679 on ARM, since we know that shifts by negative amounts are no-ops.
30680 Additionally, the default expansion code is not available or suitable
30681 for post-reload insn splits (this can occur when the register allocator
30682 chooses not to do a shift in NEON).
30684 This function is used in both initial expand and post-reload splits, and
30685 handles all kinds of 64-bit shifts.
30687 Input requirements:
30688 - It is safe for the input and output to be the same register, but
30689 early-clobber rules apply for the shift amount and scratch registers.
30690 - Shift by register requires both scratch registers. In all other cases
30691 the scratch registers may be NULL.
30692 - Ashiftrt by a register also clobbers the CC register. */
30694 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
30695 rtx amount
, rtx scratch1
, rtx scratch2
)
30697 rtx out_high
= gen_highpart (SImode
, out
);
30698 rtx out_low
= gen_lowpart (SImode
, out
);
30699 rtx in_high
= gen_highpart (SImode
, in
);
30700 rtx in_low
= gen_lowpart (SImode
, in
);
30703 in = the register pair containing the input value.
30704 out = the destination register pair.
30705 up = the high- or low-part of each pair.
30706 down = the opposite part to "up".
30707 In a shift, we can consider bits to shift from "up"-stream to
30708 "down"-stream, so in a left-shift "up" is the low-part and "down"
30709 is the high-part of each register pair. */
30711 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
30712 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
30713 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
30714 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
30716 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
30718 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
30719 && GET_MODE (out
) == DImode
);
30721 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
30722 && GET_MODE (in
) == DImode
);
30724 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
30725 && GET_MODE (amount
) == SImode
)
30726 || CONST_INT_P (amount
)));
30727 gcc_assert (scratch1
== NULL
30728 || (GET_CODE (scratch1
) == SCRATCH
)
30729 || (GET_MODE (scratch1
) == SImode
30730 && REG_P (scratch1
)));
30731 gcc_assert (scratch2
== NULL
30732 || (GET_CODE (scratch2
) == SCRATCH
)
30733 || (GET_MODE (scratch2
) == SImode
30734 && REG_P (scratch2
)));
30735 gcc_assert (!REG_P (out
) || !REG_P (amount
)
30736 || !HARD_REGISTER_P (out
)
30737 || (REGNO (out
) != REGNO (amount
)
30738 && REGNO (out
) + 1 != REGNO (amount
)));
30740 /* Macros to make following code more readable. */
30741 #define SUB_32(DEST,SRC) \
30742 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
30743 #define RSB_32(DEST,SRC) \
30744 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
30745 #define SUB_S_32(DEST,SRC) \
30746 gen_addsi3_compare0 ((DEST), (SRC), \
30748 #define SET(DEST,SRC) \
30749 gen_rtx_SET (SImode, (DEST), (SRC))
30750 #define SHIFT(CODE,SRC,AMOUNT) \
30751 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
30752 #define LSHIFT(CODE,SRC,AMOUNT) \
30753 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
30754 SImode, (SRC), (AMOUNT))
30755 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
30756 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
30757 SImode, (SRC), (AMOUNT))
30759 gen_rtx_IOR (SImode, (A), (B))
30760 #define BRANCH(COND,LABEL) \
30761 gen_arm_cond_branch ((LABEL), \
30762 gen_rtx_ ## COND (CCmode, cc_reg, \
30766 /* Shifts by register and shifts by constant are handled separately. */
30767 if (CONST_INT_P (amount
))
30769 /* We have a shift-by-constant. */
30771 /* First, handle out-of-range shift amounts.
30772 In both cases we try to match the result an ARM instruction in a
30773 shift-by-register would give. This helps reduce execution
30774 differences between optimization levels, but it won't stop other
30775 parts of the compiler doing different things. This is "undefined
30776 behaviour, in any case. */
30777 if (INTVAL (amount
) <= 0)
30778 emit_insn (gen_movdi (out
, in
));
30779 else if (INTVAL (amount
) >= 64)
30781 if (code
== ASHIFTRT
)
30783 rtx const31_rtx
= GEN_INT (31);
30784 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
30785 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
30788 emit_insn (gen_movdi (out
, const0_rtx
));
30791 /* Now handle valid shifts. */
30792 else if (INTVAL (amount
) < 32)
30794 /* Shifts by a constant less than 32. */
30795 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
30797 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
30798 emit_insn (SET (out_down
,
30799 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
30801 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
30805 /* Shifts by a constant greater than 31. */
30806 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
30808 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
30809 if (code
== ASHIFTRT
)
30810 emit_insn (gen_ashrsi3 (out_up
, in_up
,
30813 emit_insn (SET (out_up
, const0_rtx
));
30818 /* We have a shift-by-register. */
30819 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
30821 /* This alternative requires the scratch registers. */
30822 gcc_assert (scratch1
&& REG_P (scratch1
));
30823 gcc_assert (scratch2
&& REG_P (scratch2
));
30825 /* We will need the values "amount-32" and "32-amount" later.
30826 Swapping them around now allows the later code to be more general. */
30830 emit_insn (SUB_32 (scratch1
, amount
));
30831 emit_insn (RSB_32 (scratch2
, amount
));
30834 emit_insn (RSB_32 (scratch1
, amount
));
30835 /* Also set CC = amount > 32. */
30836 emit_insn (SUB_S_32 (scratch2
, amount
));
30839 emit_insn (RSB_32 (scratch1
, amount
));
30840 emit_insn (SUB_32 (scratch2
, amount
));
30843 gcc_unreachable ();
30846 /* Emit code like this:
30849 out_down = in_down << amount;
30850 out_down = (in_up << (amount - 32)) | out_down;
30851 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
30852 out_up = in_up << amount;
30855 out_down = in_down >> amount;
30856 out_down = (in_up << (32 - amount)) | out_down;
30858 out_down = ((signed)in_up >> (amount - 32)) | out_down;
30859 out_up = in_up << amount;
30862 out_down = in_down >> amount;
30863 out_down = (in_up << (32 - amount)) | out_down;
30865 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
30866 out_up = in_up << amount;
30868 The ARM and Thumb2 variants are the same but implemented slightly
30869 differently. If this were only called during expand we could just
30870 use the Thumb2 case and let combine do the right thing, but this
30871 can also be called from post-reload splitters. */
30873 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
30875 if (!TARGET_THUMB2
)
30877 /* Emit code for ARM mode. */
30878 emit_insn (SET (out_down
,
30879 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
30880 if (code
== ASHIFTRT
)
30882 rtx done_label
= gen_label_rtx ();
30883 emit_jump_insn (BRANCH (LT
, done_label
));
30884 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
30886 emit_label (done_label
);
30889 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
30894 /* Emit code for Thumb2 mode.
30895 Thumb2 can't do shift and or in one insn. */
30896 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
30897 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
30899 if (code
== ASHIFTRT
)
30901 rtx done_label
= gen_label_rtx ();
30902 emit_jump_insn (BRANCH (LT
, done_label
));
30903 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
30904 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
30905 emit_label (done_label
);
30909 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
30910 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
30914 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
30929 /* Returns true if a valid comparison operation and makes
30930 the operands in a form that is valid. */
30932 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
30934 enum rtx_code code
= GET_CODE (*comparison
);
30936 enum machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
30937 ? GET_MODE (*op2
) : GET_MODE (*op1
);
30939 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
30941 if (code
== UNEQ
|| code
== LTGT
)
30944 code_int
= (int)code
;
30945 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
30946 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
30951 if (!arm_add_operand (*op1
, mode
))
30952 *op1
= force_reg (mode
, *op1
);
30953 if (!arm_add_operand (*op2
, mode
))
30954 *op2
= force_reg (mode
, *op2
);
30958 if (!cmpdi_operand (*op1
, mode
))
30959 *op1
= force_reg (mode
, *op1
);
30960 if (!cmpdi_operand (*op2
, mode
))
30961 *op2
= force_reg (mode
, *op2
);
30966 if (!arm_float_compare_operand (*op1
, mode
))
30967 *op1
= force_reg (mode
, *op1
);
30968 if (!arm_float_compare_operand (*op2
, mode
))
30969 *op2
= force_reg (mode
, *op2
);
30979 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30981 static unsigned HOST_WIDE_INT
30982 arm_asan_shadow_offset (void)
30984 return (unsigned HOST_WIDE_INT
) 1 << 29;
30987 #include "gt-arm.h"