]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
target-def.h (TARGET_HAVE_NAMED_SECTIONS): Move to common/common-target-def.h.
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8
9 This file is part of GCC.
10
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
15
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "reload.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "diagnostic-core.h"
44 #include "recog.h"
45 #include "cgraph.h"
46 #include "ggc.h"
47 #include "except.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
50 #include "tm_p.h"
51 #include "target.h"
52 #include "target-def.h"
53 #include "debug.h"
54 #include "langhooks.h"
55 #include "df.h"
56 #include "intl.h"
57 #include "libfuncs.h"
58 #include "params.h"
59 #include "opts.h"
60
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
64
65 void (*arm_lang_output_object_attributes_hook)(void);
66
67 /* Forward function declarations. */
68 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
69 static int arm_compute_static_chain_stack_bytes (void);
70 static arm_stack_offsets *arm_get_frame_offsets (void);
71 static void arm_add_gc_roots (void);
72 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
73 HOST_WIDE_INT, rtx, rtx, int, int);
74 static unsigned bit_count (unsigned long);
75 static int arm_address_register_rtx_p (rtx, int);
76 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
77 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
78 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
79 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
80 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
81 inline static int thumb1_index_register_rtx_p (rtx, int);
82 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
83 static int thumb_far_jump_used_p (void);
84 static bool thumb_force_lr_save (void);
85 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
86 static rtx emit_sfm (int, int);
87 static unsigned arm_size_return_regs (void);
88 static bool arm_assemble_integer (rtx, unsigned int, int);
89 static void arm_print_operand (FILE *, rtx, int);
90 static void arm_print_operand_address (FILE *, rtx);
91 static bool arm_print_operand_punct_valid_p (unsigned char code);
92 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
93 static arm_cc get_arm_condition_code (rtx);
94 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
95 static rtx is_jump_table (rtx);
96 static const char *output_multi_immediate (rtx *, const char *, const char *,
97 int, HOST_WIDE_INT);
98 static const char *shift_op (rtx, HOST_WIDE_INT *);
99 static struct machine_function *arm_init_machine_status (void);
100 static void thumb_exit (FILE *, int);
101 static rtx is_jump_table (rtx);
102 static HOST_WIDE_INT get_jump_table_size (rtx);
103 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
104 static Mnode *add_minipool_forward_ref (Mfix *);
105 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
106 static Mnode *add_minipool_backward_ref (Mfix *);
107 static void assign_minipool_offsets (Mfix *);
108 static void arm_print_value (FILE *, rtx);
109 static void dump_minipool (rtx);
110 static int arm_barrier_cost (rtx);
111 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
112 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
113 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
114 rtx);
115 static void arm_reorg (void);
116 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
117 static unsigned long arm_compute_save_reg0_reg12_mask (void);
118 static unsigned long arm_compute_save_reg_mask (void);
119 static unsigned long arm_isr_value (tree);
120 static unsigned long arm_compute_func_type (void);
121 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
122 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
123 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
124 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
125 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
126 #endif
127 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
128 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
129 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
130 static int arm_comp_type_attributes (const_tree, const_tree);
131 static void arm_set_default_type_attributes (tree);
132 static int arm_adjust_cost (rtx, rtx, rtx, int);
133 static int count_insns_for_constant (HOST_WIDE_INT, int);
134 static int arm_get_strip_length (int);
135 static bool arm_function_ok_for_sibcall (tree, tree);
136 static enum machine_mode arm_promote_function_mode (const_tree,
137 enum machine_mode, int *,
138 const_tree, int);
139 static bool arm_return_in_memory (const_tree, const_tree);
140 static rtx arm_function_value (const_tree, const_tree, bool);
141 static rtx arm_libcall_value (enum machine_mode, const_rtx);
142
143 static void arm_internal_label (FILE *, const char *, unsigned long);
144 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
145 tree);
146 static bool arm_have_conditional_execution (void);
147 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
148 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
149 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
150 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
151 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
152 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
153 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
154 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
155 static bool arm_rtx_costs (rtx, int, int, int *, bool);
156 static int arm_address_cost (rtx, bool);
157 static bool arm_memory_load_p (rtx);
158 static bool arm_cirrus_insn_p (rtx);
159 static void cirrus_reorg (rtx);
160 static void arm_init_builtins (void);
161 static void arm_init_iwmmxt_builtins (void);
162 static rtx safe_vector_operand (rtx, enum machine_mode);
163 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
164 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
165 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
166 static tree arm_builtin_decl (unsigned, bool);
167 static void emit_constant_insn (rtx cond, rtx pattern);
168 static rtx emit_set_insn (rtx, rtx);
169 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
170 tree, bool);
171 static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
172 const_tree, bool);
173 static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
174 const_tree, bool);
175 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
176 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
177 const_tree);
178 static int aapcs_select_return_coproc (const_tree, const_tree);
179
180 #ifdef OBJECT_FORMAT_ELF
181 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
182 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
183 #endif
184 #ifndef ARM_PE
185 static void arm_encode_section_info (tree, rtx, int);
186 #endif
187
188 static void arm_file_end (void);
189 static void arm_file_start (void);
190
191 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
192 tree, int *, int);
193 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
194 enum machine_mode, const_tree, bool);
195 static bool arm_promote_prototypes (const_tree);
196 static bool arm_default_short_enums (void);
197 static bool arm_align_anon_bitfield (void);
198 static bool arm_return_in_msb (const_tree);
199 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
200 static bool arm_return_in_memory (const_tree, const_tree);
201 #if ARM_UNWIND_INFO
202 static void arm_unwind_emit (FILE *, rtx);
203 static bool arm_output_ttype (rtx);
204 static void arm_asm_emit_except_personality (rtx);
205 static void arm_asm_init_sections (void);
206 #endif
207 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
208 static rtx arm_dwarf_register_span (rtx);
209
210 static tree arm_cxx_guard_type (void);
211 static bool arm_cxx_guard_mask_bit (void);
212 static tree arm_get_cookie_size (tree);
213 static bool arm_cookie_has_size (void);
214 static bool arm_cxx_cdtor_returns_this (void);
215 static bool arm_cxx_key_method_may_be_inline (void);
216 static void arm_cxx_determine_class_data_visibility (tree);
217 static bool arm_cxx_class_data_always_comdat (void);
218 static bool arm_cxx_use_aeabi_atexit (void);
219 static void arm_init_libfuncs (void);
220 static tree arm_build_builtin_va_list (void);
221 static void arm_expand_builtin_va_start (tree, rtx);
222 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
223 static void arm_option_override (void);
224 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
225 static bool arm_cannot_copy_insn_p (rtx);
226 static bool arm_tls_symbol_p (rtx x);
227 static int arm_issue_rate (void);
228 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
229 static bool arm_output_addr_const_extra (FILE *, rtx);
230 static bool arm_allocate_stack_slots_for_args (void);
231 static const char *arm_invalid_parameter_type (const_tree t);
232 static const char *arm_invalid_return_type (const_tree t);
233 static tree arm_promoted_type (const_tree t);
234 static tree arm_convert_to_type (tree type, tree expr);
235 static bool arm_scalar_mode_supported_p (enum machine_mode);
236 static bool arm_frame_pointer_required (void);
237 static bool arm_can_eliminate (const int, const int);
238 static void arm_asm_trampoline_template (FILE *);
239 static void arm_trampoline_init (rtx, tree, rtx);
240 static rtx arm_trampoline_adjust_address (rtx);
241 static rtx arm_pic_static_addr (rtx orig, rtx reg);
242 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
243 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
244 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
245 static bool arm_array_mode_supported_p (enum machine_mode,
246 unsigned HOST_WIDE_INT);
247 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
248 static bool arm_class_likely_spilled_p (reg_class_t);
249 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
250 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
251 const_tree type,
252 int misalignment,
253 bool is_packed);
254 static void arm_conditional_register_usage (void);
255 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
256 static unsigned int arm_autovectorize_vector_sizes (void);
257 static int arm_default_branch_cost (bool, bool);
258 static int arm_cortex_a5_branch_cost (bool, bool);
259
260 \f
261 /* Table of machine attributes. */
262 static const struct attribute_spec arm_attribute_table[] =
263 {
264 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
265 affects_type_identity } */
266 /* Function calls made to this symbol must be done indirectly, because
267 it may lie outside of the 26 bit addressing range of a normal function
268 call. */
269 { "long_call", 0, 0, false, true, true, NULL, false },
270 /* Whereas these functions are always known to reside within the 26 bit
271 addressing range. */
272 { "short_call", 0, 0, false, true, true, NULL, false },
273 /* Specify the procedure call conventions for a function. */
274 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
275 false },
276 /* Interrupt Service Routines have special prologue and epilogue requirements. */
277 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
278 false },
279 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
280 false },
281 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
282 false },
283 #ifdef ARM_PE
284 /* ARM/PE has three new attributes:
285 interfacearm - ?
286 dllexport - for exporting a function/variable that will live in a dll
287 dllimport - for importing a function/variable from a dll
288
289 Microsoft allows multiple declspecs in one __declspec, separating
290 them with spaces. We do NOT support this. Instead, use __declspec
291 multiple times.
292 */
293 { "dllimport", 0, 0, true, false, false, NULL, false },
294 { "dllexport", 0, 0, true, false, false, NULL, false },
295 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
296 false },
297 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
298 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
299 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
300 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
301 false },
302 #endif
303 { NULL, 0, 0, false, false, false, NULL, false }
304 };
305 \f
306 /* Initialize the GCC target structure. */
307 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
308 #undef TARGET_MERGE_DECL_ATTRIBUTES
309 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
310 #endif
311
312 #undef TARGET_LEGITIMIZE_ADDRESS
313 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
314
315 #undef TARGET_ATTRIBUTE_TABLE
316 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
317
318 #undef TARGET_ASM_FILE_START
319 #define TARGET_ASM_FILE_START arm_file_start
320 #undef TARGET_ASM_FILE_END
321 #define TARGET_ASM_FILE_END arm_file_end
322
323 #undef TARGET_ASM_ALIGNED_SI_OP
324 #define TARGET_ASM_ALIGNED_SI_OP NULL
325 #undef TARGET_ASM_INTEGER
326 #define TARGET_ASM_INTEGER arm_assemble_integer
327
328 #undef TARGET_PRINT_OPERAND
329 #define TARGET_PRINT_OPERAND arm_print_operand
330 #undef TARGET_PRINT_OPERAND_ADDRESS
331 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
332 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
333 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
334
335 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
336 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
337
338 #undef TARGET_ASM_FUNCTION_PROLOGUE
339 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
340
341 #undef TARGET_ASM_FUNCTION_EPILOGUE
342 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
343
344 #undef TARGET_OPTION_OVERRIDE
345 #define TARGET_OPTION_OVERRIDE arm_option_override
346
347 #undef TARGET_COMP_TYPE_ATTRIBUTES
348 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
349
350 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
351 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
352
353 #undef TARGET_SCHED_ADJUST_COST
354 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
355
356 #undef TARGET_ENCODE_SECTION_INFO
357 #ifdef ARM_PE
358 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
359 #else
360 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
361 #endif
362
363 #undef TARGET_STRIP_NAME_ENCODING
364 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
365
366 #undef TARGET_ASM_INTERNAL_LABEL
367 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
368
369 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
370 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
371
372 #undef TARGET_FUNCTION_VALUE
373 #define TARGET_FUNCTION_VALUE arm_function_value
374
375 #undef TARGET_LIBCALL_VALUE
376 #define TARGET_LIBCALL_VALUE arm_libcall_value
377
378 #undef TARGET_ASM_OUTPUT_MI_THUNK
379 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
380 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
381 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
382
383 #undef TARGET_RTX_COSTS
384 #define TARGET_RTX_COSTS arm_rtx_costs
385 #undef TARGET_ADDRESS_COST
386 #define TARGET_ADDRESS_COST arm_address_cost
387
388 #undef TARGET_SHIFT_TRUNCATION_MASK
389 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
390 #undef TARGET_VECTOR_MODE_SUPPORTED_P
391 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
392 #undef TARGET_ARRAY_MODE_SUPPORTED_P
393 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
394 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
395 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
396 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
397 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
398 arm_autovectorize_vector_sizes
399
400 #undef TARGET_MACHINE_DEPENDENT_REORG
401 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
402
403 #undef TARGET_INIT_BUILTINS
404 #define TARGET_INIT_BUILTINS arm_init_builtins
405 #undef TARGET_EXPAND_BUILTIN
406 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
407 #undef TARGET_BUILTIN_DECL
408 #define TARGET_BUILTIN_DECL arm_builtin_decl
409
410 #undef TARGET_INIT_LIBFUNCS
411 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
412
413 #undef TARGET_PROMOTE_FUNCTION_MODE
414 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
415 #undef TARGET_PROMOTE_PROTOTYPES
416 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
417 #undef TARGET_PASS_BY_REFERENCE
418 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
419 #undef TARGET_ARG_PARTIAL_BYTES
420 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
421 #undef TARGET_FUNCTION_ARG
422 #define TARGET_FUNCTION_ARG arm_function_arg
423 #undef TARGET_FUNCTION_ARG_ADVANCE
424 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
425 #undef TARGET_FUNCTION_ARG_BOUNDARY
426 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
427
428 #undef TARGET_SETUP_INCOMING_VARARGS
429 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
430
431 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
432 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
433
434 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
435 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
436 #undef TARGET_TRAMPOLINE_INIT
437 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
438 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
439 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
440
441 #undef TARGET_DEFAULT_SHORT_ENUMS
442 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
443
444 #undef TARGET_ALIGN_ANON_BITFIELD
445 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
446
447 #undef TARGET_NARROW_VOLATILE_BITFIELD
448 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
449
450 #undef TARGET_CXX_GUARD_TYPE
451 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
452
453 #undef TARGET_CXX_GUARD_MASK_BIT
454 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
455
456 #undef TARGET_CXX_GET_COOKIE_SIZE
457 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
458
459 #undef TARGET_CXX_COOKIE_HAS_SIZE
460 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
461
462 #undef TARGET_CXX_CDTOR_RETURNS_THIS
463 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
464
465 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
466 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
467
468 #undef TARGET_CXX_USE_AEABI_ATEXIT
469 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
470
471 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
472 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
473 arm_cxx_determine_class_data_visibility
474
475 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
476 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
477
478 #undef TARGET_RETURN_IN_MSB
479 #define TARGET_RETURN_IN_MSB arm_return_in_msb
480
481 #undef TARGET_RETURN_IN_MEMORY
482 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
483
484 #undef TARGET_MUST_PASS_IN_STACK
485 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
486
487 #if ARM_UNWIND_INFO
488 #undef TARGET_ASM_UNWIND_EMIT
489 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
490
491 /* EABI unwinding tables use a different format for the typeinfo tables. */
492 #undef TARGET_ASM_TTYPE
493 #define TARGET_ASM_TTYPE arm_output_ttype
494
495 #undef TARGET_ARM_EABI_UNWINDER
496 #define TARGET_ARM_EABI_UNWINDER true
497
498 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
499 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
500
501 #undef TARGET_ASM_INIT_SECTIONS
502 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
503 #endif /* ARM_UNWIND_INFO */
504
505 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
506 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
507
508 #undef TARGET_DWARF_REGISTER_SPAN
509 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
510
511 #undef TARGET_CANNOT_COPY_INSN_P
512 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
513
514 #ifdef HAVE_AS_TLS
515 #undef TARGET_HAVE_TLS
516 #define TARGET_HAVE_TLS true
517 #endif
518
519 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
520 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
521
522 #undef TARGET_LEGITIMATE_CONSTANT_P
523 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
524
525 #undef TARGET_CANNOT_FORCE_CONST_MEM
526 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
527
528 #undef TARGET_MAX_ANCHOR_OFFSET
529 #define TARGET_MAX_ANCHOR_OFFSET 4095
530
531 /* The minimum is set such that the total size of the block
532 for a particular anchor is -4088 + 1 + 4095 bytes, which is
533 divisible by eight, ensuring natural spacing of anchors. */
534 #undef TARGET_MIN_ANCHOR_OFFSET
535 #define TARGET_MIN_ANCHOR_OFFSET -4088
536
537 #undef TARGET_SCHED_ISSUE_RATE
538 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
539
540 #undef TARGET_MANGLE_TYPE
541 #define TARGET_MANGLE_TYPE arm_mangle_type
542
543 #undef TARGET_BUILD_BUILTIN_VA_LIST
544 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
545 #undef TARGET_EXPAND_BUILTIN_VA_START
546 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
547 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
548 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
549
550 #ifdef HAVE_AS_TLS
551 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
552 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
553 #endif
554
555 #undef TARGET_LEGITIMATE_ADDRESS_P
556 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
557
558 #undef TARGET_INVALID_PARAMETER_TYPE
559 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
560
561 #undef TARGET_INVALID_RETURN_TYPE
562 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
563
564 #undef TARGET_PROMOTED_TYPE
565 #define TARGET_PROMOTED_TYPE arm_promoted_type
566
567 #undef TARGET_CONVERT_TO_TYPE
568 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
569
570 #undef TARGET_SCALAR_MODE_SUPPORTED_P
571 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
572
573 #undef TARGET_FRAME_POINTER_REQUIRED
574 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
575
576 #undef TARGET_CAN_ELIMINATE
577 #define TARGET_CAN_ELIMINATE arm_can_eliminate
578
579 #undef TARGET_CONDITIONAL_REGISTER_USAGE
580 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
581
582 #undef TARGET_CLASS_LIKELY_SPILLED_P
583 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
584
585 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
586 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
587 arm_vector_alignment_reachable
588
589 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
590 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
591 arm_builtin_support_vector_misalignment
592
593 #undef TARGET_PREFERRED_RENAME_CLASS
594 #define TARGET_PREFERRED_RENAME_CLASS \
595 arm_preferred_rename_class
596
597 struct gcc_target targetm = TARGET_INITIALIZER;
598 \f
599 /* Obstack for minipool constant handling. */
600 static struct obstack minipool_obstack;
601 static char * minipool_startobj;
602
603 /* The maximum number of insns skipped which
604 will be conditionalised if possible. */
605 static int max_insns_skipped = 5;
606
607 extern FILE * asm_out_file;
608
609 /* True if we are currently building a constant table. */
610 int making_const_table;
611
612 /* The processor for which instructions should be scheduled. */
613 enum processor_type arm_tune = arm_none;
614
615 /* The current tuning set. */
616 const struct tune_params *current_tune;
617
618 /* Which floating point hardware to schedule for. */
619 int arm_fpu_attr;
620
621 /* Which floating popint hardware to use. */
622 const struct arm_fpu_desc *arm_fpu_desc;
623
624 /* Used for Thumb call_via trampolines. */
625 rtx thumb_call_via_label[14];
626 static int thumb_call_reg_needed;
627
628 /* Bit values used to identify processor capabilities. */
629 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
630 #define FL_ARCH3M (1 << 1) /* Extended multiply */
631 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
632 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
633 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
634 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
635 #define FL_THUMB (1 << 6) /* Thumb aware */
636 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
637 #define FL_STRONG (1 << 8) /* StrongARM */
638 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
639 #define FL_XSCALE (1 << 10) /* XScale */
640 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
641 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
642 media instructions. */
643 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
644 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
645 Note: ARM6 & 7 derivatives only. */
646 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
647 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
648 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
649 profile. */
650 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
651 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
652 #define FL_NEON (1 << 20) /* Neon instructions. */
653 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
654 architecture. */
655 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
656 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
657
658 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
659
660 /* Flags that only effect tuning, not available instructions. */
661 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
662 | FL_CO_PROC)
663
664 #define FL_FOR_ARCH2 FL_NOTM
665 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
666 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
667 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
668 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
669 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
670 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
671 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
672 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
673 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
674 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
675 #define FL_FOR_ARCH6J FL_FOR_ARCH6
676 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
677 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
678 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
679 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
680 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
681 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
682 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
683 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
684 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
685 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
686
687 /* The bits in this mask specify which
688 instructions we are allowed to generate. */
689 static unsigned long insn_flags = 0;
690
691 /* The bits in this mask specify which instruction scheduling options should
692 be used. */
693 static unsigned long tune_flags = 0;
694
695 /* The following are used in the arm.md file as equivalents to bits
696 in the above two flag variables. */
697
698 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
699 int arm_arch3m = 0;
700
701 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
702 int arm_arch4 = 0;
703
704 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
705 int arm_arch4t = 0;
706
707 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
708 int arm_arch5 = 0;
709
710 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
711 int arm_arch5e = 0;
712
713 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
714 int arm_arch6 = 0;
715
716 /* Nonzero if this chip supports the ARM 6K extensions. */
717 int arm_arch6k = 0;
718
719 /* Nonzero if this chip supports the ARM 7 extensions. */
720 int arm_arch7 = 0;
721
722 /* Nonzero if instructions not present in the 'M' profile can be used. */
723 int arm_arch_notm = 0;
724
725 /* Nonzero if instructions present in ARMv7E-M can be used. */
726 int arm_arch7em = 0;
727
728 /* Nonzero if this chip can benefit from load scheduling. */
729 int arm_ld_sched = 0;
730
731 /* Nonzero if this chip is a StrongARM. */
732 int arm_tune_strongarm = 0;
733
734 /* Nonzero if this chip is a Cirrus variant. */
735 int arm_arch_cirrus = 0;
736
737 /* Nonzero if this chip supports Intel Wireless MMX technology. */
738 int arm_arch_iwmmxt = 0;
739
740 /* Nonzero if this chip is an XScale. */
741 int arm_arch_xscale = 0;
742
743 /* Nonzero if tuning for XScale */
744 int arm_tune_xscale = 0;
745
746 /* Nonzero if we want to tune for stores that access the write-buffer.
747 This typically means an ARM6 or ARM7 with MMU or MPU. */
748 int arm_tune_wbuf = 0;
749
750 /* Nonzero if tuning for Cortex-A9. */
751 int arm_tune_cortex_a9 = 0;
752
753 /* Nonzero if generating Thumb instructions. */
754 int thumb_code = 0;
755
756 /* Nonzero if generating Thumb-1 instructions. */
757 int thumb1_code = 0;
758
759 /* Nonzero if we should define __THUMB_INTERWORK__ in the
760 preprocessor.
761 XXX This is a bit of a hack, it's intended to help work around
762 problems in GLD which doesn't understand that armv5t code is
763 interworking clean. */
764 int arm_cpp_interwork = 0;
765
766 /* Nonzero if chip supports Thumb 2. */
767 int arm_arch_thumb2;
768
769 /* Nonzero if chip supports integer division instruction. */
770 int arm_arch_arm_hwdiv;
771 int arm_arch_thumb_hwdiv;
772
773 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
774 we must report the mode of the memory reference from
775 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
776 enum machine_mode output_memory_reference_mode;
777
778 /* The register number to be used for the PIC offset register. */
779 unsigned arm_pic_register = INVALID_REGNUM;
780
781 /* Set to 1 after arm_reorg has started. Reset to start at the start of
782 the next function. */
783 static int after_arm_reorg = 0;
784
785 enum arm_pcs arm_pcs_default;
786
787 /* For an explanation of these variables, see final_prescan_insn below. */
788 int arm_ccfsm_state;
789 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
790 enum arm_cond_code arm_current_cc;
791
792 rtx arm_target_insn;
793 int arm_target_label;
794 /* The number of conditionally executed insns, including the current insn. */
795 int arm_condexec_count = 0;
796 /* A bitmask specifying the patterns for the IT block.
797 Zero means do not output an IT block before this insn. */
798 int arm_condexec_mask = 0;
799 /* The number of bits used in arm_condexec_mask. */
800 int arm_condexec_masklen = 0;
801
802 /* The condition codes of the ARM, and the inverse function. */
803 static const char * const arm_condition_codes[] =
804 {
805 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
806 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
807 };
808
809 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
810 int arm_regs_in_sequence[] =
811 {
812 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
813 };
814
815 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
816 #define streq(string1, string2) (strcmp (string1, string2) == 0)
817
818 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
819 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
820 | (1 << PIC_OFFSET_TABLE_REGNUM)))
821 \f
822 /* Initialization code. */
823
824 struct processors
825 {
826 const char *const name;
827 enum processor_type core;
828 const char *arch;
829 const unsigned long flags;
830 const struct tune_params *const tune;
831 };
832
833
834 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
835 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
836 prefetch_slots, \
837 l1_size, \
838 l1_line_size
839
840 const struct tune_params arm_slowmul_tune =
841 {
842 arm_slowmul_rtx_costs,
843 NULL,
844 3, /* Constant limit. */
845 5, /* Max cond insns. */
846 ARM_PREFETCH_NOT_BENEFICIAL,
847 true, /* Prefer constant pool. */
848 arm_default_branch_cost
849 };
850
851 const struct tune_params arm_fastmul_tune =
852 {
853 arm_fastmul_rtx_costs,
854 NULL,
855 1, /* Constant limit. */
856 5, /* Max cond insns. */
857 ARM_PREFETCH_NOT_BENEFICIAL,
858 true, /* Prefer constant pool. */
859 arm_default_branch_cost
860 };
861
862 /* StrongARM has early execution of branches, so a sequence that is worth
863 skipping is shorter. Set max_insns_skipped to a lower value. */
864
865 const struct tune_params arm_strongarm_tune =
866 {
867 arm_fastmul_rtx_costs,
868 NULL,
869 1, /* Constant limit. */
870 3, /* Max cond insns. */
871 ARM_PREFETCH_NOT_BENEFICIAL,
872 true, /* Prefer constant pool. */
873 arm_default_branch_cost
874 };
875
876 const struct tune_params arm_xscale_tune =
877 {
878 arm_xscale_rtx_costs,
879 xscale_sched_adjust_cost,
880 2, /* Constant limit. */
881 3, /* Max cond insns. */
882 ARM_PREFETCH_NOT_BENEFICIAL,
883 true, /* Prefer constant pool. */
884 arm_default_branch_cost
885 };
886
887 const struct tune_params arm_9e_tune =
888 {
889 arm_9e_rtx_costs,
890 NULL,
891 1, /* Constant limit. */
892 5, /* Max cond insns. */
893 ARM_PREFETCH_NOT_BENEFICIAL,
894 true, /* Prefer constant pool. */
895 arm_default_branch_cost
896 };
897
898 const struct tune_params arm_v6t2_tune =
899 {
900 arm_9e_rtx_costs,
901 NULL,
902 1, /* Constant limit. */
903 5, /* Max cond insns. */
904 ARM_PREFETCH_NOT_BENEFICIAL,
905 false, /* Prefer constant pool. */
906 arm_default_branch_cost
907 };
908
909 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
910 const struct tune_params arm_cortex_tune =
911 {
912 arm_9e_rtx_costs,
913 NULL,
914 1, /* Constant limit. */
915 5, /* Max cond insns. */
916 ARM_PREFETCH_NOT_BENEFICIAL,
917 false, /* Prefer constant pool. */
918 arm_default_branch_cost
919 };
920
921 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
922 less appealing. Set max_insns_skipped to a low value. */
923
924 const struct tune_params arm_cortex_a5_tune =
925 {
926 arm_9e_rtx_costs,
927 NULL,
928 1, /* Constant limit. */
929 1, /* Max cond insns. */
930 ARM_PREFETCH_NOT_BENEFICIAL,
931 false, /* Prefer constant pool. */
932 arm_cortex_a5_branch_cost
933 };
934
935 const struct tune_params arm_cortex_a9_tune =
936 {
937 arm_9e_rtx_costs,
938 cortex_a9_sched_adjust_cost,
939 1, /* Constant limit. */
940 5, /* Max cond insns. */
941 ARM_PREFETCH_BENEFICIAL(4,32,32),
942 false, /* Prefer constant pool. */
943 arm_default_branch_cost
944 };
945
946 const struct tune_params arm_fa726te_tune =
947 {
948 arm_9e_rtx_costs,
949 fa726te_sched_adjust_cost,
950 1, /* Constant limit. */
951 5, /* Max cond insns. */
952 ARM_PREFETCH_NOT_BENEFICIAL,
953 true, /* Prefer constant pool. */
954 arm_default_branch_cost
955 };
956
957
958 /* Not all of these give usefully different compilation alternatives,
959 but there is no simple way of generalizing them. */
960 static const struct processors all_cores[] =
961 {
962 /* ARM Cores */
963 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
964 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
965 #include "arm-cores.def"
966 #undef ARM_CORE
967 {NULL, arm_none, NULL, 0, NULL}
968 };
969
970 static const struct processors all_architectures[] =
971 {
972 /* ARM Architectures */
973 /* We don't specify tuning costs here as it will be figured out
974 from the core. */
975
976 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
977 {NAME, CORE, #ARCH, FLAGS, NULL},
978 #include "arm-arches.def"
979 #undef ARM_ARCH
980 {NULL, arm_none, NULL, 0 , NULL}
981 };
982
983
984 /* These are populated as commandline arguments are processed, or NULL
985 if not specified. */
986 static const struct processors *arm_selected_arch;
987 static const struct processors *arm_selected_cpu;
988 static const struct processors *arm_selected_tune;
989
990 /* The name of the preprocessor macro to define for this architecture. */
991
992 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
993
994 /* Available values for -mfpu=. */
995
996 static const struct arm_fpu_desc all_fpus[] =
997 {
998 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
999 { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
1000 #include "arm-fpus.def"
1001 #undef ARM_FPU
1002 };
1003
1004
1005 /* Supported TLS relocations. */
1006
1007 enum tls_reloc {
1008 TLS_GD32,
1009 TLS_LDM32,
1010 TLS_LDO32,
1011 TLS_IE32,
1012 TLS_LE32
1013 };
1014
1015 /* The maximum number of insns to be used when loading a constant. */
1016 inline static int
1017 arm_constant_limit (bool size_p)
1018 {
1019 return size_p ? 1 : current_tune->constant_limit;
1020 }
1021
1022 /* Emit an insn that's a simple single-set. Both the operands must be known
1023 to be valid. */
1024 inline static rtx
1025 emit_set_insn (rtx x, rtx y)
1026 {
1027 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1028 }
1029
1030 /* Return the number of bits set in VALUE. */
1031 static unsigned
1032 bit_count (unsigned long value)
1033 {
1034 unsigned long count = 0;
1035
1036 while (value)
1037 {
1038 count++;
1039 value &= value - 1; /* Clear the least-significant set bit. */
1040 }
1041
1042 return count;
1043 }
1044
1045 /* Set up library functions unique to ARM. */
1046
1047 static void
1048 arm_init_libfuncs (void)
1049 {
1050 /* There are no special library functions unless we are using the
1051 ARM BPABI. */
1052 if (!TARGET_BPABI)
1053 return;
1054
1055 /* The functions below are described in Section 4 of the "Run-Time
1056 ABI for the ARM architecture", Version 1.0. */
1057
1058 /* Double-precision floating-point arithmetic. Table 2. */
1059 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1060 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1061 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1062 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1063 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1064
1065 /* Double-precision comparisons. Table 3. */
1066 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1067 set_optab_libfunc (ne_optab, DFmode, NULL);
1068 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1069 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1070 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1071 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1072 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1073
1074 /* Single-precision floating-point arithmetic. Table 4. */
1075 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1076 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1077 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1078 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1079 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1080
1081 /* Single-precision comparisons. Table 5. */
1082 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1083 set_optab_libfunc (ne_optab, SFmode, NULL);
1084 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1085 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1086 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1087 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1088 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1089
1090 /* Floating-point to integer conversions. Table 6. */
1091 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1092 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1093 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1094 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1095 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1096 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1097 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1098 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1099
1100 /* Conversions between floating types. Table 7. */
1101 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1102 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1103
1104 /* Integer to floating-point conversions. Table 8. */
1105 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1106 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1107 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1108 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1109 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1110 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1111 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1112 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1113
1114 /* Long long. Table 9. */
1115 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1116 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1117 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1118 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1119 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1120 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1121 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1122 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1123
1124 /* Integer (32/32->32) division. \S 4.3.1. */
1125 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1126 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1127
1128 /* The divmod functions are designed so that they can be used for
1129 plain division, even though they return both the quotient and the
1130 remainder. The quotient is returned in the usual location (i.e.,
1131 r0 for SImode, {r0, r1} for DImode), just as would be expected
1132 for an ordinary division routine. Because the AAPCS calling
1133 conventions specify that all of { r0, r1, r2, r3 } are
1134 callee-saved registers, there is no need to tell the compiler
1135 explicitly that those registers are clobbered by these
1136 routines. */
1137 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1138 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1139
1140 /* For SImode division the ABI provides div-without-mod routines,
1141 which are faster. */
1142 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1143 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1144
1145 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1146 divmod libcalls instead. */
1147 set_optab_libfunc (smod_optab, DImode, NULL);
1148 set_optab_libfunc (umod_optab, DImode, NULL);
1149 set_optab_libfunc (smod_optab, SImode, NULL);
1150 set_optab_libfunc (umod_optab, SImode, NULL);
1151
1152 /* Half-precision float operations. The compiler handles all operations
1153 with NULL libfuncs by converting the SFmode. */
1154 switch (arm_fp16_format)
1155 {
1156 case ARM_FP16_FORMAT_IEEE:
1157 case ARM_FP16_FORMAT_ALTERNATIVE:
1158
1159 /* Conversions. */
1160 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1161 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1162 ? "__gnu_f2h_ieee"
1163 : "__gnu_f2h_alternative"));
1164 set_conv_libfunc (sext_optab, SFmode, HFmode,
1165 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1166 ? "__gnu_h2f_ieee"
1167 : "__gnu_h2f_alternative"));
1168
1169 /* Arithmetic. */
1170 set_optab_libfunc (add_optab, HFmode, NULL);
1171 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1172 set_optab_libfunc (smul_optab, HFmode, NULL);
1173 set_optab_libfunc (neg_optab, HFmode, NULL);
1174 set_optab_libfunc (sub_optab, HFmode, NULL);
1175
1176 /* Comparisons. */
1177 set_optab_libfunc (eq_optab, HFmode, NULL);
1178 set_optab_libfunc (ne_optab, HFmode, NULL);
1179 set_optab_libfunc (lt_optab, HFmode, NULL);
1180 set_optab_libfunc (le_optab, HFmode, NULL);
1181 set_optab_libfunc (ge_optab, HFmode, NULL);
1182 set_optab_libfunc (gt_optab, HFmode, NULL);
1183 set_optab_libfunc (unord_optab, HFmode, NULL);
1184 break;
1185
1186 default:
1187 break;
1188 }
1189
1190 if (TARGET_AAPCS_BASED)
1191 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1192 }
1193
1194 /* On AAPCS systems, this is the "struct __va_list". */
1195 static GTY(()) tree va_list_type;
1196
1197 /* Return the type to use as __builtin_va_list. */
1198 static tree
1199 arm_build_builtin_va_list (void)
1200 {
1201 tree va_list_name;
1202 tree ap_field;
1203
1204 if (!TARGET_AAPCS_BASED)
1205 return std_build_builtin_va_list ();
1206
1207 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1208 defined as:
1209
1210 struct __va_list
1211 {
1212 void *__ap;
1213 };
1214
1215 The C Library ABI further reinforces this definition in \S
1216 4.1.
1217
1218 We must follow this definition exactly. The structure tag
1219 name is visible in C++ mangled names, and thus forms a part
1220 of the ABI. The field name may be used by people who
1221 #include <stdarg.h>. */
1222 /* Create the type. */
1223 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1224 /* Give it the required name. */
1225 va_list_name = build_decl (BUILTINS_LOCATION,
1226 TYPE_DECL,
1227 get_identifier ("__va_list"),
1228 va_list_type);
1229 DECL_ARTIFICIAL (va_list_name) = 1;
1230 TYPE_NAME (va_list_type) = va_list_name;
1231 TYPE_STUB_DECL (va_list_type) = va_list_name;
1232 /* Create the __ap field. */
1233 ap_field = build_decl (BUILTINS_LOCATION,
1234 FIELD_DECL,
1235 get_identifier ("__ap"),
1236 ptr_type_node);
1237 DECL_ARTIFICIAL (ap_field) = 1;
1238 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1239 TYPE_FIELDS (va_list_type) = ap_field;
1240 /* Compute its layout. */
1241 layout_type (va_list_type);
1242
1243 return va_list_type;
1244 }
1245
1246 /* Return an expression of type "void *" pointing to the next
1247 available argument in a variable-argument list. VALIST is the
1248 user-level va_list object, of type __builtin_va_list. */
1249 static tree
1250 arm_extract_valist_ptr (tree valist)
1251 {
1252 if (TREE_TYPE (valist) == error_mark_node)
1253 return error_mark_node;
1254
1255 /* On an AAPCS target, the pointer is stored within "struct
1256 va_list". */
1257 if (TARGET_AAPCS_BASED)
1258 {
1259 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1260 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1261 valist, ap_field, NULL_TREE);
1262 }
1263
1264 return valist;
1265 }
1266
1267 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1268 static void
1269 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1270 {
1271 valist = arm_extract_valist_ptr (valist);
1272 std_expand_builtin_va_start (valist, nextarg);
1273 }
1274
1275 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1276 static tree
1277 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1278 gimple_seq *post_p)
1279 {
1280 valist = arm_extract_valist_ptr (valist);
1281 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1282 }
1283
1284 /* Fix up any incompatible options that the user has specified. */
1285 static void
1286 arm_option_override (void)
1287 {
1288 if (global_options_set.x_arm_arch_option)
1289 arm_selected_arch = &all_architectures[arm_arch_option];
1290
1291 if (global_options_set.x_arm_cpu_option)
1292 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1293
1294 if (global_options_set.x_arm_tune_option)
1295 arm_selected_tune = &all_cores[(int) arm_tune_option];
1296
1297 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1298 SUBTARGET_OVERRIDE_OPTIONS;
1299 #endif
1300
1301 if (arm_selected_arch)
1302 {
1303 if (arm_selected_cpu)
1304 {
1305 /* Check for conflict between mcpu and march. */
1306 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1307 {
1308 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1309 arm_selected_cpu->name, arm_selected_arch->name);
1310 /* -march wins for code generation.
1311 -mcpu wins for default tuning. */
1312 if (!arm_selected_tune)
1313 arm_selected_tune = arm_selected_cpu;
1314
1315 arm_selected_cpu = arm_selected_arch;
1316 }
1317 else
1318 /* -mcpu wins. */
1319 arm_selected_arch = NULL;
1320 }
1321 else
1322 /* Pick a CPU based on the architecture. */
1323 arm_selected_cpu = arm_selected_arch;
1324 }
1325
1326 /* If the user did not specify a processor, choose one for them. */
1327 if (!arm_selected_cpu)
1328 {
1329 const struct processors * sel;
1330 unsigned int sought;
1331
1332 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1333 if (!arm_selected_cpu->name)
1334 {
1335 #ifdef SUBTARGET_CPU_DEFAULT
1336 /* Use the subtarget default CPU if none was specified by
1337 configure. */
1338 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1339 #endif
1340 /* Default to ARM6. */
1341 if (!arm_selected_cpu->name)
1342 arm_selected_cpu = &all_cores[arm6];
1343 }
1344
1345 sel = arm_selected_cpu;
1346 insn_flags = sel->flags;
1347
1348 /* Now check to see if the user has specified some command line
1349 switch that require certain abilities from the cpu. */
1350 sought = 0;
1351
1352 if (TARGET_INTERWORK || TARGET_THUMB)
1353 {
1354 sought |= (FL_THUMB | FL_MODE32);
1355
1356 /* There are no ARM processors that support both APCS-26 and
1357 interworking. Therefore we force FL_MODE26 to be removed
1358 from insn_flags here (if it was set), so that the search
1359 below will always be able to find a compatible processor. */
1360 insn_flags &= ~FL_MODE26;
1361 }
1362
1363 if (sought != 0 && ((sought & insn_flags) != sought))
1364 {
1365 /* Try to locate a CPU type that supports all of the abilities
1366 of the default CPU, plus the extra abilities requested by
1367 the user. */
1368 for (sel = all_cores; sel->name != NULL; sel++)
1369 if ((sel->flags & sought) == (sought | insn_flags))
1370 break;
1371
1372 if (sel->name == NULL)
1373 {
1374 unsigned current_bit_count = 0;
1375 const struct processors * best_fit = NULL;
1376
1377 /* Ideally we would like to issue an error message here
1378 saying that it was not possible to find a CPU compatible
1379 with the default CPU, but which also supports the command
1380 line options specified by the programmer, and so they
1381 ought to use the -mcpu=<name> command line option to
1382 override the default CPU type.
1383
1384 If we cannot find a cpu that has both the
1385 characteristics of the default cpu and the given
1386 command line options we scan the array again looking
1387 for a best match. */
1388 for (sel = all_cores; sel->name != NULL; sel++)
1389 if ((sel->flags & sought) == sought)
1390 {
1391 unsigned count;
1392
1393 count = bit_count (sel->flags & insn_flags);
1394
1395 if (count >= current_bit_count)
1396 {
1397 best_fit = sel;
1398 current_bit_count = count;
1399 }
1400 }
1401
1402 gcc_assert (best_fit);
1403 sel = best_fit;
1404 }
1405
1406 arm_selected_cpu = sel;
1407 }
1408 }
1409
1410 gcc_assert (arm_selected_cpu);
1411 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1412 if (!arm_selected_tune)
1413 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1414
1415 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1416 insn_flags = arm_selected_cpu->flags;
1417
1418 arm_tune = arm_selected_tune->core;
1419 tune_flags = arm_selected_tune->flags;
1420 current_tune = arm_selected_tune->tune;
1421
1422 /* Make sure that the processor choice does not conflict with any of the
1423 other command line choices. */
1424 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1425 error ("target CPU does not support ARM mode");
1426
1427 /* BPABI targets use linker tricks to allow interworking on cores
1428 without thumb support. */
1429 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1430 {
1431 warning (0, "target CPU does not support interworking" );
1432 target_flags &= ~MASK_INTERWORK;
1433 }
1434
1435 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1436 {
1437 warning (0, "target CPU does not support THUMB instructions");
1438 target_flags &= ~MASK_THUMB;
1439 }
1440
1441 if (TARGET_APCS_FRAME && TARGET_THUMB)
1442 {
1443 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1444 target_flags &= ~MASK_APCS_FRAME;
1445 }
1446
1447 /* Callee super interworking implies thumb interworking. Adding
1448 this to the flags here simplifies the logic elsewhere. */
1449 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1450 target_flags |= MASK_INTERWORK;
1451
1452 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1453 from here where no function is being compiled currently. */
1454 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1455 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1456
1457 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1458 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1459
1460 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1461 {
1462 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1463 target_flags |= MASK_APCS_FRAME;
1464 }
1465
1466 if (TARGET_POKE_FUNCTION_NAME)
1467 target_flags |= MASK_APCS_FRAME;
1468
1469 if (TARGET_APCS_REENT && flag_pic)
1470 error ("-fpic and -mapcs-reent are incompatible");
1471
1472 if (TARGET_APCS_REENT)
1473 warning (0, "APCS reentrant code not supported. Ignored");
1474
1475 /* If this target is normally configured to use APCS frames, warn if they
1476 are turned off and debugging is turned on. */
1477 if (TARGET_ARM
1478 && write_symbols != NO_DEBUG
1479 && !TARGET_APCS_FRAME
1480 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1481 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1482
1483 if (TARGET_APCS_FLOAT)
1484 warning (0, "passing floating point arguments in fp regs not yet supported");
1485
1486 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1487 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1488 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1489 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1490 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1491 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1492 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1493 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1494 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1495 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1496 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1497 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1498 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1499 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1500
1501 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1502 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1503 thumb_code = TARGET_ARM == 0;
1504 thumb1_code = TARGET_THUMB1 != 0;
1505 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1506 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1507 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1508 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1509 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1510 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1511
1512 /* If we are not using the default (ARM mode) section anchor offset
1513 ranges, then set the correct ranges now. */
1514 if (TARGET_THUMB1)
1515 {
1516 /* Thumb-1 LDR instructions cannot have negative offsets.
1517 Permissible positive offset ranges are 5-bit (for byte loads),
1518 6-bit (for halfword loads), or 7-bit (for word loads).
1519 Empirical results suggest a 7-bit anchor range gives the best
1520 overall code size. */
1521 targetm.min_anchor_offset = 0;
1522 targetm.max_anchor_offset = 127;
1523 }
1524 else if (TARGET_THUMB2)
1525 {
1526 /* The minimum is set such that the total size of the block
1527 for a particular anchor is 248 + 1 + 4095 bytes, which is
1528 divisible by eight, ensuring natural spacing of anchors. */
1529 targetm.min_anchor_offset = -248;
1530 targetm.max_anchor_offset = 4095;
1531 }
1532
1533 /* V5 code we generate is completely interworking capable, so we turn off
1534 TARGET_INTERWORK here to avoid many tests later on. */
1535
1536 /* XXX However, we must pass the right pre-processor defines to CPP
1537 or GLD can get confused. This is a hack. */
1538 if (TARGET_INTERWORK)
1539 arm_cpp_interwork = 1;
1540
1541 if (arm_arch5)
1542 target_flags &= ~MASK_INTERWORK;
1543
1544 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1545 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1546
1547 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1548 error ("iwmmxt abi requires an iwmmxt capable cpu");
1549
1550 if (!global_options_set.x_arm_fpu_index)
1551 {
1552 const char *target_fpu_name;
1553 bool ok;
1554
1555 #ifdef FPUTYPE_DEFAULT
1556 target_fpu_name = FPUTYPE_DEFAULT;
1557 #else
1558 if (arm_arch_cirrus)
1559 target_fpu_name = "maverick";
1560 else
1561 target_fpu_name = "fpe2";
1562 #endif
1563
1564 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1565 CL_TARGET);
1566 gcc_assert (ok);
1567 }
1568
1569 arm_fpu_desc = &all_fpus[arm_fpu_index];
1570
1571 switch (arm_fpu_desc->model)
1572 {
1573 case ARM_FP_MODEL_FPA:
1574 if (arm_fpu_desc->rev == 2)
1575 arm_fpu_attr = FPU_FPE2;
1576 else if (arm_fpu_desc->rev == 3)
1577 arm_fpu_attr = FPU_FPE3;
1578 else
1579 arm_fpu_attr = FPU_FPA;
1580 break;
1581
1582 case ARM_FP_MODEL_MAVERICK:
1583 arm_fpu_attr = FPU_MAVERICK;
1584 break;
1585
1586 case ARM_FP_MODEL_VFP:
1587 arm_fpu_attr = FPU_VFP;
1588 break;
1589
1590 default:
1591 gcc_unreachable();
1592 }
1593
1594 if (TARGET_AAPCS_BASED
1595 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1596 error ("FPA is unsupported in the AAPCS");
1597
1598 if (TARGET_AAPCS_BASED)
1599 {
1600 if (TARGET_CALLER_INTERWORKING)
1601 error ("AAPCS does not support -mcaller-super-interworking");
1602 else
1603 if (TARGET_CALLEE_INTERWORKING)
1604 error ("AAPCS does not support -mcallee-super-interworking");
1605 }
1606
1607 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1608 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1609 will ever exist. GCC makes no attempt to support this combination. */
1610 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1611 sorry ("iWMMXt and hardware floating point");
1612
1613 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1614 if (TARGET_THUMB2 && TARGET_IWMMXT)
1615 sorry ("Thumb-2 iWMMXt");
1616
1617 /* __fp16 support currently assumes the core has ldrh. */
1618 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1619 sorry ("__fp16 and no ldrh");
1620
1621 /* If soft-float is specified then don't use FPU. */
1622 if (TARGET_SOFT_FLOAT)
1623 arm_fpu_attr = FPU_NONE;
1624
1625 if (TARGET_AAPCS_BASED)
1626 {
1627 if (arm_abi == ARM_ABI_IWMMXT)
1628 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1629 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1630 && TARGET_HARD_FLOAT
1631 && TARGET_VFP)
1632 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1633 else
1634 arm_pcs_default = ARM_PCS_AAPCS;
1635 }
1636 else
1637 {
1638 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1639 sorry ("-mfloat-abi=hard and VFP");
1640
1641 if (arm_abi == ARM_ABI_APCS)
1642 arm_pcs_default = ARM_PCS_APCS;
1643 else
1644 arm_pcs_default = ARM_PCS_ATPCS;
1645 }
1646
1647 /* For arm2/3 there is no need to do any scheduling if there is only
1648 a floating point emulator, or we are doing software floating-point. */
1649 if ((TARGET_SOFT_FLOAT
1650 || (TARGET_FPA && arm_fpu_desc->rev))
1651 && (tune_flags & FL_MODE32) == 0)
1652 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1653
1654 /* Use the cp15 method if it is available. */
1655 if (target_thread_pointer == TP_AUTO)
1656 {
1657 if (arm_arch6k && !TARGET_THUMB1)
1658 target_thread_pointer = TP_CP15;
1659 else
1660 target_thread_pointer = TP_SOFT;
1661 }
1662
1663 if (TARGET_HARD_TP && TARGET_THUMB1)
1664 error ("can not use -mtp=cp15 with 16-bit Thumb");
1665
1666 /* Override the default structure alignment for AAPCS ABI. */
1667 if (!global_options_set.x_arm_structure_size_boundary)
1668 {
1669 if (TARGET_AAPCS_BASED)
1670 arm_structure_size_boundary = 8;
1671 }
1672 else
1673 {
1674 if (arm_structure_size_boundary != 8
1675 && arm_structure_size_boundary != 32
1676 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1677 {
1678 if (ARM_DOUBLEWORD_ALIGN)
1679 warning (0,
1680 "structure size boundary can only be set to 8, 32 or 64");
1681 else
1682 warning (0, "structure size boundary can only be set to 8 or 32");
1683 arm_structure_size_boundary
1684 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1685 }
1686 }
1687
1688 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1689 {
1690 error ("RTP PIC is incompatible with Thumb");
1691 flag_pic = 0;
1692 }
1693
1694 /* If stack checking is disabled, we can use r10 as the PIC register,
1695 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1696 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1697 {
1698 if (TARGET_VXWORKS_RTP)
1699 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1700 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1701 }
1702
1703 if (flag_pic && TARGET_VXWORKS_RTP)
1704 arm_pic_register = 9;
1705
1706 if (arm_pic_register_string != NULL)
1707 {
1708 int pic_register = decode_reg_name (arm_pic_register_string);
1709
1710 if (!flag_pic)
1711 warning (0, "-mpic-register= is useless without -fpic");
1712
1713 /* Prevent the user from choosing an obviously stupid PIC register. */
1714 else if (pic_register < 0 || call_used_regs[pic_register]
1715 || pic_register == HARD_FRAME_POINTER_REGNUM
1716 || pic_register == STACK_POINTER_REGNUM
1717 || pic_register >= PC_REGNUM
1718 || (TARGET_VXWORKS_RTP
1719 && (unsigned int) pic_register != arm_pic_register))
1720 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1721 else
1722 arm_pic_register = pic_register;
1723 }
1724
1725 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1726 if (fix_cm3_ldrd == 2)
1727 {
1728 if (arm_selected_cpu->core == cortexm3)
1729 fix_cm3_ldrd = 1;
1730 else
1731 fix_cm3_ldrd = 0;
1732 }
1733
1734 if (TARGET_THUMB1 && flag_schedule_insns)
1735 {
1736 /* Don't warn since it's on by default in -O2. */
1737 flag_schedule_insns = 0;
1738 }
1739
1740 if (optimize_size)
1741 {
1742 /* If optimizing for size, bump the number of instructions that we
1743 are prepared to conditionally execute (even on a StrongARM). */
1744 max_insns_skipped = 6;
1745 }
1746 else
1747 max_insns_skipped = current_tune->max_insns_skipped;
1748
1749 /* Hot/Cold partitioning is not currently supported, since we can't
1750 handle literal pool placement in that case. */
1751 if (flag_reorder_blocks_and_partition)
1752 {
1753 inform (input_location,
1754 "-freorder-blocks-and-partition not supported on this architecture");
1755 flag_reorder_blocks_and_partition = 0;
1756 flag_reorder_blocks = 1;
1757 }
1758
1759 if (flag_pic)
1760 /* Hoisting PIC address calculations more aggressively provides a small,
1761 but measurable, size reduction for PIC code. Therefore, we decrease
1762 the bar for unrestricted expression hoisting to the cost of PIC address
1763 calculation, which is 2 instructions. */
1764 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
1765 global_options.x_param_values,
1766 global_options_set.x_param_values);
1767
1768 /* ARM EABI defaults to strict volatile bitfields. */
1769 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0)
1770 flag_strict_volatile_bitfields = 1;
1771
1772 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
1773 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
1774 if (flag_prefetch_loop_arrays < 0
1775 && HAVE_prefetch
1776 && optimize >= 3
1777 && current_tune->num_prefetch_slots > 0)
1778 flag_prefetch_loop_arrays = 1;
1779
1780 /* Set up parameters to be used in prefetching algorithm. Do not override the
1781 defaults unless we are tuning for a core we have researched values for. */
1782 if (current_tune->num_prefetch_slots > 0)
1783 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1784 current_tune->num_prefetch_slots,
1785 global_options.x_param_values,
1786 global_options_set.x_param_values);
1787 if (current_tune->l1_cache_line_size >= 0)
1788 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1789 current_tune->l1_cache_line_size,
1790 global_options.x_param_values,
1791 global_options_set.x_param_values);
1792 if (current_tune->l1_cache_size >= 0)
1793 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1794 current_tune->l1_cache_size,
1795 global_options.x_param_values,
1796 global_options_set.x_param_values);
1797
1798 /* Register global variables with the garbage collector. */
1799 arm_add_gc_roots ();
1800 }
1801
1802 static void
1803 arm_add_gc_roots (void)
1804 {
1805 gcc_obstack_init(&minipool_obstack);
1806 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1807 }
1808 \f
1809 /* A table of known ARM exception types.
1810 For use with the interrupt function attribute. */
1811
1812 typedef struct
1813 {
1814 const char *const arg;
1815 const unsigned long return_value;
1816 }
1817 isr_attribute_arg;
1818
1819 static const isr_attribute_arg isr_attribute_args [] =
1820 {
1821 { "IRQ", ARM_FT_ISR },
1822 { "irq", ARM_FT_ISR },
1823 { "FIQ", ARM_FT_FIQ },
1824 { "fiq", ARM_FT_FIQ },
1825 { "ABORT", ARM_FT_ISR },
1826 { "abort", ARM_FT_ISR },
1827 { "ABORT", ARM_FT_ISR },
1828 { "abort", ARM_FT_ISR },
1829 { "UNDEF", ARM_FT_EXCEPTION },
1830 { "undef", ARM_FT_EXCEPTION },
1831 { "SWI", ARM_FT_EXCEPTION },
1832 { "swi", ARM_FT_EXCEPTION },
1833 { NULL, ARM_FT_NORMAL }
1834 };
1835
1836 /* Returns the (interrupt) function type of the current
1837 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1838
1839 static unsigned long
1840 arm_isr_value (tree argument)
1841 {
1842 const isr_attribute_arg * ptr;
1843 const char * arg;
1844
1845 if (!arm_arch_notm)
1846 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1847
1848 /* No argument - default to IRQ. */
1849 if (argument == NULL_TREE)
1850 return ARM_FT_ISR;
1851
1852 /* Get the value of the argument. */
1853 if (TREE_VALUE (argument) == NULL_TREE
1854 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1855 return ARM_FT_UNKNOWN;
1856
1857 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1858
1859 /* Check it against the list of known arguments. */
1860 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1861 if (streq (arg, ptr->arg))
1862 return ptr->return_value;
1863
1864 /* An unrecognized interrupt type. */
1865 return ARM_FT_UNKNOWN;
1866 }
1867
1868 /* Computes the type of the current function. */
1869
1870 static unsigned long
1871 arm_compute_func_type (void)
1872 {
1873 unsigned long type = ARM_FT_UNKNOWN;
1874 tree a;
1875 tree attr;
1876
1877 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1878
1879 /* Decide if the current function is volatile. Such functions
1880 never return, and many memory cycles can be saved by not storing
1881 register values that will never be needed again. This optimization
1882 was added to speed up context switching in a kernel application. */
1883 if (optimize > 0
1884 && (TREE_NOTHROW (current_function_decl)
1885 || !(flag_unwind_tables
1886 || (flag_exceptions
1887 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
1888 && TREE_THIS_VOLATILE (current_function_decl))
1889 type |= ARM_FT_VOLATILE;
1890
1891 if (cfun->static_chain_decl != NULL)
1892 type |= ARM_FT_NESTED;
1893
1894 attr = DECL_ATTRIBUTES (current_function_decl);
1895
1896 a = lookup_attribute ("naked", attr);
1897 if (a != NULL_TREE)
1898 type |= ARM_FT_NAKED;
1899
1900 a = lookup_attribute ("isr", attr);
1901 if (a == NULL_TREE)
1902 a = lookup_attribute ("interrupt", attr);
1903
1904 if (a == NULL_TREE)
1905 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1906 else
1907 type |= arm_isr_value (TREE_VALUE (a));
1908
1909 return type;
1910 }
1911
1912 /* Returns the type of the current function. */
1913
1914 unsigned long
1915 arm_current_func_type (void)
1916 {
1917 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1918 cfun->machine->func_type = arm_compute_func_type ();
1919
1920 return cfun->machine->func_type;
1921 }
1922
1923 bool
1924 arm_allocate_stack_slots_for_args (void)
1925 {
1926 /* Naked functions should not allocate stack slots for arguments. */
1927 return !IS_NAKED (arm_current_func_type ());
1928 }
1929
1930 \f
1931 /* Output assembler code for a block containing the constant parts
1932 of a trampoline, leaving space for the variable parts.
1933
1934 On the ARM, (if r8 is the static chain regnum, and remembering that
1935 referencing pc adds an offset of 8) the trampoline looks like:
1936 ldr r8, [pc, #0]
1937 ldr pc, [pc]
1938 .word static chain value
1939 .word function's address
1940 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
1941
1942 static void
1943 arm_asm_trampoline_template (FILE *f)
1944 {
1945 if (TARGET_ARM)
1946 {
1947 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
1948 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
1949 }
1950 else if (TARGET_THUMB2)
1951 {
1952 /* The Thumb-2 trampoline is similar to the arm implementation.
1953 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
1954 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
1955 STATIC_CHAIN_REGNUM, PC_REGNUM);
1956 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
1957 }
1958 else
1959 {
1960 ASM_OUTPUT_ALIGN (f, 2);
1961 fprintf (f, "\t.code\t16\n");
1962 fprintf (f, ".Ltrampoline_start:\n");
1963 asm_fprintf (f, "\tpush\t{r0, r1}\n");
1964 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
1965 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
1966 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
1967 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
1968 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
1969 }
1970 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
1971 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
1972 }
1973
1974 /* Emit RTL insns to initialize the variable parts of a trampoline. */
1975
1976 static void
1977 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
1978 {
1979 rtx fnaddr, mem, a_tramp;
1980
1981 emit_block_move (m_tramp, assemble_trampoline_template (),
1982 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
1983
1984 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
1985 emit_move_insn (mem, chain_value);
1986
1987 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
1988 fnaddr = XEXP (DECL_RTL (fndecl), 0);
1989 emit_move_insn (mem, fnaddr);
1990
1991 a_tramp = XEXP (m_tramp, 0);
1992 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
1993 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
1994 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
1995 }
1996
1997 /* Thumb trampolines should be entered in thumb mode, so set
1998 the bottom bit of the address. */
1999
2000 static rtx
2001 arm_trampoline_adjust_address (rtx addr)
2002 {
2003 if (TARGET_THUMB)
2004 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2005 NULL, 0, OPTAB_LIB_WIDEN);
2006 return addr;
2007 }
2008 \f
2009 /* Return 1 if it is possible to return using a single instruction.
2010 If SIBLING is non-null, this is a test for a return before a sibling
2011 call. SIBLING is the call insn, so we can examine its register usage. */
2012
2013 int
2014 use_return_insn (int iscond, rtx sibling)
2015 {
2016 int regno;
2017 unsigned int func_type;
2018 unsigned long saved_int_regs;
2019 unsigned HOST_WIDE_INT stack_adjust;
2020 arm_stack_offsets *offsets;
2021
2022 /* Never use a return instruction before reload has run. */
2023 if (!reload_completed)
2024 return 0;
2025
2026 func_type = arm_current_func_type ();
2027
2028 /* Naked, volatile and stack alignment functions need special
2029 consideration. */
2030 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2031 return 0;
2032
2033 /* So do interrupt functions that use the frame pointer and Thumb
2034 interrupt functions. */
2035 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2036 return 0;
2037
2038 offsets = arm_get_frame_offsets ();
2039 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2040
2041 /* As do variadic functions. */
2042 if (crtl->args.pretend_args_size
2043 || cfun->machine->uses_anonymous_args
2044 /* Or if the function calls __builtin_eh_return () */
2045 || crtl->calls_eh_return
2046 /* Or if the function calls alloca */
2047 || cfun->calls_alloca
2048 /* Or if there is a stack adjustment. However, if the stack pointer
2049 is saved on the stack, we can use a pre-incrementing stack load. */
2050 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2051 && stack_adjust == 4)))
2052 return 0;
2053
2054 saved_int_regs = offsets->saved_regs_mask;
2055
2056 /* Unfortunately, the insn
2057
2058 ldmib sp, {..., sp, ...}
2059
2060 triggers a bug on most SA-110 based devices, such that the stack
2061 pointer won't be correctly restored if the instruction takes a
2062 page fault. We work around this problem by popping r3 along with
2063 the other registers, since that is never slower than executing
2064 another instruction.
2065
2066 We test for !arm_arch5 here, because code for any architecture
2067 less than this could potentially be run on one of the buggy
2068 chips. */
2069 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2070 {
2071 /* Validate that r3 is a call-clobbered register (always true in
2072 the default abi) ... */
2073 if (!call_used_regs[3])
2074 return 0;
2075
2076 /* ... that it isn't being used for a return value ... */
2077 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2078 return 0;
2079
2080 /* ... or for a tail-call argument ... */
2081 if (sibling)
2082 {
2083 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2084
2085 if (find_regno_fusage (sibling, USE, 3))
2086 return 0;
2087 }
2088
2089 /* ... and that there are no call-saved registers in r0-r2
2090 (always true in the default ABI). */
2091 if (saved_int_regs & 0x7)
2092 return 0;
2093 }
2094
2095 /* Can't be done if interworking with Thumb, and any registers have been
2096 stacked. */
2097 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2098 return 0;
2099
2100 /* On StrongARM, conditional returns are expensive if they aren't
2101 taken and multiple registers have been stacked. */
2102 if (iscond && arm_tune_strongarm)
2103 {
2104 /* Conditional return when just the LR is stored is a simple
2105 conditional-load instruction, that's not expensive. */
2106 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2107 return 0;
2108
2109 if (flag_pic
2110 && arm_pic_register != INVALID_REGNUM
2111 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2112 return 0;
2113 }
2114
2115 /* If there are saved registers but the LR isn't saved, then we need
2116 two instructions for the return. */
2117 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2118 return 0;
2119
2120 /* Can't be done if any of the FPA regs are pushed,
2121 since this also requires an insn. */
2122 if (TARGET_HARD_FLOAT && TARGET_FPA)
2123 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2124 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2125 return 0;
2126
2127 /* Likewise VFP regs. */
2128 if (TARGET_HARD_FLOAT && TARGET_VFP)
2129 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2130 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2131 return 0;
2132
2133 if (TARGET_REALLY_IWMMXT)
2134 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2135 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2136 return 0;
2137
2138 return 1;
2139 }
2140
2141 /* Return TRUE if int I is a valid immediate ARM constant. */
2142
2143 int
2144 const_ok_for_arm (HOST_WIDE_INT i)
2145 {
2146 int lowbit;
2147
2148 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2149 be all zero, or all one. */
2150 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2151 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2152 != ((~(unsigned HOST_WIDE_INT) 0)
2153 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2154 return FALSE;
2155
2156 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2157
2158 /* Fast return for 0 and small values. We must do this for zero, since
2159 the code below can't handle that one case. */
2160 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2161 return TRUE;
2162
2163 /* Get the number of trailing zeros. */
2164 lowbit = ffs((int) i) - 1;
2165
2166 /* Only even shifts are allowed in ARM mode so round down to the
2167 nearest even number. */
2168 if (TARGET_ARM)
2169 lowbit &= ~1;
2170
2171 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2172 return TRUE;
2173
2174 if (TARGET_ARM)
2175 {
2176 /* Allow rotated constants in ARM mode. */
2177 if (lowbit <= 4
2178 && ((i & ~0xc000003f) == 0
2179 || (i & ~0xf000000f) == 0
2180 || (i & ~0xfc000003) == 0))
2181 return TRUE;
2182 }
2183 else
2184 {
2185 HOST_WIDE_INT v;
2186
2187 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2188 v = i & 0xff;
2189 v |= v << 16;
2190 if (i == v || i == (v | (v << 8)))
2191 return TRUE;
2192
2193 /* Allow repeated pattern 0xXY00XY00. */
2194 v = i & 0xff00;
2195 v |= v << 16;
2196 if (i == v)
2197 return TRUE;
2198 }
2199
2200 return FALSE;
2201 }
2202
2203 /* Return true if I is a valid constant for the operation CODE. */
2204 static int
2205 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2206 {
2207 if (const_ok_for_arm (i))
2208 return 1;
2209
2210 switch (code)
2211 {
2212 case SET:
2213 /* See if we can use movw. */
2214 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2215 return 1;
2216 else
2217 /* Otherwise, try mvn. */
2218 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2219
2220 case PLUS:
2221 case COMPARE:
2222 case EQ:
2223 case NE:
2224 case GT:
2225 case LE:
2226 case LT:
2227 case GE:
2228 case GEU:
2229 case LTU:
2230 case GTU:
2231 case LEU:
2232 case UNORDERED:
2233 case ORDERED:
2234 case UNEQ:
2235 case UNGE:
2236 case UNLT:
2237 case UNGT:
2238 case UNLE:
2239 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2240
2241 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2242 case XOR:
2243 return 0;
2244
2245 case IOR:
2246 if (TARGET_THUMB2)
2247 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2248 return 0;
2249
2250 case AND:
2251 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2252
2253 default:
2254 gcc_unreachable ();
2255 }
2256 }
2257
2258 /* Emit a sequence of insns to handle a large constant.
2259 CODE is the code of the operation required, it can be any of SET, PLUS,
2260 IOR, AND, XOR, MINUS;
2261 MODE is the mode in which the operation is being performed;
2262 VAL is the integer to operate on;
2263 SOURCE is the other operand (a register, or a null-pointer for SET);
2264 SUBTARGETS means it is safe to create scratch registers if that will
2265 either produce a simpler sequence, or we will want to cse the values.
2266 Return value is the number of insns emitted. */
2267
2268 /* ??? Tweak this for thumb2. */
2269 int
2270 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2271 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2272 {
2273 rtx cond;
2274
2275 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2276 cond = COND_EXEC_TEST (PATTERN (insn));
2277 else
2278 cond = NULL_RTX;
2279
2280 if (subtargets || code == SET
2281 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2282 && REGNO (target) != REGNO (source)))
2283 {
2284 /* After arm_reorg has been called, we can't fix up expensive
2285 constants by pushing them into memory so we must synthesize
2286 them in-line, regardless of the cost. This is only likely to
2287 be more costly on chips that have load delay slots and we are
2288 compiling without running the scheduler (so no splitting
2289 occurred before the final instruction emission).
2290
2291 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2292 */
2293 if (!after_arm_reorg
2294 && !cond
2295 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2296 1, 0)
2297 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2298 + (code != SET))))
2299 {
2300 if (code == SET)
2301 {
2302 /* Currently SET is the only monadic value for CODE, all
2303 the rest are diadic. */
2304 if (TARGET_USE_MOVT)
2305 arm_emit_movpair (target, GEN_INT (val));
2306 else
2307 emit_set_insn (target, GEN_INT (val));
2308
2309 return 1;
2310 }
2311 else
2312 {
2313 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2314
2315 if (TARGET_USE_MOVT)
2316 arm_emit_movpair (temp, GEN_INT (val));
2317 else
2318 emit_set_insn (temp, GEN_INT (val));
2319
2320 /* For MINUS, the value is subtracted from, since we never
2321 have subtraction of a constant. */
2322 if (code == MINUS)
2323 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2324 else
2325 emit_set_insn (target,
2326 gen_rtx_fmt_ee (code, mode, source, temp));
2327 return 2;
2328 }
2329 }
2330 }
2331
2332 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2333 1);
2334 }
2335
2336 /* Return the number of instructions required to synthesize the given
2337 constant, if we start emitting them from bit-position I. */
2338 static int
2339 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2340 {
2341 HOST_WIDE_INT temp1;
2342 int step_size = TARGET_ARM ? 2 : 1;
2343 int num_insns = 0;
2344
2345 gcc_assert (TARGET_ARM || i == 0);
2346
2347 do
2348 {
2349 int end;
2350
2351 if (i <= 0)
2352 i += 32;
2353 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2354 {
2355 end = i - 8;
2356 if (end < 0)
2357 end += 32;
2358 temp1 = remainder & ((0x0ff << end)
2359 | ((i < end) ? (0xff >> (32 - end)) : 0));
2360 remainder &= ~temp1;
2361 num_insns++;
2362 i -= 8 - step_size;
2363 }
2364 i -= step_size;
2365 } while (remainder);
2366 return num_insns;
2367 }
2368
2369 static int
2370 find_best_start (unsigned HOST_WIDE_INT remainder)
2371 {
2372 int best_consecutive_zeros = 0;
2373 int i;
2374 int best_start = 0;
2375
2376 /* If we aren't targetting ARM, the best place to start is always at
2377 the bottom. */
2378 if (! TARGET_ARM)
2379 return 0;
2380
2381 for (i = 0; i < 32; i += 2)
2382 {
2383 int consecutive_zeros = 0;
2384
2385 if (!(remainder & (3 << i)))
2386 {
2387 while ((i < 32) && !(remainder & (3 << i)))
2388 {
2389 consecutive_zeros += 2;
2390 i += 2;
2391 }
2392 if (consecutive_zeros > best_consecutive_zeros)
2393 {
2394 best_consecutive_zeros = consecutive_zeros;
2395 best_start = i - consecutive_zeros;
2396 }
2397 i -= 2;
2398 }
2399 }
2400
2401 /* So long as it won't require any more insns to do so, it's
2402 desirable to emit a small constant (in bits 0...9) in the last
2403 insn. This way there is more chance that it can be combined with
2404 a later addressing insn to form a pre-indexed load or store
2405 operation. Consider:
2406
2407 *((volatile int *)0xe0000100) = 1;
2408 *((volatile int *)0xe0000110) = 2;
2409
2410 We want this to wind up as:
2411
2412 mov rA, #0xe0000000
2413 mov rB, #1
2414 str rB, [rA, #0x100]
2415 mov rB, #2
2416 str rB, [rA, #0x110]
2417
2418 rather than having to synthesize both large constants from scratch.
2419
2420 Therefore, we calculate how many insns would be required to emit
2421 the constant starting from `best_start', and also starting from
2422 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2423 yield a shorter sequence, we may as well use zero. */
2424 if (best_start != 0
2425 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2426 && (count_insns_for_constant (remainder, 0) <=
2427 count_insns_for_constant (remainder, best_start)))
2428 best_start = 0;
2429
2430 return best_start;
2431 }
2432
2433 /* Emit an instruction with the indicated PATTERN. If COND is
2434 non-NULL, conditionalize the execution of the instruction on COND
2435 being true. */
2436
2437 static void
2438 emit_constant_insn (rtx cond, rtx pattern)
2439 {
2440 if (cond)
2441 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2442 emit_insn (pattern);
2443 }
2444
2445 /* As above, but extra parameter GENERATE which, if clear, suppresses
2446 RTL generation. */
2447 /* ??? This needs more work for thumb2. */
2448
2449 static int
2450 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2451 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2452 int generate)
2453 {
2454 int can_invert = 0;
2455 int can_negate = 0;
2456 int final_invert = 0;
2457 int i;
2458 int num_bits_set = 0;
2459 int set_sign_bit_copies = 0;
2460 int clear_sign_bit_copies = 0;
2461 int clear_zero_bit_copies = 0;
2462 int set_zero_bit_copies = 0;
2463 int insns = 0;
2464 unsigned HOST_WIDE_INT temp1, temp2;
2465 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2466 int step_size = TARGET_ARM ? 2 : 1;
2467
2468 /* Find out which operations are safe for a given CODE. Also do a quick
2469 check for degenerate cases; these can occur when DImode operations
2470 are split. */
2471 switch (code)
2472 {
2473 case SET:
2474 can_invert = 1;
2475 can_negate = 1;
2476 break;
2477
2478 case PLUS:
2479 can_negate = 1;
2480 break;
2481
2482 case IOR:
2483 if (remainder == 0xffffffff)
2484 {
2485 if (generate)
2486 emit_constant_insn (cond,
2487 gen_rtx_SET (VOIDmode, target,
2488 GEN_INT (ARM_SIGN_EXTEND (val))));
2489 return 1;
2490 }
2491
2492 if (remainder == 0)
2493 {
2494 if (reload_completed && rtx_equal_p (target, source))
2495 return 0;
2496
2497 if (generate)
2498 emit_constant_insn (cond,
2499 gen_rtx_SET (VOIDmode, target, source));
2500 return 1;
2501 }
2502 break;
2503
2504 case AND:
2505 if (remainder == 0)
2506 {
2507 if (generate)
2508 emit_constant_insn (cond,
2509 gen_rtx_SET (VOIDmode, target, const0_rtx));
2510 return 1;
2511 }
2512 if (remainder == 0xffffffff)
2513 {
2514 if (reload_completed && rtx_equal_p (target, source))
2515 return 0;
2516 if (generate)
2517 emit_constant_insn (cond,
2518 gen_rtx_SET (VOIDmode, target, source));
2519 return 1;
2520 }
2521 can_invert = 1;
2522 break;
2523
2524 case XOR:
2525 if (remainder == 0)
2526 {
2527 if (reload_completed && rtx_equal_p (target, source))
2528 return 0;
2529 if (generate)
2530 emit_constant_insn (cond,
2531 gen_rtx_SET (VOIDmode, target, source));
2532 return 1;
2533 }
2534
2535 if (remainder == 0xffffffff)
2536 {
2537 if (generate)
2538 emit_constant_insn (cond,
2539 gen_rtx_SET (VOIDmode, target,
2540 gen_rtx_NOT (mode, source)));
2541 return 1;
2542 }
2543 break;
2544
2545 case MINUS:
2546 /* We treat MINUS as (val - source), since (source - val) is always
2547 passed as (source + (-val)). */
2548 if (remainder == 0)
2549 {
2550 if (generate)
2551 emit_constant_insn (cond,
2552 gen_rtx_SET (VOIDmode, target,
2553 gen_rtx_NEG (mode, source)));
2554 return 1;
2555 }
2556 if (const_ok_for_arm (val))
2557 {
2558 if (generate)
2559 emit_constant_insn (cond,
2560 gen_rtx_SET (VOIDmode, target,
2561 gen_rtx_MINUS (mode, GEN_INT (val),
2562 source)));
2563 return 1;
2564 }
2565 can_negate = 1;
2566
2567 break;
2568
2569 default:
2570 gcc_unreachable ();
2571 }
2572
2573 /* If we can do it in one insn get out quickly. */
2574 if (const_ok_for_op (val, code))
2575 {
2576 if (generate)
2577 emit_constant_insn (cond,
2578 gen_rtx_SET (VOIDmode, target,
2579 (source
2580 ? gen_rtx_fmt_ee (code, mode, source,
2581 GEN_INT (val))
2582 : GEN_INT (val))));
2583 return 1;
2584 }
2585
2586 /* Calculate a few attributes that may be useful for specific
2587 optimizations. */
2588 /* Count number of leading zeros. */
2589 for (i = 31; i >= 0; i--)
2590 {
2591 if ((remainder & (1 << i)) == 0)
2592 clear_sign_bit_copies++;
2593 else
2594 break;
2595 }
2596
2597 /* Count number of leading 1's. */
2598 for (i = 31; i >= 0; i--)
2599 {
2600 if ((remainder & (1 << i)) != 0)
2601 set_sign_bit_copies++;
2602 else
2603 break;
2604 }
2605
2606 /* Count number of trailing zero's. */
2607 for (i = 0; i <= 31; i++)
2608 {
2609 if ((remainder & (1 << i)) == 0)
2610 clear_zero_bit_copies++;
2611 else
2612 break;
2613 }
2614
2615 /* Count number of trailing 1's. */
2616 for (i = 0; i <= 31; i++)
2617 {
2618 if ((remainder & (1 << i)) != 0)
2619 set_zero_bit_copies++;
2620 else
2621 break;
2622 }
2623
2624 switch (code)
2625 {
2626 case SET:
2627 /* See if we can do this by sign_extending a constant that is known
2628 to be negative. This is a good, way of doing it, since the shift
2629 may well merge into a subsequent insn. */
2630 if (set_sign_bit_copies > 1)
2631 {
2632 if (const_ok_for_arm
2633 (temp1 = ARM_SIGN_EXTEND (remainder
2634 << (set_sign_bit_copies - 1))))
2635 {
2636 if (generate)
2637 {
2638 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2639 emit_constant_insn (cond,
2640 gen_rtx_SET (VOIDmode, new_src,
2641 GEN_INT (temp1)));
2642 emit_constant_insn (cond,
2643 gen_ashrsi3 (target, new_src,
2644 GEN_INT (set_sign_bit_copies - 1)));
2645 }
2646 return 2;
2647 }
2648 /* For an inverted constant, we will need to set the low bits,
2649 these will be shifted out of harm's way. */
2650 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2651 if (const_ok_for_arm (~temp1))
2652 {
2653 if (generate)
2654 {
2655 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2656 emit_constant_insn (cond,
2657 gen_rtx_SET (VOIDmode, new_src,
2658 GEN_INT (temp1)));
2659 emit_constant_insn (cond,
2660 gen_ashrsi3 (target, new_src,
2661 GEN_INT (set_sign_bit_copies - 1)));
2662 }
2663 return 2;
2664 }
2665 }
2666
2667 /* See if we can calculate the value as the difference between two
2668 valid immediates. */
2669 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2670 {
2671 int topshift = clear_sign_bit_copies & ~1;
2672
2673 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2674 & (0xff000000 >> topshift));
2675
2676 /* If temp1 is zero, then that means the 9 most significant
2677 bits of remainder were 1 and we've caused it to overflow.
2678 When topshift is 0 we don't need to do anything since we
2679 can borrow from 'bit 32'. */
2680 if (temp1 == 0 && topshift != 0)
2681 temp1 = 0x80000000 >> (topshift - 1);
2682
2683 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2684
2685 if (const_ok_for_arm (temp2))
2686 {
2687 if (generate)
2688 {
2689 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2690 emit_constant_insn (cond,
2691 gen_rtx_SET (VOIDmode, new_src,
2692 GEN_INT (temp1)));
2693 emit_constant_insn (cond,
2694 gen_addsi3 (target, new_src,
2695 GEN_INT (-temp2)));
2696 }
2697
2698 return 2;
2699 }
2700 }
2701
2702 /* See if we can generate this by setting the bottom (or the top)
2703 16 bits, and then shifting these into the other half of the
2704 word. We only look for the simplest cases, to do more would cost
2705 too much. Be careful, however, not to generate this when the
2706 alternative would take fewer insns. */
2707 if (val & 0xffff0000)
2708 {
2709 temp1 = remainder & 0xffff0000;
2710 temp2 = remainder & 0x0000ffff;
2711
2712 /* Overlaps outside this range are best done using other methods. */
2713 for (i = 9; i < 24; i++)
2714 {
2715 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2716 && !const_ok_for_arm (temp2))
2717 {
2718 rtx new_src = (subtargets
2719 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2720 : target);
2721 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2722 source, subtargets, generate);
2723 source = new_src;
2724 if (generate)
2725 emit_constant_insn
2726 (cond,
2727 gen_rtx_SET
2728 (VOIDmode, target,
2729 gen_rtx_IOR (mode,
2730 gen_rtx_ASHIFT (mode, source,
2731 GEN_INT (i)),
2732 source)));
2733 return insns + 1;
2734 }
2735 }
2736
2737 /* Don't duplicate cases already considered. */
2738 for (i = 17; i < 24; i++)
2739 {
2740 if (((temp1 | (temp1 >> i)) == remainder)
2741 && !const_ok_for_arm (temp1))
2742 {
2743 rtx new_src = (subtargets
2744 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2745 : target);
2746 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2747 source, subtargets, generate);
2748 source = new_src;
2749 if (generate)
2750 emit_constant_insn
2751 (cond,
2752 gen_rtx_SET (VOIDmode, target,
2753 gen_rtx_IOR
2754 (mode,
2755 gen_rtx_LSHIFTRT (mode, source,
2756 GEN_INT (i)),
2757 source)));
2758 return insns + 1;
2759 }
2760 }
2761 }
2762 break;
2763
2764 case IOR:
2765 case XOR:
2766 /* If we have IOR or XOR, and the constant can be loaded in a
2767 single instruction, and we can find a temporary to put it in,
2768 then this can be done in two instructions instead of 3-4. */
2769 if (subtargets
2770 /* TARGET can't be NULL if SUBTARGETS is 0 */
2771 || (reload_completed && !reg_mentioned_p (target, source)))
2772 {
2773 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2774 {
2775 if (generate)
2776 {
2777 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2778
2779 emit_constant_insn (cond,
2780 gen_rtx_SET (VOIDmode, sub,
2781 GEN_INT (val)));
2782 emit_constant_insn (cond,
2783 gen_rtx_SET (VOIDmode, target,
2784 gen_rtx_fmt_ee (code, mode,
2785 source, sub)));
2786 }
2787 return 2;
2788 }
2789 }
2790
2791 if (code == XOR)
2792 break;
2793
2794 /* Convert.
2795 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2796 and the remainder 0s for e.g. 0xfff00000)
2797 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2798
2799 This can be done in 2 instructions by using shifts with mov or mvn.
2800 e.g. for
2801 x = x | 0xfff00000;
2802 we generate.
2803 mvn r0, r0, asl #12
2804 mvn r0, r0, lsr #12 */
2805 if (set_sign_bit_copies > 8
2806 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2807 {
2808 if (generate)
2809 {
2810 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2811 rtx shift = GEN_INT (set_sign_bit_copies);
2812
2813 emit_constant_insn
2814 (cond,
2815 gen_rtx_SET (VOIDmode, sub,
2816 gen_rtx_NOT (mode,
2817 gen_rtx_ASHIFT (mode,
2818 source,
2819 shift))));
2820 emit_constant_insn
2821 (cond,
2822 gen_rtx_SET (VOIDmode, target,
2823 gen_rtx_NOT (mode,
2824 gen_rtx_LSHIFTRT (mode, sub,
2825 shift))));
2826 }
2827 return 2;
2828 }
2829
2830 /* Convert
2831 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2832 to
2833 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2834
2835 For eg. r0 = r0 | 0xfff
2836 mvn r0, r0, lsr #12
2837 mvn r0, r0, asl #12
2838
2839 */
2840 if (set_zero_bit_copies > 8
2841 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2842 {
2843 if (generate)
2844 {
2845 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2846 rtx shift = GEN_INT (set_zero_bit_copies);
2847
2848 emit_constant_insn
2849 (cond,
2850 gen_rtx_SET (VOIDmode, sub,
2851 gen_rtx_NOT (mode,
2852 gen_rtx_LSHIFTRT (mode,
2853 source,
2854 shift))));
2855 emit_constant_insn
2856 (cond,
2857 gen_rtx_SET (VOIDmode, target,
2858 gen_rtx_NOT (mode,
2859 gen_rtx_ASHIFT (mode, sub,
2860 shift))));
2861 }
2862 return 2;
2863 }
2864
2865 /* This will never be reached for Thumb2 because orn is a valid
2866 instruction. This is for Thumb1 and the ARM 32 bit cases.
2867
2868 x = y | constant (such that ~constant is a valid constant)
2869 Transform this to
2870 x = ~(~y & ~constant).
2871 */
2872 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2873 {
2874 if (generate)
2875 {
2876 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2877 emit_constant_insn (cond,
2878 gen_rtx_SET (VOIDmode, sub,
2879 gen_rtx_NOT (mode, source)));
2880 source = sub;
2881 if (subtargets)
2882 sub = gen_reg_rtx (mode);
2883 emit_constant_insn (cond,
2884 gen_rtx_SET (VOIDmode, sub,
2885 gen_rtx_AND (mode, source,
2886 GEN_INT (temp1))));
2887 emit_constant_insn (cond,
2888 gen_rtx_SET (VOIDmode, target,
2889 gen_rtx_NOT (mode, sub)));
2890 }
2891 return 3;
2892 }
2893 break;
2894
2895 case AND:
2896 /* See if two shifts will do 2 or more insn's worth of work. */
2897 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2898 {
2899 HOST_WIDE_INT shift_mask = ((0xffffffff
2900 << (32 - clear_sign_bit_copies))
2901 & 0xffffffff);
2902
2903 if ((remainder | shift_mask) != 0xffffffff)
2904 {
2905 if (generate)
2906 {
2907 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2908 insns = arm_gen_constant (AND, mode, cond,
2909 remainder | shift_mask,
2910 new_src, source, subtargets, 1);
2911 source = new_src;
2912 }
2913 else
2914 {
2915 rtx targ = subtargets ? NULL_RTX : target;
2916 insns = arm_gen_constant (AND, mode, cond,
2917 remainder | shift_mask,
2918 targ, source, subtargets, 0);
2919 }
2920 }
2921
2922 if (generate)
2923 {
2924 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2925 rtx shift = GEN_INT (clear_sign_bit_copies);
2926
2927 emit_insn (gen_ashlsi3 (new_src, source, shift));
2928 emit_insn (gen_lshrsi3 (target, new_src, shift));
2929 }
2930
2931 return insns + 2;
2932 }
2933
2934 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2935 {
2936 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2937
2938 if ((remainder | shift_mask) != 0xffffffff)
2939 {
2940 if (generate)
2941 {
2942 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2943
2944 insns = arm_gen_constant (AND, mode, cond,
2945 remainder | shift_mask,
2946 new_src, source, subtargets, 1);
2947 source = new_src;
2948 }
2949 else
2950 {
2951 rtx targ = subtargets ? NULL_RTX : target;
2952
2953 insns = arm_gen_constant (AND, mode, cond,
2954 remainder | shift_mask,
2955 targ, source, subtargets, 0);
2956 }
2957 }
2958
2959 if (generate)
2960 {
2961 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2962 rtx shift = GEN_INT (clear_zero_bit_copies);
2963
2964 emit_insn (gen_lshrsi3 (new_src, source, shift));
2965 emit_insn (gen_ashlsi3 (target, new_src, shift));
2966 }
2967
2968 return insns + 2;
2969 }
2970
2971 break;
2972
2973 default:
2974 break;
2975 }
2976
2977 for (i = 0; i < 32; i++)
2978 if (remainder & (1 << i))
2979 num_bits_set++;
2980
2981 if ((code == AND) || (can_invert && num_bits_set > 16))
2982 remainder ^= 0xffffffff;
2983 else if (code == PLUS && num_bits_set > 16)
2984 remainder = (-remainder) & 0xffffffff;
2985
2986 /* For XOR, if more than half the bits are set and there's a sequence
2987 of more than 8 consecutive ones in the pattern then we can XOR by the
2988 inverted constant and then invert the final result; this may save an
2989 instruction and might also lead to the final mvn being merged with
2990 some other operation. */
2991 else if (code == XOR && num_bits_set > 16
2992 && (count_insns_for_constant (remainder ^ 0xffffffff,
2993 find_best_start
2994 (remainder ^ 0xffffffff))
2995 < count_insns_for_constant (remainder,
2996 find_best_start (remainder))))
2997 {
2998 remainder ^= 0xffffffff;
2999 final_invert = 1;
3000 }
3001 else
3002 {
3003 can_invert = 0;
3004 can_negate = 0;
3005 }
3006
3007 /* Now try and find a way of doing the job in either two or three
3008 instructions.
3009 We start by looking for the largest block of zeros that are aligned on
3010 a 2-bit boundary, we then fill up the temps, wrapping around to the
3011 top of the word when we drop off the bottom.
3012 In the worst case this code should produce no more than four insns.
3013 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3014 best place to start. */
3015
3016 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3017 the same. */
3018 {
3019 /* Now start emitting the insns. */
3020 i = find_best_start (remainder);
3021 do
3022 {
3023 int end;
3024
3025 if (i <= 0)
3026 i += 32;
3027 if (remainder & (3 << (i - 2)))
3028 {
3029 end = i - 8;
3030 if (end < 0)
3031 end += 32;
3032 temp1 = remainder & ((0x0ff << end)
3033 | ((i < end) ? (0xff >> (32 - end)) : 0));
3034 remainder &= ~temp1;
3035
3036 if (generate)
3037 {
3038 rtx new_src, temp1_rtx;
3039
3040 if (code == SET || code == MINUS)
3041 {
3042 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3043 if (can_invert && code != MINUS)
3044 temp1 = ~temp1;
3045 }
3046 else
3047 {
3048 if ((final_invert || remainder) && subtargets)
3049 new_src = gen_reg_rtx (mode);
3050 else
3051 new_src = target;
3052 if (can_invert)
3053 temp1 = ~temp1;
3054 else if (can_negate)
3055 temp1 = -temp1;
3056 }
3057
3058 temp1 = trunc_int_for_mode (temp1, mode);
3059 temp1_rtx = GEN_INT (temp1);
3060
3061 if (code == SET)
3062 ;
3063 else if (code == MINUS)
3064 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3065 else
3066 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3067
3068 emit_constant_insn (cond,
3069 gen_rtx_SET (VOIDmode, new_src,
3070 temp1_rtx));
3071 source = new_src;
3072 }
3073
3074 if (code == SET)
3075 {
3076 can_invert = 0;
3077 code = PLUS;
3078 }
3079 else if (code == MINUS)
3080 code = PLUS;
3081
3082 insns++;
3083 i -= 8 - step_size;
3084 }
3085 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3086 shifts. */
3087 i -= step_size;
3088 }
3089 while (remainder);
3090 }
3091
3092 if (final_invert)
3093 {
3094 if (generate)
3095 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3096 gen_rtx_NOT (mode, source)));
3097 insns++;
3098 }
3099
3100 return insns;
3101 }
3102
3103 /* Canonicalize a comparison so that we are more likely to recognize it.
3104 This can be done for a few constant compares, where we can make the
3105 immediate value easier to load. */
3106
3107 enum rtx_code
3108 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3109 {
3110 enum machine_mode mode;
3111 unsigned HOST_WIDE_INT i, maxval;
3112
3113 mode = GET_MODE (*op0);
3114 if (mode == VOIDmode)
3115 mode = GET_MODE (*op1);
3116
3117 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3118
3119 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3120 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3121 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3122 for GTU/LEU in Thumb mode. */
3123 if (mode == DImode)
3124 {
3125 rtx tem;
3126
3127 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3128 available. */
3129 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3130 return code;
3131
3132 if (code == GT || code == LE
3133 || (!TARGET_ARM && (code == GTU || code == LEU)))
3134 {
3135 /* Missing comparison. First try to use an available
3136 comparison. */
3137 if (GET_CODE (*op1) == CONST_INT)
3138 {
3139 i = INTVAL (*op1);
3140 switch (code)
3141 {
3142 case GT:
3143 case LE:
3144 if (i != maxval
3145 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3146 {
3147 *op1 = GEN_INT (i + 1);
3148 return code == GT ? GE : LT;
3149 }
3150 break;
3151 case GTU:
3152 case LEU:
3153 if (i != ~((unsigned HOST_WIDE_INT) 0)
3154 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3155 {
3156 *op1 = GEN_INT (i + 1);
3157 return code == GTU ? GEU : LTU;
3158 }
3159 break;
3160 default:
3161 gcc_unreachable ();
3162 }
3163 }
3164
3165 /* If that did not work, reverse the condition. */
3166 tem = *op0;
3167 *op0 = *op1;
3168 *op1 = tem;
3169 return swap_condition (code);
3170 }
3171
3172 return code;
3173 }
3174
3175 /* Comparisons smaller than DImode. Only adjust comparisons against
3176 an out-of-range constant. */
3177 if (GET_CODE (*op1) != CONST_INT
3178 || const_ok_for_arm (INTVAL (*op1))
3179 || const_ok_for_arm (- INTVAL (*op1)))
3180 return code;
3181
3182 i = INTVAL (*op1);
3183
3184 switch (code)
3185 {
3186 case EQ:
3187 case NE:
3188 return code;
3189
3190 case GT:
3191 case LE:
3192 if (i != maxval
3193 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3194 {
3195 *op1 = GEN_INT (i + 1);
3196 return code == GT ? GE : LT;
3197 }
3198 break;
3199
3200 case GE:
3201 case LT:
3202 if (i != ~maxval
3203 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3204 {
3205 *op1 = GEN_INT (i - 1);
3206 return code == GE ? GT : LE;
3207 }
3208 break;
3209
3210 case GTU:
3211 case LEU:
3212 if (i != ~((unsigned HOST_WIDE_INT) 0)
3213 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3214 {
3215 *op1 = GEN_INT (i + 1);
3216 return code == GTU ? GEU : LTU;
3217 }
3218 break;
3219
3220 case GEU:
3221 case LTU:
3222 if (i != 0
3223 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3224 {
3225 *op1 = GEN_INT (i - 1);
3226 return code == GEU ? GTU : LEU;
3227 }
3228 break;
3229
3230 default:
3231 gcc_unreachable ();
3232 }
3233
3234 return code;
3235 }
3236
3237
3238 /* Define how to find the value returned by a function. */
3239
3240 static rtx
3241 arm_function_value(const_tree type, const_tree func,
3242 bool outgoing ATTRIBUTE_UNUSED)
3243 {
3244 enum machine_mode mode;
3245 int unsignedp ATTRIBUTE_UNUSED;
3246 rtx r ATTRIBUTE_UNUSED;
3247
3248 mode = TYPE_MODE (type);
3249
3250 if (TARGET_AAPCS_BASED)
3251 return aapcs_allocate_return_reg (mode, type, func);
3252
3253 /* Promote integer types. */
3254 if (INTEGRAL_TYPE_P (type))
3255 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3256
3257 /* Promotes small structs returned in a register to full-word size
3258 for big-endian AAPCS. */
3259 if (arm_return_in_msb (type))
3260 {
3261 HOST_WIDE_INT size = int_size_in_bytes (type);
3262 if (size % UNITS_PER_WORD != 0)
3263 {
3264 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3265 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3266 }
3267 }
3268
3269 return LIBCALL_VALUE (mode);
3270 }
3271
3272 static int
3273 libcall_eq (const void *p1, const void *p2)
3274 {
3275 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3276 }
3277
3278 static hashval_t
3279 libcall_hash (const void *p1)
3280 {
3281 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3282 }
3283
3284 static void
3285 add_libcall (htab_t htab, rtx libcall)
3286 {
3287 *htab_find_slot (htab, libcall, INSERT) = libcall;
3288 }
3289
3290 static bool
3291 arm_libcall_uses_aapcs_base (const_rtx libcall)
3292 {
3293 static bool init_done = false;
3294 static htab_t libcall_htab;
3295
3296 if (!init_done)
3297 {
3298 init_done = true;
3299
3300 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3301 NULL);
3302 add_libcall (libcall_htab,
3303 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3304 add_libcall (libcall_htab,
3305 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3306 add_libcall (libcall_htab,
3307 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3308 add_libcall (libcall_htab,
3309 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3310
3311 add_libcall (libcall_htab,
3312 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3313 add_libcall (libcall_htab,
3314 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3315 add_libcall (libcall_htab,
3316 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3317 add_libcall (libcall_htab,
3318 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3319
3320 add_libcall (libcall_htab,
3321 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3322 add_libcall (libcall_htab,
3323 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3324 add_libcall (libcall_htab,
3325 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3326 add_libcall (libcall_htab,
3327 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3328 add_libcall (libcall_htab,
3329 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3330 add_libcall (libcall_htab,
3331 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3332
3333 /* Values from double-precision helper functions are returned in core
3334 registers if the selected core only supports single-precision
3335 arithmetic, even if we are using the hard-float ABI. The same is
3336 true for single-precision helpers, but we will never be using the
3337 hard-float ABI on a CPU which doesn't support single-precision
3338 operations in hardware. */
3339 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3340 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3341 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3342 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3343 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3344 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3345 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3346 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3347 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3348 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3349 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3350 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3351 SFmode));
3352 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3353 DFmode));
3354 }
3355
3356 return libcall && htab_find (libcall_htab, libcall) != NULL;
3357 }
3358
3359 rtx
3360 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3361 {
3362 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3363 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3364 {
3365 /* The following libcalls return their result in integer registers,
3366 even though they return a floating point value. */
3367 if (arm_libcall_uses_aapcs_base (libcall))
3368 return gen_rtx_REG (mode, ARG_REGISTER(1));
3369
3370 }
3371
3372 return LIBCALL_VALUE (mode);
3373 }
3374
3375 /* Determine the amount of memory needed to store the possible return
3376 registers of an untyped call. */
3377 int
3378 arm_apply_result_size (void)
3379 {
3380 int size = 16;
3381
3382 if (TARGET_32BIT)
3383 {
3384 if (TARGET_HARD_FLOAT_ABI)
3385 {
3386 if (TARGET_VFP)
3387 size += 32;
3388 if (TARGET_FPA)
3389 size += 12;
3390 if (TARGET_MAVERICK)
3391 size += 8;
3392 }
3393 if (TARGET_IWMMXT_ABI)
3394 size += 8;
3395 }
3396
3397 return size;
3398 }
3399
3400 /* Decide whether TYPE should be returned in memory (true)
3401 or in a register (false). FNTYPE is the type of the function making
3402 the call. */
3403 static bool
3404 arm_return_in_memory (const_tree type, const_tree fntype)
3405 {
3406 HOST_WIDE_INT size;
3407
3408 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3409
3410 if (TARGET_AAPCS_BASED)
3411 {
3412 /* Simple, non-aggregate types (ie not including vectors and
3413 complex) are always returned in a register (or registers).
3414 We don't care about which register here, so we can short-cut
3415 some of the detail. */
3416 if (!AGGREGATE_TYPE_P (type)
3417 && TREE_CODE (type) != VECTOR_TYPE
3418 && TREE_CODE (type) != COMPLEX_TYPE)
3419 return false;
3420
3421 /* Any return value that is no larger than one word can be
3422 returned in r0. */
3423 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3424 return false;
3425
3426 /* Check any available co-processors to see if they accept the
3427 type as a register candidate (VFP, for example, can return
3428 some aggregates in consecutive registers). These aren't
3429 available if the call is variadic. */
3430 if (aapcs_select_return_coproc (type, fntype) >= 0)
3431 return false;
3432
3433 /* Vector values should be returned using ARM registers, not
3434 memory (unless they're over 16 bytes, which will break since
3435 we only have four call-clobbered registers to play with). */
3436 if (TREE_CODE (type) == VECTOR_TYPE)
3437 return (size < 0 || size > (4 * UNITS_PER_WORD));
3438
3439 /* The rest go in memory. */
3440 return true;
3441 }
3442
3443 if (TREE_CODE (type) == VECTOR_TYPE)
3444 return (size < 0 || size > (4 * UNITS_PER_WORD));
3445
3446 if (!AGGREGATE_TYPE_P (type) &&
3447 (TREE_CODE (type) != VECTOR_TYPE))
3448 /* All simple types are returned in registers. */
3449 return false;
3450
3451 if (arm_abi != ARM_ABI_APCS)
3452 {
3453 /* ATPCS and later return aggregate types in memory only if they are
3454 larger than a word (or are variable size). */
3455 return (size < 0 || size > UNITS_PER_WORD);
3456 }
3457
3458 /* For the arm-wince targets we choose to be compatible with Microsoft's
3459 ARM and Thumb compilers, which always return aggregates in memory. */
3460 #ifndef ARM_WINCE
3461 /* All structures/unions bigger than one word are returned in memory.
3462 Also catch the case where int_size_in_bytes returns -1. In this case
3463 the aggregate is either huge or of variable size, and in either case
3464 we will want to return it via memory and not in a register. */
3465 if (size < 0 || size > UNITS_PER_WORD)
3466 return true;
3467
3468 if (TREE_CODE (type) == RECORD_TYPE)
3469 {
3470 tree field;
3471
3472 /* For a struct the APCS says that we only return in a register
3473 if the type is 'integer like' and every addressable element
3474 has an offset of zero. For practical purposes this means
3475 that the structure can have at most one non bit-field element
3476 and that this element must be the first one in the structure. */
3477
3478 /* Find the first field, ignoring non FIELD_DECL things which will
3479 have been created by C++. */
3480 for (field = TYPE_FIELDS (type);
3481 field && TREE_CODE (field) != FIELD_DECL;
3482 field = DECL_CHAIN (field))
3483 continue;
3484
3485 if (field == NULL)
3486 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3487
3488 /* Check that the first field is valid for returning in a register. */
3489
3490 /* ... Floats are not allowed */
3491 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3492 return true;
3493
3494 /* ... Aggregates that are not themselves valid for returning in
3495 a register are not allowed. */
3496 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3497 return true;
3498
3499 /* Now check the remaining fields, if any. Only bitfields are allowed,
3500 since they are not addressable. */
3501 for (field = DECL_CHAIN (field);
3502 field;
3503 field = DECL_CHAIN (field))
3504 {
3505 if (TREE_CODE (field) != FIELD_DECL)
3506 continue;
3507
3508 if (!DECL_BIT_FIELD_TYPE (field))
3509 return true;
3510 }
3511
3512 return false;
3513 }
3514
3515 if (TREE_CODE (type) == UNION_TYPE)
3516 {
3517 tree field;
3518
3519 /* Unions can be returned in registers if every element is
3520 integral, or can be returned in an integer register. */
3521 for (field = TYPE_FIELDS (type);
3522 field;
3523 field = DECL_CHAIN (field))
3524 {
3525 if (TREE_CODE (field) != FIELD_DECL)
3526 continue;
3527
3528 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3529 return true;
3530
3531 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3532 return true;
3533 }
3534
3535 return false;
3536 }
3537 #endif /* not ARM_WINCE */
3538
3539 /* Return all other types in memory. */
3540 return true;
3541 }
3542
3543 /* Indicate whether or not words of a double are in big-endian order. */
3544
3545 int
3546 arm_float_words_big_endian (void)
3547 {
3548 if (TARGET_MAVERICK)
3549 return 0;
3550
3551 /* For FPA, float words are always big-endian. For VFP, floats words
3552 follow the memory system mode. */
3553
3554 if (TARGET_FPA)
3555 {
3556 return 1;
3557 }
3558
3559 if (TARGET_VFP)
3560 return (TARGET_BIG_END ? 1 : 0);
3561
3562 return 1;
3563 }
3564
3565 const struct pcs_attribute_arg
3566 {
3567 const char *arg;
3568 enum arm_pcs value;
3569 } pcs_attribute_args[] =
3570 {
3571 {"aapcs", ARM_PCS_AAPCS},
3572 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3573 #if 0
3574 /* We could recognize these, but changes would be needed elsewhere
3575 * to implement them. */
3576 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3577 {"atpcs", ARM_PCS_ATPCS},
3578 {"apcs", ARM_PCS_APCS},
3579 #endif
3580 {NULL, ARM_PCS_UNKNOWN}
3581 };
3582
3583 static enum arm_pcs
3584 arm_pcs_from_attribute (tree attr)
3585 {
3586 const struct pcs_attribute_arg *ptr;
3587 const char *arg;
3588
3589 /* Get the value of the argument. */
3590 if (TREE_VALUE (attr) == NULL_TREE
3591 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3592 return ARM_PCS_UNKNOWN;
3593
3594 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3595
3596 /* Check it against the list of known arguments. */
3597 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3598 if (streq (arg, ptr->arg))
3599 return ptr->value;
3600
3601 /* An unrecognized interrupt type. */
3602 return ARM_PCS_UNKNOWN;
3603 }
3604
3605 /* Get the PCS variant to use for this call. TYPE is the function's type
3606 specification, DECL is the specific declartion. DECL may be null if
3607 the call could be indirect or if this is a library call. */
3608 static enum arm_pcs
3609 arm_get_pcs_model (const_tree type, const_tree decl)
3610 {
3611 bool user_convention = false;
3612 enum arm_pcs user_pcs = arm_pcs_default;
3613 tree attr;
3614
3615 gcc_assert (type);
3616
3617 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3618 if (attr)
3619 {
3620 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3621 user_convention = true;
3622 }
3623
3624 if (TARGET_AAPCS_BASED)
3625 {
3626 /* Detect varargs functions. These always use the base rules
3627 (no argument is ever a candidate for a co-processor
3628 register). */
3629 bool base_rules = stdarg_p (type);
3630
3631 if (user_convention)
3632 {
3633 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3634 sorry ("non-AAPCS derived PCS variant");
3635 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3636 error ("variadic functions must use the base AAPCS variant");
3637 }
3638
3639 if (base_rules)
3640 return ARM_PCS_AAPCS;
3641 else if (user_convention)
3642 return user_pcs;
3643 else if (decl && flag_unit_at_a_time)
3644 {
3645 /* Local functions never leak outside this compilation unit,
3646 so we are free to use whatever conventions are
3647 appropriate. */
3648 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3649 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3650 if (i && i->local)
3651 return ARM_PCS_AAPCS_LOCAL;
3652 }
3653 }
3654 else if (user_convention && user_pcs != arm_pcs_default)
3655 sorry ("PCS variant");
3656
3657 /* For everything else we use the target's default. */
3658 return arm_pcs_default;
3659 }
3660
3661
3662 static void
3663 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3664 const_tree fntype ATTRIBUTE_UNUSED,
3665 rtx libcall ATTRIBUTE_UNUSED,
3666 const_tree fndecl ATTRIBUTE_UNUSED)
3667 {
3668 /* Record the unallocated VFP registers. */
3669 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3670 pcum->aapcs_vfp_reg_alloc = 0;
3671 }
3672
3673 /* Walk down the type tree of TYPE counting consecutive base elements.
3674 If *MODEP is VOIDmode, then set it to the first valid floating point
3675 type. If a non-floating point type is found, or if a floating point
3676 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3677 otherwise return the count in the sub-tree. */
3678 static int
3679 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3680 {
3681 enum machine_mode mode;
3682 HOST_WIDE_INT size;
3683
3684 switch (TREE_CODE (type))
3685 {
3686 case REAL_TYPE:
3687 mode = TYPE_MODE (type);
3688 if (mode != DFmode && mode != SFmode)
3689 return -1;
3690
3691 if (*modep == VOIDmode)
3692 *modep = mode;
3693
3694 if (*modep == mode)
3695 return 1;
3696
3697 break;
3698
3699 case COMPLEX_TYPE:
3700 mode = TYPE_MODE (TREE_TYPE (type));
3701 if (mode != DFmode && mode != SFmode)
3702 return -1;
3703
3704 if (*modep == VOIDmode)
3705 *modep = mode;
3706
3707 if (*modep == mode)
3708 return 2;
3709
3710 break;
3711
3712 case VECTOR_TYPE:
3713 /* Use V2SImode and V4SImode as representatives of all 64-bit
3714 and 128-bit vector types, whether or not those modes are
3715 supported with the present options. */
3716 size = int_size_in_bytes (type);
3717 switch (size)
3718 {
3719 case 8:
3720 mode = V2SImode;
3721 break;
3722 case 16:
3723 mode = V4SImode;
3724 break;
3725 default:
3726 return -1;
3727 }
3728
3729 if (*modep == VOIDmode)
3730 *modep = mode;
3731
3732 /* Vector modes are considered to be opaque: two vectors are
3733 equivalent for the purposes of being homogeneous aggregates
3734 if they are the same size. */
3735 if (*modep == mode)
3736 return 1;
3737
3738 break;
3739
3740 case ARRAY_TYPE:
3741 {
3742 int count;
3743 tree index = TYPE_DOMAIN (type);
3744
3745 /* Can't handle incomplete types. */
3746 if (!COMPLETE_TYPE_P(type))
3747 return -1;
3748
3749 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3750 if (count == -1
3751 || !index
3752 || !TYPE_MAX_VALUE (index)
3753 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3754 || !TYPE_MIN_VALUE (index)
3755 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3756 || count < 0)
3757 return -1;
3758
3759 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3760 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3761
3762 /* There must be no padding. */
3763 if (!host_integerp (TYPE_SIZE (type), 1)
3764 || (tree_low_cst (TYPE_SIZE (type), 1)
3765 != count * GET_MODE_BITSIZE (*modep)))
3766 return -1;
3767
3768 return count;
3769 }
3770
3771 case RECORD_TYPE:
3772 {
3773 int count = 0;
3774 int sub_count;
3775 tree field;
3776
3777 /* Can't handle incomplete types. */
3778 if (!COMPLETE_TYPE_P(type))
3779 return -1;
3780
3781 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3782 {
3783 if (TREE_CODE (field) != FIELD_DECL)
3784 continue;
3785
3786 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3787 if (sub_count < 0)
3788 return -1;
3789 count += sub_count;
3790 }
3791
3792 /* There must be no padding. */
3793 if (!host_integerp (TYPE_SIZE (type), 1)
3794 || (tree_low_cst (TYPE_SIZE (type), 1)
3795 != count * GET_MODE_BITSIZE (*modep)))
3796 return -1;
3797
3798 return count;
3799 }
3800
3801 case UNION_TYPE:
3802 case QUAL_UNION_TYPE:
3803 {
3804 /* These aren't very interesting except in a degenerate case. */
3805 int count = 0;
3806 int sub_count;
3807 tree field;
3808
3809 /* Can't handle incomplete types. */
3810 if (!COMPLETE_TYPE_P(type))
3811 return -1;
3812
3813 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3814 {
3815 if (TREE_CODE (field) != FIELD_DECL)
3816 continue;
3817
3818 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3819 if (sub_count < 0)
3820 return -1;
3821 count = count > sub_count ? count : sub_count;
3822 }
3823
3824 /* There must be no padding. */
3825 if (!host_integerp (TYPE_SIZE (type), 1)
3826 || (tree_low_cst (TYPE_SIZE (type), 1)
3827 != count * GET_MODE_BITSIZE (*modep)))
3828 return -1;
3829
3830 return count;
3831 }
3832
3833 default:
3834 break;
3835 }
3836
3837 return -1;
3838 }
3839
3840 /* Return true if PCS_VARIANT should use VFP registers. */
3841 static bool
3842 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3843 {
3844 if (pcs_variant == ARM_PCS_AAPCS_VFP)
3845 {
3846 static bool seen_thumb1_vfp = false;
3847
3848 if (TARGET_THUMB1 && !seen_thumb1_vfp)
3849 {
3850 sorry ("Thumb-1 hard-float VFP ABI");
3851 /* sorry() is not immediately fatal, so only display this once. */
3852 seen_thumb1_vfp = true;
3853 }
3854
3855 return true;
3856 }
3857
3858 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
3859 return false;
3860
3861 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
3862 (TARGET_VFP_DOUBLE || !is_double));
3863 }
3864
3865 static bool
3866 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
3867 enum machine_mode mode, const_tree type,
3868 enum machine_mode *base_mode, int *count)
3869 {
3870 enum machine_mode new_mode = VOIDmode;
3871
3872 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3873 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3874 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3875 {
3876 *count = 1;
3877 new_mode = mode;
3878 }
3879 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3880 {
3881 *count = 2;
3882 new_mode = (mode == DCmode ? DFmode : SFmode);
3883 }
3884 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3885 {
3886 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
3887
3888 if (ag_count > 0 && ag_count <= 4)
3889 *count = ag_count;
3890 else
3891 return false;
3892 }
3893 else
3894 return false;
3895
3896
3897 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
3898 return false;
3899
3900 *base_mode = new_mode;
3901 return true;
3902 }
3903
3904 static bool
3905 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
3906 enum machine_mode mode, const_tree type)
3907 {
3908 int count ATTRIBUTE_UNUSED;
3909 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
3910
3911 if (!use_vfp_abi (pcs_variant, false))
3912 return false;
3913 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3914 &ag_mode, &count);
3915 }
3916
3917 static bool
3918 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3919 const_tree type)
3920 {
3921 if (!use_vfp_abi (pcum->pcs_variant, false))
3922 return false;
3923
3924 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
3925 &pcum->aapcs_vfp_rmode,
3926 &pcum->aapcs_vfp_rcount);
3927 }
3928
3929 static bool
3930 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3931 const_tree type ATTRIBUTE_UNUSED)
3932 {
3933 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
3934 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
3935 int regno;
3936
3937 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
3938 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
3939 {
3940 pcum->aapcs_vfp_reg_alloc = mask << regno;
3941 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3942 {
3943 int i;
3944 int rcount = pcum->aapcs_vfp_rcount;
3945 int rshift = shift;
3946 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
3947 rtx par;
3948 if (!TARGET_NEON)
3949 {
3950 /* Avoid using unsupported vector modes. */
3951 if (rmode == V2SImode)
3952 rmode = DImode;
3953 else if (rmode == V4SImode)
3954 {
3955 rmode = DImode;
3956 rcount *= 2;
3957 rshift /= 2;
3958 }
3959 }
3960 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
3961 for (i = 0; i < rcount; i++)
3962 {
3963 rtx tmp = gen_rtx_REG (rmode,
3964 FIRST_VFP_REGNUM + regno + i * rshift);
3965 tmp = gen_rtx_EXPR_LIST
3966 (VOIDmode, tmp,
3967 GEN_INT (i * GET_MODE_SIZE (rmode)));
3968 XVECEXP (par, 0, i) = tmp;
3969 }
3970
3971 pcum->aapcs_reg = par;
3972 }
3973 else
3974 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
3975 return true;
3976 }
3977 return false;
3978 }
3979
3980 static rtx
3981 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
3982 enum machine_mode mode,
3983 const_tree type ATTRIBUTE_UNUSED)
3984 {
3985 if (!use_vfp_abi (pcs_variant, false))
3986 return false;
3987
3988 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3989 {
3990 int count;
3991 enum machine_mode ag_mode;
3992 int i;
3993 rtx par;
3994 int shift;
3995
3996 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3997 &ag_mode, &count);
3998
3999 if (!TARGET_NEON)
4000 {
4001 if (ag_mode == V2SImode)
4002 ag_mode = DImode;
4003 else if (ag_mode == V4SImode)
4004 {
4005 ag_mode = DImode;
4006 count *= 2;
4007 }
4008 }
4009 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4010 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4011 for (i = 0; i < count; i++)
4012 {
4013 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4014 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4015 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4016 XVECEXP (par, 0, i) = tmp;
4017 }
4018
4019 return par;
4020 }
4021
4022 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4023 }
4024
4025 static void
4026 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4027 enum machine_mode mode ATTRIBUTE_UNUSED,
4028 const_tree type ATTRIBUTE_UNUSED)
4029 {
4030 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4031 pcum->aapcs_vfp_reg_alloc = 0;
4032 return;
4033 }
4034
4035 #define AAPCS_CP(X) \
4036 { \
4037 aapcs_ ## X ## _cum_init, \
4038 aapcs_ ## X ## _is_call_candidate, \
4039 aapcs_ ## X ## _allocate, \
4040 aapcs_ ## X ## _is_return_candidate, \
4041 aapcs_ ## X ## _allocate_return_reg, \
4042 aapcs_ ## X ## _advance \
4043 }
4044
4045 /* Table of co-processors that can be used to pass arguments in
4046 registers. Idealy no arugment should be a candidate for more than
4047 one co-processor table entry, but the table is processed in order
4048 and stops after the first match. If that entry then fails to put
4049 the argument into a co-processor register, the argument will go on
4050 the stack. */
4051 static struct
4052 {
4053 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4054 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4055
4056 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4057 BLKmode) is a candidate for this co-processor's registers; this
4058 function should ignore any position-dependent state in
4059 CUMULATIVE_ARGS and only use call-type dependent information. */
4060 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4061
4062 /* Return true if the argument does get a co-processor register; it
4063 should set aapcs_reg to an RTX of the register allocated as is
4064 required for a return from FUNCTION_ARG. */
4065 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4066
4067 /* Return true if a result of mode MODE (or type TYPE if MODE is
4068 BLKmode) is can be returned in this co-processor's registers. */
4069 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4070
4071 /* Allocate and return an RTX element to hold the return type of a
4072 call, this routine must not fail and will only be called if
4073 is_return_candidate returned true with the same parameters. */
4074 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4075
4076 /* Finish processing this argument and prepare to start processing
4077 the next one. */
4078 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4079 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4080 {
4081 AAPCS_CP(vfp)
4082 };
4083
4084 #undef AAPCS_CP
4085
4086 static int
4087 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4088 const_tree type)
4089 {
4090 int i;
4091
4092 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4093 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4094 return i;
4095
4096 return -1;
4097 }
4098
4099 static int
4100 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4101 {
4102 /* We aren't passed a decl, so we can't check that a call is local.
4103 However, it isn't clear that that would be a win anyway, since it
4104 might limit some tail-calling opportunities. */
4105 enum arm_pcs pcs_variant;
4106
4107 if (fntype)
4108 {
4109 const_tree fndecl = NULL_TREE;
4110
4111 if (TREE_CODE (fntype) == FUNCTION_DECL)
4112 {
4113 fndecl = fntype;
4114 fntype = TREE_TYPE (fntype);
4115 }
4116
4117 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4118 }
4119 else
4120 pcs_variant = arm_pcs_default;
4121
4122 if (pcs_variant != ARM_PCS_AAPCS)
4123 {
4124 int i;
4125
4126 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4127 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4128 TYPE_MODE (type),
4129 type))
4130 return i;
4131 }
4132 return -1;
4133 }
4134
4135 static rtx
4136 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4137 const_tree fntype)
4138 {
4139 /* We aren't passed a decl, so we can't check that a call is local.
4140 However, it isn't clear that that would be a win anyway, since it
4141 might limit some tail-calling opportunities. */
4142 enum arm_pcs pcs_variant;
4143 int unsignedp ATTRIBUTE_UNUSED;
4144
4145 if (fntype)
4146 {
4147 const_tree fndecl = NULL_TREE;
4148
4149 if (TREE_CODE (fntype) == FUNCTION_DECL)
4150 {
4151 fndecl = fntype;
4152 fntype = TREE_TYPE (fntype);
4153 }
4154
4155 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4156 }
4157 else
4158 pcs_variant = arm_pcs_default;
4159
4160 /* Promote integer types. */
4161 if (type && INTEGRAL_TYPE_P (type))
4162 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4163
4164 if (pcs_variant != ARM_PCS_AAPCS)
4165 {
4166 int i;
4167
4168 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4169 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4170 type))
4171 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4172 mode, type);
4173 }
4174
4175 /* Promotes small structs returned in a register to full-word size
4176 for big-endian AAPCS. */
4177 if (type && arm_return_in_msb (type))
4178 {
4179 HOST_WIDE_INT size = int_size_in_bytes (type);
4180 if (size % UNITS_PER_WORD != 0)
4181 {
4182 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4183 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4184 }
4185 }
4186
4187 return gen_rtx_REG (mode, R0_REGNUM);
4188 }
4189
4190 rtx
4191 aapcs_libcall_value (enum machine_mode mode)
4192 {
4193 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4194 }
4195
4196 /* Lay out a function argument using the AAPCS rules. The rule
4197 numbers referred to here are those in the AAPCS. */
4198 static void
4199 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4200 const_tree type, bool named)
4201 {
4202 int nregs, nregs2;
4203 int ncrn;
4204
4205 /* We only need to do this once per argument. */
4206 if (pcum->aapcs_arg_processed)
4207 return;
4208
4209 pcum->aapcs_arg_processed = true;
4210
4211 /* Special case: if named is false then we are handling an incoming
4212 anonymous argument which is on the stack. */
4213 if (!named)
4214 return;
4215
4216 /* Is this a potential co-processor register candidate? */
4217 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4218 {
4219 int slot = aapcs_select_call_coproc (pcum, mode, type);
4220 pcum->aapcs_cprc_slot = slot;
4221
4222 /* We don't have to apply any of the rules from part B of the
4223 preparation phase, these are handled elsewhere in the
4224 compiler. */
4225
4226 if (slot >= 0)
4227 {
4228 /* A Co-processor register candidate goes either in its own
4229 class of registers or on the stack. */
4230 if (!pcum->aapcs_cprc_failed[slot])
4231 {
4232 /* C1.cp - Try to allocate the argument to co-processor
4233 registers. */
4234 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4235 return;
4236
4237 /* C2.cp - Put the argument on the stack and note that we
4238 can't assign any more candidates in this slot. We also
4239 need to note that we have allocated stack space, so that
4240 we won't later try to split a non-cprc candidate between
4241 core registers and the stack. */
4242 pcum->aapcs_cprc_failed[slot] = true;
4243 pcum->can_split = false;
4244 }
4245
4246 /* We didn't get a register, so this argument goes on the
4247 stack. */
4248 gcc_assert (pcum->can_split == false);
4249 return;
4250 }
4251 }
4252
4253 /* C3 - For double-word aligned arguments, round the NCRN up to the
4254 next even number. */
4255 ncrn = pcum->aapcs_ncrn;
4256 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4257 ncrn++;
4258
4259 nregs = ARM_NUM_REGS2(mode, type);
4260
4261 /* Sigh, this test should really assert that nregs > 0, but a GCC
4262 extension allows empty structs and then gives them empty size; it
4263 then allows such a structure to be passed by value. For some of
4264 the code below we have to pretend that such an argument has
4265 non-zero size so that we 'locate' it correctly either in
4266 registers or on the stack. */
4267 gcc_assert (nregs >= 0);
4268
4269 nregs2 = nregs ? nregs : 1;
4270
4271 /* C4 - Argument fits entirely in core registers. */
4272 if (ncrn + nregs2 <= NUM_ARG_REGS)
4273 {
4274 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4275 pcum->aapcs_next_ncrn = ncrn + nregs;
4276 return;
4277 }
4278
4279 /* C5 - Some core registers left and there are no arguments already
4280 on the stack: split this argument between the remaining core
4281 registers and the stack. */
4282 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4283 {
4284 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4285 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4286 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4287 return;
4288 }
4289
4290 /* C6 - NCRN is set to 4. */
4291 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4292
4293 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4294 return;
4295 }
4296
4297 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4298 for a call to a function whose data type is FNTYPE.
4299 For a library call, FNTYPE is NULL. */
4300 void
4301 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4302 rtx libname,
4303 tree fndecl ATTRIBUTE_UNUSED)
4304 {
4305 /* Long call handling. */
4306 if (fntype)
4307 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4308 else
4309 pcum->pcs_variant = arm_pcs_default;
4310
4311 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4312 {
4313 if (arm_libcall_uses_aapcs_base (libname))
4314 pcum->pcs_variant = ARM_PCS_AAPCS;
4315
4316 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4317 pcum->aapcs_reg = NULL_RTX;
4318 pcum->aapcs_partial = 0;
4319 pcum->aapcs_arg_processed = false;
4320 pcum->aapcs_cprc_slot = -1;
4321 pcum->can_split = true;
4322
4323 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4324 {
4325 int i;
4326
4327 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4328 {
4329 pcum->aapcs_cprc_failed[i] = false;
4330 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4331 }
4332 }
4333 return;
4334 }
4335
4336 /* Legacy ABIs */
4337
4338 /* On the ARM, the offset starts at 0. */
4339 pcum->nregs = 0;
4340 pcum->iwmmxt_nregs = 0;
4341 pcum->can_split = true;
4342
4343 /* Varargs vectors are treated the same as long long.
4344 named_count avoids having to change the way arm handles 'named' */
4345 pcum->named_count = 0;
4346 pcum->nargs = 0;
4347
4348 if (TARGET_REALLY_IWMMXT && fntype)
4349 {
4350 tree fn_arg;
4351
4352 for (fn_arg = TYPE_ARG_TYPES (fntype);
4353 fn_arg;
4354 fn_arg = TREE_CHAIN (fn_arg))
4355 pcum->named_count += 1;
4356
4357 if (! pcum->named_count)
4358 pcum->named_count = INT_MAX;
4359 }
4360 }
4361
4362
4363 /* Return true if mode/type need doubleword alignment. */
4364 static bool
4365 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4366 {
4367 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4368 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4369 }
4370
4371
4372 /* Determine where to put an argument to a function.
4373 Value is zero to push the argument on the stack,
4374 or a hard register in which to store the argument.
4375
4376 MODE is the argument's machine mode.
4377 TYPE is the data type of the argument (as a tree).
4378 This is null for libcalls where that information may
4379 not be available.
4380 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4381 the preceding args and about the function being called.
4382 NAMED is nonzero if this argument is a named parameter
4383 (otherwise it is an extra parameter matching an ellipsis).
4384
4385 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4386 other arguments are passed on the stack. If (NAMED == 0) (which happens
4387 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4388 defined), say it is passed in the stack (function_prologue will
4389 indeed make it pass in the stack if necessary). */
4390
4391 static rtx
4392 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4393 const_tree type, bool named)
4394 {
4395 int nregs;
4396
4397 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4398 a call insn (op3 of a call_value insn). */
4399 if (mode == VOIDmode)
4400 return const0_rtx;
4401
4402 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4403 {
4404 aapcs_layout_arg (pcum, mode, type, named);
4405 return pcum->aapcs_reg;
4406 }
4407
4408 /* Varargs vectors are treated the same as long long.
4409 named_count avoids having to change the way arm handles 'named' */
4410 if (TARGET_IWMMXT_ABI
4411 && arm_vector_mode_supported_p (mode)
4412 && pcum->named_count > pcum->nargs + 1)
4413 {
4414 if (pcum->iwmmxt_nregs <= 9)
4415 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4416 else
4417 {
4418 pcum->can_split = false;
4419 return NULL_RTX;
4420 }
4421 }
4422
4423 /* Put doubleword aligned quantities in even register pairs. */
4424 if (pcum->nregs & 1
4425 && ARM_DOUBLEWORD_ALIGN
4426 && arm_needs_doubleword_align (mode, type))
4427 pcum->nregs++;
4428
4429 /* Only allow splitting an arg between regs and memory if all preceding
4430 args were allocated to regs. For args passed by reference we only count
4431 the reference pointer. */
4432 if (pcum->can_split)
4433 nregs = 1;
4434 else
4435 nregs = ARM_NUM_REGS2 (mode, type);
4436
4437 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4438 return NULL_RTX;
4439
4440 return gen_rtx_REG (mode, pcum->nregs);
4441 }
4442
4443 static unsigned int
4444 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4445 {
4446 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4447 ? DOUBLEWORD_ALIGNMENT
4448 : PARM_BOUNDARY);
4449 }
4450
4451 static int
4452 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4453 tree type, bool named)
4454 {
4455 int nregs = pcum->nregs;
4456
4457 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4458 {
4459 aapcs_layout_arg (pcum, mode, type, named);
4460 return pcum->aapcs_partial;
4461 }
4462
4463 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4464 return 0;
4465
4466 if (NUM_ARG_REGS > nregs
4467 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4468 && pcum->can_split)
4469 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4470
4471 return 0;
4472 }
4473
4474 /* Update the data in PCUM to advance over an argument
4475 of mode MODE and data type TYPE.
4476 (TYPE is null for libcalls where that information may not be available.) */
4477
4478 static void
4479 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4480 const_tree type, bool named)
4481 {
4482 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4483 {
4484 aapcs_layout_arg (pcum, mode, type, named);
4485
4486 if (pcum->aapcs_cprc_slot >= 0)
4487 {
4488 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4489 type);
4490 pcum->aapcs_cprc_slot = -1;
4491 }
4492
4493 /* Generic stuff. */
4494 pcum->aapcs_arg_processed = false;
4495 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4496 pcum->aapcs_reg = NULL_RTX;
4497 pcum->aapcs_partial = 0;
4498 }
4499 else
4500 {
4501 pcum->nargs += 1;
4502 if (arm_vector_mode_supported_p (mode)
4503 && pcum->named_count > pcum->nargs
4504 && TARGET_IWMMXT_ABI)
4505 pcum->iwmmxt_nregs += 1;
4506 else
4507 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4508 }
4509 }
4510
4511 /* Variable sized types are passed by reference. This is a GCC
4512 extension to the ARM ABI. */
4513
4514 static bool
4515 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4516 enum machine_mode mode ATTRIBUTE_UNUSED,
4517 const_tree type, bool named ATTRIBUTE_UNUSED)
4518 {
4519 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4520 }
4521 \f
4522 /* Encode the current state of the #pragma [no_]long_calls. */
4523 typedef enum
4524 {
4525 OFF, /* No #pragma [no_]long_calls is in effect. */
4526 LONG, /* #pragma long_calls is in effect. */
4527 SHORT /* #pragma no_long_calls is in effect. */
4528 } arm_pragma_enum;
4529
4530 static arm_pragma_enum arm_pragma_long_calls = OFF;
4531
4532 void
4533 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4534 {
4535 arm_pragma_long_calls = LONG;
4536 }
4537
4538 void
4539 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4540 {
4541 arm_pragma_long_calls = SHORT;
4542 }
4543
4544 void
4545 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4546 {
4547 arm_pragma_long_calls = OFF;
4548 }
4549 \f
4550 /* Handle an attribute requiring a FUNCTION_DECL;
4551 arguments as in struct attribute_spec.handler. */
4552 static tree
4553 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4554 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4555 {
4556 if (TREE_CODE (*node) != FUNCTION_DECL)
4557 {
4558 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4559 name);
4560 *no_add_attrs = true;
4561 }
4562
4563 return NULL_TREE;
4564 }
4565
4566 /* Handle an "interrupt" or "isr" attribute;
4567 arguments as in struct attribute_spec.handler. */
4568 static tree
4569 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4570 bool *no_add_attrs)
4571 {
4572 if (DECL_P (*node))
4573 {
4574 if (TREE_CODE (*node) != FUNCTION_DECL)
4575 {
4576 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4577 name);
4578 *no_add_attrs = true;
4579 }
4580 /* FIXME: the argument if any is checked for type attributes;
4581 should it be checked for decl ones? */
4582 }
4583 else
4584 {
4585 if (TREE_CODE (*node) == FUNCTION_TYPE
4586 || TREE_CODE (*node) == METHOD_TYPE)
4587 {
4588 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4589 {
4590 warning (OPT_Wattributes, "%qE attribute ignored",
4591 name);
4592 *no_add_attrs = true;
4593 }
4594 }
4595 else if (TREE_CODE (*node) == POINTER_TYPE
4596 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4597 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4598 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4599 {
4600 *node = build_variant_type_copy (*node);
4601 TREE_TYPE (*node) = build_type_attribute_variant
4602 (TREE_TYPE (*node),
4603 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4604 *no_add_attrs = true;
4605 }
4606 else
4607 {
4608 /* Possibly pass this attribute on from the type to a decl. */
4609 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4610 | (int) ATTR_FLAG_FUNCTION_NEXT
4611 | (int) ATTR_FLAG_ARRAY_NEXT))
4612 {
4613 *no_add_attrs = true;
4614 return tree_cons (name, args, NULL_TREE);
4615 }
4616 else
4617 {
4618 warning (OPT_Wattributes, "%qE attribute ignored",
4619 name);
4620 }
4621 }
4622 }
4623
4624 return NULL_TREE;
4625 }
4626
4627 /* Handle a "pcs" attribute; arguments as in struct
4628 attribute_spec.handler. */
4629 static tree
4630 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4631 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4632 {
4633 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4634 {
4635 warning (OPT_Wattributes, "%qE attribute ignored", name);
4636 *no_add_attrs = true;
4637 }
4638 return NULL_TREE;
4639 }
4640
4641 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4642 /* Handle the "notshared" attribute. This attribute is another way of
4643 requesting hidden visibility. ARM's compiler supports
4644 "__declspec(notshared)"; we support the same thing via an
4645 attribute. */
4646
4647 static tree
4648 arm_handle_notshared_attribute (tree *node,
4649 tree name ATTRIBUTE_UNUSED,
4650 tree args ATTRIBUTE_UNUSED,
4651 int flags ATTRIBUTE_UNUSED,
4652 bool *no_add_attrs)
4653 {
4654 tree decl = TYPE_NAME (*node);
4655
4656 if (decl)
4657 {
4658 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4659 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4660 *no_add_attrs = false;
4661 }
4662 return NULL_TREE;
4663 }
4664 #endif
4665
4666 /* Return 0 if the attributes for two types are incompatible, 1 if they
4667 are compatible, and 2 if they are nearly compatible (which causes a
4668 warning to be generated). */
4669 static int
4670 arm_comp_type_attributes (const_tree type1, const_tree type2)
4671 {
4672 int l1, l2, s1, s2;
4673
4674 /* Check for mismatch of non-default calling convention. */
4675 if (TREE_CODE (type1) != FUNCTION_TYPE)
4676 return 1;
4677
4678 /* Check for mismatched call attributes. */
4679 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4680 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4681 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4682 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4683
4684 /* Only bother to check if an attribute is defined. */
4685 if (l1 | l2 | s1 | s2)
4686 {
4687 /* If one type has an attribute, the other must have the same attribute. */
4688 if ((l1 != l2) || (s1 != s2))
4689 return 0;
4690
4691 /* Disallow mixed attributes. */
4692 if ((l1 & s2) || (l2 & s1))
4693 return 0;
4694 }
4695
4696 /* Check for mismatched ISR attribute. */
4697 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4698 if (! l1)
4699 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4700 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4701 if (! l2)
4702 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4703 if (l1 != l2)
4704 return 0;
4705
4706 return 1;
4707 }
4708
4709 /* Assigns default attributes to newly defined type. This is used to
4710 set short_call/long_call attributes for function types of
4711 functions defined inside corresponding #pragma scopes. */
4712 static void
4713 arm_set_default_type_attributes (tree type)
4714 {
4715 /* Add __attribute__ ((long_call)) to all functions, when
4716 inside #pragma long_calls or __attribute__ ((short_call)),
4717 when inside #pragma no_long_calls. */
4718 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4719 {
4720 tree type_attr_list, attr_name;
4721 type_attr_list = TYPE_ATTRIBUTES (type);
4722
4723 if (arm_pragma_long_calls == LONG)
4724 attr_name = get_identifier ("long_call");
4725 else if (arm_pragma_long_calls == SHORT)
4726 attr_name = get_identifier ("short_call");
4727 else
4728 return;
4729
4730 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4731 TYPE_ATTRIBUTES (type) = type_attr_list;
4732 }
4733 }
4734 \f
4735 /* Return true if DECL is known to be linked into section SECTION. */
4736
4737 static bool
4738 arm_function_in_section_p (tree decl, section *section)
4739 {
4740 /* We can only be certain about functions defined in the same
4741 compilation unit. */
4742 if (!TREE_STATIC (decl))
4743 return false;
4744
4745 /* Make sure that SYMBOL always binds to the definition in this
4746 compilation unit. */
4747 if (!targetm.binds_local_p (decl))
4748 return false;
4749
4750 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4751 if (!DECL_SECTION_NAME (decl))
4752 {
4753 /* Make sure that we will not create a unique section for DECL. */
4754 if (flag_function_sections || DECL_ONE_ONLY (decl))
4755 return false;
4756 }
4757
4758 return function_section (decl) == section;
4759 }
4760
4761 /* Return nonzero if a 32-bit "long_call" should be generated for
4762 a call from the current function to DECL. We generate a long_call
4763 if the function:
4764
4765 a. has an __attribute__((long call))
4766 or b. is within the scope of a #pragma long_calls
4767 or c. the -mlong-calls command line switch has been specified
4768
4769 However we do not generate a long call if the function:
4770
4771 d. has an __attribute__ ((short_call))
4772 or e. is inside the scope of a #pragma no_long_calls
4773 or f. is defined in the same section as the current function. */
4774
4775 bool
4776 arm_is_long_call_p (tree decl)
4777 {
4778 tree attrs;
4779
4780 if (!decl)
4781 return TARGET_LONG_CALLS;
4782
4783 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4784 if (lookup_attribute ("short_call", attrs))
4785 return false;
4786
4787 /* For "f", be conservative, and only cater for cases in which the
4788 whole of the current function is placed in the same section. */
4789 if (!flag_reorder_blocks_and_partition
4790 && TREE_CODE (decl) == FUNCTION_DECL
4791 && arm_function_in_section_p (decl, current_function_section ()))
4792 return false;
4793
4794 if (lookup_attribute ("long_call", attrs))
4795 return true;
4796
4797 return TARGET_LONG_CALLS;
4798 }
4799
4800 /* Return nonzero if it is ok to make a tail-call to DECL. */
4801 static bool
4802 arm_function_ok_for_sibcall (tree decl, tree exp)
4803 {
4804 unsigned long func_type;
4805
4806 if (cfun->machine->sibcall_blocked)
4807 return false;
4808
4809 /* Never tailcall something for which we have no decl, or if we
4810 are generating code for Thumb-1. */
4811 if (decl == NULL || TARGET_THUMB1)
4812 return false;
4813
4814 /* The PIC register is live on entry to VxWorks PLT entries, so we
4815 must make the call before restoring the PIC register. */
4816 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4817 return false;
4818
4819 /* Cannot tail-call to long calls, since these are out of range of
4820 a branch instruction. */
4821 if (arm_is_long_call_p (decl))
4822 return false;
4823
4824 /* If we are interworking and the function is not declared static
4825 then we can't tail-call it unless we know that it exists in this
4826 compilation unit (since it might be a Thumb routine). */
4827 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4828 return false;
4829
4830 func_type = arm_current_func_type ();
4831 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4832 if (IS_INTERRUPT (func_type))
4833 return false;
4834
4835 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4836 {
4837 /* Check that the return value locations are the same. For
4838 example that we aren't returning a value from the sibling in
4839 a VFP register but then need to transfer it to a core
4840 register. */
4841 rtx a, b;
4842
4843 a = arm_function_value (TREE_TYPE (exp), decl, false);
4844 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4845 cfun->decl, false);
4846 if (!rtx_equal_p (a, b))
4847 return false;
4848 }
4849
4850 /* Never tailcall if function may be called with a misaligned SP. */
4851 if (IS_STACKALIGN (func_type))
4852 return false;
4853
4854 /* Everything else is ok. */
4855 return true;
4856 }
4857
4858 \f
4859 /* Addressing mode support functions. */
4860
4861 /* Return nonzero if X is a legitimate immediate operand when compiling
4862 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4863 int
4864 legitimate_pic_operand_p (rtx x)
4865 {
4866 if (GET_CODE (x) == SYMBOL_REF
4867 || (GET_CODE (x) == CONST
4868 && GET_CODE (XEXP (x, 0)) == PLUS
4869 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4870 return 0;
4871
4872 return 1;
4873 }
4874
4875 /* Record that the current function needs a PIC register. Initialize
4876 cfun->machine->pic_reg if we have not already done so. */
4877
4878 static void
4879 require_pic_register (void)
4880 {
4881 /* A lot of the logic here is made obscure by the fact that this
4882 routine gets called as part of the rtx cost estimation process.
4883 We don't want those calls to affect any assumptions about the real
4884 function; and further, we can't call entry_of_function() until we
4885 start the real expansion process. */
4886 if (!crtl->uses_pic_offset_table)
4887 {
4888 gcc_assert (can_create_pseudo_p ());
4889 if (arm_pic_register != INVALID_REGNUM)
4890 {
4891 if (!cfun->machine->pic_reg)
4892 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4893
4894 /* Play games to avoid marking the function as needing pic
4895 if we are being called as part of the cost-estimation
4896 process. */
4897 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4898 crtl->uses_pic_offset_table = 1;
4899 }
4900 else
4901 {
4902 rtx seq, insn;
4903
4904 if (!cfun->machine->pic_reg)
4905 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4906
4907 /* Play games to avoid marking the function as needing pic
4908 if we are being called as part of the cost-estimation
4909 process. */
4910 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4911 {
4912 crtl->uses_pic_offset_table = 1;
4913 start_sequence ();
4914
4915 arm_load_pic_register (0UL);
4916
4917 seq = get_insns ();
4918 end_sequence ();
4919
4920 for (insn = seq; insn; insn = NEXT_INSN (insn))
4921 if (INSN_P (insn))
4922 INSN_LOCATOR (insn) = prologue_locator;
4923
4924 /* We can be called during expansion of PHI nodes, where
4925 we can't yet emit instructions directly in the final
4926 insn stream. Queue the insns on the entry edge, they will
4927 be committed after everything else is expanded. */
4928 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
4929 }
4930 }
4931 }
4932 }
4933
4934 rtx
4935 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
4936 {
4937 if (GET_CODE (orig) == SYMBOL_REF
4938 || GET_CODE (orig) == LABEL_REF)
4939 {
4940 rtx insn;
4941
4942 if (reg == 0)
4943 {
4944 gcc_assert (can_create_pseudo_p ());
4945 reg = gen_reg_rtx (Pmode);
4946 }
4947
4948 /* VxWorks does not impose a fixed gap between segments; the run-time
4949 gap can be different from the object-file gap. We therefore can't
4950 use GOTOFF unless we are absolutely sure that the symbol is in the
4951 same segment as the GOT. Unfortunately, the flexibility of linker
4952 scripts means that we can't be sure of that in general, so assume
4953 that GOTOFF is never valid on VxWorks. */
4954 if ((GET_CODE (orig) == LABEL_REF
4955 || (GET_CODE (orig) == SYMBOL_REF &&
4956 SYMBOL_REF_LOCAL_P (orig)))
4957 && NEED_GOT_RELOC
4958 && !TARGET_VXWORKS_RTP)
4959 insn = arm_pic_static_addr (orig, reg);
4960 else
4961 {
4962 rtx pat;
4963 rtx mem;
4964
4965 /* If this function doesn't have a pic register, create one now. */
4966 require_pic_register ();
4967
4968 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
4969
4970 /* Make the MEM as close to a constant as possible. */
4971 mem = SET_SRC (pat);
4972 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
4973 MEM_READONLY_P (mem) = 1;
4974 MEM_NOTRAP_P (mem) = 1;
4975
4976 insn = emit_insn (pat);
4977 }
4978
4979 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4980 by loop. */
4981 set_unique_reg_note (insn, REG_EQUAL, orig);
4982
4983 return reg;
4984 }
4985 else if (GET_CODE (orig) == CONST)
4986 {
4987 rtx base, offset;
4988
4989 if (GET_CODE (XEXP (orig, 0)) == PLUS
4990 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
4991 return orig;
4992
4993 /* Handle the case where we have: const (UNSPEC_TLS). */
4994 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
4995 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
4996 return orig;
4997
4998 /* Handle the case where we have:
4999 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5000 CONST_INT. */
5001 if (GET_CODE (XEXP (orig, 0)) == PLUS
5002 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5003 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5004 {
5005 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5006 return orig;
5007 }
5008
5009 if (reg == 0)
5010 {
5011 gcc_assert (can_create_pseudo_p ());
5012 reg = gen_reg_rtx (Pmode);
5013 }
5014
5015 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5016
5017 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5018 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5019 base == reg ? 0 : reg);
5020
5021 if (GET_CODE (offset) == CONST_INT)
5022 {
5023 /* The base register doesn't really matter, we only want to
5024 test the index for the appropriate mode. */
5025 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5026 {
5027 gcc_assert (can_create_pseudo_p ());
5028 offset = force_reg (Pmode, offset);
5029 }
5030
5031 if (GET_CODE (offset) == CONST_INT)
5032 return plus_constant (base, INTVAL (offset));
5033 }
5034
5035 if (GET_MODE_SIZE (mode) > 4
5036 && (GET_MODE_CLASS (mode) == MODE_INT
5037 || TARGET_SOFT_FLOAT))
5038 {
5039 emit_insn (gen_addsi3 (reg, base, offset));
5040 return reg;
5041 }
5042
5043 return gen_rtx_PLUS (Pmode, base, offset);
5044 }
5045
5046 return orig;
5047 }
5048
5049
5050 /* Find a spare register to use during the prolog of a function. */
5051
5052 static int
5053 thumb_find_work_register (unsigned long pushed_regs_mask)
5054 {
5055 int reg;
5056
5057 /* Check the argument registers first as these are call-used. The
5058 register allocation order means that sometimes r3 might be used
5059 but earlier argument registers might not, so check them all. */
5060 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5061 if (!df_regs_ever_live_p (reg))
5062 return reg;
5063
5064 /* Before going on to check the call-saved registers we can try a couple
5065 more ways of deducing that r3 is available. The first is when we are
5066 pushing anonymous arguments onto the stack and we have less than 4
5067 registers worth of fixed arguments(*). In this case r3 will be part of
5068 the variable argument list and so we can be sure that it will be
5069 pushed right at the start of the function. Hence it will be available
5070 for the rest of the prologue.
5071 (*): ie crtl->args.pretend_args_size is greater than 0. */
5072 if (cfun->machine->uses_anonymous_args
5073 && crtl->args.pretend_args_size > 0)
5074 return LAST_ARG_REGNUM;
5075
5076 /* The other case is when we have fixed arguments but less than 4 registers
5077 worth. In this case r3 might be used in the body of the function, but
5078 it is not being used to convey an argument into the function. In theory
5079 we could just check crtl->args.size to see how many bytes are
5080 being passed in argument registers, but it seems that it is unreliable.
5081 Sometimes it will have the value 0 when in fact arguments are being
5082 passed. (See testcase execute/20021111-1.c for an example). So we also
5083 check the args_info.nregs field as well. The problem with this field is
5084 that it makes no allowances for arguments that are passed to the
5085 function but which are not used. Hence we could miss an opportunity
5086 when a function has an unused argument in r3. But it is better to be
5087 safe than to be sorry. */
5088 if (! cfun->machine->uses_anonymous_args
5089 && crtl->args.size >= 0
5090 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5091 && crtl->args.info.nregs < 4)
5092 return LAST_ARG_REGNUM;
5093
5094 /* Otherwise look for a call-saved register that is going to be pushed. */
5095 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5096 if (pushed_regs_mask & (1 << reg))
5097 return reg;
5098
5099 if (TARGET_THUMB2)
5100 {
5101 /* Thumb-2 can use high regs. */
5102 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5103 if (pushed_regs_mask & (1 << reg))
5104 return reg;
5105 }
5106 /* Something went wrong - thumb_compute_save_reg_mask()
5107 should have arranged for a suitable register to be pushed. */
5108 gcc_unreachable ();
5109 }
5110
5111 static GTY(()) int pic_labelno;
5112
5113 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5114 low register. */
5115
5116 void
5117 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5118 {
5119 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5120
5121 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5122 return;
5123
5124 gcc_assert (flag_pic);
5125
5126 pic_reg = cfun->machine->pic_reg;
5127 if (TARGET_VXWORKS_RTP)
5128 {
5129 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5130 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5131 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5132
5133 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5134
5135 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5136 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5137 }
5138 else
5139 {
5140 /* We use an UNSPEC rather than a LABEL_REF because this label
5141 never appears in the code stream. */
5142
5143 labelno = GEN_INT (pic_labelno++);
5144 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5145 l1 = gen_rtx_CONST (VOIDmode, l1);
5146
5147 /* On the ARM the PC register contains 'dot + 8' at the time of the
5148 addition, on the Thumb it is 'dot + 4'. */
5149 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5150 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5151 UNSPEC_GOTSYM_OFF);
5152 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5153
5154 if (TARGET_32BIT)
5155 {
5156 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5157 if (TARGET_ARM)
5158 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5159 else
5160 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5161 }
5162 else /* TARGET_THUMB1 */
5163 {
5164 if (arm_pic_register != INVALID_REGNUM
5165 && REGNO (pic_reg) > LAST_LO_REGNUM)
5166 {
5167 /* We will have pushed the pic register, so we should always be
5168 able to find a work register. */
5169 pic_tmp = gen_rtx_REG (SImode,
5170 thumb_find_work_register (saved_regs));
5171 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5172 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5173 }
5174 else
5175 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5176 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5177 }
5178 }
5179
5180 /* Need to emit this whether or not we obey regdecls,
5181 since setjmp/longjmp can cause life info to screw up. */
5182 emit_use (pic_reg);
5183 }
5184
5185 /* Generate code to load the address of a static var when flag_pic is set. */
5186 static rtx
5187 arm_pic_static_addr (rtx orig, rtx reg)
5188 {
5189 rtx l1, labelno, offset_rtx, insn;
5190
5191 gcc_assert (flag_pic);
5192
5193 /* We use an UNSPEC rather than a LABEL_REF because this label
5194 never appears in the code stream. */
5195 labelno = GEN_INT (pic_labelno++);
5196 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5197 l1 = gen_rtx_CONST (VOIDmode, l1);
5198
5199 /* On the ARM the PC register contains 'dot + 8' at the time of the
5200 addition, on the Thumb it is 'dot + 4'. */
5201 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5202 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5203 UNSPEC_SYMBOL_OFFSET);
5204 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5205
5206 if (TARGET_32BIT)
5207 {
5208 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5209 if (TARGET_ARM)
5210 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5211 else
5212 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5213 }
5214 else /* TARGET_THUMB1 */
5215 {
5216 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5217 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5218 }
5219
5220 return insn;
5221 }
5222
5223 /* Return nonzero if X is valid as an ARM state addressing register. */
5224 static int
5225 arm_address_register_rtx_p (rtx x, int strict_p)
5226 {
5227 int regno;
5228
5229 if (GET_CODE (x) != REG)
5230 return 0;
5231
5232 regno = REGNO (x);
5233
5234 if (strict_p)
5235 return ARM_REGNO_OK_FOR_BASE_P (regno);
5236
5237 return (regno <= LAST_ARM_REGNUM
5238 || regno >= FIRST_PSEUDO_REGISTER
5239 || regno == FRAME_POINTER_REGNUM
5240 || regno == ARG_POINTER_REGNUM);
5241 }
5242
5243 /* Return TRUE if this rtx is the difference of a symbol and a label,
5244 and will reduce to a PC-relative relocation in the object file.
5245 Expressions like this can be left alone when generating PIC, rather
5246 than forced through the GOT. */
5247 static int
5248 pcrel_constant_p (rtx x)
5249 {
5250 if (GET_CODE (x) == MINUS)
5251 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5252
5253 return FALSE;
5254 }
5255
5256 /* Return true if X will surely end up in an index register after next
5257 splitting pass. */
5258 static bool
5259 will_be_in_index_register (const_rtx x)
5260 {
5261 /* arm.md: calculate_pic_address will split this into a register. */
5262 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5263 }
5264
5265 /* Return nonzero if X is a valid ARM state address operand. */
5266 int
5267 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5268 int strict_p)
5269 {
5270 bool use_ldrd;
5271 enum rtx_code code = GET_CODE (x);
5272
5273 if (arm_address_register_rtx_p (x, strict_p))
5274 return 1;
5275
5276 use_ldrd = (TARGET_LDRD
5277 && (mode == DImode
5278 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5279
5280 if (code == POST_INC || code == PRE_DEC
5281 || ((code == PRE_INC || code == POST_DEC)
5282 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5283 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5284
5285 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5286 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5287 && GET_CODE (XEXP (x, 1)) == PLUS
5288 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5289 {
5290 rtx addend = XEXP (XEXP (x, 1), 1);
5291
5292 /* Don't allow ldrd post increment by register because it's hard
5293 to fixup invalid register choices. */
5294 if (use_ldrd
5295 && GET_CODE (x) == POST_MODIFY
5296 && GET_CODE (addend) == REG)
5297 return 0;
5298
5299 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5300 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5301 }
5302
5303 /* After reload constants split into minipools will have addresses
5304 from a LABEL_REF. */
5305 else if (reload_completed
5306 && (code == LABEL_REF
5307 || (code == CONST
5308 && GET_CODE (XEXP (x, 0)) == PLUS
5309 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5310 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5311 return 1;
5312
5313 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5314 return 0;
5315
5316 else if (code == PLUS)
5317 {
5318 rtx xop0 = XEXP (x, 0);
5319 rtx xop1 = XEXP (x, 1);
5320
5321 return ((arm_address_register_rtx_p (xop0, strict_p)
5322 && ((GET_CODE(xop1) == CONST_INT
5323 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5324 || (!strict_p && will_be_in_index_register (xop1))))
5325 || (arm_address_register_rtx_p (xop1, strict_p)
5326 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5327 }
5328
5329 #if 0
5330 /* Reload currently can't handle MINUS, so disable this for now */
5331 else if (GET_CODE (x) == MINUS)
5332 {
5333 rtx xop0 = XEXP (x, 0);
5334 rtx xop1 = XEXP (x, 1);
5335
5336 return (arm_address_register_rtx_p (xop0, strict_p)
5337 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5338 }
5339 #endif
5340
5341 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5342 && code == SYMBOL_REF
5343 && CONSTANT_POOL_ADDRESS_P (x)
5344 && ! (flag_pic
5345 && symbol_mentioned_p (get_pool_constant (x))
5346 && ! pcrel_constant_p (get_pool_constant (x))))
5347 return 1;
5348
5349 return 0;
5350 }
5351
5352 /* Return nonzero if X is a valid Thumb-2 address operand. */
5353 static int
5354 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5355 {
5356 bool use_ldrd;
5357 enum rtx_code code = GET_CODE (x);
5358
5359 if (arm_address_register_rtx_p (x, strict_p))
5360 return 1;
5361
5362 use_ldrd = (TARGET_LDRD
5363 && (mode == DImode
5364 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5365
5366 if (code == POST_INC || code == PRE_DEC
5367 || ((code == PRE_INC || code == POST_DEC)
5368 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5369 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5370
5371 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5372 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5373 && GET_CODE (XEXP (x, 1)) == PLUS
5374 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5375 {
5376 /* Thumb-2 only has autoincrement by constant. */
5377 rtx addend = XEXP (XEXP (x, 1), 1);
5378 HOST_WIDE_INT offset;
5379
5380 if (GET_CODE (addend) != CONST_INT)
5381 return 0;
5382
5383 offset = INTVAL(addend);
5384 if (GET_MODE_SIZE (mode) <= 4)
5385 return (offset > -256 && offset < 256);
5386
5387 return (use_ldrd && offset > -1024 && offset < 1024
5388 && (offset & 3) == 0);
5389 }
5390
5391 /* After reload constants split into minipools will have addresses
5392 from a LABEL_REF. */
5393 else if (reload_completed
5394 && (code == LABEL_REF
5395 || (code == CONST
5396 && GET_CODE (XEXP (x, 0)) == PLUS
5397 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5398 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5399 return 1;
5400
5401 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5402 return 0;
5403
5404 else if (code == PLUS)
5405 {
5406 rtx xop0 = XEXP (x, 0);
5407 rtx xop1 = XEXP (x, 1);
5408
5409 return ((arm_address_register_rtx_p (xop0, strict_p)
5410 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5411 || (!strict_p && will_be_in_index_register (xop1))))
5412 || (arm_address_register_rtx_p (xop1, strict_p)
5413 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5414 }
5415
5416 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5417 && code == SYMBOL_REF
5418 && CONSTANT_POOL_ADDRESS_P (x)
5419 && ! (flag_pic
5420 && symbol_mentioned_p (get_pool_constant (x))
5421 && ! pcrel_constant_p (get_pool_constant (x))))
5422 return 1;
5423
5424 return 0;
5425 }
5426
5427 /* Return nonzero if INDEX is valid for an address index operand in
5428 ARM state. */
5429 static int
5430 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5431 int strict_p)
5432 {
5433 HOST_WIDE_INT range;
5434 enum rtx_code code = GET_CODE (index);
5435
5436 /* Standard coprocessor addressing modes. */
5437 if (TARGET_HARD_FLOAT
5438 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5439 && (mode == SFmode || mode == DFmode
5440 || (TARGET_MAVERICK && mode == DImode)))
5441 return (code == CONST_INT && INTVAL (index) < 1024
5442 && INTVAL (index) > -1024
5443 && (INTVAL (index) & 3) == 0);
5444
5445 /* For quad modes, we restrict the constant offset to be slightly less
5446 than what the instruction format permits. We do this because for
5447 quad mode moves, we will actually decompose them into two separate
5448 double-mode reads or writes. INDEX must therefore be a valid
5449 (double-mode) offset and so should INDEX+8. */
5450 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5451 return (code == CONST_INT
5452 && INTVAL (index) < 1016
5453 && INTVAL (index) > -1024
5454 && (INTVAL (index) & 3) == 0);
5455
5456 /* We have no such constraint on double mode offsets, so we permit the
5457 full range of the instruction format. */
5458 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5459 return (code == CONST_INT
5460 && INTVAL (index) < 1024
5461 && INTVAL (index) > -1024
5462 && (INTVAL (index) & 3) == 0);
5463
5464 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5465 return (code == CONST_INT
5466 && INTVAL (index) < 1024
5467 && INTVAL (index) > -1024
5468 && (INTVAL (index) & 3) == 0);
5469
5470 if (arm_address_register_rtx_p (index, strict_p)
5471 && (GET_MODE_SIZE (mode) <= 4))
5472 return 1;
5473
5474 if (mode == DImode || mode == DFmode)
5475 {
5476 if (code == CONST_INT)
5477 {
5478 HOST_WIDE_INT val = INTVAL (index);
5479
5480 if (TARGET_LDRD)
5481 return val > -256 && val < 256;
5482 else
5483 return val > -4096 && val < 4092;
5484 }
5485
5486 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5487 }
5488
5489 if (GET_MODE_SIZE (mode) <= 4
5490 && ! (arm_arch4
5491 && (mode == HImode
5492 || mode == HFmode
5493 || (mode == QImode && outer == SIGN_EXTEND))))
5494 {
5495 if (code == MULT)
5496 {
5497 rtx xiop0 = XEXP (index, 0);
5498 rtx xiop1 = XEXP (index, 1);
5499
5500 return ((arm_address_register_rtx_p (xiop0, strict_p)
5501 && power_of_two_operand (xiop1, SImode))
5502 || (arm_address_register_rtx_p (xiop1, strict_p)
5503 && power_of_two_operand (xiop0, SImode)));
5504 }
5505 else if (code == LSHIFTRT || code == ASHIFTRT
5506 || code == ASHIFT || code == ROTATERT)
5507 {
5508 rtx op = XEXP (index, 1);
5509
5510 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5511 && GET_CODE (op) == CONST_INT
5512 && INTVAL (op) > 0
5513 && INTVAL (op) <= 31);
5514 }
5515 }
5516
5517 /* For ARM v4 we may be doing a sign-extend operation during the
5518 load. */
5519 if (arm_arch4)
5520 {
5521 if (mode == HImode
5522 || mode == HFmode
5523 || (outer == SIGN_EXTEND && mode == QImode))
5524 range = 256;
5525 else
5526 range = 4096;
5527 }
5528 else
5529 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5530
5531 return (code == CONST_INT
5532 && INTVAL (index) < range
5533 && INTVAL (index) > -range);
5534 }
5535
5536 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5537 index operand. i.e. 1, 2, 4 or 8. */
5538 static bool
5539 thumb2_index_mul_operand (rtx op)
5540 {
5541 HOST_WIDE_INT val;
5542
5543 if (GET_CODE(op) != CONST_INT)
5544 return false;
5545
5546 val = INTVAL(op);
5547 return (val == 1 || val == 2 || val == 4 || val == 8);
5548 }
5549
5550 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5551 static int
5552 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5553 {
5554 enum rtx_code code = GET_CODE (index);
5555
5556 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5557 /* Standard coprocessor addressing modes. */
5558 if (TARGET_HARD_FLOAT
5559 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5560 && (mode == SFmode || mode == DFmode
5561 || (TARGET_MAVERICK && mode == DImode)))
5562 return (code == CONST_INT && INTVAL (index) < 1024
5563 /* Thumb-2 allows only > -256 index range for it's core register
5564 load/stores. Since we allow SF/DF in core registers, we have
5565 to use the intersection between -256~4096 (core) and -1024~1024
5566 (coprocessor). */
5567 && INTVAL (index) > -256
5568 && (INTVAL (index) & 3) == 0);
5569
5570 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5571 {
5572 /* For DImode assume values will usually live in core regs
5573 and only allow LDRD addressing modes. */
5574 if (!TARGET_LDRD || mode != DImode)
5575 return (code == CONST_INT
5576 && INTVAL (index) < 1024
5577 && INTVAL (index) > -1024
5578 && (INTVAL (index) & 3) == 0);
5579 }
5580
5581 /* For quad modes, we restrict the constant offset to be slightly less
5582 than what the instruction format permits. We do this because for
5583 quad mode moves, we will actually decompose them into two separate
5584 double-mode reads or writes. INDEX must therefore be a valid
5585 (double-mode) offset and so should INDEX+8. */
5586 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5587 return (code == CONST_INT
5588 && INTVAL (index) < 1016
5589 && INTVAL (index) > -1024
5590 && (INTVAL (index) & 3) == 0);
5591
5592 /* We have no such constraint on double mode offsets, so we permit the
5593 full range of the instruction format. */
5594 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5595 return (code == CONST_INT
5596 && INTVAL (index) < 1024
5597 && INTVAL (index) > -1024
5598 && (INTVAL (index) & 3) == 0);
5599
5600 if (arm_address_register_rtx_p (index, strict_p)
5601 && (GET_MODE_SIZE (mode) <= 4))
5602 return 1;
5603
5604 if (mode == DImode || mode == DFmode)
5605 {
5606 if (code == CONST_INT)
5607 {
5608 HOST_WIDE_INT val = INTVAL (index);
5609 /* ??? Can we assume ldrd for thumb2? */
5610 /* Thumb-2 ldrd only has reg+const addressing modes. */
5611 /* ldrd supports offsets of +-1020.
5612 However the ldr fallback does not. */
5613 return val > -256 && val < 256 && (val & 3) == 0;
5614 }
5615 else
5616 return 0;
5617 }
5618
5619 if (code == MULT)
5620 {
5621 rtx xiop0 = XEXP (index, 0);
5622 rtx xiop1 = XEXP (index, 1);
5623
5624 return ((arm_address_register_rtx_p (xiop0, strict_p)
5625 && thumb2_index_mul_operand (xiop1))
5626 || (arm_address_register_rtx_p (xiop1, strict_p)
5627 && thumb2_index_mul_operand (xiop0)));
5628 }
5629 else if (code == ASHIFT)
5630 {
5631 rtx op = XEXP (index, 1);
5632
5633 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5634 && GET_CODE (op) == CONST_INT
5635 && INTVAL (op) > 0
5636 && INTVAL (op) <= 3);
5637 }
5638
5639 return (code == CONST_INT
5640 && INTVAL (index) < 4096
5641 && INTVAL (index) > -256);
5642 }
5643
5644 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5645 static int
5646 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5647 {
5648 int regno;
5649
5650 if (GET_CODE (x) != REG)
5651 return 0;
5652
5653 regno = REGNO (x);
5654
5655 if (strict_p)
5656 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5657
5658 return (regno <= LAST_LO_REGNUM
5659 || regno > LAST_VIRTUAL_REGISTER
5660 || regno == FRAME_POINTER_REGNUM
5661 || (GET_MODE_SIZE (mode) >= 4
5662 && (regno == STACK_POINTER_REGNUM
5663 || regno >= FIRST_PSEUDO_REGISTER
5664 || x == hard_frame_pointer_rtx
5665 || x == arg_pointer_rtx)));
5666 }
5667
5668 /* Return nonzero if x is a legitimate index register. This is the case
5669 for any base register that can access a QImode object. */
5670 inline static int
5671 thumb1_index_register_rtx_p (rtx x, int strict_p)
5672 {
5673 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5674 }
5675
5676 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5677
5678 The AP may be eliminated to either the SP or the FP, so we use the
5679 least common denominator, e.g. SImode, and offsets from 0 to 64.
5680
5681 ??? Verify whether the above is the right approach.
5682
5683 ??? Also, the FP may be eliminated to the SP, so perhaps that
5684 needs special handling also.
5685
5686 ??? Look at how the mips16 port solves this problem. It probably uses
5687 better ways to solve some of these problems.
5688
5689 Although it is not incorrect, we don't accept QImode and HImode
5690 addresses based on the frame pointer or arg pointer until the
5691 reload pass starts. This is so that eliminating such addresses
5692 into stack based ones won't produce impossible code. */
5693 int
5694 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5695 {
5696 /* ??? Not clear if this is right. Experiment. */
5697 if (GET_MODE_SIZE (mode) < 4
5698 && !(reload_in_progress || reload_completed)
5699 && (reg_mentioned_p (frame_pointer_rtx, x)
5700 || reg_mentioned_p (arg_pointer_rtx, x)
5701 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5702 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5703 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5704 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5705 return 0;
5706
5707 /* Accept any base register. SP only in SImode or larger. */
5708 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5709 return 1;
5710
5711 /* This is PC relative data before arm_reorg runs. */
5712 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5713 && GET_CODE (x) == SYMBOL_REF
5714 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5715 return 1;
5716
5717 /* This is PC relative data after arm_reorg runs. */
5718 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5719 && reload_completed
5720 && (GET_CODE (x) == LABEL_REF
5721 || (GET_CODE (x) == CONST
5722 && GET_CODE (XEXP (x, 0)) == PLUS
5723 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5724 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5725 return 1;
5726
5727 /* Post-inc indexing only supported for SImode and larger. */
5728 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5729 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5730 return 1;
5731
5732 else if (GET_CODE (x) == PLUS)
5733 {
5734 /* REG+REG address can be any two index registers. */
5735 /* We disallow FRAME+REG addressing since we know that FRAME
5736 will be replaced with STACK, and SP relative addressing only
5737 permits SP+OFFSET. */
5738 if (GET_MODE_SIZE (mode) <= 4
5739 && XEXP (x, 0) != frame_pointer_rtx
5740 && XEXP (x, 1) != frame_pointer_rtx
5741 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5742 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
5743 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
5744 return 1;
5745
5746 /* REG+const has 5-7 bit offset for non-SP registers. */
5747 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5748 || XEXP (x, 0) == arg_pointer_rtx)
5749 && GET_CODE (XEXP (x, 1)) == CONST_INT
5750 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5751 return 1;
5752
5753 /* REG+const has 10-bit offset for SP, but only SImode and
5754 larger is supported. */
5755 /* ??? Should probably check for DI/DFmode overflow here
5756 just like GO_IF_LEGITIMATE_OFFSET does. */
5757 else if (GET_CODE (XEXP (x, 0)) == REG
5758 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5759 && GET_MODE_SIZE (mode) >= 4
5760 && GET_CODE (XEXP (x, 1)) == CONST_INT
5761 && INTVAL (XEXP (x, 1)) >= 0
5762 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5763 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5764 return 1;
5765
5766 else if (GET_CODE (XEXP (x, 0)) == REG
5767 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5768 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5769 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5770 && REGNO (XEXP (x, 0))
5771 <= LAST_VIRTUAL_POINTER_REGISTER))
5772 && GET_MODE_SIZE (mode) >= 4
5773 && GET_CODE (XEXP (x, 1)) == CONST_INT
5774 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5775 return 1;
5776 }
5777
5778 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5779 && GET_MODE_SIZE (mode) == 4
5780 && GET_CODE (x) == SYMBOL_REF
5781 && CONSTANT_POOL_ADDRESS_P (x)
5782 && ! (flag_pic
5783 && symbol_mentioned_p (get_pool_constant (x))
5784 && ! pcrel_constant_p (get_pool_constant (x))))
5785 return 1;
5786
5787 return 0;
5788 }
5789
5790 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5791 instruction of mode MODE. */
5792 int
5793 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5794 {
5795 switch (GET_MODE_SIZE (mode))
5796 {
5797 case 1:
5798 return val >= 0 && val < 32;
5799
5800 case 2:
5801 return val >= 0 && val < 64 && (val & 1) == 0;
5802
5803 default:
5804 return (val >= 0
5805 && (val + GET_MODE_SIZE (mode)) <= 128
5806 && (val & 3) == 0);
5807 }
5808 }
5809
5810 bool
5811 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5812 {
5813 if (TARGET_ARM)
5814 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5815 else if (TARGET_THUMB2)
5816 return thumb2_legitimate_address_p (mode, x, strict_p);
5817 else /* if (TARGET_THUMB1) */
5818 return thumb1_legitimate_address_p (mode, x, strict_p);
5819 }
5820
5821 /* Build the SYMBOL_REF for __tls_get_addr. */
5822
5823 static GTY(()) rtx tls_get_addr_libfunc;
5824
5825 static rtx
5826 get_tls_get_addr (void)
5827 {
5828 if (!tls_get_addr_libfunc)
5829 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5830 return tls_get_addr_libfunc;
5831 }
5832
5833 static rtx
5834 arm_load_tp (rtx target)
5835 {
5836 if (!target)
5837 target = gen_reg_rtx (SImode);
5838
5839 if (TARGET_HARD_TP)
5840 {
5841 /* Can return in any reg. */
5842 emit_insn (gen_load_tp_hard (target));
5843 }
5844 else
5845 {
5846 /* Always returned in r0. Immediately copy the result into a pseudo,
5847 otherwise other uses of r0 (e.g. setting up function arguments) may
5848 clobber the value. */
5849
5850 rtx tmp;
5851
5852 emit_insn (gen_load_tp_soft ());
5853
5854 tmp = gen_rtx_REG (SImode, 0);
5855 emit_move_insn (target, tmp);
5856 }
5857 return target;
5858 }
5859
5860 static rtx
5861 load_tls_operand (rtx x, rtx reg)
5862 {
5863 rtx tmp;
5864
5865 if (reg == NULL_RTX)
5866 reg = gen_reg_rtx (SImode);
5867
5868 tmp = gen_rtx_CONST (SImode, x);
5869
5870 emit_move_insn (reg, tmp);
5871
5872 return reg;
5873 }
5874
5875 static rtx
5876 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5877 {
5878 rtx insns, label, labelno, sum;
5879
5880 start_sequence ();
5881
5882 labelno = GEN_INT (pic_labelno++);
5883 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5884 label = gen_rtx_CONST (VOIDmode, label);
5885
5886 sum = gen_rtx_UNSPEC (Pmode,
5887 gen_rtvec (4, x, GEN_INT (reloc), label,
5888 GEN_INT (TARGET_ARM ? 8 : 4)),
5889 UNSPEC_TLS);
5890 reg = load_tls_operand (sum, reg);
5891
5892 if (TARGET_ARM)
5893 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5894 else if (TARGET_THUMB2)
5895 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5896 else /* TARGET_THUMB1 */
5897 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5898
5899 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5900 Pmode, 1, reg, Pmode);
5901
5902 insns = get_insns ();
5903 end_sequence ();
5904
5905 return insns;
5906 }
5907
5908 rtx
5909 legitimize_tls_address (rtx x, rtx reg)
5910 {
5911 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5912 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5913
5914 switch (model)
5915 {
5916 case TLS_MODEL_GLOBAL_DYNAMIC:
5917 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
5918 dest = gen_reg_rtx (Pmode);
5919 emit_libcall_block (insns, dest, ret, x);
5920 return dest;
5921
5922 case TLS_MODEL_LOCAL_DYNAMIC:
5923 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
5924
5925 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5926 share the LDM result with other LD model accesses. */
5927 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
5928 UNSPEC_TLS);
5929 dest = gen_reg_rtx (Pmode);
5930 emit_libcall_block (insns, dest, ret, eqv);
5931
5932 /* Load the addend. */
5933 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
5934 UNSPEC_TLS);
5935 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
5936 return gen_rtx_PLUS (Pmode, dest, addend);
5937
5938 case TLS_MODEL_INITIAL_EXEC:
5939 labelno = GEN_INT (pic_labelno++);
5940 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5941 label = gen_rtx_CONST (VOIDmode, label);
5942 sum = gen_rtx_UNSPEC (Pmode,
5943 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
5944 GEN_INT (TARGET_ARM ? 8 : 4)),
5945 UNSPEC_TLS);
5946 reg = load_tls_operand (sum, reg);
5947
5948 if (TARGET_ARM)
5949 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
5950 else if (TARGET_THUMB2)
5951 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
5952 else
5953 {
5954 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5955 emit_move_insn (reg, gen_const_mem (SImode, reg));
5956 }
5957
5958 tp = arm_load_tp (NULL_RTX);
5959
5960 return gen_rtx_PLUS (Pmode, tp, reg);
5961
5962 case TLS_MODEL_LOCAL_EXEC:
5963 tp = arm_load_tp (NULL_RTX);
5964
5965 reg = gen_rtx_UNSPEC (Pmode,
5966 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
5967 UNSPEC_TLS);
5968 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
5969
5970 return gen_rtx_PLUS (Pmode, tp, reg);
5971
5972 default:
5973 abort ();
5974 }
5975 }
5976
5977 /* Try machine-dependent ways of modifying an illegitimate address
5978 to be legitimate. If we find one, return the new, valid address. */
5979 rtx
5980 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5981 {
5982 if (!TARGET_ARM)
5983 {
5984 /* TODO: legitimize_address for Thumb2. */
5985 if (TARGET_THUMB2)
5986 return x;
5987 return thumb_legitimize_address (x, orig_x, mode);
5988 }
5989
5990 if (arm_tls_symbol_p (x))
5991 return legitimize_tls_address (x, NULL_RTX);
5992
5993 if (GET_CODE (x) == PLUS)
5994 {
5995 rtx xop0 = XEXP (x, 0);
5996 rtx xop1 = XEXP (x, 1);
5997
5998 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
5999 xop0 = force_reg (SImode, xop0);
6000
6001 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6002 xop1 = force_reg (SImode, xop1);
6003
6004 if (ARM_BASE_REGISTER_RTX_P (xop0)
6005 && GET_CODE (xop1) == CONST_INT)
6006 {
6007 HOST_WIDE_INT n, low_n;
6008 rtx base_reg, val;
6009 n = INTVAL (xop1);
6010
6011 /* VFP addressing modes actually allow greater offsets, but for
6012 now we just stick with the lowest common denominator. */
6013 if (mode == DImode
6014 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6015 {
6016 low_n = n & 0x0f;
6017 n &= ~0x0f;
6018 if (low_n > 4)
6019 {
6020 n += 16;
6021 low_n -= 16;
6022 }
6023 }
6024 else
6025 {
6026 low_n = ((mode) == TImode ? 0
6027 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6028 n -= low_n;
6029 }
6030
6031 base_reg = gen_reg_rtx (SImode);
6032 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6033 emit_move_insn (base_reg, val);
6034 x = plus_constant (base_reg, low_n);
6035 }
6036 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6037 x = gen_rtx_PLUS (SImode, xop0, xop1);
6038 }
6039
6040 /* XXX We don't allow MINUS any more -- see comment in
6041 arm_legitimate_address_outer_p (). */
6042 else if (GET_CODE (x) == MINUS)
6043 {
6044 rtx xop0 = XEXP (x, 0);
6045 rtx xop1 = XEXP (x, 1);
6046
6047 if (CONSTANT_P (xop0))
6048 xop0 = force_reg (SImode, xop0);
6049
6050 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6051 xop1 = force_reg (SImode, xop1);
6052
6053 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6054 x = gen_rtx_MINUS (SImode, xop0, xop1);
6055 }
6056
6057 /* Make sure to take full advantage of the pre-indexed addressing mode
6058 with absolute addresses which often allows for the base register to
6059 be factorized for multiple adjacent memory references, and it might
6060 even allows for the mini pool to be avoided entirely. */
6061 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6062 {
6063 unsigned int bits;
6064 HOST_WIDE_INT mask, base, index;
6065 rtx base_reg;
6066
6067 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6068 use a 8-bit index. So let's use a 12-bit index for SImode only and
6069 hope that arm_gen_constant will enable ldrb to use more bits. */
6070 bits = (mode == SImode) ? 12 : 8;
6071 mask = (1 << bits) - 1;
6072 base = INTVAL (x) & ~mask;
6073 index = INTVAL (x) & mask;
6074 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6075 {
6076 /* It'll most probably be more efficient to generate the base
6077 with more bits set and use a negative index instead. */
6078 base |= mask;
6079 index -= mask;
6080 }
6081 base_reg = force_reg (SImode, GEN_INT (base));
6082 x = plus_constant (base_reg, index);
6083 }
6084
6085 if (flag_pic)
6086 {
6087 /* We need to find and carefully transform any SYMBOL and LABEL
6088 references; so go back to the original address expression. */
6089 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6090
6091 if (new_x != orig_x)
6092 x = new_x;
6093 }
6094
6095 return x;
6096 }
6097
6098
6099 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6100 to be legitimate. If we find one, return the new, valid address. */
6101 rtx
6102 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6103 {
6104 if (arm_tls_symbol_p (x))
6105 return legitimize_tls_address (x, NULL_RTX);
6106
6107 if (GET_CODE (x) == PLUS
6108 && GET_CODE (XEXP (x, 1)) == CONST_INT
6109 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6110 || INTVAL (XEXP (x, 1)) < 0))
6111 {
6112 rtx xop0 = XEXP (x, 0);
6113 rtx xop1 = XEXP (x, 1);
6114 HOST_WIDE_INT offset = INTVAL (xop1);
6115
6116 /* Try and fold the offset into a biasing of the base register and
6117 then offsetting that. Don't do this when optimizing for space
6118 since it can cause too many CSEs. */
6119 if (optimize_size && offset >= 0
6120 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6121 {
6122 HOST_WIDE_INT delta;
6123
6124 if (offset >= 256)
6125 delta = offset - (256 - GET_MODE_SIZE (mode));
6126 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6127 delta = 31 * GET_MODE_SIZE (mode);
6128 else
6129 delta = offset & (~31 * GET_MODE_SIZE (mode));
6130
6131 xop0 = force_operand (plus_constant (xop0, offset - delta),
6132 NULL_RTX);
6133 x = plus_constant (xop0, delta);
6134 }
6135 else if (offset < 0 && offset > -256)
6136 /* Small negative offsets are best done with a subtract before the
6137 dereference, forcing these into a register normally takes two
6138 instructions. */
6139 x = force_operand (x, NULL_RTX);
6140 else
6141 {
6142 /* For the remaining cases, force the constant into a register. */
6143 xop1 = force_reg (SImode, xop1);
6144 x = gen_rtx_PLUS (SImode, xop0, xop1);
6145 }
6146 }
6147 else if (GET_CODE (x) == PLUS
6148 && s_register_operand (XEXP (x, 1), SImode)
6149 && !s_register_operand (XEXP (x, 0), SImode))
6150 {
6151 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6152
6153 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6154 }
6155
6156 if (flag_pic)
6157 {
6158 /* We need to find and carefully transform any SYMBOL and LABEL
6159 references; so go back to the original address expression. */
6160 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6161
6162 if (new_x != orig_x)
6163 x = new_x;
6164 }
6165
6166 return x;
6167 }
6168
6169 bool
6170 arm_legitimize_reload_address (rtx *p,
6171 enum machine_mode mode,
6172 int opnum, int type,
6173 int ind_levels ATTRIBUTE_UNUSED)
6174 {
6175 if (GET_CODE (*p) == PLUS
6176 && GET_CODE (XEXP (*p, 0)) == REG
6177 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6178 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6179 {
6180 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6181 HOST_WIDE_INT low, high;
6182
6183 /* Detect coprocessor load/stores. */
6184 bool coproc_p = ((TARGET_HARD_FLOAT
6185 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
6186 && (mode == SFmode || mode == DFmode
6187 || (mode == DImode && TARGET_MAVERICK)))
6188 || (TARGET_REALLY_IWMMXT
6189 && VALID_IWMMXT_REG_MODE (mode))
6190 || (TARGET_NEON
6191 && (VALID_NEON_DREG_MODE (mode)
6192 || VALID_NEON_QREG_MODE (mode))));
6193
6194 /* For some conditions, bail out when lower two bits are unaligned. */
6195 if ((val & 0x3) != 0
6196 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6197 && (coproc_p
6198 /* For DI, and DF under soft-float: */
6199 || ((mode == DImode || mode == DFmode)
6200 /* Without ldrd, we use stm/ldm, which does not
6201 fair well with unaligned bits. */
6202 && (! TARGET_LDRD
6203 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6204 || TARGET_THUMB2))))
6205 return false;
6206
6207 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6208 of which the (reg+high) gets turned into a reload add insn,
6209 we try to decompose the index into high/low values that can often
6210 also lead to better reload CSE.
6211 For example:
6212 ldr r0, [r2, #4100] // Offset too large
6213 ldr r1, [r2, #4104] // Offset too large
6214
6215 is best reloaded as:
6216 add t1, r2, #4096
6217 ldr r0, [t1, #4]
6218 add t2, r2, #4096
6219 ldr r1, [t2, #8]
6220
6221 which post-reload CSE can simplify in most cases to eliminate the
6222 second add instruction:
6223 add t1, r2, #4096
6224 ldr r0, [t1, #4]
6225 ldr r1, [t1, #8]
6226
6227 The idea here is that we want to split out the bits of the constant
6228 as a mask, rather than as subtracting the maximum offset that the
6229 respective type of load/store used can handle.
6230
6231 When encountering negative offsets, we can still utilize it even if
6232 the overall offset is positive; sometimes this may lead to an immediate
6233 that can be constructed with fewer instructions.
6234 For example:
6235 ldr r0, [r2, #0x3FFFFC]
6236
6237 This is best reloaded as:
6238 add t1, r2, #0x400000
6239 ldr r0, [t1, #-4]
6240
6241 The trick for spotting this for a load insn with N bits of offset
6242 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6243 negative offset that is going to make bit N and all the bits below
6244 it become zero in the remainder part.
6245
6246 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6247 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6248 used in most cases of ARM load/store instructions. */
6249
6250 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6251 (((VAL) & ((1 << (N)) - 1)) \
6252 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6253 : 0)
6254
6255 if (coproc_p)
6256 {
6257 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6258
6259 /* NEON quad-word load/stores are made of two double-word accesses,
6260 so the valid index range is reduced by 8. Treat as 9-bit range if
6261 we go over it. */
6262 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6263 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6264 }
6265 else if (GET_MODE_SIZE (mode) == 8)
6266 {
6267 if (TARGET_LDRD)
6268 low = (TARGET_THUMB2
6269 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6270 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6271 else
6272 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6273 to access doublewords. The supported load/store offsets are
6274 -8, -4, and 4, which we try to produce here. */
6275 low = ((val & 0xf) ^ 0x8) - 0x8;
6276 }
6277 else if (GET_MODE_SIZE (mode) < 8)
6278 {
6279 /* NEON element load/stores do not have an offset. */
6280 if (TARGET_NEON_FP16 && mode == HFmode)
6281 return false;
6282
6283 if (TARGET_THUMB2)
6284 {
6285 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6286 Try the wider 12-bit range first, and re-try if the result
6287 is out of range. */
6288 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6289 if (low < -255)
6290 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6291 }
6292 else
6293 {
6294 if (mode == HImode || mode == HFmode)
6295 {
6296 if (arm_arch4)
6297 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6298 else
6299 {
6300 /* The storehi/movhi_bytes fallbacks can use only
6301 [-4094,+4094] of the full ldrb/strb index range. */
6302 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6303 if (low == 4095 || low == -4095)
6304 return false;
6305 }
6306 }
6307 else
6308 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6309 }
6310 }
6311 else
6312 return false;
6313
6314 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6315 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6316 - (unsigned HOST_WIDE_INT) 0x80000000);
6317 /* Check for overflow or zero */
6318 if (low == 0 || high == 0 || (high + low != val))
6319 return false;
6320
6321 /* Reload the high part into a base reg; leave the low part
6322 in the mem. */
6323 *p = gen_rtx_PLUS (GET_MODE (*p),
6324 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6325 GEN_INT (high)),
6326 GEN_INT (low));
6327 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6328 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6329 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6330 return true;
6331 }
6332
6333 return false;
6334 }
6335
6336 rtx
6337 thumb_legitimize_reload_address (rtx *x_p,
6338 enum machine_mode mode,
6339 int opnum, int type,
6340 int ind_levels ATTRIBUTE_UNUSED)
6341 {
6342 rtx x = *x_p;
6343
6344 if (GET_CODE (x) == PLUS
6345 && GET_MODE_SIZE (mode) < 4
6346 && REG_P (XEXP (x, 0))
6347 && XEXP (x, 0) == stack_pointer_rtx
6348 && GET_CODE (XEXP (x, 1)) == CONST_INT
6349 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6350 {
6351 rtx orig_x = x;
6352
6353 x = copy_rtx (x);
6354 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6355 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6356 return x;
6357 }
6358
6359 /* If both registers are hi-regs, then it's better to reload the
6360 entire expression rather than each register individually. That
6361 only requires one reload register rather than two. */
6362 if (GET_CODE (x) == PLUS
6363 && REG_P (XEXP (x, 0))
6364 && REG_P (XEXP (x, 1))
6365 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6366 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6367 {
6368 rtx orig_x = x;
6369
6370 x = copy_rtx (x);
6371 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6372 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6373 return x;
6374 }
6375
6376 return NULL;
6377 }
6378
6379 /* Test for various thread-local symbols. */
6380
6381 /* Return TRUE if X is a thread-local symbol. */
6382
6383 static bool
6384 arm_tls_symbol_p (rtx x)
6385 {
6386 if (! TARGET_HAVE_TLS)
6387 return false;
6388
6389 if (GET_CODE (x) != SYMBOL_REF)
6390 return false;
6391
6392 return SYMBOL_REF_TLS_MODEL (x) != 0;
6393 }
6394
6395 /* Helper for arm_tls_referenced_p. */
6396
6397 static int
6398 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6399 {
6400 if (GET_CODE (*x) == SYMBOL_REF)
6401 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6402
6403 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6404 TLS offsets, not real symbol references. */
6405 if (GET_CODE (*x) == UNSPEC
6406 && XINT (*x, 1) == UNSPEC_TLS)
6407 return -1;
6408
6409 return 0;
6410 }
6411
6412 /* Return TRUE if X contains any TLS symbol references. */
6413
6414 bool
6415 arm_tls_referenced_p (rtx x)
6416 {
6417 if (! TARGET_HAVE_TLS)
6418 return false;
6419
6420 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6421 }
6422
6423 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6424
6425 On the ARM, allow any integer (invalid ones are removed later by insn
6426 patterns), nice doubles and symbol_refs which refer to the function's
6427 constant pool XXX.
6428
6429 When generating pic allow anything. */
6430
6431 static bool
6432 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
6433 {
6434 /* At present, we have no support for Neon structure constants, so forbid
6435 them here. It might be possible to handle simple cases like 0 and -1
6436 in future. */
6437 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
6438 return false;
6439
6440 return flag_pic || !label_mentioned_p (x);
6441 }
6442
6443 static bool
6444 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6445 {
6446 return (GET_CODE (x) == CONST_INT
6447 || GET_CODE (x) == CONST_DOUBLE
6448 || CONSTANT_ADDRESS_P (x)
6449 || flag_pic);
6450 }
6451
6452 static bool
6453 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
6454 {
6455 return (!arm_cannot_force_const_mem (mode, x)
6456 && (TARGET_32BIT
6457 ? arm_legitimate_constant_p_1 (mode, x)
6458 : thumb_legitimate_constant_p (mode, x)));
6459 }
6460
6461 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6462
6463 static bool
6464 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6465 {
6466 rtx base, offset;
6467
6468 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6469 {
6470 split_const (x, &base, &offset);
6471 if (GET_CODE (base) == SYMBOL_REF
6472 && !offset_within_block_p (base, INTVAL (offset)))
6473 return true;
6474 }
6475 return arm_tls_referenced_p (x);
6476 }
6477 \f
6478 #define REG_OR_SUBREG_REG(X) \
6479 (GET_CODE (X) == REG \
6480 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6481
6482 #define REG_OR_SUBREG_RTX(X) \
6483 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6484
6485 static inline int
6486 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6487 {
6488 enum machine_mode mode = GET_MODE (x);
6489 int total;
6490
6491 switch (code)
6492 {
6493 case ASHIFT:
6494 case ASHIFTRT:
6495 case LSHIFTRT:
6496 case ROTATERT:
6497 case PLUS:
6498 case MINUS:
6499 case COMPARE:
6500 case NEG:
6501 case NOT:
6502 return COSTS_N_INSNS (1);
6503
6504 case MULT:
6505 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6506 {
6507 int cycles = 0;
6508 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6509
6510 while (i)
6511 {
6512 i >>= 2;
6513 cycles++;
6514 }
6515 return COSTS_N_INSNS (2) + cycles;
6516 }
6517 return COSTS_N_INSNS (1) + 16;
6518
6519 case SET:
6520 return (COSTS_N_INSNS (1)
6521 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6522 + GET_CODE (SET_DEST (x)) == MEM));
6523
6524 case CONST_INT:
6525 if (outer == SET)
6526 {
6527 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6528 return 0;
6529 if (thumb_shiftable_const (INTVAL (x)))
6530 return COSTS_N_INSNS (2);
6531 return COSTS_N_INSNS (3);
6532 }
6533 else if ((outer == PLUS || outer == COMPARE)
6534 && INTVAL (x) < 256 && INTVAL (x) > -256)
6535 return 0;
6536 else if ((outer == IOR || outer == XOR || outer == AND)
6537 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6538 return COSTS_N_INSNS (1);
6539 else if (outer == AND)
6540 {
6541 int i;
6542 /* This duplicates the tests in the andsi3 expander. */
6543 for (i = 9; i <= 31; i++)
6544 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6545 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6546 return COSTS_N_INSNS (2);
6547 }
6548 else if (outer == ASHIFT || outer == ASHIFTRT
6549 || outer == LSHIFTRT)
6550 return 0;
6551 return COSTS_N_INSNS (2);
6552
6553 case CONST:
6554 case CONST_DOUBLE:
6555 case LABEL_REF:
6556 case SYMBOL_REF:
6557 return COSTS_N_INSNS (3);
6558
6559 case UDIV:
6560 case UMOD:
6561 case DIV:
6562 case MOD:
6563 return 100;
6564
6565 case TRUNCATE:
6566 return 99;
6567
6568 case AND:
6569 case XOR:
6570 case IOR:
6571 /* XXX guess. */
6572 return 8;
6573
6574 case MEM:
6575 /* XXX another guess. */
6576 /* Memory costs quite a lot for the first word, but subsequent words
6577 load at the equivalent of a single insn each. */
6578 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6579 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6580 ? 4 : 0));
6581
6582 case IF_THEN_ELSE:
6583 /* XXX a guess. */
6584 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6585 return 14;
6586 return 2;
6587
6588 case SIGN_EXTEND:
6589 case ZERO_EXTEND:
6590 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6591 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6592
6593 if (mode == SImode)
6594 return total;
6595
6596 if (arm_arch6)
6597 return total + COSTS_N_INSNS (1);
6598
6599 /* Assume a two-shift sequence. Increase the cost slightly so
6600 we prefer actual shifts over an extend operation. */
6601 return total + 1 + COSTS_N_INSNS (2);
6602
6603 default:
6604 return 99;
6605 }
6606 }
6607
6608 static inline bool
6609 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6610 {
6611 enum machine_mode mode = GET_MODE (x);
6612 enum rtx_code subcode;
6613 rtx operand;
6614 enum rtx_code code = GET_CODE (x);
6615 *total = 0;
6616
6617 switch (code)
6618 {
6619 case MEM:
6620 /* Memory costs quite a lot for the first word, but subsequent words
6621 load at the equivalent of a single insn each. */
6622 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6623 return true;
6624
6625 case DIV:
6626 case MOD:
6627 case UDIV:
6628 case UMOD:
6629 if (TARGET_HARD_FLOAT && mode == SFmode)
6630 *total = COSTS_N_INSNS (2);
6631 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6632 *total = COSTS_N_INSNS (4);
6633 else
6634 *total = COSTS_N_INSNS (20);
6635 return false;
6636
6637 case ROTATE:
6638 if (GET_CODE (XEXP (x, 1)) == REG)
6639 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6640 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6641 *total = rtx_cost (XEXP (x, 1), code, speed);
6642
6643 /* Fall through */
6644 case ROTATERT:
6645 if (mode != SImode)
6646 {
6647 *total += COSTS_N_INSNS (4);
6648 return true;
6649 }
6650
6651 /* Fall through */
6652 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6653 *total += rtx_cost (XEXP (x, 0), code, speed);
6654 if (mode == DImode)
6655 {
6656 *total += COSTS_N_INSNS (3);
6657 return true;
6658 }
6659
6660 *total += COSTS_N_INSNS (1);
6661 /* Increase the cost of complex shifts because they aren't any faster,
6662 and reduce dual issue opportunities. */
6663 if (arm_tune_cortex_a9
6664 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6665 ++*total;
6666
6667 return true;
6668
6669 case MINUS:
6670 if (mode == DImode)
6671 {
6672 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6673 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6674 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6675 {
6676 *total += rtx_cost (XEXP (x, 1), code, speed);
6677 return true;
6678 }
6679
6680 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6681 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6682 {
6683 *total += rtx_cost (XEXP (x, 0), code, speed);
6684 return true;
6685 }
6686
6687 return false;
6688 }
6689
6690 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6691 {
6692 if (TARGET_HARD_FLOAT
6693 && (mode == SFmode
6694 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6695 {
6696 *total = COSTS_N_INSNS (1);
6697 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6698 && arm_const_double_rtx (XEXP (x, 0)))
6699 {
6700 *total += rtx_cost (XEXP (x, 1), code, speed);
6701 return true;
6702 }
6703
6704 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6705 && arm_const_double_rtx (XEXP (x, 1)))
6706 {
6707 *total += rtx_cost (XEXP (x, 0), code, speed);
6708 return true;
6709 }
6710
6711 return false;
6712 }
6713 *total = COSTS_N_INSNS (20);
6714 return false;
6715 }
6716
6717 *total = COSTS_N_INSNS (1);
6718 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6719 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6720 {
6721 *total += rtx_cost (XEXP (x, 1), code, speed);
6722 return true;
6723 }
6724
6725 subcode = GET_CODE (XEXP (x, 1));
6726 if (subcode == ASHIFT || subcode == ASHIFTRT
6727 || subcode == LSHIFTRT
6728 || subcode == ROTATE || subcode == ROTATERT)
6729 {
6730 *total += rtx_cost (XEXP (x, 0), code, speed);
6731 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6732 return true;
6733 }
6734
6735 /* A shift as a part of RSB costs no more than RSB itself. */
6736 if (GET_CODE (XEXP (x, 0)) == MULT
6737 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6738 {
6739 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6740 *total += rtx_cost (XEXP (x, 1), code, speed);
6741 return true;
6742 }
6743
6744 if (subcode == MULT
6745 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6746 {
6747 *total += rtx_cost (XEXP (x, 0), code, speed);
6748 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6749 return true;
6750 }
6751
6752 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6753 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6754 {
6755 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6756 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6757 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6758 *total += COSTS_N_INSNS (1);
6759
6760 return true;
6761 }
6762
6763 /* Fall through */
6764
6765 case PLUS:
6766 if (code == PLUS && arm_arch6 && mode == SImode
6767 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6768 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6769 {
6770 *total = COSTS_N_INSNS (1);
6771 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6772 speed);
6773 *total += rtx_cost (XEXP (x, 1), code, speed);
6774 return true;
6775 }
6776
6777 /* MLA: All arguments must be registers. We filter out
6778 multiplication by a power of two, so that we fall down into
6779 the code below. */
6780 if (GET_CODE (XEXP (x, 0)) == MULT
6781 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6782 {
6783 /* The cost comes from the cost of the multiply. */
6784 return false;
6785 }
6786
6787 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6788 {
6789 if (TARGET_HARD_FLOAT
6790 && (mode == SFmode
6791 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6792 {
6793 *total = COSTS_N_INSNS (1);
6794 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6795 && arm_const_double_rtx (XEXP (x, 1)))
6796 {
6797 *total += rtx_cost (XEXP (x, 0), code, speed);
6798 return true;
6799 }
6800
6801 return false;
6802 }
6803
6804 *total = COSTS_N_INSNS (20);
6805 return false;
6806 }
6807
6808 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6809 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6810 {
6811 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6812 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6813 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6814 *total += COSTS_N_INSNS (1);
6815 return true;
6816 }
6817
6818 /* Fall through */
6819
6820 case AND: case XOR: case IOR:
6821
6822 /* Normally the frame registers will be spilt into reg+const during
6823 reload, so it is a bad idea to combine them with other instructions,
6824 since then they might not be moved outside of loops. As a compromise
6825 we allow integration with ops that have a constant as their second
6826 operand. */
6827 if (REG_OR_SUBREG_REG (XEXP (x, 0))
6828 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6829 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6830 *total = COSTS_N_INSNS (1);
6831
6832 if (mode == DImode)
6833 {
6834 *total += COSTS_N_INSNS (2);
6835 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6836 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6837 {
6838 *total += rtx_cost (XEXP (x, 0), code, speed);
6839 return true;
6840 }
6841
6842 return false;
6843 }
6844
6845 *total += COSTS_N_INSNS (1);
6846 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6847 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6848 {
6849 *total += rtx_cost (XEXP (x, 0), code, speed);
6850 return true;
6851 }
6852 subcode = GET_CODE (XEXP (x, 0));
6853 if (subcode == ASHIFT || subcode == ASHIFTRT
6854 || subcode == LSHIFTRT
6855 || subcode == ROTATE || subcode == ROTATERT)
6856 {
6857 *total += rtx_cost (XEXP (x, 1), code, speed);
6858 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6859 return true;
6860 }
6861
6862 if (subcode == MULT
6863 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6864 {
6865 *total += rtx_cost (XEXP (x, 1), code, speed);
6866 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6867 return true;
6868 }
6869
6870 if (subcode == UMIN || subcode == UMAX
6871 || subcode == SMIN || subcode == SMAX)
6872 {
6873 *total = COSTS_N_INSNS (3);
6874 return true;
6875 }
6876
6877 return false;
6878
6879 case MULT:
6880 /* This should have been handled by the CPU specific routines. */
6881 gcc_unreachable ();
6882
6883 case TRUNCATE:
6884 if (arm_arch3m && mode == SImode
6885 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6886 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6887 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6888 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6889 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6890 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6891 {
6892 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6893 return true;
6894 }
6895 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6896 return false;
6897
6898 case NEG:
6899 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6900 {
6901 if (TARGET_HARD_FLOAT
6902 && (mode == SFmode
6903 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6904 {
6905 *total = COSTS_N_INSNS (1);
6906 return false;
6907 }
6908 *total = COSTS_N_INSNS (2);
6909 return false;
6910 }
6911
6912 /* Fall through */
6913 case NOT:
6914 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6915 if (mode == SImode && code == NOT)
6916 {
6917 subcode = GET_CODE (XEXP (x, 0));
6918 if (subcode == ASHIFT || subcode == ASHIFTRT
6919 || subcode == LSHIFTRT
6920 || subcode == ROTATE || subcode == ROTATERT
6921 || (subcode == MULT
6922 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6923 {
6924 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6925 /* Register shifts cost an extra cycle. */
6926 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6927 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6928 subcode, speed);
6929 return true;
6930 }
6931 }
6932
6933 return false;
6934
6935 case IF_THEN_ELSE:
6936 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6937 {
6938 *total = COSTS_N_INSNS (4);
6939 return true;
6940 }
6941
6942 operand = XEXP (x, 0);
6943
6944 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6945 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6946 && GET_CODE (XEXP (operand, 0)) == REG
6947 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6948 *total += COSTS_N_INSNS (1);
6949 *total += (rtx_cost (XEXP (x, 1), code, speed)
6950 + rtx_cost (XEXP (x, 2), code, speed));
6951 return true;
6952
6953 case NE:
6954 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6955 {
6956 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6957 return true;
6958 }
6959 goto scc_insn;
6960
6961 case GE:
6962 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6963 && mode == SImode && XEXP (x, 1) == const0_rtx)
6964 {
6965 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6966 return true;
6967 }
6968 goto scc_insn;
6969
6970 case LT:
6971 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6972 && mode == SImode && XEXP (x, 1) == const0_rtx)
6973 {
6974 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6975 return true;
6976 }
6977 goto scc_insn;
6978
6979 case EQ:
6980 case GT:
6981 case LE:
6982 case GEU:
6983 case LTU:
6984 case GTU:
6985 case LEU:
6986 case UNORDERED:
6987 case ORDERED:
6988 case UNEQ:
6989 case UNGE:
6990 case UNLT:
6991 case UNGT:
6992 case UNLE:
6993 scc_insn:
6994 /* SCC insns. In the case where the comparison has already been
6995 performed, then they cost 2 instructions. Otherwise they need
6996 an additional comparison before them. */
6997 *total = COSTS_N_INSNS (2);
6998 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6999 {
7000 return true;
7001 }
7002
7003 /* Fall through */
7004 case COMPARE:
7005 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7006 {
7007 *total = 0;
7008 return true;
7009 }
7010
7011 *total += COSTS_N_INSNS (1);
7012 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7013 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7014 {
7015 *total += rtx_cost (XEXP (x, 0), code, speed);
7016 return true;
7017 }
7018
7019 subcode = GET_CODE (XEXP (x, 0));
7020 if (subcode == ASHIFT || subcode == ASHIFTRT
7021 || subcode == LSHIFTRT
7022 || subcode == ROTATE || subcode == ROTATERT)
7023 {
7024 *total += rtx_cost (XEXP (x, 1), code, speed);
7025 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7026 return true;
7027 }
7028
7029 if (subcode == MULT
7030 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7031 {
7032 *total += rtx_cost (XEXP (x, 1), code, speed);
7033 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7034 return true;
7035 }
7036
7037 return false;
7038
7039 case UMIN:
7040 case UMAX:
7041 case SMIN:
7042 case SMAX:
7043 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7044 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7045 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7046 *total += rtx_cost (XEXP (x, 1), code, speed);
7047 return true;
7048
7049 case ABS:
7050 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7051 {
7052 if (TARGET_HARD_FLOAT
7053 && (mode == SFmode
7054 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7055 {
7056 *total = COSTS_N_INSNS (1);
7057 return false;
7058 }
7059 *total = COSTS_N_INSNS (20);
7060 return false;
7061 }
7062 *total = COSTS_N_INSNS (1);
7063 if (mode == DImode)
7064 *total += COSTS_N_INSNS (3);
7065 return false;
7066
7067 case SIGN_EXTEND:
7068 case ZERO_EXTEND:
7069 *total = 0;
7070 if (GET_MODE_CLASS (mode) == MODE_INT)
7071 {
7072 rtx op = XEXP (x, 0);
7073 enum machine_mode opmode = GET_MODE (op);
7074
7075 if (mode == DImode)
7076 *total += COSTS_N_INSNS (1);
7077
7078 if (opmode != SImode)
7079 {
7080 if (MEM_P (op))
7081 {
7082 /* If !arm_arch4, we use one of the extendhisi2_mem
7083 or movhi_bytes patterns for HImode. For a QImode
7084 sign extension, we first zero-extend from memory
7085 and then perform a shift sequence. */
7086 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7087 *total += COSTS_N_INSNS (2);
7088 }
7089 else if (arm_arch6)
7090 *total += COSTS_N_INSNS (1);
7091
7092 /* We don't have the necessary insn, so we need to perform some
7093 other operation. */
7094 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7095 /* An and with constant 255. */
7096 *total += COSTS_N_INSNS (1);
7097 else
7098 /* A shift sequence. Increase costs slightly to avoid
7099 combining two shifts into an extend operation. */
7100 *total += COSTS_N_INSNS (2) + 1;
7101 }
7102
7103 return false;
7104 }
7105
7106 switch (GET_MODE (XEXP (x, 0)))
7107 {
7108 case V8QImode:
7109 case V4HImode:
7110 case V2SImode:
7111 case V4QImode:
7112 case V2HImode:
7113 *total = COSTS_N_INSNS (1);
7114 return false;
7115
7116 default:
7117 gcc_unreachable ();
7118 }
7119 gcc_unreachable ();
7120
7121 case ZERO_EXTRACT:
7122 case SIGN_EXTRACT:
7123 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7124 return true;
7125
7126 case CONST_INT:
7127 if (const_ok_for_arm (INTVAL (x))
7128 || const_ok_for_arm (~INTVAL (x)))
7129 *total = COSTS_N_INSNS (1);
7130 else
7131 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7132 INTVAL (x), NULL_RTX,
7133 NULL_RTX, 0, 0));
7134 return true;
7135
7136 case CONST:
7137 case LABEL_REF:
7138 case SYMBOL_REF:
7139 *total = COSTS_N_INSNS (3);
7140 return true;
7141
7142 case HIGH:
7143 *total = COSTS_N_INSNS (1);
7144 return true;
7145
7146 case LO_SUM:
7147 *total = COSTS_N_INSNS (1);
7148 *total += rtx_cost (XEXP (x, 0), code, speed);
7149 return true;
7150
7151 case CONST_DOUBLE:
7152 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7153 && (mode == SFmode || !TARGET_VFP_SINGLE))
7154 *total = COSTS_N_INSNS (1);
7155 else
7156 *total = COSTS_N_INSNS (4);
7157 return true;
7158
7159 default:
7160 *total = COSTS_N_INSNS (4);
7161 return false;
7162 }
7163 }
7164
7165 /* Estimates the size cost of thumb1 instructions.
7166 For now most of the code is copied from thumb1_rtx_costs. We need more
7167 fine grain tuning when we have more related test cases. */
7168 static inline int
7169 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7170 {
7171 enum machine_mode mode = GET_MODE (x);
7172
7173 switch (code)
7174 {
7175 case ASHIFT:
7176 case ASHIFTRT:
7177 case LSHIFTRT:
7178 case ROTATERT:
7179 case PLUS:
7180 case MINUS:
7181 case COMPARE:
7182 case NEG:
7183 case NOT:
7184 return COSTS_N_INSNS (1);
7185
7186 case MULT:
7187 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7188 {
7189 /* Thumb1 mul instruction can't operate on const. We must Load it
7190 into a register first. */
7191 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7192 return COSTS_N_INSNS (1) + const_size;
7193 }
7194 return COSTS_N_INSNS (1);
7195
7196 case SET:
7197 return (COSTS_N_INSNS (1)
7198 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7199 + GET_CODE (SET_DEST (x)) == MEM));
7200
7201 case CONST_INT:
7202 if (outer == SET)
7203 {
7204 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7205 return COSTS_N_INSNS (1);
7206 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7207 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7208 return COSTS_N_INSNS (2);
7209 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7210 if (thumb_shiftable_const (INTVAL (x)))
7211 return COSTS_N_INSNS (2);
7212 return COSTS_N_INSNS (3);
7213 }
7214 else if ((outer == PLUS || outer == COMPARE)
7215 && INTVAL (x) < 256 && INTVAL (x) > -256)
7216 return 0;
7217 else if ((outer == IOR || outer == XOR || outer == AND)
7218 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7219 return COSTS_N_INSNS (1);
7220 else if (outer == AND)
7221 {
7222 int i;
7223 /* This duplicates the tests in the andsi3 expander. */
7224 for (i = 9; i <= 31; i++)
7225 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7226 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7227 return COSTS_N_INSNS (2);
7228 }
7229 else if (outer == ASHIFT || outer == ASHIFTRT
7230 || outer == LSHIFTRT)
7231 return 0;
7232 return COSTS_N_INSNS (2);
7233
7234 case CONST:
7235 case CONST_DOUBLE:
7236 case LABEL_REF:
7237 case SYMBOL_REF:
7238 return COSTS_N_INSNS (3);
7239
7240 case UDIV:
7241 case UMOD:
7242 case DIV:
7243 case MOD:
7244 return 100;
7245
7246 case TRUNCATE:
7247 return 99;
7248
7249 case AND:
7250 case XOR:
7251 case IOR:
7252 /* XXX guess. */
7253 return 8;
7254
7255 case MEM:
7256 /* XXX another guess. */
7257 /* Memory costs quite a lot for the first word, but subsequent words
7258 load at the equivalent of a single insn each. */
7259 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7260 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7261 ? 4 : 0));
7262
7263 case IF_THEN_ELSE:
7264 /* XXX a guess. */
7265 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7266 return 14;
7267 return 2;
7268
7269 case ZERO_EXTEND:
7270 /* XXX still guessing. */
7271 switch (GET_MODE (XEXP (x, 0)))
7272 {
7273 case QImode:
7274 return (1 + (mode == DImode ? 4 : 0)
7275 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7276
7277 case HImode:
7278 return (4 + (mode == DImode ? 4 : 0)
7279 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7280
7281 case SImode:
7282 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7283
7284 default:
7285 return 99;
7286 }
7287
7288 default:
7289 return 99;
7290 }
7291 }
7292
7293 /* RTX costs when optimizing for size. */
7294 static bool
7295 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7296 int *total)
7297 {
7298 enum machine_mode mode = GET_MODE (x);
7299 if (TARGET_THUMB1)
7300 {
7301 *total = thumb1_size_rtx_costs (x, code, outer_code);
7302 return true;
7303 }
7304
7305 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7306 switch (code)
7307 {
7308 case MEM:
7309 /* A memory access costs 1 insn if the mode is small, or the address is
7310 a single register, otherwise it costs one insn per word. */
7311 if (REG_P (XEXP (x, 0)))
7312 *total = COSTS_N_INSNS (1);
7313 else if (flag_pic
7314 && GET_CODE (XEXP (x, 0)) == PLUS
7315 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7316 /* This will be split into two instructions.
7317 See arm.md:calculate_pic_address. */
7318 *total = COSTS_N_INSNS (2);
7319 else
7320 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7321 return true;
7322
7323 case DIV:
7324 case MOD:
7325 case UDIV:
7326 case UMOD:
7327 /* Needs a libcall, so it costs about this. */
7328 *total = COSTS_N_INSNS (2);
7329 return false;
7330
7331 case ROTATE:
7332 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7333 {
7334 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7335 return true;
7336 }
7337 /* Fall through */
7338 case ROTATERT:
7339 case ASHIFT:
7340 case LSHIFTRT:
7341 case ASHIFTRT:
7342 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7343 {
7344 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7345 return true;
7346 }
7347 else if (mode == SImode)
7348 {
7349 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7350 /* Slightly disparage register shifts, but not by much. */
7351 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7352 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7353 return true;
7354 }
7355
7356 /* Needs a libcall. */
7357 *total = COSTS_N_INSNS (2);
7358 return false;
7359
7360 case MINUS:
7361 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7362 && (mode == SFmode || !TARGET_VFP_SINGLE))
7363 {
7364 *total = COSTS_N_INSNS (1);
7365 return false;
7366 }
7367
7368 if (mode == SImode)
7369 {
7370 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7371 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7372
7373 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7374 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7375 || subcode1 == ROTATE || subcode1 == ROTATERT
7376 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7377 || subcode1 == ASHIFTRT)
7378 {
7379 /* It's just the cost of the two operands. */
7380 *total = 0;
7381 return false;
7382 }
7383
7384 *total = COSTS_N_INSNS (1);
7385 return false;
7386 }
7387
7388 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7389 return false;
7390
7391 case PLUS:
7392 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7393 && (mode == SFmode || !TARGET_VFP_SINGLE))
7394 {
7395 *total = COSTS_N_INSNS (1);
7396 return false;
7397 }
7398
7399 /* A shift as a part of ADD costs nothing. */
7400 if (GET_CODE (XEXP (x, 0)) == MULT
7401 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7402 {
7403 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7404 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7405 *total += rtx_cost (XEXP (x, 1), code, false);
7406 return true;
7407 }
7408
7409 /* Fall through */
7410 case AND: case XOR: case IOR:
7411 if (mode == SImode)
7412 {
7413 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7414
7415 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7416 || subcode == LSHIFTRT || subcode == ASHIFTRT
7417 || (code == AND && subcode == NOT))
7418 {
7419 /* It's just the cost of the two operands. */
7420 *total = 0;
7421 return false;
7422 }
7423 }
7424
7425 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7426 return false;
7427
7428 case MULT:
7429 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7430 return false;
7431
7432 case NEG:
7433 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7434 && (mode == SFmode || !TARGET_VFP_SINGLE))
7435 {
7436 *total = COSTS_N_INSNS (1);
7437 return false;
7438 }
7439
7440 /* Fall through */
7441 case NOT:
7442 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7443
7444 return false;
7445
7446 case IF_THEN_ELSE:
7447 *total = 0;
7448 return false;
7449
7450 case COMPARE:
7451 if (cc_register (XEXP (x, 0), VOIDmode))
7452 * total = 0;
7453 else
7454 *total = COSTS_N_INSNS (1);
7455 return false;
7456
7457 case ABS:
7458 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7459 && (mode == SFmode || !TARGET_VFP_SINGLE))
7460 *total = COSTS_N_INSNS (1);
7461 else
7462 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7463 return false;
7464
7465 case SIGN_EXTEND:
7466 case ZERO_EXTEND:
7467 return arm_rtx_costs_1 (x, outer_code, total, 0);
7468
7469 case CONST_INT:
7470 if (const_ok_for_arm (INTVAL (x)))
7471 /* A multiplication by a constant requires another instruction
7472 to load the constant to a register. */
7473 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7474 ? 1 : 0);
7475 else if (const_ok_for_arm (~INTVAL (x)))
7476 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7477 else if (const_ok_for_arm (-INTVAL (x)))
7478 {
7479 if (outer_code == COMPARE || outer_code == PLUS
7480 || outer_code == MINUS)
7481 *total = 0;
7482 else
7483 *total = COSTS_N_INSNS (1);
7484 }
7485 else
7486 *total = COSTS_N_INSNS (2);
7487 return true;
7488
7489 case CONST:
7490 case LABEL_REF:
7491 case SYMBOL_REF:
7492 *total = COSTS_N_INSNS (2);
7493 return true;
7494
7495 case CONST_DOUBLE:
7496 *total = COSTS_N_INSNS (4);
7497 return true;
7498
7499 case HIGH:
7500 case LO_SUM:
7501 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7502 cost of these slightly. */
7503 *total = COSTS_N_INSNS (1) + 1;
7504 return true;
7505
7506 default:
7507 if (mode != VOIDmode)
7508 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7509 else
7510 *total = COSTS_N_INSNS (4); /* How knows? */
7511 return false;
7512 }
7513 }
7514
7515 /* RTX costs when optimizing for size. */
7516 static bool
7517 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7518 bool speed)
7519 {
7520 if (!speed)
7521 return arm_size_rtx_costs (x, (enum rtx_code) code,
7522 (enum rtx_code) outer_code, total);
7523 else
7524 return current_tune->rtx_costs (x, (enum rtx_code) code,
7525 (enum rtx_code) outer_code,
7526 total, speed);
7527 }
7528
7529 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7530 supported on any "slowmul" cores, so it can be ignored. */
7531
7532 static bool
7533 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7534 int *total, bool speed)
7535 {
7536 enum machine_mode mode = GET_MODE (x);
7537
7538 if (TARGET_THUMB)
7539 {
7540 *total = thumb1_rtx_costs (x, code, outer_code);
7541 return true;
7542 }
7543
7544 switch (code)
7545 {
7546 case MULT:
7547 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7548 || mode == DImode)
7549 {
7550 *total = COSTS_N_INSNS (20);
7551 return false;
7552 }
7553
7554 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7555 {
7556 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7557 & (unsigned HOST_WIDE_INT) 0xffffffff);
7558 int cost, const_ok = const_ok_for_arm (i);
7559 int j, booth_unit_size;
7560
7561 /* Tune as appropriate. */
7562 cost = const_ok ? 4 : 8;
7563 booth_unit_size = 2;
7564 for (j = 0; i && j < 32; j += booth_unit_size)
7565 {
7566 i >>= booth_unit_size;
7567 cost++;
7568 }
7569
7570 *total = COSTS_N_INSNS (cost);
7571 *total += rtx_cost (XEXP (x, 0), code, speed);
7572 return true;
7573 }
7574
7575 *total = COSTS_N_INSNS (20);
7576 return false;
7577
7578 default:
7579 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7580 }
7581 }
7582
7583
7584 /* RTX cost for cores with a fast multiply unit (M variants). */
7585
7586 static bool
7587 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7588 int *total, bool speed)
7589 {
7590 enum machine_mode mode = GET_MODE (x);
7591
7592 if (TARGET_THUMB1)
7593 {
7594 *total = thumb1_rtx_costs (x, code, outer_code);
7595 return true;
7596 }
7597
7598 /* ??? should thumb2 use different costs? */
7599 switch (code)
7600 {
7601 case MULT:
7602 /* There is no point basing this on the tuning, since it is always the
7603 fast variant if it exists at all. */
7604 if (mode == DImode
7605 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7606 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7607 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7608 {
7609 *total = COSTS_N_INSNS(2);
7610 return false;
7611 }
7612
7613
7614 if (mode == DImode)
7615 {
7616 *total = COSTS_N_INSNS (5);
7617 return false;
7618 }
7619
7620 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7621 {
7622 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7623 & (unsigned HOST_WIDE_INT) 0xffffffff);
7624 int cost, const_ok = const_ok_for_arm (i);
7625 int j, booth_unit_size;
7626
7627 /* Tune as appropriate. */
7628 cost = const_ok ? 4 : 8;
7629 booth_unit_size = 8;
7630 for (j = 0; i && j < 32; j += booth_unit_size)
7631 {
7632 i >>= booth_unit_size;
7633 cost++;
7634 }
7635
7636 *total = COSTS_N_INSNS(cost);
7637 return false;
7638 }
7639
7640 if (mode == SImode)
7641 {
7642 *total = COSTS_N_INSNS (4);
7643 return false;
7644 }
7645
7646 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7647 {
7648 if (TARGET_HARD_FLOAT
7649 && (mode == SFmode
7650 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7651 {
7652 *total = COSTS_N_INSNS (1);
7653 return false;
7654 }
7655 }
7656
7657 /* Requires a lib call */
7658 *total = COSTS_N_INSNS (20);
7659 return false;
7660
7661 default:
7662 return arm_rtx_costs_1 (x, outer_code, total, speed);
7663 }
7664 }
7665
7666
7667 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7668 so it can be ignored. */
7669
7670 static bool
7671 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7672 int *total, bool speed)
7673 {
7674 enum machine_mode mode = GET_MODE (x);
7675
7676 if (TARGET_THUMB)
7677 {
7678 *total = thumb1_rtx_costs (x, code, outer_code);
7679 return true;
7680 }
7681
7682 switch (code)
7683 {
7684 case COMPARE:
7685 if (GET_CODE (XEXP (x, 0)) != MULT)
7686 return arm_rtx_costs_1 (x, outer_code, total, speed);
7687
7688 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7689 will stall until the multiplication is complete. */
7690 *total = COSTS_N_INSNS (3);
7691 return false;
7692
7693 case MULT:
7694 /* There is no point basing this on the tuning, since it is always the
7695 fast variant if it exists at all. */
7696 if (mode == DImode
7697 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7698 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7699 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7700 {
7701 *total = COSTS_N_INSNS (2);
7702 return false;
7703 }
7704
7705
7706 if (mode == DImode)
7707 {
7708 *total = COSTS_N_INSNS (5);
7709 return false;
7710 }
7711
7712 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7713 {
7714 /* If operand 1 is a constant we can more accurately
7715 calculate the cost of the multiply. The multiplier can
7716 retire 15 bits on the first cycle and a further 12 on the
7717 second. We do, of course, have to load the constant into
7718 a register first. */
7719 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7720 /* There's a general overhead of one cycle. */
7721 int cost = 1;
7722 unsigned HOST_WIDE_INT masked_const;
7723
7724 if (i & 0x80000000)
7725 i = ~i;
7726
7727 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7728
7729 masked_const = i & 0xffff8000;
7730 if (masked_const != 0)
7731 {
7732 cost++;
7733 masked_const = i & 0xf8000000;
7734 if (masked_const != 0)
7735 cost++;
7736 }
7737 *total = COSTS_N_INSNS (cost);
7738 return false;
7739 }
7740
7741 if (mode == SImode)
7742 {
7743 *total = COSTS_N_INSNS (3);
7744 return false;
7745 }
7746
7747 /* Requires a lib call */
7748 *total = COSTS_N_INSNS (20);
7749 return false;
7750
7751 default:
7752 return arm_rtx_costs_1 (x, outer_code, total, speed);
7753 }
7754 }
7755
7756
7757 /* RTX costs for 9e (and later) cores. */
7758
7759 static bool
7760 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7761 int *total, bool speed)
7762 {
7763 enum machine_mode mode = GET_MODE (x);
7764
7765 if (TARGET_THUMB1)
7766 {
7767 switch (code)
7768 {
7769 case MULT:
7770 *total = COSTS_N_INSNS (3);
7771 return true;
7772
7773 default:
7774 *total = thumb1_rtx_costs (x, code, outer_code);
7775 return true;
7776 }
7777 }
7778
7779 switch (code)
7780 {
7781 case MULT:
7782 /* There is no point basing this on the tuning, since it is always the
7783 fast variant if it exists at all. */
7784 if (mode == DImode
7785 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7786 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7787 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7788 {
7789 *total = COSTS_N_INSNS (2);
7790 return false;
7791 }
7792
7793
7794 if (mode == DImode)
7795 {
7796 *total = COSTS_N_INSNS (5);
7797 return false;
7798 }
7799
7800 if (mode == SImode)
7801 {
7802 *total = COSTS_N_INSNS (2);
7803 return false;
7804 }
7805
7806 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7807 {
7808 if (TARGET_HARD_FLOAT
7809 && (mode == SFmode
7810 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7811 {
7812 *total = COSTS_N_INSNS (1);
7813 return false;
7814 }
7815 }
7816
7817 *total = COSTS_N_INSNS (20);
7818 return false;
7819
7820 default:
7821 return arm_rtx_costs_1 (x, outer_code, total, speed);
7822 }
7823 }
7824 /* All address computations that can be done are free, but rtx cost returns
7825 the same for practically all of them. So we weight the different types
7826 of address here in the order (most pref first):
7827 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7828 static inline int
7829 arm_arm_address_cost (rtx x)
7830 {
7831 enum rtx_code c = GET_CODE (x);
7832
7833 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7834 return 0;
7835 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7836 return 10;
7837
7838 if (c == PLUS)
7839 {
7840 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7841 return 2;
7842
7843 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7844 return 3;
7845
7846 return 4;
7847 }
7848
7849 return 6;
7850 }
7851
7852 static inline int
7853 arm_thumb_address_cost (rtx x)
7854 {
7855 enum rtx_code c = GET_CODE (x);
7856
7857 if (c == REG)
7858 return 1;
7859 if (c == PLUS
7860 && GET_CODE (XEXP (x, 0)) == REG
7861 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7862 return 1;
7863
7864 return 2;
7865 }
7866
7867 static int
7868 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7869 {
7870 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7871 }
7872
7873 /* Adjust cost hook for XScale. */
7874 static bool
7875 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7876 {
7877 /* Some true dependencies can have a higher cost depending
7878 on precisely how certain input operands are used. */
7879 if (REG_NOTE_KIND(link) == 0
7880 && recog_memoized (insn) >= 0
7881 && recog_memoized (dep) >= 0)
7882 {
7883 int shift_opnum = get_attr_shift (insn);
7884 enum attr_type attr_type = get_attr_type (dep);
7885
7886 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7887 operand for INSN. If we have a shifted input operand and the
7888 instruction we depend on is another ALU instruction, then we may
7889 have to account for an additional stall. */
7890 if (shift_opnum != 0
7891 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7892 {
7893 rtx shifted_operand;
7894 int opno;
7895
7896 /* Get the shifted operand. */
7897 extract_insn (insn);
7898 shifted_operand = recog_data.operand[shift_opnum];
7899
7900 /* Iterate over all the operands in DEP. If we write an operand
7901 that overlaps with SHIFTED_OPERAND, then we have increase the
7902 cost of this dependency. */
7903 extract_insn (dep);
7904 preprocess_constraints ();
7905 for (opno = 0; opno < recog_data.n_operands; opno++)
7906 {
7907 /* We can ignore strict inputs. */
7908 if (recog_data.operand_type[opno] == OP_IN)
7909 continue;
7910
7911 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7912 shifted_operand))
7913 {
7914 *cost = 2;
7915 return false;
7916 }
7917 }
7918 }
7919 }
7920 return true;
7921 }
7922
7923 /* Adjust cost hook for Cortex A9. */
7924 static bool
7925 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7926 {
7927 switch (REG_NOTE_KIND (link))
7928 {
7929 case REG_DEP_ANTI:
7930 *cost = 0;
7931 return false;
7932
7933 case REG_DEP_TRUE:
7934 case REG_DEP_OUTPUT:
7935 if (recog_memoized (insn) >= 0
7936 && recog_memoized (dep) >= 0)
7937 {
7938 if (GET_CODE (PATTERN (insn)) == SET)
7939 {
7940 if (GET_MODE_CLASS
7941 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
7942 || GET_MODE_CLASS
7943 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
7944 {
7945 enum attr_type attr_type_insn = get_attr_type (insn);
7946 enum attr_type attr_type_dep = get_attr_type (dep);
7947
7948 /* By default all dependencies of the form
7949 s0 = s0 <op> s1
7950 s0 = s0 <op> s2
7951 have an extra latency of 1 cycle because
7952 of the input and output dependency in this
7953 case. However this gets modeled as an true
7954 dependency and hence all these checks. */
7955 if (REG_P (SET_DEST (PATTERN (insn)))
7956 && REG_P (SET_DEST (PATTERN (dep)))
7957 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
7958 SET_DEST (PATTERN (dep))))
7959 {
7960 /* FMACS is a special case where the dependant
7961 instruction can be issued 3 cycles before
7962 the normal latency in case of an output
7963 dependency. */
7964 if ((attr_type_insn == TYPE_FMACS
7965 || attr_type_insn == TYPE_FMACD)
7966 && (attr_type_dep == TYPE_FMACS
7967 || attr_type_dep == TYPE_FMACD))
7968 {
7969 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7970 *cost = insn_default_latency (dep) - 3;
7971 else
7972 *cost = insn_default_latency (dep);
7973 return false;
7974 }
7975 else
7976 {
7977 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7978 *cost = insn_default_latency (dep) + 1;
7979 else
7980 *cost = insn_default_latency (dep);
7981 }
7982 return false;
7983 }
7984 }
7985 }
7986 }
7987 break;
7988
7989 default:
7990 gcc_unreachable ();
7991 }
7992
7993 return true;
7994 }
7995
7996 /* Adjust cost hook for FA726TE. */
7997 static bool
7998 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7999 {
8000 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8001 have penalty of 3. */
8002 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8003 && recog_memoized (insn) >= 0
8004 && recog_memoized (dep) >= 0
8005 && get_attr_conds (dep) == CONDS_SET)
8006 {
8007 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8008 if (get_attr_conds (insn) == CONDS_USE
8009 && get_attr_type (insn) != TYPE_BRANCH)
8010 {
8011 *cost = 3;
8012 return false;
8013 }
8014
8015 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8016 || get_attr_conds (insn) == CONDS_USE)
8017 {
8018 *cost = 0;
8019 return false;
8020 }
8021 }
8022
8023 return true;
8024 }
8025
8026 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8027 It corrects the value of COST based on the relationship between
8028 INSN and DEP through the dependence LINK. It returns the new
8029 value. There is a per-core adjust_cost hook to adjust scheduler costs
8030 and the per-core hook can choose to completely override the generic
8031 adjust_cost function. Only put bits of code into arm_adjust_cost that
8032 are common across all cores. */
8033 static int
8034 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8035 {
8036 rtx i_pat, d_pat;
8037
8038 /* When generating Thumb-1 code, we want to place flag-setting operations
8039 close to a conditional branch which depends on them, so that we can
8040 omit the comparison. */
8041 if (TARGET_THUMB1
8042 && REG_NOTE_KIND (link) == 0
8043 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8044 && recog_memoized (dep) >= 0
8045 && get_attr_conds (dep) == CONDS_SET)
8046 return 0;
8047
8048 if (current_tune->sched_adjust_cost != NULL)
8049 {
8050 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8051 return cost;
8052 }
8053
8054 /* XXX This is not strictly true for the FPA. */
8055 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8056 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8057 return 0;
8058
8059 /* Call insns don't incur a stall, even if they follow a load. */
8060 if (REG_NOTE_KIND (link) == 0
8061 && GET_CODE (insn) == CALL_INSN)
8062 return 1;
8063
8064 if ((i_pat = single_set (insn)) != NULL
8065 && GET_CODE (SET_SRC (i_pat)) == MEM
8066 && (d_pat = single_set (dep)) != NULL
8067 && GET_CODE (SET_DEST (d_pat)) == MEM)
8068 {
8069 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8070 /* This is a load after a store, there is no conflict if the load reads
8071 from a cached area. Assume that loads from the stack, and from the
8072 constant pool are cached, and that others will miss. This is a
8073 hack. */
8074
8075 if ((GET_CODE (src_mem) == SYMBOL_REF
8076 && CONSTANT_POOL_ADDRESS_P (src_mem))
8077 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8078 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8079 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8080 return 1;
8081 }
8082
8083 return cost;
8084 }
8085
8086 static int
8087 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
8088 {
8089 if (TARGET_32BIT)
8090 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
8091 else
8092 return (optimize > 0) ? 2 : 0;
8093 }
8094
8095 static int
8096 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
8097 {
8098 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
8099 }
8100
8101 static int fp_consts_inited = 0;
8102
8103 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8104 static const char * const strings_fp[8] =
8105 {
8106 "0", "1", "2", "3",
8107 "4", "5", "0.5", "10"
8108 };
8109
8110 static REAL_VALUE_TYPE values_fp[8];
8111
8112 static void
8113 init_fp_table (void)
8114 {
8115 int i;
8116 REAL_VALUE_TYPE r;
8117
8118 if (TARGET_VFP)
8119 fp_consts_inited = 1;
8120 else
8121 fp_consts_inited = 8;
8122
8123 for (i = 0; i < fp_consts_inited; i++)
8124 {
8125 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8126 values_fp[i] = r;
8127 }
8128 }
8129
8130 /* Return TRUE if rtx X is a valid immediate FP constant. */
8131 int
8132 arm_const_double_rtx (rtx x)
8133 {
8134 REAL_VALUE_TYPE r;
8135 int i;
8136
8137 if (!fp_consts_inited)
8138 init_fp_table ();
8139
8140 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8141 if (REAL_VALUE_MINUS_ZERO (r))
8142 return 0;
8143
8144 for (i = 0; i < fp_consts_inited; i++)
8145 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8146 return 1;
8147
8148 return 0;
8149 }
8150
8151 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8152 int
8153 neg_const_double_rtx_ok_for_fpa (rtx x)
8154 {
8155 REAL_VALUE_TYPE r;
8156 int i;
8157
8158 if (!fp_consts_inited)
8159 init_fp_table ();
8160
8161 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8162 r = real_value_negate (&r);
8163 if (REAL_VALUE_MINUS_ZERO (r))
8164 return 0;
8165
8166 for (i = 0; i < 8; i++)
8167 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8168 return 1;
8169
8170 return 0;
8171 }
8172
8173
8174 /* VFPv3 has a fairly wide range of representable immediates, formed from
8175 "quarter-precision" floating-point values. These can be evaluated using this
8176 formula (with ^ for exponentiation):
8177
8178 -1^s * n * 2^-r
8179
8180 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8181 16 <= n <= 31 and 0 <= r <= 7.
8182
8183 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8184
8185 - A (most-significant) is the sign bit.
8186 - BCD are the exponent (encoded as r XOR 3).
8187 - EFGH are the mantissa (encoded as n - 16).
8188 */
8189
8190 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8191 fconst[sd] instruction, or -1 if X isn't suitable. */
8192 static int
8193 vfp3_const_double_index (rtx x)
8194 {
8195 REAL_VALUE_TYPE r, m;
8196 int sign, exponent;
8197 unsigned HOST_WIDE_INT mantissa, mant_hi;
8198 unsigned HOST_WIDE_INT mask;
8199 HOST_WIDE_INT m1, m2;
8200 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8201
8202 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8203 return -1;
8204
8205 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8206
8207 /* We can't represent these things, so detect them first. */
8208 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8209 return -1;
8210
8211 /* Extract sign, exponent and mantissa. */
8212 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8213 r = real_value_abs (&r);
8214 exponent = REAL_EXP (&r);
8215 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8216 highest (sign) bit, with a fixed binary point at bit point_pos.
8217 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8218 bits for the mantissa, this may fail (low bits would be lost). */
8219 real_ldexp (&m, &r, point_pos - exponent);
8220 REAL_VALUE_TO_INT (&m1, &m2, m);
8221 mantissa = m1;
8222 mant_hi = m2;
8223
8224 /* If there are bits set in the low part of the mantissa, we can't
8225 represent this value. */
8226 if (mantissa != 0)
8227 return -1;
8228
8229 /* Now make it so that mantissa contains the most-significant bits, and move
8230 the point_pos to indicate that the least-significant bits have been
8231 discarded. */
8232 point_pos -= HOST_BITS_PER_WIDE_INT;
8233 mantissa = mant_hi;
8234
8235 /* We can permit four significant bits of mantissa only, plus a high bit
8236 which is always 1. */
8237 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8238 if ((mantissa & mask) != 0)
8239 return -1;
8240
8241 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8242 mantissa >>= point_pos - 5;
8243
8244 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8245 floating-point immediate zero with Neon using an integer-zero load, but
8246 that case is handled elsewhere.) */
8247 if (mantissa == 0)
8248 return -1;
8249
8250 gcc_assert (mantissa >= 16 && mantissa <= 31);
8251
8252 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8253 normalized significands are in the range [1, 2). (Our mantissa is shifted
8254 left 4 places at this point relative to normalized IEEE754 values). GCC
8255 internally uses [0.5, 1) (see real.c), so the exponent returned from
8256 REAL_EXP must be altered. */
8257 exponent = 5 - exponent;
8258
8259 if (exponent < 0 || exponent > 7)
8260 return -1;
8261
8262 /* Sign, mantissa and exponent are now in the correct form to plug into the
8263 formula described in the comment above. */
8264 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8265 }
8266
8267 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8268 int
8269 vfp3_const_double_rtx (rtx x)
8270 {
8271 if (!TARGET_VFP3)
8272 return 0;
8273
8274 return vfp3_const_double_index (x) != -1;
8275 }
8276
8277 /* Recognize immediates which can be used in various Neon instructions. Legal
8278 immediates are described by the following table (for VMVN variants, the
8279 bitwise inverse of the constant shown is recognized. In either case, VMOV
8280 is output and the correct instruction to use for a given constant is chosen
8281 by the assembler). The constant shown is replicated across all elements of
8282 the destination vector.
8283
8284 insn elems variant constant (binary)
8285 ---- ----- ------- -----------------
8286 vmov i32 0 00000000 00000000 00000000 abcdefgh
8287 vmov i32 1 00000000 00000000 abcdefgh 00000000
8288 vmov i32 2 00000000 abcdefgh 00000000 00000000
8289 vmov i32 3 abcdefgh 00000000 00000000 00000000
8290 vmov i16 4 00000000 abcdefgh
8291 vmov i16 5 abcdefgh 00000000
8292 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8293 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8294 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8295 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8296 vmvn i16 10 00000000 abcdefgh
8297 vmvn i16 11 abcdefgh 00000000
8298 vmov i32 12 00000000 00000000 abcdefgh 11111111
8299 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8300 vmov i32 14 00000000 abcdefgh 11111111 11111111
8301 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8302 vmov i8 16 abcdefgh
8303 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8304 eeeeeeee ffffffff gggggggg hhhhhhhh
8305 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8306
8307 For case 18, B = !b. Representable values are exactly those accepted by
8308 vfp3_const_double_index, but are output as floating-point numbers rather
8309 than indices.
8310
8311 Variants 0-5 (inclusive) may also be used as immediates for the second
8312 operand of VORR/VBIC instructions.
8313
8314 The INVERSE argument causes the bitwise inverse of the given operand to be
8315 recognized instead (used for recognizing legal immediates for the VAND/VORN
8316 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8317 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8318 output, rather than the real insns vbic/vorr).
8319
8320 INVERSE makes no difference to the recognition of float vectors.
8321
8322 The return value is the variant of immediate as shown in the above table, or
8323 -1 if the given value doesn't match any of the listed patterns.
8324 */
8325 static int
8326 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8327 rtx *modconst, int *elementwidth)
8328 {
8329 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8330 matches = 1; \
8331 for (i = 0; i < idx; i += (STRIDE)) \
8332 if (!(TEST)) \
8333 matches = 0; \
8334 if (matches) \
8335 { \
8336 immtype = (CLASS); \
8337 elsize = (ELSIZE); \
8338 break; \
8339 }
8340
8341 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8342 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8343 unsigned char bytes[16];
8344 int immtype = -1, matches;
8345 unsigned int invmask = inverse ? 0xff : 0;
8346
8347 /* Vectors of float constants. */
8348 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8349 {
8350 rtx el0 = CONST_VECTOR_ELT (op, 0);
8351 REAL_VALUE_TYPE r0;
8352
8353 if (!vfp3_const_double_rtx (el0))
8354 return -1;
8355
8356 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8357
8358 for (i = 1; i < n_elts; i++)
8359 {
8360 rtx elt = CONST_VECTOR_ELT (op, i);
8361 REAL_VALUE_TYPE re;
8362
8363 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8364
8365 if (!REAL_VALUES_EQUAL (r0, re))
8366 return -1;
8367 }
8368
8369 if (modconst)
8370 *modconst = CONST_VECTOR_ELT (op, 0);
8371
8372 if (elementwidth)
8373 *elementwidth = 0;
8374
8375 return 18;
8376 }
8377
8378 /* Splat vector constant out into a byte vector. */
8379 for (i = 0; i < n_elts; i++)
8380 {
8381 rtx el = CONST_VECTOR_ELT (op, i);
8382 unsigned HOST_WIDE_INT elpart;
8383 unsigned int part, parts;
8384
8385 if (GET_CODE (el) == CONST_INT)
8386 {
8387 elpart = INTVAL (el);
8388 parts = 1;
8389 }
8390 else if (GET_CODE (el) == CONST_DOUBLE)
8391 {
8392 elpart = CONST_DOUBLE_LOW (el);
8393 parts = 2;
8394 }
8395 else
8396 gcc_unreachable ();
8397
8398 for (part = 0; part < parts; part++)
8399 {
8400 unsigned int byte;
8401 for (byte = 0; byte < innersize; byte++)
8402 {
8403 bytes[idx++] = (elpart & 0xff) ^ invmask;
8404 elpart >>= BITS_PER_UNIT;
8405 }
8406 if (GET_CODE (el) == CONST_DOUBLE)
8407 elpart = CONST_DOUBLE_HIGH (el);
8408 }
8409 }
8410
8411 /* Sanity check. */
8412 gcc_assert (idx == GET_MODE_SIZE (mode));
8413
8414 do
8415 {
8416 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8417 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8418
8419 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8420 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8421
8422 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8423 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8424
8425 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8426 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8427
8428 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8429
8430 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8431
8432 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8433 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8434
8435 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8436 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8437
8438 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8439 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8440
8441 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8442 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8443
8444 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8445
8446 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8447
8448 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8449 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8450
8451 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8452 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8453
8454 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8455 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8456
8457 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8458 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8459
8460 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8461
8462 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8463 && bytes[i] == bytes[(i + 8) % idx]);
8464 }
8465 while (0);
8466
8467 if (immtype == -1)
8468 return -1;
8469
8470 if (elementwidth)
8471 *elementwidth = elsize;
8472
8473 if (modconst)
8474 {
8475 unsigned HOST_WIDE_INT imm = 0;
8476
8477 /* Un-invert bytes of recognized vector, if necessary. */
8478 if (invmask != 0)
8479 for (i = 0; i < idx; i++)
8480 bytes[i] ^= invmask;
8481
8482 if (immtype == 17)
8483 {
8484 /* FIXME: Broken on 32-bit H_W_I hosts. */
8485 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8486
8487 for (i = 0; i < 8; i++)
8488 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8489 << (i * BITS_PER_UNIT);
8490
8491 *modconst = GEN_INT (imm);
8492 }
8493 else
8494 {
8495 unsigned HOST_WIDE_INT imm = 0;
8496
8497 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8498 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8499
8500 *modconst = GEN_INT (imm);
8501 }
8502 }
8503
8504 return immtype;
8505 #undef CHECK
8506 }
8507
8508 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8509 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8510 float elements), and a modified constant (whatever should be output for a
8511 VMOV) in *MODCONST. */
8512
8513 int
8514 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8515 rtx *modconst, int *elementwidth)
8516 {
8517 rtx tmpconst;
8518 int tmpwidth;
8519 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8520
8521 if (retval == -1)
8522 return 0;
8523
8524 if (modconst)
8525 *modconst = tmpconst;
8526
8527 if (elementwidth)
8528 *elementwidth = tmpwidth;
8529
8530 return 1;
8531 }
8532
8533 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8534 the immediate is valid, write a constant suitable for using as an operand
8535 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8536 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8537
8538 int
8539 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8540 rtx *modconst, int *elementwidth)
8541 {
8542 rtx tmpconst;
8543 int tmpwidth;
8544 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8545
8546 if (retval < 0 || retval > 5)
8547 return 0;
8548
8549 if (modconst)
8550 *modconst = tmpconst;
8551
8552 if (elementwidth)
8553 *elementwidth = tmpwidth;
8554
8555 return 1;
8556 }
8557
8558 /* Return a string suitable for output of Neon immediate logic operation
8559 MNEM. */
8560
8561 char *
8562 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8563 int inverse, int quad)
8564 {
8565 int width, is_valid;
8566 static char templ[40];
8567
8568 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8569
8570 gcc_assert (is_valid != 0);
8571
8572 if (quad)
8573 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8574 else
8575 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8576
8577 return templ;
8578 }
8579
8580 /* Output a sequence of pairwise operations to implement a reduction.
8581 NOTE: We do "too much work" here, because pairwise operations work on two
8582 registers-worth of operands in one go. Unfortunately we can't exploit those
8583 extra calculations to do the full operation in fewer steps, I don't think.
8584 Although all vector elements of the result but the first are ignored, we
8585 actually calculate the same result in each of the elements. An alternative
8586 such as initially loading a vector with zero to use as each of the second
8587 operands would use up an additional register and take an extra instruction,
8588 for no particular gain. */
8589
8590 void
8591 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8592 rtx (*reduc) (rtx, rtx, rtx))
8593 {
8594 enum machine_mode inner = GET_MODE_INNER (mode);
8595 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8596 rtx tmpsum = op1;
8597
8598 for (i = parts / 2; i >= 1; i /= 2)
8599 {
8600 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8601 emit_insn (reduc (dest, tmpsum, tmpsum));
8602 tmpsum = dest;
8603 }
8604 }
8605
8606 /* If VALS is a vector constant that can be loaded into a register
8607 using VDUP, generate instructions to do so and return an RTX to
8608 assign to the register. Otherwise return NULL_RTX. */
8609
8610 static rtx
8611 neon_vdup_constant (rtx vals)
8612 {
8613 enum machine_mode mode = GET_MODE (vals);
8614 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8615 int n_elts = GET_MODE_NUNITS (mode);
8616 bool all_same = true;
8617 rtx x;
8618 int i;
8619
8620 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8621 return NULL_RTX;
8622
8623 for (i = 0; i < n_elts; ++i)
8624 {
8625 x = XVECEXP (vals, 0, i);
8626 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8627 all_same = false;
8628 }
8629
8630 if (!all_same)
8631 /* The elements are not all the same. We could handle repeating
8632 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8633 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8634 vdup.i16). */
8635 return NULL_RTX;
8636
8637 /* We can load this constant by using VDUP and a constant in a
8638 single ARM register. This will be cheaper than a vector
8639 load. */
8640
8641 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8642 return gen_rtx_VEC_DUPLICATE (mode, x);
8643 }
8644
8645 /* Generate code to load VALS, which is a PARALLEL containing only
8646 constants (for vec_init) or CONST_VECTOR, efficiently into a
8647 register. Returns an RTX to copy into the register, or NULL_RTX
8648 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8649
8650 rtx
8651 neon_make_constant (rtx vals)
8652 {
8653 enum machine_mode mode = GET_MODE (vals);
8654 rtx target;
8655 rtx const_vec = NULL_RTX;
8656 int n_elts = GET_MODE_NUNITS (mode);
8657 int n_const = 0;
8658 int i;
8659
8660 if (GET_CODE (vals) == CONST_VECTOR)
8661 const_vec = vals;
8662 else if (GET_CODE (vals) == PARALLEL)
8663 {
8664 /* A CONST_VECTOR must contain only CONST_INTs and
8665 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8666 Only store valid constants in a CONST_VECTOR. */
8667 for (i = 0; i < n_elts; ++i)
8668 {
8669 rtx x = XVECEXP (vals, 0, i);
8670 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8671 n_const++;
8672 }
8673 if (n_const == n_elts)
8674 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8675 }
8676 else
8677 gcc_unreachable ();
8678
8679 if (const_vec != NULL
8680 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8681 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8682 return const_vec;
8683 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8684 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8685 pipeline cycle; creating the constant takes one or two ARM
8686 pipeline cycles. */
8687 return target;
8688 else if (const_vec != NULL_RTX)
8689 /* Load from constant pool. On Cortex-A8 this takes two cycles
8690 (for either double or quad vectors). We can not take advantage
8691 of single-cycle VLD1 because we need a PC-relative addressing
8692 mode. */
8693 return const_vec;
8694 else
8695 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8696 We can not construct an initializer. */
8697 return NULL_RTX;
8698 }
8699
8700 /* Initialize vector TARGET to VALS. */
8701
8702 void
8703 neon_expand_vector_init (rtx target, rtx vals)
8704 {
8705 enum machine_mode mode = GET_MODE (target);
8706 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8707 int n_elts = GET_MODE_NUNITS (mode);
8708 int n_var = 0, one_var = -1;
8709 bool all_same = true;
8710 rtx x, mem;
8711 int i;
8712
8713 for (i = 0; i < n_elts; ++i)
8714 {
8715 x = XVECEXP (vals, 0, i);
8716 if (!CONSTANT_P (x))
8717 ++n_var, one_var = i;
8718
8719 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8720 all_same = false;
8721 }
8722
8723 if (n_var == 0)
8724 {
8725 rtx constant = neon_make_constant (vals);
8726 if (constant != NULL_RTX)
8727 {
8728 emit_move_insn (target, constant);
8729 return;
8730 }
8731 }
8732
8733 /* Splat a single non-constant element if we can. */
8734 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8735 {
8736 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8737 emit_insn (gen_rtx_SET (VOIDmode, target,
8738 gen_rtx_VEC_DUPLICATE (mode, x)));
8739 return;
8740 }
8741
8742 /* One field is non-constant. Load constant then overwrite varying
8743 field. This is more efficient than using the stack. */
8744 if (n_var == 1)
8745 {
8746 rtx copy = copy_rtx (vals);
8747 rtx index = GEN_INT (one_var);
8748
8749 /* Load constant part of vector, substitute neighboring value for
8750 varying element. */
8751 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8752 neon_expand_vector_init (target, copy);
8753
8754 /* Insert variable. */
8755 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8756 switch (mode)
8757 {
8758 case V8QImode:
8759 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8760 break;
8761 case V16QImode:
8762 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8763 break;
8764 case V4HImode:
8765 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8766 break;
8767 case V8HImode:
8768 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8769 break;
8770 case V2SImode:
8771 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8772 break;
8773 case V4SImode:
8774 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8775 break;
8776 case V2SFmode:
8777 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8778 break;
8779 case V4SFmode:
8780 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8781 break;
8782 case V2DImode:
8783 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8784 break;
8785 default:
8786 gcc_unreachable ();
8787 }
8788 return;
8789 }
8790
8791 /* Construct the vector in memory one field at a time
8792 and load the whole vector. */
8793 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8794 for (i = 0; i < n_elts; i++)
8795 emit_move_insn (adjust_address_nv (mem, inner_mode,
8796 i * GET_MODE_SIZE (inner_mode)),
8797 XVECEXP (vals, 0, i));
8798 emit_move_insn (target, mem);
8799 }
8800
8801 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8802 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8803 reported source locations are bogus. */
8804
8805 static void
8806 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8807 const char *err)
8808 {
8809 HOST_WIDE_INT lane;
8810
8811 gcc_assert (GET_CODE (operand) == CONST_INT);
8812
8813 lane = INTVAL (operand);
8814
8815 if (lane < low || lane >= high)
8816 error (err);
8817 }
8818
8819 /* Bounds-check lanes. */
8820
8821 void
8822 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8823 {
8824 bounds_check (operand, low, high, "lane out of range");
8825 }
8826
8827 /* Bounds-check constants. */
8828
8829 void
8830 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8831 {
8832 bounds_check (operand, low, high, "constant out of range");
8833 }
8834
8835 HOST_WIDE_INT
8836 neon_element_bits (enum machine_mode mode)
8837 {
8838 if (mode == DImode)
8839 return GET_MODE_BITSIZE (mode);
8840 else
8841 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8842 }
8843
8844 \f
8845 /* Predicates for `match_operand' and `match_operator'. */
8846
8847 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8848 int
8849 cirrus_memory_offset (rtx op)
8850 {
8851 /* Reject eliminable registers. */
8852 if (! (reload_in_progress || reload_completed)
8853 && ( reg_mentioned_p (frame_pointer_rtx, op)
8854 || reg_mentioned_p (arg_pointer_rtx, op)
8855 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8856 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8857 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8858 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8859 return 0;
8860
8861 if (GET_CODE (op) == MEM)
8862 {
8863 rtx ind;
8864
8865 ind = XEXP (op, 0);
8866
8867 /* Match: (mem (reg)). */
8868 if (GET_CODE (ind) == REG)
8869 return 1;
8870
8871 /* Match:
8872 (mem (plus (reg)
8873 (const))). */
8874 if (GET_CODE (ind) == PLUS
8875 && GET_CODE (XEXP (ind, 0)) == REG
8876 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8877 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8878 return 1;
8879 }
8880
8881 return 0;
8882 }
8883
8884 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8885 WB is true if full writeback address modes are allowed and is false
8886 if limited writeback address modes (POST_INC and PRE_DEC) are
8887 allowed. */
8888
8889 int
8890 arm_coproc_mem_operand (rtx op, bool wb)
8891 {
8892 rtx ind;
8893
8894 /* Reject eliminable registers. */
8895 if (! (reload_in_progress || reload_completed)
8896 && ( reg_mentioned_p (frame_pointer_rtx, op)
8897 || reg_mentioned_p (arg_pointer_rtx, op)
8898 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8899 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8900 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8901 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8902 return FALSE;
8903
8904 /* Constants are converted into offsets from labels. */
8905 if (GET_CODE (op) != MEM)
8906 return FALSE;
8907
8908 ind = XEXP (op, 0);
8909
8910 if (reload_completed
8911 && (GET_CODE (ind) == LABEL_REF
8912 || (GET_CODE (ind) == CONST
8913 && GET_CODE (XEXP (ind, 0)) == PLUS
8914 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8915 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8916 return TRUE;
8917
8918 /* Match: (mem (reg)). */
8919 if (GET_CODE (ind) == REG)
8920 return arm_address_register_rtx_p (ind, 0);
8921
8922 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8923 acceptable in any case (subject to verification by
8924 arm_address_register_rtx_p). We need WB to be true to accept
8925 PRE_INC and POST_DEC. */
8926 if (GET_CODE (ind) == POST_INC
8927 || GET_CODE (ind) == PRE_DEC
8928 || (wb
8929 && (GET_CODE (ind) == PRE_INC
8930 || GET_CODE (ind) == POST_DEC)))
8931 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8932
8933 if (wb
8934 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8935 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8936 && GET_CODE (XEXP (ind, 1)) == PLUS
8937 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8938 ind = XEXP (ind, 1);
8939
8940 /* Match:
8941 (plus (reg)
8942 (const)). */
8943 if (GET_CODE (ind) == PLUS
8944 && GET_CODE (XEXP (ind, 0)) == REG
8945 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8946 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8947 && INTVAL (XEXP (ind, 1)) > -1024
8948 && INTVAL (XEXP (ind, 1)) < 1024
8949 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8950 return TRUE;
8951
8952 return FALSE;
8953 }
8954
8955 /* Return TRUE if OP is a memory operand which we can load or store a vector
8956 to/from. TYPE is one of the following values:
8957 0 - Vector load/stor (vldr)
8958 1 - Core registers (ldm)
8959 2 - Element/structure loads (vld1)
8960 */
8961 int
8962 neon_vector_mem_operand (rtx op, int type)
8963 {
8964 rtx ind;
8965
8966 /* Reject eliminable registers. */
8967 if (! (reload_in_progress || reload_completed)
8968 && ( reg_mentioned_p (frame_pointer_rtx, op)
8969 || reg_mentioned_p (arg_pointer_rtx, op)
8970 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8971 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8972 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8973 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8974 return FALSE;
8975
8976 /* Constants are converted into offsets from labels. */
8977 if (GET_CODE (op) != MEM)
8978 return FALSE;
8979
8980 ind = XEXP (op, 0);
8981
8982 if (reload_completed
8983 && (GET_CODE (ind) == LABEL_REF
8984 || (GET_CODE (ind) == CONST
8985 && GET_CODE (XEXP (ind, 0)) == PLUS
8986 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8987 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8988 return TRUE;
8989
8990 /* Match: (mem (reg)). */
8991 if (GET_CODE (ind) == REG)
8992 return arm_address_register_rtx_p (ind, 0);
8993
8994 /* Allow post-increment with Neon registers. */
8995 if ((type != 1 && GET_CODE (ind) == POST_INC)
8996 || (type == 0 && GET_CODE (ind) == PRE_DEC))
8997 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8998
8999 /* FIXME: vld1 allows register post-modify. */
9000
9001 /* Match:
9002 (plus (reg)
9003 (const)). */
9004 if (type == 0
9005 && GET_CODE (ind) == PLUS
9006 && GET_CODE (XEXP (ind, 0)) == REG
9007 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9008 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9009 && INTVAL (XEXP (ind, 1)) > -1024
9010 && INTVAL (XEXP (ind, 1)) < 1016
9011 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9012 return TRUE;
9013
9014 return FALSE;
9015 }
9016
9017 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9018 type. */
9019 int
9020 neon_struct_mem_operand (rtx op)
9021 {
9022 rtx ind;
9023
9024 /* Reject eliminable registers. */
9025 if (! (reload_in_progress || reload_completed)
9026 && ( reg_mentioned_p (frame_pointer_rtx, op)
9027 || reg_mentioned_p (arg_pointer_rtx, op)
9028 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9029 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9030 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9031 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9032 return FALSE;
9033
9034 /* Constants are converted into offsets from labels. */
9035 if (GET_CODE (op) != MEM)
9036 return FALSE;
9037
9038 ind = XEXP (op, 0);
9039
9040 if (reload_completed
9041 && (GET_CODE (ind) == LABEL_REF
9042 || (GET_CODE (ind) == CONST
9043 && GET_CODE (XEXP (ind, 0)) == PLUS
9044 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9045 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9046 return TRUE;
9047
9048 /* Match: (mem (reg)). */
9049 if (GET_CODE (ind) == REG)
9050 return arm_address_register_rtx_p (ind, 0);
9051
9052 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9053 if (GET_CODE (ind) == POST_INC
9054 || GET_CODE (ind) == PRE_DEC)
9055 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9056
9057 return FALSE;
9058 }
9059
9060 /* Return true if X is a register that will be eliminated later on. */
9061 int
9062 arm_eliminable_register (rtx x)
9063 {
9064 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9065 || REGNO (x) == ARG_POINTER_REGNUM
9066 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9067 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9068 }
9069
9070 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9071 coprocessor registers. Otherwise return NO_REGS. */
9072
9073 enum reg_class
9074 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9075 {
9076 if (mode == HFmode)
9077 {
9078 if (!TARGET_NEON_FP16)
9079 return GENERAL_REGS;
9080 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9081 return NO_REGS;
9082 return GENERAL_REGS;
9083 }
9084
9085 /* The neon move patterns handle all legitimate vector and struct
9086 addresses. */
9087 if (TARGET_NEON
9088 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
9089 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9090 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9091 || VALID_NEON_STRUCT_MODE (mode)))
9092 return NO_REGS;
9093
9094 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9095 return NO_REGS;
9096
9097 return GENERAL_REGS;
9098 }
9099
9100 /* Values which must be returned in the most-significant end of the return
9101 register. */
9102
9103 static bool
9104 arm_return_in_msb (const_tree valtype)
9105 {
9106 return (TARGET_AAPCS_BASED
9107 && BYTES_BIG_ENDIAN
9108 && (AGGREGATE_TYPE_P (valtype)
9109 || TREE_CODE (valtype) == COMPLEX_TYPE));
9110 }
9111
9112 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9113 Use by the Cirrus Maverick code which has to workaround
9114 a hardware bug triggered by such instructions. */
9115 static bool
9116 arm_memory_load_p (rtx insn)
9117 {
9118 rtx body, lhs, rhs;;
9119
9120 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9121 return false;
9122
9123 body = PATTERN (insn);
9124
9125 if (GET_CODE (body) != SET)
9126 return false;
9127
9128 lhs = XEXP (body, 0);
9129 rhs = XEXP (body, 1);
9130
9131 lhs = REG_OR_SUBREG_RTX (lhs);
9132
9133 /* If the destination is not a general purpose
9134 register we do not have to worry. */
9135 if (GET_CODE (lhs) != REG
9136 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9137 return false;
9138
9139 /* As well as loads from memory we also have to react
9140 to loads of invalid constants which will be turned
9141 into loads from the minipool. */
9142 return (GET_CODE (rhs) == MEM
9143 || GET_CODE (rhs) == SYMBOL_REF
9144 || note_invalid_constants (insn, -1, false));
9145 }
9146
9147 /* Return TRUE if INSN is a Cirrus instruction. */
9148 static bool
9149 arm_cirrus_insn_p (rtx insn)
9150 {
9151 enum attr_cirrus attr;
9152
9153 /* get_attr cannot accept USE or CLOBBER. */
9154 if (!insn
9155 || GET_CODE (insn) != INSN
9156 || GET_CODE (PATTERN (insn)) == USE
9157 || GET_CODE (PATTERN (insn)) == CLOBBER)
9158 return 0;
9159
9160 attr = get_attr_cirrus (insn);
9161
9162 return attr != CIRRUS_NOT;
9163 }
9164
9165 /* Cirrus reorg for invalid instruction combinations. */
9166 static void
9167 cirrus_reorg (rtx first)
9168 {
9169 enum attr_cirrus attr;
9170 rtx body = PATTERN (first);
9171 rtx t;
9172 int nops;
9173
9174 /* Any branch must be followed by 2 non Cirrus instructions. */
9175 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9176 {
9177 nops = 0;
9178 t = next_nonnote_insn (first);
9179
9180 if (arm_cirrus_insn_p (t))
9181 ++ nops;
9182
9183 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9184 ++ nops;
9185
9186 while (nops --)
9187 emit_insn_after (gen_nop (), first);
9188
9189 return;
9190 }
9191
9192 /* (float (blah)) is in parallel with a clobber. */
9193 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9194 body = XVECEXP (body, 0, 0);
9195
9196 if (GET_CODE (body) == SET)
9197 {
9198 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9199
9200 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9201 be followed by a non Cirrus insn. */
9202 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9203 {
9204 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9205 emit_insn_after (gen_nop (), first);
9206
9207 return;
9208 }
9209 else if (arm_memory_load_p (first))
9210 {
9211 unsigned int arm_regno;
9212
9213 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9214 ldr/cfmv64hr combination where the Rd field is the same
9215 in both instructions must be split with a non Cirrus
9216 insn. Example:
9217
9218 ldr r0, blah
9219 nop
9220 cfmvsr mvf0, r0. */
9221
9222 /* Get Arm register number for ldr insn. */
9223 if (GET_CODE (lhs) == REG)
9224 arm_regno = REGNO (lhs);
9225 else
9226 {
9227 gcc_assert (GET_CODE (rhs) == REG);
9228 arm_regno = REGNO (rhs);
9229 }
9230
9231 /* Next insn. */
9232 first = next_nonnote_insn (first);
9233
9234 if (! arm_cirrus_insn_p (first))
9235 return;
9236
9237 body = PATTERN (first);
9238
9239 /* (float (blah)) is in parallel with a clobber. */
9240 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9241 body = XVECEXP (body, 0, 0);
9242
9243 if (GET_CODE (body) == FLOAT)
9244 body = XEXP (body, 0);
9245
9246 if (get_attr_cirrus (first) == CIRRUS_MOVE
9247 && GET_CODE (XEXP (body, 1)) == REG
9248 && arm_regno == REGNO (XEXP (body, 1)))
9249 emit_insn_after (gen_nop (), first);
9250
9251 return;
9252 }
9253 }
9254
9255 /* get_attr cannot accept USE or CLOBBER. */
9256 if (!first
9257 || GET_CODE (first) != INSN
9258 || GET_CODE (PATTERN (first)) == USE
9259 || GET_CODE (PATTERN (first)) == CLOBBER)
9260 return;
9261
9262 attr = get_attr_cirrus (first);
9263
9264 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9265 must be followed by a non-coprocessor instruction. */
9266 if (attr == CIRRUS_COMPARE)
9267 {
9268 nops = 0;
9269
9270 t = next_nonnote_insn (first);
9271
9272 if (arm_cirrus_insn_p (t))
9273 ++ nops;
9274
9275 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9276 ++ nops;
9277
9278 while (nops --)
9279 emit_insn_after (gen_nop (), first);
9280
9281 return;
9282 }
9283 }
9284
9285 /* Return TRUE if X references a SYMBOL_REF. */
9286 int
9287 symbol_mentioned_p (rtx x)
9288 {
9289 const char * fmt;
9290 int i;
9291
9292 if (GET_CODE (x) == SYMBOL_REF)
9293 return 1;
9294
9295 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9296 are constant offsets, not symbols. */
9297 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9298 return 0;
9299
9300 fmt = GET_RTX_FORMAT (GET_CODE (x));
9301
9302 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9303 {
9304 if (fmt[i] == 'E')
9305 {
9306 int j;
9307
9308 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9309 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9310 return 1;
9311 }
9312 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9313 return 1;
9314 }
9315
9316 return 0;
9317 }
9318
9319 /* Return TRUE if X references a LABEL_REF. */
9320 int
9321 label_mentioned_p (rtx x)
9322 {
9323 const char * fmt;
9324 int i;
9325
9326 if (GET_CODE (x) == LABEL_REF)
9327 return 1;
9328
9329 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9330 instruction, but they are constant offsets, not symbols. */
9331 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9332 return 0;
9333
9334 fmt = GET_RTX_FORMAT (GET_CODE (x));
9335 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9336 {
9337 if (fmt[i] == 'E')
9338 {
9339 int j;
9340
9341 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9342 if (label_mentioned_p (XVECEXP (x, i, j)))
9343 return 1;
9344 }
9345 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9346 return 1;
9347 }
9348
9349 return 0;
9350 }
9351
9352 int
9353 tls_mentioned_p (rtx x)
9354 {
9355 switch (GET_CODE (x))
9356 {
9357 case CONST:
9358 return tls_mentioned_p (XEXP (x, 0));
9359
9360 case UNSPEC:
9361 if (XINT (x, 1) == UNSPEC_TLS)
9362 return 1;
9363
9364 default:
9365 return 0;
9366 }
9367 }
9368
9369 /* Must not copy any rtx that uses a pc-relative address. */
9370
9371 static int
9372 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9373 {
9374 if (GET_CODE (*x) == UNSPEC
9375 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9376 return 1;
9377 return 0;
9378 }
9379
9380 static bool
9381 arm_cannot_copy_insn_p (rtx insn)
9382 {
9383 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9384 }
9385
9386 enum rtx_code
9387 minmax_code (rtx x)
9388 {
9389 enum rtx_code code = GET_CODE (x);
9390
9391 switch (code)
9392 {
9393 case SMAX:
9394 return GE;
9395 case SMIN:
9396 return LE;
9397 case UMIN:
9398 return LEU;
9399 case UMAX:
9400 return GEU;
9401 default:
9402 gcc_unreachable ();
9403 }
9404 }
9405
9406 /* Return 1 if memory locations are adjacent. */
9407 int
9408 adjacent_mem_locations (rtx a, rtx b)
9409 {
9410 /* We don't guarantee to preserve the order of these memory refs. */
9411 if (volatile_refs_p (a) || volatile_refs_p (b))
9412 return 0;
9413
9414 if ((GET_CODE (XEXP (a, 0)) == REG
9415 || (GET_CODE (XEXP (a, 0)) == PLUS
9416 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9417 && (GET_CODE (XEXP (b, 0)) == REG
9418 || (GET_CODE (XEXP (b, 0)) == PLUS
9419 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9420 {
9421 HOST_WIDE_INT val0 = 0, val1 = 0;
9422 rtx reg0, reg1;
9423 int val_diff;
9424
9425 if (GET_CODE (XEXP (a, 0)) == PLUS)
9426 {
9427 reg0 = XEXP (XEXP (a, 0), 0);
9428 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9429 }
9430 else
9431 reg0 = XEXP (a, 0);
9432
9433 if (GET_CODE (XEXP (b, 0)) == PLUS)
9434 {
9435 reg1 = XEXP (XEXP (b, 0), 0);
9436 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9437 }
9438 else
9439 reg1 = XEXP (b, 0);
9440
9441 /* Don't accept any offset that will require multiple
9442 instructions to handle, since this would cause the
9443 arith_adjacentmem pattern to output an overlong sequence. */
9444 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9445 return 0;
9446
9447 /* Don't allow an eliminable register: register elimination can make
9448 the offset too large. */
9449 if (arm_eliminable_register (reg0))
9450 return 0;
9451
9452 val_diff = val1 - val0;
9453
9454 if (arm_ld_sched)
9455 {
9456 /* If the target has load delay slots, then there's no benefit
9457 to using an ldm instruction unless the offset is zero and
9458 we are optimizing for size. */
9459 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9460 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9461 && (val_diff == 4 || val_diff == -4));
9462 }
9463
9464 return ((REGNO (reg0) == REGNO (reg1))
9465 && (val_diff == 4 || val_diff == -4));
9466 }
9467
9468 return 0;
9469 }
9470
9471 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9472 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9473 instruction. ADD_OFFSET is nonzero if the base address register needs
9474 to be modified with an add instruction before we can use it. */
9475
9476 static bool
9477 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9478 int nops, HOST_WIDE_INT add_offset)
9479 {
9480 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9481 if the offset isn't small enough. The reason 2 ldrs are faster
9482 is because these ARMs are able to do more than one cache access
9483 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9484 whilst the ARM8 has a double bandwidth cache. This means that
9485 these cores can do both an instruction fetch and a data fetch in
9486 a single cycle, so the trick of calculating the address into a
9487 scratch register (one of the result regs) and then doing a load
9488 multiple actually becomes slower (and no smaller in code size).
9489 That is the transformation
9490
9491 ldr rd1, [rbase + offset]
9492 ldr rd2, [rbase + offset + 4]
9493
9494 to
9495
9496 add rd1, rbase, offset
9497 ldmia rd1, {rd1, rd2}
9498
9499 produces worse code -- '3 cycles + any stalls on rd2' instead of
9500 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9501 access per cycle, the first sequence could never complete in less
9502 than 6 cycles, whereas the ldm sequence would only take 5 and
9503 would make better use of sequential accesses if not hitting the
9504 cache.
9505
9506 We cheat here and test 'arm_ld_sched' which we currently know to
9507 only be true for the ARM8, ARM9 and StrongARM. If this ever
9508 changes, then the test below needs to be reworked. */
9509 if (nops == 2 && arm_ld_sched && add_offset != 0)
9510 return false;
9511
9512 /* XScale has load-store double instructions, but they have stricter
9513 alignment requirements than load-store multiple, so we cannot
9514 use them.
9515
9516 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9517 the pipeline until completion.
9518
9519 NREGS CYCLES
9520 1 3
9521 2 4
9522 3 5
9523 4 6
9524
9525 An ldr instruction takes 1-3 cycles, but does not block the
9526 pipeline.
9527
9528 NREGS CYCLES
9529 1 1-3
9530 2 2-6
9531 3 3-9
9532 4 4-12
9533
9534 Best case ldr will always win. However, the more ldr instructions
9535 we issue, the less likely we are to be able to schedule them well.
9536 Using ldr instructions also increases code size.
9537
9538 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9539 for counts of 3 or 4 regs. */
9540 if (nops <= 2 && arm_tune_xscale && !optimize_size)
9541 return false;
9542 return true;
9543 }
9544
9545 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9546 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9547 an array ORDER which describes the sequence to use when accessing the
9548 offsets that produces an ascending order. In this sequence, each
9549 offset must be larger by exactly 4 than the previous one. ORDER[0]
9550 must have been filled in with the lowest offset by the caller.
9551 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9552 we use to verify that ORDER produces an ascending order of registers.
9553 Return true if it was possible to construct such an order, false if
9554 not. */
9555
9556 static bool
9557 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9558 int *unsorted_regs)
9559 {
9560 int i;
9561 for (i = 1; i < nops; i++)
9562 {
9563 int j;
9564
9565 order[i] = order[i - 1];
9566 for (j = 0; j < nops; j++)
9567 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9568 {
9569 /* We must find exactly one offset that is higher than the
9570 previous one by 4. */
9571 if (order[i] != order[i - 1])
9572 return false;
9573 order[i] = j;
9574 }
9575 if (order[i] == order[i - 1])
9576 return false;
9577 /* The register numbers must be ascending. */
9578 if (unsorted_regs != NULL
9579 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9580 return false;
9581 }
9582 return true;
9583 }
9584
9585 /* Used to determine in a peephole whether a sequence of load
9586 instructions can be changed into a load-multiple instruction.
9587 NOPS is the number of separate load instructions we are examining. The
9588 first NOPS entries in OPERANDS are the destination registers, the
9589 next NOPS entries are memory operands. If this function is
9590 successful, *BASE is set to the common base register of the memory
9591 accesses; *LOAD_OFFSET is set to the first memory location's offset
9592 from that base register.
9593 REGS is an array filled in with the destination register numbers.
9594 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9595 insn numbers to an ascending order of stores. If CHECK_REGS is true,
9596 the sequence of registers in REGS matches the loads from ascending memory
9597 locations, and the function verifies that the register numbers are
9598 themselves ascending. If CHECK_REGS is false, the register numbers
9599 are stored in the order they are found in the operands. */
9600 static int
9601 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
9602 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
9603 {
9604 int unsorted_regs[MAX_LDM_STM_OPS];
9605 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9606 int order[MAX_LDM_STM_OPS];
9607 rtx base_reg_rtx = NULL;
9608 int base_reg = -1;
9609 int i, ldm_case;
9610
9611 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9612 easily extended if required. */
9613 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9614
9615 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9616
9617 /* Loop over the operands and check that the memory references are
9618 suitable (i.e. immediate offsets from the same base register). At
9619 the same time, extract the target register, and the memory
9620 offsets. */
9621 for (i = 0; i < nops; i++)
9622 {
9623 rtx reg;
9624 rtx offset;
9625
9626 /* Convert a subreg of a mem into the mem itself. */
9627 if (GET_CODE (operands[nops + i]) == SUBREG)
9628 operands[nops + i] = alter_subreg (operands + (nops + i));
9629
9630 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9631
9632 /* Don't reorder volatile memory references; it doesn't seem worth
9633 looking for the case where the order is ok anyway. */
9634 if (MEM_VOLATILE_P (operands[nops + i]))
9635 return 0;
9636
9637 offset = const0_rtx;
9638
9639 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9640 || (GET_CODE (reg) == SUBREG
9641 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9642 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9643 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9644 == REG)
9645 || (GET_CODE (reg) == SUBREG
9646 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9647 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9648 == CONST_INT)))
9649 {
9650 if (i == 0)
9651 {
9652 base_reg = REGNO (reg);
9653 base_reg_rtx = reg;
9654 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9655 return 0;
9656 }
9657 else if (base_reg != (int) REGNO (reg))
9658 /* Not addressed from the same base register. */
9659 return 0;
9660
9661 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9662 ? REGNO (operands[i])
9663 : REGNO (SUBREG_REG (operands[i])));
9664
9665 /* If it isn't an integer register, or if it overwrites the
9666 base register but isn't the last insn in the list, then
9667 we can't do this. */
9668 if (unsorted_regs[i] < 0
9669 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9670 || unsorted_regs[i] > 14
9671 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9672 return 0;
9673
9674 unsorted_offsets[i] = INTVAL (offset);
9675 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9676 order[0] = i;
9677 }
9678 else
9679 /* Not a suitable memory address. */
9680 return 0;
9681 }
9682
9683 /* All the useful information has now been extracted from the
9684 operands into unsorted_regs and unsorted_offsets; additionally,
9685 order[0] has been set to the lowest offset in the list. Sort
9686 the offsets into order, verifying that they are adjacent, and
9687 check that the register numbers are ascending. */
9688 if (!compute_offset_order (nops, unsorted_offsets, order,
9689 check_regs ? unsorted_regs : NULL))
9690 return 0;
9691
9692 if (saved_order)
9693 memcpy (saved_order, order, sizeof order);
9694
9695 if (base)
9696 {
9697 *base = base_reg;
9698
9699 for (i = 0; i < nops; i++)
9700 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9701
9702 *load_offset = unsorted_offsets[order[0]];
9703 }
9704
9705 if (TARGET_THUMB1
9706 && !peep2_reg_dead_p (nops, base_reg_rtx))
9707 return 0;
9708
9709 if (unsorted_offsets[order[0]] == 0)
9710 ldm_case = 1; /* ldmia */
9711 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9712 ldm_case = 2; /* ldmib */
9713 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9714 ldm_case = 3; /* ldmda */
9715 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9716 ldm_case = 4; /* ldmdb */
9717 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9718 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9719 ldm_case = 5;
9720 else
9721 return 0;
9722
9723 if (!multiple_operation_profitable_p (false, nops,
9724 ldm_case == 5
9725 ? unsorted_offsets[order[0]] : 0))
9726 return 0;
9727
9728 return ldm_case;
9729 }
9730
9731 /* Used to determine in a peephole whether a sequence of store instructions can
9732 be changed into a store-multiple instruction.
9733 NOPS is the number of separate store instructions we are examining.
9734 NOPS_TOTAL is the total number of instructions recognized by the peephole
9735 pattern.
9736 The first NOPS entries in OPERANDS are the source registers, the next
9737 NOPS entries are memory operands. If this function is successful, *BASE is
9738 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9739 to the first memory location's offset from that base register. REGS is an
9740 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9741 likewise filled with the corresponding rtx's.
9742 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9743 numbers to an ascending order of stores.
9744 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9745 from ascending memory locations, and the function verifies that the register
9746 numbers are themselves ascending. If CHECK_REGS is false, the register
9747 numbers are stored in the order they are found in the operands. */
9748 static int
9749 store_multiple_sequence (rtx *operands, int nops, int nops_total,
9750 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
9751 HOST_WIDE_INT *load_offset, bool check_regs)
9752 {
9753 int unsorted_regs[MAX_LDM_STM_OPS];
9754 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
9755 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9756 int order[MAX_LDM_STM_OPS];
9757 int base_reg = -1;
9758 rtx base_reg_rtx = NULL;
9759 int i, stm_case;
9760
9761 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9762 easily extended if required. */
9763 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9764
9765 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9766
9767 /* Loop over the operands and check that the memory references are
9768 suitable (i.e. immediate offsets from the same base register). At
9769 the same time, extract the target register, and the memory
9770 offsets. */
9771 for (i = 0; i < nops; i++)
9772 {
9773 rtx reg;
9774 rtx offset;
9775
9776 /* Convert a subreg of a mem into the mem itself. */
9777 if (GET_CODE (operands[nops + i]) == SUBREG)
9778 operands[nops + i] = alter_subreg (operands + (nops + i));
9779
9780 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9781
9782 /* Don't reorder volatile memory references; it doesn't seem worth
9783 looking for the case where the order is ok anyway. */
9784 if (MEM_VOLATILE_P (operands[nops + i]))
9785 return 0;
9786
9787 offset = const0_rtx;
9788
9789 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9790 || (GET_CODE (reg) == SUBREG
9791 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9792 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9793 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9794 == REG)
9795 || (GET_CODE (reg) == SUBREG
9796 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9797 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9798 == CONST_INT)))
9799 {
9800 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
9801 ? operands[i] : SUBREG_REG (operands[i]));
9802 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
9803
9804 if (i == 0)
9805 {
9806 base_reg = REGNO (reg);
9807 base_reg_rtx = reg;
9808 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9809 return 0;
9810 }
9811 else if (base_reg != (int) REGNO (reg))
9812 /* Not addressed from the same base register. */
9813 return 0;
9814
9815 /* If it isn't an integer register, then we can't do this. */
9816 if (unsorted_regs[i] < 0
9817 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9818 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
9819 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
9820 || unsorted_regs[i] > 14)
9821 return 0;
9822
9823 unsorted_offsets[i] = INTVAL (offset);
9824 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9825 order[0] = i;
9826 }
9827 else
9828 /* Not a suitable memory address. */
9829 return 0;
9830 }
9831
9832 /* All the useful information has now been extracted from the
9833 operands into unsorted_regs and unsorted_offsets; additionally,
9834 order[0] has been set to the lowest offset in the list. Sort
9835 the offsets into order, verifying that they are adjacent, and
9836 check that the register numbers are ascending. */
9837 if (!compute_offset_order (nops, unsorted_offsets, order,
9838 check_regs ? unsorted_regs : NULL))
9839 return 0;
9840
9841 if (saved_order)
9842 memcpy (saved_order, order, sizeof order);
9843
9844 if (base)
9845 {
9846 *base = base_reg;
9847
9848 for (i = 0; i < nops; i++)
9849 {
9850 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9851 if (reg_rtxs)
9852 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
9853 }
9854
9855 *load_offset = unsorted_offsets[order[0]];
9856 }
9857
9858 if (TARGET_THUMB1
9859 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
9860 return 0;
9861
9862 if (unsorted_offsets[order[0]] == 0)
9863 stm_case = 1; /* stmia */
9864 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9865 stm_case = 2; /* stmib */
9866 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9867 stm_case = 3; /* stmda */
9868 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9869 stm_case = 4; /* stmdb */
9870 else
9871 return 0;
9872
9873 if (!multiple_operation_profitable_p (false, nops, 0))
9874 return 0;
9875
9876 return stm_case;
9877 }
9878 \f
9879 /* Routines for use in generating RTL. */
9880
9881 /* Generate a load-multiple instruction. COUNT is the number of loads in
9882 the instruction; REGS and MEMS are arrays containing the operands.
9883 BASEREG is the base register to be used in addressing the memory operands.
9884 WBACK_OFFSET is nonzero if the instruction should update the base
9885 register. */
9886
9887 static rtx
9888 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9889 HOST_WIDE_INT wback_offset)
9890 {
9891 int i = 0, j;
9892 rtx result;
9893
9894 if (!multiple_operation_profitable_p (false, count, 0))
9895 {
9896 rtx seq;
9897
9898 start_sequence ();
9899
9900 for (i = 0; i < count; i++)
9901 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
9902
9903 if (wback_offset != 0)
9904 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9905
9906 seq = get_insns ();
9907 end_sequence ();
9908
9909 return seq;
9910 }
9911
9912 result = gen_rtx_PARALLEL (VOIDmode,
9913 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9914 if (wback_offset != 0)
9915 {
9916 XVECEXP (result, 0, 0)
9917 = gen_rtx_SET (VOIDmode, basereg,
9918 plus_constant (basereg, wback_offset));
9919 i = 1;
9920 count++;
9921 }
9922
9923 for (j = 0; i < count; i++, j++)
9924 XVECEXP (result, 0, i)
9925 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
9926
9927 return result;
9928 }
9929
9930 /* Generate a store-multiple instruction. COUNT is the number of stores in
9931 the instruction; REGS and MEMS are arrays containing the operands.
9932 BASEREG is the base register to be used in addressing the memory operands.
9933 WBACK_OFFSET is nonzero if the instruction should update the base
9934 register. */
9935
9936 static rtx
9937 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9938 HOST_WIDE_INT wback_offset)
9939 {
9940 int i = 0, j;
9941 rtx result;
9942
9943 if (GET_CODE (basereg) == PLUS)
9944 basereg = XEXP (basereg, 0);
9945
9946 if (!multiple_operation_profitable_p (false, count, 0))
9947 {
9948 rtx seq;
9949
9950 start_sequence ();
9951
9952 for (i = 0; i < count; i++)
9953 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
9954
9955 if (wback_offset != 0)
9956 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9957
9958 seq = get_insns ();
9959 end_sequence ();
9960
9961 return seq;
9962 }
9963
9964 result = gen_rtx_PARALLEL (VOIDmode,
9965 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9966 if (wback_offset != 0)
9967 {
9968 XVECEXP (result, 0, 0)
9969 = gen_rtx_SET (VOIDmode, basereg,
9970 plus_constant (basereg, wback_offset));
9971 i = 1;
9972 count++;
9973 }
9974
9975 for (j = 0; i < count; i++, j++)
9976 XVECEXP (result, 0, i)
9977 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
9978
9979 return result;
9980 }
9981
9982 /* Generate either a load-multiple or a store-multiple instruction. This
9983 function can be used in situations where we can start with a single MEM
9984 rtx and adjust its address upwards.
9985 COUNT is the number of operations in the instruction, not counting a
9986 possible update of the base register. REGS is an array containing the
9987 register operands.
9988 BASEREG is the base register to be used in addressing the memory operands,
9989 which are constructed from BASEMEM.
9990 WRITE_BACK specifies whether the generated instruction should include an
9991 update of the base register.
9992 OFFSETP is used to pass an offset to and from this function; this offset
9993 is not used when constructing the address (instead BASEMEM should have an
9994 appropriate offset in its address), it is used only for setting
9995 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
9996
9997 static rtx
9998 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
9999 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10000 {
10001 rtx mems[MAX_LDM_STM_OPS];
10002 HOST_WIDE_INT offset = *offsetp;
10003 int i;
10004
10005 gcc_assert (count <= MAX_LDM_STM_OPS);
10006
10007 if (GET_CODE (basereg) == PLUS)
10008 basereg = XEXP (basereg, 0);
10009
10010 for (i = 0; i < count; i++)
10011 {
10012 rtx addr = plus_constant (basereg, i * 4);
10013 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10014 offset += 4;
10015 }
10016
10017 if (write_back)
10018 *offsetp = offset;
10019
10020 if (is_load)
10021 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10022 write_back ? 4 * count : 0);
10023 else
10024 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10025 write_back ? 4 * count : 0);
10026 }
10027
10028 rtx
10029 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10030 rtx basemem, HOST_WIDE_INT *offsetp)
10031 {
10032 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10033 offsetp);
10034 }
10035
10036 rtx
10037 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10038 rtx basemem, HOST_WIDE_INT *offsetp)
10039 {
10040 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10041 offsetp);
10042 }
10043
10044 /* Called from a peephole2 expander to turn a sequence of loads into an
10045 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10046 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10047 is true if we can reorder the registers because they are used commutatively
10048 subsequently.
10049 Returns true iff we could generate a new instruction. */
10050
10051 bool
10052 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10053 {
10054 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10055 rtx mems[MAX_LDM_STM_OPS];
10056 int i, j, base_reg;
10057 rtx base_reg_rtx;
10058 HOST_WIDE_INT offset;
10059 int write_back = FALSE;
10060 int ldm_case;
10061 rtx addr;
10062
10063 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10064 &base_reg, &offset, !sort_regs);
10065
10066 if (ldm_case == 0)
10067 return false;
10068
10069 if (sort_regs)
10070 for (i = 0; i < nops - 1; i++)
10071 for (j = i + 1; j < nops; j++)
10072 if (regs[i] > regs[j])
10073 {
10074 int t = regs[i];
10075 regs[i] = regs[j];
10076 regs[j] = t;
10077 }
10078 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10079
10080 if (TARGET_THUMB1)
10081 {
10082 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10083 gcc_assert (ldm_case == 1 || ldm_case == 5);
10084 write_back = TRUE;
10085 }
10086
10087 if (ldm_case == 5)
10088 {
10089 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10090 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10091 offset = 0;
10092 if (!TARGET_THUMB1)
10093 {
10094 base_reg = regs[0];
10095 base_reg_rtx = newbase;
10096 }
10097 }
10098
10099 for (i = 0; i < nops; i++)
10100 {
10101 addr = plus_constant (base_reg_rtx, offset + i * 4);
10102 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10103 SImode, addr, 0);
10104 }
10105 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10106 write_back ? offset + i * 4 : 0));
10107 return true;
10108 }
10109
10110 /* Called from a peephole2 expander to turn a sequence of stores into an
10111 STM instruction. OPERANDS are the operands found by the peephole matcher;
10112 NOPS indicates how many separate stores we are trying to combine.
10113 Returns true iff we could generate a new instruction. */
10114
10115 bool
10116 gen_stm_seq (rtx *operands, int nops)
10117 {
10118 int i;
10119 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10120 rtx mems[MAX_LDM_STM_OPS];
10121 int base_reg;
10122 rtx base_reg_rtx;
10123 HOST_WIDE_INT offset;
10124 int write_back = FALSE;
10125 int stm_case;
10126 rtx addr;
10127 bool base_reg_dies;
10128
10129 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10130 mem_order, &base_reg, &offset, true);
10131
10132 if (stm_case == 0)
10133 return false;
10134
10135 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10136
10137 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10138 if (TARGET_THUMB1)
10139 {
10140 gcc_assert (base_reg_dies);
10141 write_back = TRUE;
10142 }
10143
10144 if (stm_case == 5)
10145 {
10146 gcc_assert (base_reg_dies);
10147 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10148 offset = 0;
10149 }
10150
10151 addr = plus_constant (base_reg_rtx, offset);
10152
10153 for (i = 0; i < nops; i++)
10154 {
10155 addr = plus_constant (base_reg_rtx, offset + i * 4);
10156 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10157 SImode, addr, 0);
10158 }
10159 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10160 write_back ? offset + i * 4 : 0));
10161 return true;
10162 }
10163
10164 /* Called from a peephole2 expander to turn a sequence of stores that are
10165 preceded by constant loads into an STM instruction. OPERANDS are the
10166 operands found by the peephole matcher; NOPS indicates how many
10167 separate stores we are trying to combine; there are 2 * NOPS
10168 instructions in the peephole.
10169 Returns true iff we could generate a new instruction. */
10170
10171 bool
10172 gen_const_stm_seq (rtx *operands, int nops)
10173 {
10174 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10175 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10176 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10177 rtx mems[MAX_LDM_STM_OPS];
10178 int base_reg;
10179 rtx base_reg_rtx;
10180 HOST_WIDE_INT offset;
10181 int write_back = FALSE;
10182 int stm_case;
10183 rtx addr;
10184 bool base_reg_dies;
10185 int i, j;
10186 HARD_REG_SET allocated;
10187
10188 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10189 mem_order, &base_reg, &offset, false);
10190
10191 if (stm_case == 0)
10192 return false;
10193
10194 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10195
10196 /* If the same register is used more than once, try to find a free
10197 register. */
10198 CLEAR_HARD_REG_SET (allocated);
10199 for (i = 0; i < nops; i++)
10200 {
10201 for (j = i + 1; j < nops; j++)
10202 if (regs[i] == regs[j])
10203 {
10204 rtx t = peep2_find_free_register (0, nops * 2,
10205 TARGET_THUMB1 ? "l" : "r",
10206 SImode, &allocated);
10207 if (t == NULL_RTX)
10208 return false;
10209 reg_rtxs[i] = t;
10210 regs[i] = REGNO (t);
10211 }
10212 }
10213
10214 /* Compute an ordering that maps the register numbers to an ascending
10215 sequence. */
10216 reg_order[0] = 0;
10217 for (i = 0; i < nops; i++)
10218 if (regs[i] < regs[reg_order[0]])
10219 reg_order[0] = i;
10220
10221 for (i = 1; i < nops; i++)
10222 {
10223 int this_order = reg_order[i - 1];
10224 for (j = 0; j < nops; j++)
10225 if (regs[j] > regs[reg_order[i - 1]]
10226 && (this_order == reg_order[i - 1]
10227 || regs[j] < regs[this_order]))
10228 this_order = j;
10229 reg_order[i] = this_order;
10230 }
10231
10232 /* Ensure that registers that must be live after the instruction end
10233 up with the correct value. */
10234 for (i = 0; i < nops; i++)
10235 {
10236 int this_order = reg_order[i];
10237 if ((this_order != mem_order[i]
10238 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10239 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10240 return false;
10241 }
10242
10243 /* Load the constants. */
10244 for (i = 0; i < nops; i++)
10245 {
10246 rtx op = operands[2 * nops + mem_order[i]];
10247 sorted_regs[i] = regs[reg_order[i]];
10248 emit_move_insn (reg_rtxs[reg_order[i]], op);
10249 }
10250
10251 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10252
10253 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10254 if (TARGET_THUMB1)
10255 {
10256 gcc_assert (base_reg_dies);
10257 write_back = TRUE;
10258 }
10259
10260 if (stm_case == 5)
10261 {
10262 gcc_assert (base_reg_dies);
10263 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10264 offset = 0;
10265 }
10266
10267 addr = plus_constant (base_reg_rtx, offset);
10268
10269 for (i = 0; i < nops; i++)
10270 {
10271 addr = plus_constant (base_reg_rtx, offset + i * 4);
10272 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10273 SImode, addr, 0);
10274 }
10275 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10276 write_back ? offset + i * 4 : 0));
10277 return true;
10278 }
10279
10280 int
10281 arm_gen_movmemqi (rtx *operands)
10282 {
10283 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
10284 HOST_WIDE_INT srcoffset, dstoffset;
10285 int i;
10286 rtx src, dst, srcbase, dstbase;
10287 rtx part_bytes_reg = NULL;
10288 rtx mem;
10289
10290 if (GET_CODE (operands[2]) != CONST_INT
10291 || GET_CODE (operands[3]) != CONST_INT
10292 || INTVAL (operands[2]) > 64
10293 || INTVAL (operands[3]) & 3)
10294 return 0;
10295
10296 dstbase = operands[0];
10297 srcbase = operands[1];
10298
10299 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
10300 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
10301
10302 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
10303 out_words_to_go = INTVAL (operands[2]) / 4;
10304 last_bytes = INTVAL (operands[2]) & 3;
10305 dstoffset = srcoffset = 0;
10306
10307 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
10308 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
10309
10310 for (i = 0; in_words_to_go >= 2; i+=4)
10311 {
10312 if (in_words_to_go > 4)
10313 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
10314 TRUE, srcbase, &srcoffset));
10315 else
10316 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
10317 src, FALSE, srcbase,
10318 &srcoffset));
10319
10320 if (out_words_to_go)
10321 {
10322 if (out_words_to_go > 4)
10323 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
10324 TRUE, dstbase, &dstoffset));
10325 else if (out_words_to_go != 1)
10326 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
10327 out_words_to_go, dst,
10328 (last_bytes == 0
10329 ? FALSE : TRUE),
10330 dstbase, &dstoffset));
10331 else
10332 {
10333 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10334 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
10335 if (last_bytes != 0)
10336 {
10337 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
10338 dstoffset += 4;
10339 }
10340 }
10341 }
10342
10343 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
10344 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
10345 }
10346
10347 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10348 if (out_words_to_go)
10349 {
10350 rtx sreg;
10351
10352 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10353 sreg = copy_to_reg (mem);
10354
10355 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10356 emit_move_insn (mem, sreg);
10357 in_words_to_go--;
10358
10359 gcc_assert (!in_words_to_go); /* Sanity check */
10360 }
10361
10362 if (in_words_to_go)
10363 {
10364 gcc_assert (in_words_to_go > 0);
10365
10366 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10367 part_bytes_reg = copy_to_mode_reg (SImode, mem);
10368 }
10369
10370 gcc_assert (!last_bytes || part_bytes_reg);
10371
10372 if (BYTES_BIG_ENDIAN && last_bytes)
10373 {
10374 rtx tmp = gen_reg_rtx (SImode);
10375
10376 /* The bytes we want are in the top end of the word. */
10377 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
10378 GEN_INT (8 * (4 - last_bytes))));
10379 part_bytes_reg = tmp;
10380
10381 while (last_bytes)
10382 {
10383 mem = adjust_automodify_address (dstbase, QImode,
10384 plus_constant (dst, last_bytes - 1),
10385 dstoffset + last_bytes - 1);
10386 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10387
10388 if (--last_bytes)
10389 {
10390 tmp = gen_reg_rtx (SImode);
10391 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
10392 part_bytes_reg = tmp;
10393 }
10394 }
10395
10396 }
10397 else
10398 {
10399 if (last_bytes > 1)
10400 {
10401 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
10402 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
10403 last_bytes -= 2;
10404 if (last_bytes)
10405 {
10406 rtx tmp = gen_reg_rtx (SImode);
10407 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
10408 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
10409 part_bytes_reg = tmp;
10410 dstoffset += 2;
10411 }
10412 }
10413
10414 if (last_bytes)
10415 {
10416 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
10417 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10418 }
10419 }
10420
10421 return 1;
10422 }
10423
10424 /* Select a dominance comparison mode if possible for a test of the general
10425 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10426 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10427 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10428 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10429 In all cases OP will be either EQ or NE, but we don't need to know which
10430 here. If we are unable to support a dominance comparison we return
10431 CC mode. This will then fail to match for the RTL expressions that
10432 generate this call. */
10433 enum machine_mode
10434 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
10435 {
10436 enum rtx_code cond1, cond2;
10437 int swapped = 0;
10438
10439 /* Currently we will probably get the wrong result if the individual
10440 comparisons are not simple. This also ensures that it is safe to
10441 reverse a comparison if necessary. */
10442 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
10443 != CCmode)
10444 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
10445 != CCmode))
10446 return CCmode;
10447
10448 /* The if_then_else variant of this tests the second condition if the
10449 first passes, but is true if the first fails. Reverse the first
10450 condition to get a true "inclusive-or" expression. */
10451 if (cond_or == DOM_CC_NX_OR_Y)
10452 cond1 = reverse_condition (cond1);
10453
10454 /* If the comparisons are not equal, and one doesn't dominate the other,
10455 then we can't do this. */
10456 if (cond1 != cond2
10457 && !comparison_dominates_p (cond1, cond2)
10458 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
10459 return CCmode;
10460
10461 if (swapped)
10462 {
10463 enum rtx_code temp = cond1;
10464 cond1 = cond2;
10465 cond2 = temp;
10466 }
10467
10468 switch (cond1)
10469 {
10470 case EQ:
10471 if (cond_or == DOM_CC_X_AND_Y)
10472 return CC_DEQmode;
10473
10474 switch (cond2)
10475 {
10476 case EQ: return CC_DEQmode;
10477 case LE: return CC_DLEmode;
10478 case LEU: return CC_DLEUmode;
10479 case GE: return CC_DGEmode;
10480 case GEU: return CC_DGEUmode;
10481 default: gcc_unreachable ();
10482 }
10483
10484 case LT:
10485 if (cond_or == DOM_CC_X_AND_Y)
10486 return CC_DLTmode;
10487
10488 switch (cond2)
10489 {
10490 case LT:
10491 return CC_DLTmode;
10492 case LE:
10493 return CC_DLEmode;
10494 case NE:
10495 return CC_DNEmode;
10496 default:
10497 gcc_unreachable ();
10498 }
10499
10500 case GT:
10501 if (cond_or == DOM_CC_X_AND_Y)
10502 return CC_DGTmode;
10503
10504 switch (cond2)
10505 {
10506 case GT:
10507 return CC_DGTmode;
10508 case GE:
10509 return CC_DGEmode;
10510 case NE:
10511 return CC_DNEmode;
10512 default:
10513 gcc_unreachable ();
10514 }
10515
10516 case LTU:
10517 if (cond_or == DOM_CC_X_AND_Y)
10518 return CC_DLTUmode;
10519
10520 switch (cond2)
10521 {
10522 case LTU:
10523 return CC_DLTUmode;
10524 case LEU:
10525 return CC_DLEUmode;
10526 case NE:
10527 return CC_DNEmode;
10528 default:
10529 gcc_unreachable ();
10530 }
10531
10532 case GTU:
10533 if (cond_or == DOM_CC_X_AND_Y)
10534 return CC_DGTUmode;
10535
10536 switch (cond2)
10537 {
10538 case GTU:
10539 return CC_DGTUmode;
10540 case GEU:
10541 return CC_DGEUmode;
10542 case NE:
10543 return CC_DNEmode;
10544 default:
10545 gcc_unreachable ();
10546 }
10547
10548 /* The remaining cases only occur when both comparisons are the
10549 same. */
10550 case NE:
10551 gcc_assert (cond1 == cond2);
10552 return CC_DNEmode;
10553
10554 case LE:
10555 gcc_assert (cond1 == cond2);
10556 return CC_DLEmode;
10557
10558 case GE:
10559 gcc_assert (cond1 == cond2);
10560 return CC_DGEmode;
10561
10562 case LEU:
10563 gcc_assert (cond1 == cond2);
10564 return CC_DLEUmode;
10565
10566 case GEU:
10567 gcc_assert (cond1 == cond2);
10568 return CC_DGEUmode;
10569
10570 default:
10571 gcc_unreachable ();
10572 }
10573 }
10574
10575 enum machine_mode
10576 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
10577 {
10578 /* All floating point compares return CCFP if it is an equality
10579 comparison, and CCFPE otherwise. */
10580 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10581 {
10582 switch (op)
10583 {
10584 case EQ:
10585 case NE:
10586 case UNORDERED:
10587 case ORDERED:
10588 case UNLT:
10589 case UNLE:
10590 case UNGT:
10591 case UNGE:
10592 case UNEQ:
10593 case LTGT:
10594 return CCFPmode;
10595
10596 case LT:
10597 case LE:
10598 case GT:
10599 case GE:
10600 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
10601 return CCFPmode;
10602 return CCFPEmode;
10603
10604 default:
10605 gcc_unreachable ();
10606 }
10607 }
10608
10609 /* A compare with a shifted operand. Because of canonicalization, the
10610 comparison will have to be swapped when we emit the assembler. */
10611 if (GET_MODE (y) == SImode
10612 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10613 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10614 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10615 || GET_CODE (x) == ROTATERT))
10616 return CC_SWPmode;
10617
10618 /* This operation is performed swapped, but since we only rely on the Z
10619 flag we don't need an additional mode. */
10620 if (GET_MODE (y) == SImode
10621 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10622 && GET_CODE (x) == NEG
10623 && (op == EQ || op == NE))
10624 return CC_Zmode;
10625
10626 /* This is a special case that is used by combine to allow a
10627 comparison of a shifted byte load to be split into a zero-extend
10628 followed by a comparison of the shifted integer (only valid for
10629 equalities and unsigned inequalities). */
10630 if (GET_MODE (x) == SImode
10631 && GET_CODE (x) == ASHIFT
10632 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10633 && GET_CODE (XEXP (x, 0)) == SUBREG
10634 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10635 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10636 && (op == EQ || op == NE
10637 || op == GEU || op == GTU || op == LTU || op == LEU)
10638 && GET_CODE (y) == CONST_INT)
10639 return CC_Zmode;
10640
10641 /* A construct for a conditional compare, if the false arm contains
10642 0, then both conditions must be true, otherwise either condition
10643 must be true. Not all conditions are possible, so CCmode is
10644 returned if it can't be done. */
10645 if (GET_CODE (x) == IF_THEN_ELSE
10646 && (XEXP (x, 2) == const0_rtx
10647 || XEXP (x, 2) == const1_rtx)
10648 && COMPARISON_P (XEXP (x, 0))
10649 && COMPARISON_P (XEXP (x, 1)))
10650 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10651 INTVAL (XEXP (x, 2)));
10652
10653 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10654 if (GET_CODE (x) == AND
10655 && (op == EQ || op == NE)
10656 && COMPARISON_P (XEXP (x, 0))
10657 && COMPARISON_P (XEXP (x, 1)))
10658 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10659 DOM_CC_X_AND_Y);
10660
10661 if (GET_CODE (x) == IOR
10662 && (op == EQ || op == NE)
10663 && COMPARISON_P (XEXP (x, 0))
10664 && COMPARISON_P (XEXP (x, 1)))
10665 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10666 DOM_CC_X_OR_Y);
10667
10668 /* An operation (on Thumb) where we want to test for a single bit.
10669 This is done by shifting that bit up into the top bit of a
10670 scratch register; we can then branch on the sign bit. */
10671 if (TARGET_THUMB1
10672 && GET_MODE (x) == SImode
10673 && (op == EQ || op == NE)
10674 && GET_CODE (x) == ZERO_EXTRACT
10675 && XEXP (x, 1) == const1_rtx)
10676 return CC_Nmode;
10677
10678 /* An operation that sets the condition codes as a side-effect, the
10679 V flag is not set correctly, so we can only use comparisons where
10680 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10681 instead.) */
10682 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10683 if (GET_MODE (x) == SImode
10684 && y == const0_rtx
10685 && (op == EQ || op == NE || op == LT || op == GE)
10686 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10687 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10688 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10689 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10690 || GET_CODE (x) == LSHIFTRT
10691 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10692 || GET_CODE (x) == ROTATERT
10693 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10694 return CC_NOOVmode;
10695
10696 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10697 return CC_Zmode;
10698
10699 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10700 && GET_CODE (x) == PLUS
10701 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10702 return CC_Cmode;
10703
10704 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10705 {
10706 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10707 available. */
10708 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10709 return CCmode;
10710
10711 switch (op)
10712 {
10713 case EQ:
10714 case NE:
10715 /* A DImode comparison against zero can be implemented by
10716 or'ing the two halves together. */
10717 if (y == const0_rtx)
10718 return CC_Zmode;
10719
10720 /* We can do an equality test in three Thumb instructions. */
10721 if (!TARGET_ARM)
10722 return CC_Zmode;
10723
10724 /* FALLTHROUGH */
10725
10726 case LTU:
10727 case LEU:
10728 case GTU:
10729 case GEU:
10730 /* DImode unsigned comparisons can be implemented by cmp +
10731 cmpeq without a scratch register. Not worth doing in
10732 Thumb-2. */
10733 if (TARGET_ARM)
10734 return CC_CZmode;
10735
10736 /* FALLTHROUGH */
10737
10738 case LT:
10739 case LE:
10740 case GT:
10741 case GE:
10742 /* DImode signed and unsigned comparisons can be implemented
10743 by cmp + sbcs with a scratch register, but that does not
10744 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10745 gcc_assert (op != EQ && op != NE);
10746 return CC_NCVmode;
10747
10748 default:
10749 gcc_unreachable ();
10750 }
10751 }
10752
10753 return CCmode;
10754 }
10755
10756 /* X and Y are two things to compare using CODE. Emit the compare insn and
10757 return the rtx for register 0 in the proper mode. FP means this is a
10758 floating point compare: I don't think that it is needed on the arm. */
10759 rtx
10760 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10761 {
10762 enum machine_mode mode;
10763 rtx cc_reg;
10764 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10765
10766 /* We might have X as a constant, Y as a register because of the predicates
10767 used for cmpdi. If so, force X to a register here. */
10768 if (dimode_comparison && !REG_P (x))
10769 x = force_reg (DImode, x);
10770
10771 mode = SELECT_CC_MODE (code, x, y);
10772 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10773
10774 if (dimode_comparison
10775 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10776 && mode != CC_CZmode)
10777 {
10778 rtx clobber, set;
10779
10780 /* To compare two non-zero values for equality, XOR them and
10781 then compare against zero. Not used for ARM mode; there
10782 CC_CZmode is cheaper. */
10783 if (mode == CC_Zmode && y != const0_rtx)
10784 {
10785 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10786 y = const0_rtx;
10787 }
10788 /* A scratch register is required. */
10789 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10790 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10791 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10792 }
10793 else
10794 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10795
10796 return cc_reg;
10797 }
10798
10799 /* Generate a sequence of insns that will generate the correct return
10800 address mask depending on the physical architecture that the program
10801 is running on. */
10802 rtx
10803 arm_gen_return_addr_mask (void)
10804 {
10805 rtx reg = gen_reg_rtx (Pmode);
10806
10807 emit_insn (gen_return_addr_mask (reg));
10808 return reg;
10809 }
10810
10811 void
10812 arm_reload_in_hi (rtx *operands)
10813 {
10814 rtx ref = operands[1];
10815 rtx base, scratch;
10816 HOST_WIDE_INT offset = 0;
10817
10818 if (GET_CODE (ref) == SUBREG)
10819 {
10820 offset = SUBREG_BYTE (ref);
10821 ref = SUBREG_REG (ref);
10822 }
10823
10824 if (GET_CODE (ref) == REG)
10825 {
10826 /* We have a pseudo which has been spilt onto the stack; there
10827 are two cases here: the first where there is a simple
10828 stack-slot replacement and a second where the stack-slot is
10829 out of range, or is used as a subreg. */
10830 if (reg_equiv_mem (REGNO (ref)))
10831 {
10832 ref = reg_equiv_mem (REGNO (ref));
10833 base = find_replacement (&XEXP (ref, 0));
10834 }
10835 else
10836 /* The slot is out of range, or was dressed up in a SUBREG. */
10837 base = reg_equiv_address (REGNO (ref));
10838 }
10839 else
10840 base = find_replacement (&XEXP (ref, 0));
10841
10842 /* Handle the case where the address is too complex to be offset by 1. */
10843 if (GET_CODE (base) == MINUS
10844 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10845 {
10846 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10847
10848 emit_set_insn (base_plus, base);
10849 base = base_plus;
10850 }
10851 else if (GET_CODE (base) == PLUS)
10852 {
10853 /* The addend must be CONST_INT, or we would have dealt with it above. */
10854 HOST_WIDE_INT hi, lo;
10855
10856 offset += INTVAL (XEXP (base, 1));
10857 base = XEXP (base, 0);
10858
10859 /* Rework the address into a legal sequence of insns. */
10860 /* Valid range for lo is -4095 -> 4095 */
10861 lo = (offset >= 0
10862 ? (offset & 0xfff)
10863 : -((-offset) & 0xfff));
10864
10865 /* Corner case, if lo is the max offset then we would be out of range
10866 once we have added the additional 1 below, so bump the msb into the
10867 pre-loading insn(s). */
10868 if (lo == 4095)
10869 lo &= 0x7ff;
10870
10871 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10872 ^ (HOST_WIDE_INT) 0x80000000)
10873 - (HOST_WIDE_INT) 0x80000000);
10874
10875 gcc_assert (hi + lo == offset);
10876
10877 if (hi != 0)
10878 {
10879 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10880
10881 /* Get the base address; addsi3 knows how to handle constants
10882 that require more than one insn. */
10883 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10884 base = base_plus;
10885 offset = lo;
10886 }
10887 }
10888
10889 /* Operands[2] may overlap operands[0] (though it won't overlap
10890 operands[1]), that's why we asked for a DImode reg -- so we can
10891 use the bit that does not overlap. */
10892 if (REGNO (operands[2]) == REGNO (operands[0]))
10893 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10894 else
10895 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10896
10897 emit_insn (gen_zero_extendqisi2 (scratch,
10898 gen_rtx_MEM (QImode,
10899 plus_constant (base,
10900 offset))));
10901 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10902 gen_rtx_MEM (QImode,
10903 plus_constant (base,
10904 offset + 1))));
10905 if (!BYTES_BIG_ENDIAN)
10906 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10907 gen_rtx_IOR (SImode,
10908 gen_rtx_ASHIFT
10909 (SImode,
10910 gen_rtx_SUBREG (SImode, operands[0], 0),
10911 GEN_INT (8)),
10912 scratch));
10913 else
10914 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10915 gen_rtx_IOR (SImode,
10916 gen_rtx_ASHIFT (SImode, scratch,
10917 GEN_INT (8)),
10918 gen_rtx_SUBREG (SImode, operands[0], 0)));
10919 }
10920
10921 /* Handle storing a half-word to memory during reload by synthesizing as two
10922 byte stores. Take care not to clobber the input values until after we
10923 have moved them somewhere safe. This code assumes that if the DImode
10924 scratch in operands[2] overlaps either the input value or output address
10925 in some way, then that value must die in this insn (we absolutely need
10926 two scratch registers for some corner cases). */
10927 void
10928 arm_reload_out_hi (rtx *operands)
10929 {
10930 rtx ref = operands[0];
10931 rtx outval = operands[1];
10932 rtx base, scratch;
10933 HOST_WIDE_INT offset = 0;
10934
10935 if (GET_CODE (ref) == SUBREG)
10936 {
10937 offset = SUBREG_BYTE (ref);
10938 ref = SUBREG_REG (ref);
10939 }
10940
10941 if (GET_CODE (ref) == REG)
10942 {
10943 /* We have a pseudo which has been spilt onto the stack; there
10944 are two cases here: the first where there is a simple
10945 stack-slot replacement and a second where the stack-slot is
10946 out of range, or is used as a subreg. */
10947 if (reg_equiv_mem (REGNO (ref)))
10948 {
10949 ref = reg_equiv_mem (REGNO (ref));
10950 base = find_replacement (&XEXP (ref, 0));
10951 }
10952 else
10953 /* The slot is out of range, or was dressed up in a SUBREG. */
10954 base = reg_equiv_address (REGNO (ref));
10955 }
10956 else
10957 base = find_replacement (&XEXP (ref, 0));
10958
10959 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10960
10961 /* Handle the case where the address is too complex to be offset by 1. */
10962 if (GET_CODE (base) == MINUS
10963 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10964 {
10965 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10966
10967 /* Be careful not to destroy OUTVAL. */
10968 if (reg_overlap_mentioned_p (base_plus, outval))
10969 {
10970 /* Updating base_plus might destroy outval, see if we can
10971 swap the scratch and base_plus. */
10972 if (!reg_overlap_mentioned_p (scratch, outval))
10973 {
10974 rtx tmp = scratch;
10975 scratch = base_plus;
10976 base_plus = tmp;
10977 }
10978 else
10979 {
10980 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10981
10982 /* Be conservative and copy OUTVAL into the scratch now,
10983 this should only be necessary if outval is a subreg
10984 of something larger than a word. */
10985 /* XXX Might this clobber base? I can't see how it can,
10986 since scratch is known to overlap with OUTVAL, and
10987 must be wider than a word. */
10988 emit_insn (gen_movhi (scratch_hi, outval));
10989 outval = scratch_hi;
10990 }
10991 }
10992
10993 emit_set_insn (base_plus, base);
10994 base = base_plus;
10995 }
10996 else if (GET_CODE (base) == PLUS)
10997 {
10998 /* The addend must be CONST_INT, or we would have dealt with it above. */
10999 HOST_WIDE_INT hi, lo;
11000
11001 offset += INTVAL (XEXP (base, 1));
11002 base = XEXP (base, 0);
11003
11004 /* Rework the address into a legal sequence of insns. */
11005 /* Valid range for lo is -4095 -> 4095 */
11006 lo = (offset >= 0
11007 ? (offset & 0xfff)
11008 : -((-offset) & 0xfff));
11009
11010 /* Corner case, if lo is the max offset then we would be out of range
11011 once we have added the additional 1 below, so bump the msb into the
11012 pre-loading insn(s). */
11013 if (lo == 4095)
11014 lo &= 0x7ff;
11015
11016 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11017 ^ (HOST_WIDE_INT) 0x80000000)
11018 - (HOST_WIDE_INT) 0x80000000);
11019
11020 gcc_assert (hi + lo == offset);
11021
11022 if (hi != 0)
11023 {
11024 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11025
11026 /* Be careful not to destroy OUTVAL. */
11027 if (reg_overlap_mentioned_p (base_plus, outval))
11028 {
11029 /* Updating base_plus might destroy outval, see if we
11030 can swap the scratch and base_plus. */
11031 if (!reg_overlap_mentioned_p (scratch, outval))
11032 {
11033 rtx tmp = scratch;
11034 scratch = base_plus;
11035 base_plus = tmp;
11036 }
11037 else
11038 {
11039 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11040
11041 /* Be conservative and copy outval into scratch now,
11042 this should only be necessary if outval is a
11043 subreg of something larger than a word. */
11044 /* XXX Might this clobber base? I can't see how it
11045 can, since scratch is known to overlap with
11046 outval. */
11047 emit_insn (gen_movhi (scratch_hi, outval));
11048 outval = scratch_hi;
11049 }
11050 }
11051
11052 /* Get the base address; addsi3 knows how to handle constants
11053 that require more than one insn. */
11054 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11055 base = base_plus;
11056 offset = lo;
11057 }
11058 }
11059
11060 if (BYTES_BIG_ENDIAN)
11061 {
11062 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11063 plus_constant (base, offset + 1)),
11064 gen_lowpart (QImode, outval)));
11065 emit_insn (gen_lshrsi3 (scratch,
11066 gen_rtx_SUBREG (SImode, outval, 0),
11067 GEN_INT (8)));
11068 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11069 gen_lowpart (QImode, scratch)));
11070 }
11071 else
11072 {
11073 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11074 gen_lowpart (QImode, outval)));
11075 emit_insn (gen_lshrsi3 (scratch,
11076 gen_rtx_SUBREG (SImode, outval, 0),
11077 GEN_INT (8)));
11078 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11079 plus_constant (base, offset + 1)),
11080 gen_lowpart (QImode, scratch)));
11081 }
11082 }
11083
11084 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
11085 (padded to the size of a word) should be passed in a register. */
11086
11087 static bool
11088 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
11089 {
11090 if (TARGET_AAPCS_BASED)
11091 return must_pass_in_stack_var_size (mode, type);
11092 else
11093 return must_pass_in_stack_var_size_or_pad (mode, type);
11094 }
11095
11096
11097 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
11098 Return true if an argument passed on the stack should be padded upwards,
11099 i.e. if the least-significant byte has useful data.
11100 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
11101 aggregate types are placed in the lowest memory address. */
11102
11103 bool
11104 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
11105 {
11106 if (!TARGET_AAPCS_BASED)
11107 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
11108
11109 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
11110 return false;
11111
11112 return true;
11113 }
11114
11115
11116 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
11117 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
11118 byte of the register has useful data, and return the opposite if the
11119 most significant byte does.
11120 For AAPCS, small aggregates and small complex types are always padded
11121 upwards. */
11122
11123 bool
11124 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
11125 tree type, int first ATTRIBUTE_UNUSED)
11126 {
11127 if (TARGET_AAPCS_BASED
11128 && BYTES_BIG_ENDIAN
11129 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
11130 && int_size_in_bytes (type) <= 4)
11131 return true;
11132
11133 /* Otherwise, use default padding. */
11134 return !BYTES_BIG_ENDIAN;
11135 }
11136
11137 \f
11138 /* Print a symbolic form of X to the debug file, F. */
11139 static void
11140 arm_print_value (FILE *f, rtx x)
11141 {
11142 switch (GET_CODE (x))
11143 {
11144 case CONST_INT:
11145 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
11146 return;
11147
11148 case CONST_DOUBLE:
11149 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
11150 return;
11151
11152 case CONST_VECTOR:
11153 {
11154 int i;
11155
11156 fprintf (f, "<");
11157 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
11158 {
11159 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
11160 if (i < (CONST_VECTOR_NUNITS (x) - 1))
11161 fputc (',', f);
11162 }
11163 fprintf (f, ">");
11164 }
11165 return;
11166
11167 case CONST_STRING:
11168 fprintf (f, "\"%s\"", XSTR (x, 0));
11169 return;
11170
11171 case SYMBOL_REF:
11172 fprintf (f, "`%s'", XSTR (x, 0));
11173 return;
11174
11175 case LABEL_REF:
11176 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
11177 return;
11178
11179 case CONST:
11180 arm_print_value (f, XEXP (x, 0));
11181 return;
11182
11183 case PLUS:
11184 arm_print_value (f, XEXP (x, 0));
11185 fprintf (f, "+");
11186 arm_print_value (f, XEXP (x, 1));
11187 return;
11188
11189 case PC:
11190 fprintf (f, "pc");
11191 return;
11192
11193 default:
11194 fprintf (f, "????");
11195 return;
11196 }
11197 }
11198 \f
11199 /* Routines for manipulation of the constant pool. */
11200
11201 /* Arm instructions cannot load a large constant directly into a
11202 register; they have to come from a pc relative load. The constant
11203 must therefore be placed in the addressable range of the pc
11204 relative load. Depending on the precise pc relative load
11205 instruction the range is somewhere between 256 bytes and 4k. This
11206 means that we often have to dump a constant inside a function, and
11207 generate code to branch around it.
11208
11209 It is important to minimize this, since the branches will slow
11210 things down and make the code larger.
11211
11212 Normally we can hide the table after an existing unconditional
11213 branch so that there is no interruption of the flow, but in the
11214 worst case the code looks like this:
11215
11216 ldr rn, L1
11217 ...
11218 b L2
11219 align
11220 L1: .long value
11221 L2:
11222 ...
11223
11224 ldr rn, L3
11225 ...
11226 b L4
11227 align
11228 L3: .long value
11229 L4:
11230 ...
11231
11232 We fix this by performing a scan after scheduling, which notices
11233 which instructions need to have their operands fetched from the
11234 constant table and builds the table.
11235
11236 The algorithm starts by building a table of all the constants that
11237 need fixing up and all the natural barriers in the function (places
11238 where a constant table can be dropped without breaking the flow).
11239 For each fixup we note how far the pc-relative replacement will be
11240 able to reach and the offset of the instruction into the function.
11241
11242 Having built the table we then group the fixes together to form
11243 tables that are as large as possible (subject to addressing
11244 constraints) and emit each table of constants after the last
11245 barrier that is within range of all the instructions in the group.
11246 If a group does not contain a barrier, then we forcibly create one
11247 by inserting a jump instruction into the flow. Once the table has
11248 been inserted, the insns are then modified to reference the
11249 relevant entry in the pool.
11250
11251 Possible enhancements to the algorithm (not implemented) are:
11252
11253 1) For some processors and object formats, there may be benefit in
11254 aligning the pools to the start of cache lines; this alignment
11255 would need to be taken into account when calculating addressability
11256 of a pool. */
11257
11258 /* These typedefs are located at the start of this file, so that
11259 they can be used in the prototypes there. This comment is to
11260 remind readers of that fact so that the following structures
11261 can be understood more easily.
11262
11263 typedef struct minipool_node Mnode;
11264 typedef struct minipool_fixup Mfix; */
11265
11266 struct minipool_node
11267 {
11268 /* Doubly linked chain of entries. */
11269 Mnode * next;
11270 Mnode * prev;
11271 /* The maximum offset into the code that this entry can be placed. While
11272 pushing fixes for forward references, all entries are sorted in order
11273 of increasing max_address. */
11274 HOST_WIDE_INT max_address;
11275 /* Similarly for an entry inserted for a backwards ref. */
11276 HOST_WIDE_INT min_address;
11277 /* The number of fixes referencing this entry. This can become zero
11278 if we "unpush" an entry. In this case we ignore the entry when we
11279 come to emit the code. */
11280 int refcount;
11281 /* The offset from the start of the minipool. */
11282 HOST_WIDE_INT offset;
11283 /* The value in table. */
11284 rtx value;
11285 /* The mode of value. */
11286 enum machine_mode mode;
11287 /* The size of the value. With iWMMXt enabled
11288 sizes > 4 also imply an alignment of 8-bytes. */
11289 int fix_size;
11290 };
11291
11292 struct minipool_fixup
11293 {
11294 Mfix * next;
11295 rtx insn;
11296 HOST_WIDE_INT address;
11297 rtx * loc;
11298 enum machine_mode mode;
11299 int fix_size;
11300 rtx value;
11301 Mnode * minipool;
11302 HOST_WIDE_INT forwards;
11303 HOST_WIDE_INT backwards;
11304 };
11305
11306 /* Fixes less than a word need padding out to a word boundary. */
11307 #define MINIPOOL_FIX_SIZE(mode) \
11308 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11309
11310 static Mnode * minipool_vector_head;
11311 static Mnode * minipool_vector_tail;
11312 static rtx minipool_vector_label;
11313 static int minipool_pad;
11314
11315 /* The linked list of all minipool fixes required for this function. */
11316 Mfix * minipool_fix_head;
11317 Mfix * minipool_fix_tail;
11318 /* The fix entry for the current minipool, once it has been placed. */
11319 Mfix * minipool_barrier;
11320
11321 /* Determines if INSN is the start of a jump table. Returns the end
11322 of the TABLE or NULL_RTX. */
11323 static rtx
11324 is_jump_table (rtx insn)
11325 {
11326 rtx table;
11327
11328 if (GET_CODE (insn) == JUMP_INSN
11329 && JUMP_LABEL (insn) != NULL
11330 && ((table = next_real_insn (JUMP_LABEL (insn)))
11331 == next_real_insn (insn))
11332 && table != NULL
11333 && GET_CODE (table) == JUMP_INSN
11334 && (GET_CODE (PATTERN (table)) == ADDR_VEC
11335 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
11336 return table;
11337
11338 return NULL_RTX;
11339 }
11340
11341 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11342 #define JUMP_TABLES_IN_TEXT_SECTION 0
11343 #endif
11344
11345 static HOST_WIDE_INT
11346 get_jump_table_size (rtx insn)
11347 {
11348 /* ADDR_VECs only take room if read-only data does into the text
11349 section. */
11350 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
11351 {
11352 rtx body = PATTERN (insn);
11353 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
11354 HOST_WIDE_INT size;
11355 HOST_WIDE_INT modesize;
11356
11357 modesize = GET_MODE_SIZE (GET_MODE (body));
11358 size = modesize * XVECLEN (body, elt);
11359 switch (modesize)
11360 {
11361 case 1:
11362 /* Round up size of TBB table to a halfword boundary. */
11363 size = (size + 1) & ~(HOST_WIDE_INT)1;
11364 break;
11365 case 2:
11366 /* No padding necessary for TBH. */
11367 break;
11368 case 4:
11369 /* Add two bytes for alignment on Thumb. */
11370 if (TARGET_THUMB)
11371 size += 2;
11372 break;
11373 default:
11374 gcc_unreachable ();
11375 }
11376 return size;
11377 }
11378
11379 return 0;
11380 }
11381
11382 /* Move a minipool fix MP from its current location to before MAX_MP.
11383 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11384 constraints may need updating. */
11385 static Mnode *
11386 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
11387 HOST_WIDE_INT max_address)
11388 {
11389 /* The code below assumes these are different. */
11390 gcc_assert (mp != max_mp);
11391
11392 if (max_mp == NULL)
11393 {
11394 if (max_address < mp->max_address)
11395 mp->max_address = max_address;
11396 }
11397 else
11398 {
11399 if (max_address > max_mp->max_address - mp->fix_size)
11400 mp->max_address = max_mp->max_address - mp->fix_size;
11401 else
11402 mp->max_address = max_address;
11403
11404 /* Unlink MP from its current position. Since max_mp is non-null,
11405 mp->prev must be non-null. */
11406 mp->prev->next = mp->next;
11407 if (mp->next != NULL)
11408 mp->next->prev = mp->prev;
11409 else
11410 minipool_vector_tail = mp->prev;
11411
11412 /* Re-insert it before MAX_MP. */
11413 mp->next = max_mp;
11414 mp->prev = max_mp->prev;
11415 max_mp->prev = mp;
11416
11417 if (mp->prev != NULL)
11418 mp->prev->next = mp;
11419 else
11420 minipool_vector_head = mp;
11421 }
11422
11423 /* Save the new entry. */
11424 max_mp = mp;
11425
11426 /* Scan over the preceding entries and adjust their addresses as
11427 required. */
11428 while (mp->prev != NULL
11429 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11430 {
11431 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11432 mp = mp->prev;
11433 }
11434
11435 return max_mp;
11436 }
11437
11438 /* Add a constant to the minipool for a forward reference. Returns the
11439 node added or NULL if the constant will not fit in this pool. */
11440 static Mnode *
11441 add_minipool_forward_ref (Mfix *fix)
11442 {
11443 /* If set, max_mp is the first pool_entry that has a lower
11444 constraint than the one we are trying to add. */
11445 Mnode * max_mp = NULL;
11446 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
11447 Mnode * mp;
11448
11449 /* If the minipool starts before the end of FIX->INSN then this FIX
11450 can not be placed into the current pool. Furthermore, adding the
11451 new constant pool entry may cause the pool to start FIX_SIZE bytes
11452 earlier. */
11453 if (minipool_vector_head &&
11454 (fix->address + get_attr_length (fix->insn)
11455 >= minipool_vector_head->max_address - fix->fix_size))
11456 return NULL;
11457
11458 /* Scan the pool to see if a constant with the same value has
11459 already been added. While we are doing this, also note the
11460 location where we must insert the constant if it doesn't already
11461 exist. */
11462 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11463 {
11464 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11465 && fix->mode == mp->mode
11466 && (GET_CODE (fix->value) != CODE_LABEL
11467 || (CODE_LABEL_NUMBER (fix->value)
11468 == CODE_LABEL_NUMBER (mp->value)))
11469 && rtx_equal_p (fix->value, mp->value))
11470 {
11471 /* More than one fix references this entry. */
11472 mp->refcount++;
11473 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
11474 }
11475
11476 /* Note the insertion point if necessary. */
11477 if (max_mp == NULL
11478 && mp->max_address > max_address)
11479 max_mp = mp;
11480
11481 /* If we are inserting an 8-bytes aligned quantity and
11482 we have not already found an insertion point, then
11483 make sure that all such 8-byte aligned quantities are
11484 placed at the start of the pool. */
11485 if (ARM_DOUBLEWORD_ALIGN
11486 && max_mp == NULL
11487 && fix->fix_size >= 8
11488 && mp->fix_size < 8)
11489 {
11490 max_mp = mp;
11491 max_address = mp->max_address;
11492 }
11493 }
11494
11495 /* The value is not currently in the minipool, so we need to create
11496 a new entry for it. If MAX_MP is NULL, the entry will be put on
11497 the end of the list since the placement is less constrained than
11498 any existing entry. Otherwise, we insert the new fix before
11499 MAX_MP and, if necessary, adjust the constraints on the other
11500 entries. */
11501 mp = XNEW (Mnode);
11502 mp->fix_size = fix->fix_size;
11503 mp->mode = fix->mode;
11504 mp->value = fix->value;
11505 mp->refcount = 1;
11506 /* Not yet required for a backwards ref. */
11507 mp->min_address = -65536;
11508
11509 if (max_mp == NULL)
11510 {
11511 mp->max_address = max_address;
11512 mp->next = NULL;
11513 mp->prev = minipool_vector_tail;
11514
11515 if (mp->prev == NULL)
11516 {
11517 minipool_vector_head = mp;
11518 minipool_vector_label = gen_label_rtx ();
11519 }
11520 else
11521 mp->prev->next = mp;
11522
11523 minipool_vector_tail = mp;
11524 }
11525 else
11526 {
11527 if (max_address > max_mp->max_address - mp->fix_size)
11528 mp->max_address = max_mp->max_address - mp->fix_size;
11529 else
11530 mp->max_address = max_address;
11531
11532 mp->next = max_mp;
11533 mp->prev = max_mp->prev;
11534 max_mp->prev = mp;
11535 if (mp->prev != NULL)
11536 mp->prev->next = mp;
11537 else
11538 minipool_vector_head = mp;
11539 }
11540
11541 /* Save the new entry. */
11542 max_mp = mp;
11543
11544 /* Scan over the preceding entries and adjust their addresses as
11545 required. */
11546 while (mp->prev != NULL
11547 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11548 {
11549 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11550 mp = mp->prev;
11551 }
11552
11553 return max_mp;
11554 }
11555
11556 static Mnode *
11557 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
11558 HOST_WIDE_INT min_address)
11559 {
11560 HOST_WIDE_INT offset;
11561
11562 /* The code below assumes these are different. */
11563 gcc_assert (mp != min_mp);
11564
11565 if (min_mp == NULL)
11566 {
11567 if (min_address > mp->min_address)
11568 mp->min_address = min_address;
11569 }
11570 else
11571 {
11572 /* We will adjust this below if it is too loose. */
11573 mp->min_address = min_address;
11574
11575 /* Unlink MP from its current position. Since min_mp is non-null,
11576 mp->next must be non-null. */
11577 mp->next->prev = mp->prev;
11578 if (mp->prev != NULL)
11579 mp->prev->next = mp->next;
11580 else
11581 minipool_vector_head = mp->next;
11582
11583 /* Reinsert it after MIN_MP. */
11584 mp->prev = min_mp;
11585 mp->next = min_mp->next;
11586 min_mp->next = mp;
11587 if (mp->next != NULL)
11588 mp->next->prev = mp;
11589 else
11590 minipool_vector_tail = mp;
11591 }
11592
11593 min_mp = mp;
11594
11595 offset = 0;
11596 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11597 {
11598 mp->offset = offset;
11599 if (mp->refcount > 0)
11600 offset += mp->fix_size;
11601
11602 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
11603 mp->next->min_address = mp->min_address + mp->fix_size;
11604 }
11605
11606 return min_mp;
11607 }
11608
11609 /* Add a constant to the minipool for a backward reference. Returns the
11610 node added or NULL if the constant will not fit in this pool.
11611
11612 Note that the code for insertion for a backwards reference can be
11613 somewhat confusing because the calculated offsets for each fix do
11614 not take into account the size of the pool (which is still under
11615 construction. */
11616 static Mnode *
11617 add_minipool_backward_ref (Mfix *fix)
11618 {
11619 /* If set, min_mp is the last pool_entry that has a lower constraint
11620 than the one we are trying to add. */
11621 Mnode *min_mp = NULL;
11622 /* This can be negative, since it is only a constraint. */
11623 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11624 Mnode *mp;
11625
11626 /* If we can't reach the current pool from this insn, or if we can't
11627 insert this entry at the end of the pool without pushing other
11628 fixes out of range, then we don't try. This ensures that we
11629 can't fail later on. */
11630 if (min_address >= minipool_barrier->address
11631 || (minipool_vector_tail->min_address + fix->fix_size
11632 >= minipool_barrier->address))
11633 return NULL;
11634
11635 /* Scan the pool to see if a constant with the same value has
11636 already been added. While we are doing this, also note the
11637 location where we must insert the constant if it doesn't already
11638 exist. */
11639 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11640 {
11641 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11642 && fix->mode == mp->mode
11643 && (GET_CODE (fix->value) != CODE_LABEL
11644 || (CODE_LABEL_NUMBER (fix->value)
11645 == CODE_LABEL_NUMBER (mp->value)))
11646 && rtx_equal_p (fix->value, mp->value)
11647 /* Check that there is enough slack to move this entry to the
11648 end of the table (this is conservative). */
11649 && (mp->max_address
11650 > (minipool_barrier->address
11651 + minipool_vector_tail->offset
11652 + minipool_vector_tail->fix_size)))
11653 {
11654 mp->refcount++;
11655 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11656 }
11657
11658 if (min_mp != NULL)
11659 mp->min_address += fix->fix_size;
11660 else
11661 {
11662 /* Note the insertion point if necessary. */
11663 if (mp->min_address < min_address)
11664 {
11665 /* For now, we do not allow the insertion of 8-byte alignment
11666 requiring nodes anywhere but at the start of the pool. */
11667 if (ARM_DOUBLEWORD_ALIGN
11668 && fix->fix_size >= 8 && mp->fix_size < 8)
11669 return NULL;
11670 else
11671 min_mp = mp;
11672 }
11673 else if (mp->max_address
11674 < minipool_barrier->address + mp->offset + fix->fix_size)
11675 {
11676 /* Inserting before this entry would push the fix beyond
11677 its maximum address (which can happen if we have
11678 re-located a forwards fix); force the new fix to come
11679 after it. */
11680 if (ARM_DOUBLEWORD_ALIGN
11681 && fix->fix_size >= 8 && mp->fix_size < 8)
11682 return NULL;
11683 else
11684 {
11685 min_mp = mp;
11686 min_address = mp->min_address + fix->fix_size;
11687 }
11688 }
11689 /* Do not insert a non-8-byte aligned quantity before 8-byte
11690 aligned quantities. */
11691 else if (ARM_DOUBLEWORD_ALIGN
11692 && fix->fix_size < 8
11693 && mp->fix_size >= 8)
11694 {
11695 min_mp = mp;
11696 min_address = mp->min_address + fix->fix_size;
11697 }
11698 }
11699 }
11700
11701 /* We need to create a new entry. */
11702 mp = XNEW (Mnode);
11703 mp->fix_size = fix->fix_size;
11704 mp->mode = fix->mode;
11705 mp->value = fix->value;
11706 mp->refcount = 1;
11707 mp->max_address = minipool_barrier->address + 65536;
11708
11709 mp->min_address = min_address;
11710
11711 if (min_mp == NULL)
11712 {
11713 mp->prev = NULL;
11714 mp->next = minipool_vector_head;
11715
11716 if (mp->next == NULL)
11717 {
11718 minipool_vector_tail = mp;
11719 minipool_vector_label = gen_label_rtx ();
11720 }
11721 else
11722 mp->next->prev = mp;
11723
11724 minipool_vector_head = mp;
11725 }
11726 else
11727 {
11728 mp->next = min_mp->next;
11729 mp->prev = min_mp;
11730 min_mp->next = mp;
11731
11732 if (mp->next != NULL)
11733 mp->next->prev = mp;
11734 else
11735 minipool_vector_tail = mp;
11736 }
11737
11738 /* Save the new entry. */
11739 min_mp = mp;
11740
11741 if (mp->prev)
11742 mp = mp->prev;
11743 else
11744 mp->offset = 0;
11745
11746 /* Scan over the following entries and adjust their offsets. */
11747 while (mp->next != NULL)
11748 {
11749 if (mp->next->min_address < mp->min_address + mp->fix_size)
11750 mp->next->min_address = mp->min_address + mp->fix_size;
11751
11752 if (mp->refcount)
11753 mp->next->offset = mp->offset + mp->fix_size;
11754 else
11755 mp->next->offset = mp->offset;
11756
11757 mp = mp->next;
11758 }
11759
11760 return min_mp;
11761 }
11762
11763 static void
11764 assign_minipool_offsets (Mfix *barrier)
11765 {
11766 HOST_WIDE_INT offset = 0;
11767 Mnode *mp;
11768
11769 minipool_barrier = barrier;
11770
11771 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11772 {
11773 mp->offset = offset;
11774
11775 if (mp->refcount > 0)
11776 offset += mp->fix_size;
11777 }
11778 }
11779
11780 /* Output the literal table */
11781 static void
11782 dump_minipool (rtx scan)
11783 {
11784 Mnode * mp;
11785 Mnode * nmp;
11786 int align64 = 0;
11787
11788 if (ARM_DOUBLEWORD_ALIGN)
11789 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11790 if (mp->refcount > 0 && mp->fix_size >= 8)
11791 {
11792 align64 = 1;
11793 break;
11794 }
11795
11796 if (dump_file)
11797 fprintf (dump_file,
11798 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11799 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11800
11801 scan = emit_label_after (gen_label_rtx (), scan);
11802 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11803 scan = emit_label_after (minipool_vector_label, scan);
11804
11805 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11806 {
11807 if (mp->refcount > 0)
11808 {
11809 if (dump_file)
11810 {
11811 fprintf (dump_file,
11812 ";; Offset %u, min %ld, max %ld ",
11813 (unsigned) mp->offset, (unsigned long) mp->min_address,
11814 (unsigned long) mp->max_address);
11815 arm_print_value (dump_file, mp->value);
11816 fputc ('\n', dump_file);
11817 }
11818
11819 switch (mp->fix_size)
11820 {
11821 #ifdef HAVE_consttable_1
11822 case 1:
11823 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11824 break;
11825
11826 #endif
11827 #ifdef HAVE_consttable_2
11828 case 2:
11829 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11830 break;
11831
11832 #endif
11833 #ifdef HAVE_consttable_4
11834 case 4:
11835 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11836 break;
11837
11838 #endif
11839 #ifdef HAVE_consttable_8
11840 case 8:
11841 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11842 break;
11843
11844 #endif
11845 #ifdef HAVE_consttable_16
11846 case 16:
11847 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11848 break;
11849
11850 #endif
11851 default:
11852 gcc_unreachable ();
11853 }
11854 }
11855
11856 nmp = mp->next;
11857 free (mp);
11858 }
11859
11860 minipool_vector_head = minipool_vector_tail = NULL;
11861 scan = emit_insn_after (gen_consttable_end (), scan);
11862 scan = emit_barrier_after (scan);
11863 }
11864
11865 /* Return the cost of forcibly inserting a barrier after INSN. */
11866 static int
11867 arm_barrier_cost (rtx insn)
11868 {
11869 /* Basing the location of the pool on the loop depth is preferable,
11870 but at the moment, the basic block information seems to be
11871 corrupt by this stage of the compilation. */
11872 int base_cost = 50;
11873 rtx next = next_nonnote_insn (insn);
11874
11875 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11876 base_cost -= 20;
11877
11878 switch (GET_CODE (insn))
11879 {
11880 case CODE_LABEL:
11881 /* It will always be better to place the table before the label, rather
11882 than after it. */
11883 return 50;
11884
11885 case INSN:
11886 case CALL_INSN:
11887 return base_cost;
11888
11889 case JUMP_INSN:
11890 return base_cost - 10;
11891
11892 default:
11893 return base_cost + 10;
11894 }
11895 }
11896
11897 /* Find the best place in the insn stream in the range
11898 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11899 Create the barrier by inserting a jump and add a new fix entry for
11900 it. */
11901 static Mfix *
11902 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11903 {
11904 HOST_WIDE_INT count = 0;
11905 rtx barrier;
11906 rtx from = fix->insn;
11907 /* The instruction after which we will insert the jump. */
11908 rtx selected = NULL;
11909 int selected_cost;
11910 /* The address at which the jump instruction will be placed. */
11911 HOST_WIDE_INT selected_address;
11912 Mfix * new_fix;
11913 HOST_WIDE_INT max_count = max_address - fix->address;
11914 rtx label = gen_label_rtx ();
11915
11916 selected_cost = arm_barrier_cost (from);
11917 selected_address = fix->address;
11918
11919 while (from && count < max_count)
11920 {
11921 rtx tmp;
11922 int new_cost;
11923
11924 /* This code shouldn't have been called if there was a natural barrier
11925 within range. */
11926 gcc_assert (GET_CODE (from) != BARRIER);
11927
11928 /* Count the length of this insn. */
11929 count += get_attr_length (from);
11930
11931 /* If there is a jump table, add its length. */
11932 tmp = is_jump_table (from);
11933 if (tmp != NULL)
11934 {
11935 count += get_jump_table_size (tmp);
11936
11937 /* Jump tables aren't in a basic block, so base the cost on
11938 the dispatch insn. If we select this location, we will
11939 still put the pool after the table. */
11940 new_cost = arm_barrier_cost (from);
11941
11942 if (count < max_count
11943 && (!selected || new_cost <= selected_cost))
11944 {
11945 selected = tmp;
11946 selected_cost = new_cost;
11947 selected_address = fix->address + count;
11948 }
11949
11950 /* Continue after the dispatch table. */
11951 from = NEXT_INSN (tmp);
11952 continue;
11953 }
11954
11955 new_cost = arm_barrier_cost (from);
11956
11957 if (count < max_count
11958 && (!selected || new_cost <= selected_cost))
11959 {
11960 selected = from;
11961 selected_cost = new_cost;
11962 selected_address = fix->address + count;
11963 }
11964
11965 from = NEXT_INSN (from);
11966 }
11967
11968 /* Make sure that we found a place to insert the jump. */
11969 gcc_assert (selected);
11970
11971 /* Make sure we do not split a call and its corresponding
11972 CALL_ARG_LOCATION note. */
11973 if (CALL_P (selected))
11974 {
11975 rtx next = NEXT_INSN (selected);
11976 if (next && NOTE_P (next)
11977 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
11978 selected = next;
11979 }
11980
11981 /* Create a new JUMP_INSN that branches around a barrier. */
11982 from = emit_jump_insn_after (gen_jump (label), selected);
11983 JUMP_LABEL (from) = label;
11984 barrier = emit_barrier_after (from);
11985 emit_label_after (label, barrier);
11986
11987 /* Create a minipool barrier entry for the new barrier. */
11988 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11989 new_fix->insn = barrier;
11990 new_fix->address = selected_address;
11991 new_fix->next = fix->next;
11992 fix->next = new_fix;
11993
11994 return new_fix;
11995 }
11996
11997 /* Record that there is a natural barrier in the insn stream at
11998 ADDRESS. */
11999 static void
12000 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
12001 {
12002 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12003
12004 fix->insn = insn;
12005 fix->address = address;
12006
12007 fix->next = NULL;
12008 if (minipool_fix_head != NULL)
12009 minipool_fix_tail->next = fix;
12010 else
12011 minipool_fix_head = fix;
12012
12013 minipool_fix_tail = fix;
12014 }
12015
12016 /* Record INSN, which will need fixing up to load a value from the
12017 minipool. ADDRESS is the offset of the insn since the start of the
12018 function; LOC is a pointer to the part of the insn which requires
12019 fixing; VALUE is the constant that must be loaded, which is of type
12020 MODE. */
12021 static void
12022 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
12023 enum machine_mode mode, rtx value)
12024 {
12025 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12026
12027 fix->insn = insn;
12028 fix->address = address;
12029 fix->loc = loc;
12030 fix->mode = mode;
12031 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
12032 fix->value = value;
12033 fix->forwards = get_attr_pool_range (insn);
12034 fix->backwards = get_attr_neg_pool_range (insn);
12035 fix->minipool = NULL;
12036
12037 /* If an insn doesn't have a range defined for it, then it isn't
12038 expecting to be reworked by this code. Better to stop now than
12039 to generate duff assembly code. */
12040 gcc_assert (fix->forwards || fix->backwards);
12041
12042 /* If an entry requires 8-byte alignment then assume all constant pools
12043 require 4 bytes of padding. Trying to do this later on a per-pool
12044 basis is awkward because existing pool entries have to be modified. */
12045 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
12046 minipool_pad = 4;
12047
12048 if (dump_file)
12049 {
12050 fprintf (dump_file,
12051 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
12052 GET_MODE_NAME (mode),
12053 INSN_UID (insn), (unsigned long) address,
12054 -1 * (long)fix->backwards, (long)fix->forwards);
12055 arm_print_value (dump_file, fix->value);
12056 fprintf (dump_file, "\n");
12057 }
12058
12059 /* Add it to the chain of fixes. */
12060 fix->next = NULL;
12061
12062 if (minipool_fix_head != NULL)
12063 minipool_fix_tail->next = fix;
12064 else
12065 minipool_fix_head = fix;
12066
12067 minipool_fix_tail = fix;
12068 }
12069
12070 /* Return the cost of synthesizing a 64-bit constant VAL inline.
12071 Returns the number of insns needed, or 99 if we don't know how to
12072 do it. */
12073 int
12074 arm_const_double_inline_cost (rtx val)
12075 {
12076 rtx lowpart, highpart;
12077 enum machine_mode mode;
12078
12079 mode = GET_MODE (val);
12080
12081 if (mode == VOIDmode)
12082 mode = DImode;
12083
12084 gcc_assert (GET_MODE_SIZE (mode) == 8);
12085
12086 lowpart = gen_lowpart (SImode, val);
12087 highpart = gen_highpart_mode (SImode, mode, val);
12088
12089 gcc_assert (GET_CODE (lowpart) == CONST_INT);
12090 gcc_assert (GET_CODE (highpart) == CONST_INT);
12091
12092 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
12093 NULL_RTX, NULL_RTX, 0, 0)
12094 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
12095 NULL_RTX, NULL_RTX, 0, 0));
12096 }
12097
12098 /* Return true if it is worthwhile to split a 64-bit constant into two
12099 32-bit operations. This is the case if optimizing for size, or
12100 if we have load delay slots, or if one 32-bit part can be done with
12101 a single data operation. */
12102 bool
12103 arm_const_double_by_parts (rtx val)
12104 {
12105 enum machine_mode mode = GET_MODE (val);
12106 rtx part;
12107
12108 if (optimize_size || arm_ld_sched)
12109 return true;
12110
12111 if (mode == VOIDmode)
12112 mode = DImode;
12113
12114 part = gen_highpart_mode (SImode, mode, val);
12115
12116 gcc_assert (GET_CODE (part) == CONST_INT);
12117
12118 if (const_ok_for_arm (INTVAL (part))
12119 || const_ok_for_arm (~INTVAL (part)))
12120 return true;
12121
12122 part = gen_lowpart (SImode, val);
12123
12124 gcc_assert (GET_CODE (part) == CONST_INT);
12125
12126 if (const_ok_for_arm (INTVAL (part))
12127 || const_ok_for_arm (~INTVAL (part)))
12128 return true;
12129
12130 return false;
12131 }
12132
12133 /* Return true if it is possible to inline both the high and low parts
12134 of a 64-bit constant into 32-bit data processing instructions. */
12135 bool
12136 arm_const_double_by_immediates (rtx val)
12137 {
12138 enum machine_mode mode = GET_MODE (val);
12139 rtx part;
12140
12141 if (mode == VOIDmode)
12142 mode = DImode;
12143
12144 part = gen_highpart_mode (SImode, mode, val);
12145
12146 gcc_assert (GET_CODE (part) == CONST_INT);
12147
12148 if (!const_ok_for_arm (INTVAL (part)))
12149 return false;
12150
12151 part = gen_lowpart (SImode, val);
12152
12153 gcc_assert (GET_CODE (part) == CONST_INT);
12154
12155 if (!const_ok_for_arm (INTVAL (part)))
12156 return false;
12157
12158 return true;
12159 }
12160
12161 /* Scan INSN and note any of its operands that need fixing.
12162 If DO_PUSHES is false we do not actually push any of the fixups
12163 needed. The function returns TRUE if any fixups were needed/pushed.
12164 This is used by arm_memory_load_p() which needs to know about loads
12165 of constants that will be converted into minipool loads. */
12166 static bool
12167 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
12168 {
12169 bool result = false;
12170 int opno;
12171
12172 extract_insn (insn);
12173
12174 if (!constrain_operands (1))
12175 fatal_insn_not_found (insn);
12176
12177 if (recog_data.n_alternatives == 0)
12178 return false;
12179
12180 /* Fill in recog_op_alt with information about the constraints of
12181 this insn. */
12182 preprocess_constraints ();
12183
12184 for (opno = 0; opno < recog_data.n_operands; opno++)
12185 {
12186 /* Things we need to fix can only occur in inputs. */
12187 if (recog_data.operand_type[opno] != OP_IN)
12188 continue;
12189
12190 /* If this alternative is a memory reference, then any mention
12191 of constants in this alternative is really to fool reload
12192 into allowing us to accept one there. We need to fix them up
12193 now so that we output the right code. */
12194 if (recog_op_alt[opno][which_alternative].memory_ok)
12195 {
12196 rtx op = recog_data.operand[opno];
12197
12198 if (CONSTANT_P (op))
12199 {
12200 if (do_pushes)
12201 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
12202 recog_data.operand_mode[opno], op);
12203 result = true;
12204 }
12205 else if (GET_CODE (op) == MEM
12206 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
12207 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
12208 {
12209 if (do_pushes)
12210 {
12211 rtx cop = avoid_constant_pool_reference (op);
12212
12213 /* Casting the address of something to a mode narrower
12214 than a word can cause avoid_constant_pool_reference()
12215 to return the pool reference itself. That's no good to
12216 us here. Lets just hope that we can use the
12217 constant pool value directly. */
12218 if (op == cop)
12219 cop = get_pool_constant (XEXP (op, 0));
12220
12221 push_minipool_fix (insn, address,
12222 recog_data.operand_loc[opno],
12223 recog_data.operand_mode[opno], cop);
12224 }
12225
12226 result = true;
12227 }
12228 }
12229 }
12230
12231 return result;
12232 }
12233
12234 /* Convert instructions to their cc-clobbering variant if possible, since
12235 that allows us to use smaller encodings. */
12236
12237 static void
12238 thumb2_reorg (void)
12239 {
12240 basic_block bb;
12241 regset_head live;
12242
12243 INIT_REG_SET (&live);
12244
12245 /* We are freeing block_for_insn in the toplev to keep compatibility
12246 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12247 compute_bb_for_insn ();
12248 df_analyze ();
12249
12250 FOR_EACH_BB (bb)
12251 {
12252 rtx insn;
12253
12254 COPY_REG_SET (&live, DF_LR_OUT (bb));
12255 df_simulate_initialize_backwards (bb, &live);
12256 FOR_BB_INSNS_REVERSE (bb, insn)
12257 {
12258 if (NONJUMP_INSN_P (insn)
12259 && !REGNO_REG_SET_P (&live, CC_REGNUM))
12260 {
12261 rtx pat = PATTERN (insn);
12262 if (GET_CODE (pat) == SET
12263 && low_register_operand (XEXP (pat, 0), SImode)
12264 && thumb_16bit_operator (XEXP (pat, 1), SImode)
12265 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
12266 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
12267 {
12268 rtx dst = XEXP (pat, 0);
12269 rtx src = XEXP (pat, 1);
12270 rtx op0 = XEXP (src, 0);
12271 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
12272 ? XEXP (src, 1) : NULL);
12273
12274 if (rtx_equal_p (dst, op0)
12275 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
12276 {
12277 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12278 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12279 rtvec vec = gen_rtvec (2, pat, clobber);
12280
12281 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12282 INSN_CODE (insn) = -1;
12283 }
12284 /* We can also handle a commutative operation where the
12285 second operand matches the destination. */
12286 else if (op1 && rtx_equal_p (dst, op1))
12287 {
12288 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12289 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12290 rtvec vec;
12291
12292 src = copy_rtx (src);
12293 XEXP (src, 0) = op1;
12294 XEXP (src, 1) = op0;
12295 pat = gen_rtx_SET (VOIDmode, dst, src);
12296 vec = gen_rtvec (2, pat, clobber);
12297 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12298 INSN_CODE (insn) = -1;
12299 }
12300 }
12301 }
12302
12303 if (NONDEBUG_INSN_P (insn))
12304 df_simulate_one_insn_backwards (bb, insn, &live);
12305 }
12306 }
12307
12308 CLEAR_REG_SET (&live);
12309 }
12310
12311 /* Gcc puts the pool in the wrong place for ARM, since we can only
12312 load addresses a limited distance around the pc. We do some
12313 special munging to move the constant pool values to the correct
12314 point in the code. */
12315 static void
12316 arm_reorg (void)
12317 {
12318 rtx insn;
12319 HOST_WIDE_INT address = 0;
12320 Mfix * fix;
12321
12322 if (TARGET_THUMB2)
12323 thumb2_reorg ();
12324
12325 minipool_fix_head = minipool_fix_tail = NULL;
12326
12327 /* The first insn must always be a note, or the code below won't
12328 scan it properly. */
12329 insn = get_insns ();
12330 gcc_assert (GET_CODE (insn) == NOTE);
12331 minipool_pad = 0;
12332
12333 /* Scan all the insns and record the operands that will need fixing. */
12334 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
12335 {
12336 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12337 && (arm_cirrus_insn_p (insn)
12338 || GET_CODE (insn) == JUMP_INSN
12339 || arm_memory_load_p (insn)))
12340 cirrus_reorg (insn);
12341
12342 if (GET_CODE (insn) == BARRIER)
12343 push_minipool_barrier (insn, address);
12344 else if (INSN_P (insn))
12345 {
12346 rtx table;
12347
12348 note_invalid_constants (insn, address, true);
12349 address += get_attr_length (insn);
12350
12351 /* If the insn is a vector jump, add the size of the table
12352 and skip the table. */
12353 if ((table = is_jump_table (insn)) != NULL)
12354 {
12355 address += get_jump_table_size (table);
12356 insn = table;
12357 }
12358 }
12359 }
12360
12361 fix = minipool_fix_head;
12362
12363 /* Now scan the fixups and perform the required changes. */
12364 while (fix)
12365 {
12366 Mfix * ftmp;
12367 Mfix * fdel;
12368 Mfix * last_added_fix;
12369 Mfix * last_barrier = NULL;
12370 Mfix * this_fix;
12371
12372 /* Skip any further barriers before the next fix. */
12373 while (fix && GET_CODE (fix->insn) == BARRIER)
12374 fix = fix->next;
12375
12376 /* No more fixes. */
12377 if (fix == NULL)
12378 break;
12379
12380 last_added_fix = NULL;
12381
12382 for (ftmp = fix; ftmp; ftmp = ftmp->next)
12383 {
12384 if (GET_CODE (ftmp->insn) == BARRIER)
12385 {
12386 if (ftmp->address >= minipool_vector_head->max_address)
12387 break;
12388
12389 last_barrier = ftmp;
12390 }
12391 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
12392 break;
12393
12394 last_added_fix = ftmp; /* Keep track of the last fix added. */
12395 }
12396
12397 /* If we found a barrier, drop back to that; any fixes that we
12398 could have reached but come after the barrier will now go in
12399 the next mini-pool. */
12400 if (last_barrier != NULL)
12401 {
12402 /* Reduce the refcount for those fixes that won't go into this
12403 pool after all. */
12404 for (fdel = last_barrier->next;
12405 fdel && fdel != ftmp;
12406 fdel = fdel->next)
12407 {
12408 fdel->minipool->refcount--;
12409 fdel->minipool = NULL;
12410 }
12411
12412 ftmp = last_barrier;
12413 }
12414 else
12415 {
12416 /* ftmp is first fix that we can't fit into this pool and
12417 there no natural barriers that we could use. Insert a
12418 new barrier in the code somewhere between the previous
12419 fix and this one, and arrange to jump around it. */
12420 HOST_WIDE_INT max_address;
12421
12422 /* The last item on the list of fixes must be a barrier, so
12423 we can never run off the end of the list of fixes without
12424 last_barrier being set. */
12425 gcc_assert (ftmp);
12426
12427 max_address = minipool_vector_head->max_address;
12428 /* Check that there isn't another fix that is in range that
12429 we couldn't fit into this pool because the pool was
12430 already too large: we need to put the pool before such an
12431 instruction. The pool itself may come just after the
12432 fix because create_fix_barrier also allows space for a
12433 jump instruction. */
12434 if (ftmp->address < max_address)
12435 max_address = ftmp->address + 1;
12436
12437 last_barrier = create_fix_barrier (last_added_fix, max_address);
12438 }
12439
12440 assign_minipool_offsets (last_barrier);
12441
12442 while (ftmp)
12443 {
12444 if (GET_CODE (ftmp->insn) != BARRIER
12445 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
12446 == NULL))
12447 break;
12448
12449 ftmp = ftmp->next;
12450 }
12451
12452 /* Scan over the fixes we have identified for this pool, fixing them
12453 up and adding the constants to the pool itself. */
12454 for (this_fix = fix; this_fix && ftmp != this_fix;
12455 this_fix = this_fix->next)
12456 if (GET_CODE (this_fix->insn) != BARRIER)
12457 {
12458 rtx addr
12459 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
12460 minipool_vector_label),
12461 this_fix->minipool->offset);
12462 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
12463 }
12464
12465 dump_minipool (last_barrier->insn);
12466 fix = ftmp;
12467 }
12468
12469 /* From now on we must synthesize any constants that we can't handle
12470 directly. This can happen if the RTL gets split during final
12471 instruction generation. */
12472 after_arm_reorg = 1;
12473
12474 /* Free the minipool memory. */
12475 obstack_free (&minipool_obstack, minipool_startobj);
12476 }
12477 \f
12478 /* Routines to output assembly language. */
12479
12480 /* If the rtx is the correct value then return the string of the number.
12481 In this way we can ensure that valid double constants are generated even
12482 when cross compiling. */
12483 const char *
12484 fp_immediate_constant (rtx x)
12485 {
12486 REAL_VALUE_TYPE r;
12487 int i;
12488
12489 if (!fp_consts_inited)
12490 init_fp_table ();
12491
12492 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12493 for (i = 0; i < 8; i++)
12494 if (REAL_VALUES_EQUAL (r, values_fp[i]))
12495 return strings_fp[i];
12496
12497 gcc_unreachable ();
12498 }
12499
12500 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12501 static const char *
12502 fp_const_from_val (REAL_VALUE_TYPE *r)
12503 {
12504 int i;
12505
12506 if (!fp_consts_inited)
12507 init_fp_table ();
12508
12509 for (i = 0; i < 8; i++)
12510 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
12511 return strings_fp[i];
12512
12513 gcc_unreachable ();
12514 }
12515
12516 /* Output the operands of a LDM/STM instruction to STREAM.
12517 MASK is the ARM register set mask of which only bits 0-15 are important.
12518 REG is the base register, either the frame pointer or the stack pointer,
12519 INSTR is the possibly suffixed load or store instruction.
12520 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12521
12522 static void
12523 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
12524 unsigned long mask, int rfe)
12525 {
12526 unsigned i;
12527 bool not_first = FALSE;
12528
12529 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
12530 fputc ('\t', stream);
12531 asm_fprintf (stream, instr, reg);
12532 fputc ('{', stream);
12533
12534 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12535 if (mask & (1 << i))
12536 {
12537 if (not_first)
12538 fprintf (stream, ", ");
12539
12540 asm_fprintf (stream, "%r", i);
12541 not_first = TRUE;
12542 }
12543
12544 if (rfe)
12545 fprintf (stream, "}^\n");
12546 else
12547 fprintf (stream, "}\n");
12548 }
12549
12550
12551 /* Output a FLDMD instruction to STREAM.
12552 BASE if the register containing the address.
12553 REG and COUNT specify the register range.
12554 Extra registers may be added to avoid hardware bugs.
12555
12556 We output FLDMD even for ARMv5 VFP implementations. Although
12557 FLDMD is technically not supported until ARMv6, it is believed
12558 that all VFP implementations support its use in this context. */
12559
12560 static void
12561 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
12562 {
12563 int i;
12564
12565 /* Workaround ARM10 VFPr1 bug. */
12566 if (count == 2 && !arm_arch6)
12567 {
12568 if (reg == 15)
12569 reg--;
12570 count++;
12571 }
12572
12573 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12574 load into multiple parts if we have to handle more than 16 registers. */
12575 if (count > 16)
12576 {
12577 vfp_output_fldmd (stream, base, reg, 16);
12578 vfp_output_fldmd (stream, base, reg + 16, count - 16);
12579 return;
12580 }
12581
12582 fputc ('\t', stream);
12583 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
12584
12585 for (i = reg; i < reg + count; i++)
12586 {
12587 if (i > reg)
12588 fputs (", ", stream);
12589 asm_fprintf (stream, "d%d", i);
12590 }
12591 fputs ("}\n", stream);
12592
12593 }
12594
12595
12596 /* Output the assembly for a store multiple. */
12597
12598 const char *
12599 vfp_output_fstmd (rtx * operands)
12600 {
12601 char pattern[100];
12602 int p;
12603 int base;
12604 int i;
12605
12606 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
12607 p = strlen (pattern);
12608
12609 gcc_assert (GET_CODE (operands[1]) == REG);
12610
12611 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
12612 for (i = 1; i < XVECLEN (operands[2], 0); i++)
12613 {
12614 p += sprintf (&pattern[p], ", d%d", base + i);
12615 }
12616 strcpy (&pattern[p], "}");
12617
12618 output_asm_insn (pattern, operands);
12619 return "";
12620 }
12621
12622
12623 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12624 number of bytes pushed. */
12625
12626 static int
12627 vfp_emit_fstmd (int base_reg, int count)
12628 {
12629 rtx par;
12630 rtx dwarf;
12631 rtx tmp, reg;
12632 int i;
12633
12634 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12635 register pairs are stored by a store multiple insn. We avoid this
12636 by pushing an extra pair. */
12637 if (count == 2 && !arm_arch6)
12638 {
12639 if (base_reg == LAST_VFP_REGNUM - 3)
12640 base_reg -= 2;
12641 count++;
12642 }
12643
12644 /* FSTMD may not store more than 16 doubleword registers at once. Split
12645 larger stores into multiple parts (up to a maximum of two, in
12646 practice). */
12647 if (count > 16)
12648 {
12649 int saved;
12650 /* NOTE: base_reg is an internal register number, so each D register
12651 counts as 2. */
12652 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12653 saved += vfp_emit_fstmd (base_reg, 16);
12654 return saved;
12655 }
12656
12657 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12658 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12659
12660 reg = gen_rtx_REG (DFmode, base_reg);
12661 base_reg += 2;
12662
12663 XVECEXP (par, 0, 0)
12664 = gen_rtx_SET (VOIDmode,
12665 gen_frame_mem
12666 (BLKmode,
12667 gen_rtx_PRE_MODIFY (Pmode,
12668 stack_pointer_rtx,
12669 plus_constant
12670 (stack_pointer_rtx,
12671 - (count * 8)))
12672 ),
12673 gen_rtx_UNSPEC (BLKmode,
12674 gen_rtvec (1, reg),
12675 UNSPEC_PUSH_MULT));
12676
12677 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12678 plus_constant (stack_pointer_rtx, -(count * 8)));
12679 RTX_FRAME_RELATED_P (tmp) = 1;
12680 XVECEXP (dwarf, 0, 0) = tmp;
12681
12682 tmp = gen_rtx_SET (VOIDmode,
12683 gen_frame_mem (DFmode, stack_pointer_rtx),
12684 reg);
12685 RTX_FRAME_RELATED_P (tmp) = 1;
12686 XVECEXP (dwarf, 0, 1) = tmp;
12687
12688 for (i = 1; i < count; i++)
12689 {
12690 reg = gen_rtx_REG (DFmode, base_reg);
12691 base_reg += 2;
12692 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12693
12694 tmp = gen_rtx_SET (VOIDmode,
12695 gen_frame_mem (DFmode,
12696 plus_constant (stack_pointer_rtx,
12697 i * 8)),
12698 reg);
12699 RTX_FRAME_RELATED_P (tmp) = 1;
12700 XVECEXP (dwarf, 0, i + 1) = tmp;
12701 }
12702
12703 par = emit_insn (par);
12704 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12705 RTX_FRAME_RELATED_P (par) = 1;
12706
12707 return count * 8;
12708 }
12709
12710 /* Emit a call instruction with pattern PAT. ADDR is the address of
12711 the call target. */
12712
12713 void
12714 arm_emit_call_insn (rtx pat, rtx addr)
12715 {
12716 rtx insn;
12717
12718 insn = emit_call_insn (pat);
12719
12720 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12721 If the call might use such an entry, add a use of the PIC register
12722 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12723 if (TARGET_VXWORKS_RTP
12724 && flag_pic
12725 && GET_CODE (addr) == SYMBOL_REF
12726 && (SYMBOL_REF_DECL (addr)
12727 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
12728 : !SYMBOL_REF_LOCAL_P (addr)))
12729 {
12730 require_pic_register ();
12731 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
12732 }
12733 }
12734
12735 /* Output a 'call' insn. */
12736 const char *
12737 output_call (rtx *operands)
12738 {
12739 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
12740
12741 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12742 if (REGNO (operands[0]) == LR_REGNUM)
12743 {
12744 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
12745 output_asm_insn ("mov%?\t%0, %|lr", operands);
12746 }
12747
12748 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12749
12750 if (TARGET_INTERWORK || arm_arch4t)
12751 output_asm_insn ("bx%?\t%0", operands);
12752 else
12753 output_asm_insn ("mov%?\t%|pc, %0", operands);
12754
12755 return "";
12756 }
12757
12758 /* Output a 'call' insn that is a reference in memory. This is
12759 disabled for ARMv5 and we prefer a blx instead because otherwise
12760 there's a significant performance overhead. */
12761 const char *
12762 output_call_mem (rtx *operands)
12763 {
12764 gcc_assert (!arm_arch5);
12765 if (TARGET_INTERWORK)
12766 {
12767 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12768 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12769 output_asm_insn ("bx%?\t%|ip", operands);
12770 }
12771 else if (regno_use_in (LR_REGNUM, operands[0]))
12772 {
12773 /* LR is used in the memory address. We load the address in the
12774 first instruction. It's safe to use IP as the target of the
12775 load since the call will kill it anyway. */
12776 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12777 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12778 if (arm_arch4t)
12779 output_asm_insn ("bx%?\t%|ip", operands);
12780 else
12781 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
12782 }
12783 else
12784 {
12785 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12786 output_asm_insn ("ldr%?\t%|pc, %0", operands);
12787 }
12788
12789 return "";
12790 }
12791
12792
12793 /* Output a move from arm registers to an fpa registers.
12794 OPERANDS[0] is an fpa register.
12795 OPERANDS[1] is the first registers of an arm register pair. */
12796 const char *
12797 output_mov_long_double_fpa_from_arm (rtx *operands)
12798 {
12799 int arm_reg0 = REGNO (operands[1]);
12800 rtx ops[3];
12801
12802 gcc_assert (arm_reg0 != IP_REGNUM);
12803
12804 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12805 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12806 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12807
12808 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12809 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
12810
12811 return "";
12812 }
12813
12814 /* Output a move from an fpa register to arm registers.
12815 OPERANDS[0] is the first registers of an arm register pair.
12816 OPERANDS[1] is an fpa register. */
12817 const char *
12818 output_mov_long_double_arm_from_fpa (rtx *operands)
12819 {
12820 int arm_reg0 = REGNO (operands[0]);
12821 rtx ops[3];
12822
12823 gcc_assert (arm_reg0 != IP_REGNUM);
12824
12825 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12826 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12827 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12828
12829 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
12830 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12831 return "";
12832 }
12833
12834 /* Output a move from arm registers to arm registers of a long double
12835 OPERANDS[0] is the destination.
12836 OPERANDS[1] is the source. */
12837 const char *
12838 output_mov_long_double_arm_from_arm (rtx *operands)
12839 {
12840 /* We have to be careful here because the two might overlap. */
12841 int dest_start = REGNO (operands[0]);
12842 int src_start = REGNO (operands[1]);
12843 rtx ops[2];
12844 int i;
12845
12846 if (dest_start < src_start)
12847 {
12848 for (i = 0; i < 3; i++)
12849 {
12850 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12851 ops[1] = gen_rtx_REG (SImode, src_start + i);
12852 output_asm_insn ("mov%?\t%0, %1", ops);
12853 }
12854 }
12855 else
12856 {
12857 for (i = 2; i >= 0; i--)
12858 {
12859 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12860 ops[1] = gen_rtx_REG (SImode, src_start + i);
12861 output_asm_insn ("mov%?\t%0, %1", ops);
12862 }
12863 }
12864
12865 return "";
12866 }
12867
12868 void
12869 arm_emit_movpair (rtx dest, rtx src)
12870 {
12871 /* If the src is an immediate, simplify it. */
12872 if (CONST_INT_P (src))
12873 {
12874 HOST_WIDE_INT val = INTVAL (src);
12875 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12876 if ((val >> 16) & 0x0000ffff)
12877 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12878 GEN_INT (16)),
12879 GEN_INT ((val >> 16) & 0x0000ffff));
12880 return;
12881 }
12882 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12883 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12884 }
12885
12886 /* Output a move from arm registers to an fpa registers.
12887 OPERANDS[0] is an fpa register.
12888 OPERANDS[1] is the first registers of an arm register pair. */
12889 const char *
12890 output_mov_double_fpa_from_arm (rtx *operands)
12891 {
12892 int arm_reg0 = REGNO (operands[1]);
12893 rtx ops[2];
12894
12895 gcc_assert (arm_reg0 != IP_REGNUM);
12896
12897 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12898 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12899 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12900 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12901 return "";
12902 }
12903
12904 /* Output a move from an fpa register to arm registers.
12905 OPERANDS[0] is the first registers of an arm register pair.
12906 OPERANDS[1] is an fpa register. */
12907 const char *
12908 output_mov_double_arm_from_fpa (rtx *operands)
12909 {
12910 int arm_reg0 = REGNO (operands[0]);
12911 rtx ops[2];
12912
12913 gcc_assert (arm_reg0 != IP_REGNUM);
12914
12915 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12916 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12917 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12918 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12919 return "";
12920 }
12921
12922 /* Output a move between double words. It must be REG<-MEM
12923 or MEM<-REG. */
12924 const char *
12925 output_move_double (rtx *operands)
12926 {
12927 enum rtx_code code0 = GET_CODE (operands[0]);
12928 enum rtx_code code1 = GET_CODE (operands[1]);
12929 rtx otherops[3];
12930
12931 if (code0 == REG)
12932 {
12933 unsigned int reg0 = REGNO (operands[0]);
12934
12935 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12936
12937 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
12938
12939 switch (GET_CODE (XEXP (operands[1], 0)))
12940 {
12941 case REG:
12942 if (TARGET_LDRD
12943 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
12944 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
12945 else
12946 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12947 break;
12948
12949 case PRE_INC:
12950 gcc_assert (TARGET_LDRD);
12951 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
12952 break;
12953
12954 case PRE_DEC:
12955 if (TARGET_LDRD)
12956 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
12957 else
12958 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
12959 break;
12960
12961 case POST_INC:
12962 if (TARGET_LDRD)
12963 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
12964 else
12965 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
12966 break;
12967
12968 case POST_DEC:
12969 gcc_assert (TARGET_LDRD);
12970 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
12971 break;
12972
12973 case PRE_MODIFY:
12974 case POST_MODIFY:
12975 /* Autoicrement addressing modes should never have overlapping
12976 base and destination registers, and overlapping index registers
12977 are already prohibited, so this doesn't need to worry about
12978 fix_cm3_ldrd. */
12979 otherops[0] = operands[0];
12980 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
12981 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
12982
12983 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
12984 {
12985 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
12986 {
12987 /* Registers overlap so split out the increment. */
12988 output_asm_insn ("add%?\t%1, %1, %2", otherops);
12989 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
12990 }
12991 else
12992 {
12993 /* Use a single insn if we can.
12994 FIXME: IWMMXT allows offsets larger than ldrd can
12995 handle, fix these up with a pair of ldr. */
12996 if (TARGET_THUMB2
12997 || GET_CODE (otherops[2]) != CONST_INT
12998 || (INTVAL (otherops[2]) > -256
12999 && INTVAL (otherops[2]) < 256))
13000 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
13001 else
13002 {
13003 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
13004 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13005 }
13006 }
13007 }
13008 else
13009 {
13010 /* Use a single insn if we can.
13011 FIXME: IWMMXT allows offsets larger than ldrd can handle,
13012 fix these up with a pair of ldr. */
13013 if (TARGET_THUMB2
13014 || GET_CODE (otherops[2]) != CONST_INT
13015 || (INTVAL (otherops[2]) > -256
13016 && INTVAL (otherops[2]) < 256))
13017 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
13018 else
13019 {
13020 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13021 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
13022 }
13023 }
13024 break;
13025
13026 case LABEL_REF:
13027 case CONST:
13028 /* We might be able to use ldrd %0, %1 here. However the range is
13029 different to ldr/adr, and it is broken on some ARMv7-M
13030 implementations. */
13031 /* Use the second register of the pair to avoid problematic
13032 overlap. */
13033 otherops[1] = operands[1];
13034 output_asm_insn ("adr%?\t%0, %1", otherops);
13035 operands[1] = otherops[0];
13036 if (TARGET_LDRD)
13037 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13038 else
13039 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
13040 break;
13041
13042 /* ??? This needs checking for thumb2. */
13043 default:
13044 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
13045 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
13046 {
13047 otherops[0] = operands[0];
13048 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
13049 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
13050
13051 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
13052 {
13053 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13054 {
13055 switch ((int) INTVAL (otherops[2]))
13056 {
13057 case -8:
13058 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
13059 return "";
13060 case -4:
13061 if (TARGET_THUMB2)
13062 break;
13063 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
13064 return "";
13065 case 4:
13066 if (TARGET_THUMB2)
13067 break;
13068 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
13069 return "";
13070 }
13071 }
13072 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
13073 operands[1] = otherops[0];
13074 if (TARGET_LDRD
13075 && (GET_CODE (otherops[2]) == REG
13076 || TARGET_THUMB2
13077 || (GET_CODE (otherops[2]) == CONST_INT
13078 && INTVAL (otherops[2]) > -256
13079 && INTVAL (otherops[2]) < 256)))
13080 {
13081 if (reg_overlap_mentioned_p (operands[0],
13082 otherops[2]))
13083 {
13084 rtx tmp;
13085 /* Swap base and index registers over to
13086 avoid a conflict. */
13087 tmp = otherops[1];
13088 otherops[1] = otherops[2];
13089 otherops[2] = tmp;
13090 }
13091 /* If both registers conflict, it will usually
13092 have been fixed by a splitter. */
13093 if (reg_overlap_mentioned_p (operands[0], otherops[2])
13094 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
13095 {
13096 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13097 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13098 }
13099 else
13100 {
13101 otherops[0] = operands[0];
13102 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
13103 }
13104 return "";
13105 }
13106
13107 if (GET_CODE (otherops[2]) == CONST_INT)
13108 {
13109 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
13110 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
13111 else
13112 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13113 }
13114 else
13115 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13116 }
13117 else
13118 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
13119
13120 if (TARGET_LDRD)
13121 return "ldr%(d%)\t%0, [%1]";
13122
13123 return "ldm%(ia%)\t%1, %M0";
13124 }
13125 else
13126 {
13127 otherops[1] = adjust_address (operands[1], SImode, 4);
13128 /* Take care of overlapping base/data reg. */
13129 if (reg_mentioned_p (operands[0], operands[1]))
13130 {
13131 output_asm_insn ("ldr%?\t%0, %1", otherops);
13132 output_asm_insn ("ldr%?\t%0, %1", operands);
13133 }
13134 else
13135 {
13136 output_asm_insn ("ldr%?\t%0, %1", operands);
13137 output_asm_insn ("ldr%?\t%0, %1", otherops);
13138 }
13139 }
13140 }
13141 }
13142 else
13143 {
13144 /* Constraints should ensure this. */
13145 gcc_assert (code0 == MEM && code1 == REG);
13146 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
13147
13148 switch (GET_CODE (XEXP (operands[0], 0)))
13149 {
13150 case REG:
13151 if (TARGET_LDRD)
13152 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
13153 else
13154 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13155 break;
13156
13157 case PRE_INC:
13158 gcc_assert (TARGET_LDRD);
13159 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
13160 break;
13161
13162 case PRE_DEC:
13163 if (TARGET_LDRD)
13164 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
13165 else
13166 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
13167 break;
13168
13169 case POST_INC:
13170 if (TARGET_LDRD)
13171 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
13172 else
13173 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
13174 break;
13175
13176 case POST_DEC:
13177 gcc_assert (TARGET_LDRD);
13178 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
13179 break;
13180
13181 case PRE_MODIFY:
13182 case POST_MODIFY:
13183 otherops[0] = operands[1];
13184 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
13185 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
13186
13187 /* IWMMXT allows offsets larger than ldrd can handle,
13188 fix these up with a pair of ldr. */
13189 if (!TARGET_THUMB2
13190 && GET_CODE (otherops[2]) == CONST_INT
13191 && (INTVAL(otherops[2]) <= -256
13192 || INTVAL(otherops[2]) >= 256))
13193 {
13194 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13195 {
13196 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
13197 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13198 }
13199 else
13200 {
13201 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13202 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
13203 }
13204 }
13205 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13206 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
13207 else
13208 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
13209 break;
13210
13211 case PLUS:
13212 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
13213 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13214 {
13215 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
13216 {
13217 case -8:
13218 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
13219 return "";
13220
13221 case -4:
13222 if (TARGET_THUMB2)
13223 break;
13224 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
13225 return "";
13226
13227 case 4:
13228 if (TARGET_THUMB2)
13229 break;
13230 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
13231 return "";
13232 }
13233 }
13234 if (TARGET_LDRD
13235 && (GET_CODE (otherops[2]) == REG
13236 || TARGET_THUMB2
13237 || (GET_CODE (otherops[2]) == CONST_INT
13238 && INTVAL (otherops[2]) > -256
13239 && INTVAL (otherops[2]) < 256)))
13240 {
13241 otherops[0] = operands[1];
13242 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
13243 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
13244 return "";
13245 }
13246 /* Fall through */
13247
13248 default:
13249 otherops[0] = adjust_address (operands[0], SImode, 4);
13250 otherops[1] = operands[1];
13251 output_asm_insn ("str%?\t%1, %0", operands);
13252 output_asm_insn ("str%?\t%H1, %0", otherops);
13253 }
13254 }
13255
13256 return "";
13257 }
13258
13259 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13260 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13261
13262 const char *
13263 output_move_quad (rtx *operands)
13264 {
13265 if (REG_P (operands[0]))
13266 {
13267 /* Load, or reg->reg move. */
13268
13269 if (MEM_P (operands[1]))
13270 {
13271 switch (GET_CODE (XEXP (operands[1], 0)))
13272 {
13273 case REG:
13274 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13275 break;
13276
13277 case LABEL_REF:
13278 case CONST:
13279 output_asm_insn ("adr%?\t%0, %1", operands);
13280 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
13281 break;
13282
13283 default:
13284 gcc_unreachable ();
13285 }
13286 }
13287 else
13288 {
13289 rtx ops[2];
13290 int dest, src, i;
13291
13292 gcc_assert (REG_P (operands[1]));
13293
13294 dest = REGNO (operands[0]);
13295 src = REGNO (operands[1]);
13296
13297 /* This seems pretty dumb, but hopefully GCC won't try to do it
13298 very often. */
13299 if (dest < src)
13300 for (i = 0; i < 4; i++)
13301 {
13302 ops[0] = gen_rtx_REG (SImode, dest + i);
13303 ops[1] = gen_rtx_REG (SImode, src + i);
13304 output_asm_insn ("mov%?\t%0, %1", ops);
13305 }
13306 else
13307 for (i = 3; i >= 0; i--)
13308 {
13309 ops[0] = gen_rtx_REG (SImode, dest + i);
13310 ops[1] = gen_rtx_REG (SImode, src + i);
13311 output_asm_insn ("mov%?\t%0, %1", ops);
13312 }
13313 }
13314 }
13315 else
13316 {
13317 gcc_assert (MEM_P (operands[0]));
13318 gcc_assert (REG_P (operands[1]));
13319 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
13320
13321 switch (GET_CODE (XEXP (operands[0], 0)))
13322 {
13323 case REG:
13324 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13325 break;
13326
13327 default:
13328 gcc_unreachable ();
13329 }
13330 }
13331
13332 return "";
13333 }
13334
13335 /* Output a VFP load or store instruction. */
13336
13337 const char *
13338 output_move_vfp (rtx *operands)
13339 {
13340 rtx reg, mem, addr, ops[2];
13341 int load = REG_P (operands[0]);
13342 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
13343 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
13344 const char *templ;
13345 char buff[50];
13346 enum machine_mode mode;
13347
13348 reg = operands[!load];
13349 mem = operands[load];
13350
13351 mode = GET_MODE (reg);
13352
13353 gcc_assert (REG_P (reg));
13354 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
13355 gcc_assert (mode == SFmode
13356 || mode == DFmode
13357 || mode == SImode
13358 || mode == DImode
13359 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
13360 gcc_assert (MEM_P (mem));
13361
13362 addr = XEXP (mem, 0);
13363
13364 switch (GET_CODE (addr))
13365 {
13366 case PRE_DEC:
13367 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13368 ops[0] = XEXP (addr, 0);
13369 ops[1] = reg;
13370 break;
13371
13372 case POST_INC:
13373 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
13374 ops[0] = XEXP (addr, 0);
13375 ops[1] = reg;
13376 break;
13377
13378 default:
13379 templ = "f%s%c%%?\t%%%s0, %%1%s";
13380 ops[0] = reg;
13381 ops[1] = mem;
13382 break;
13383 }
13384
13385 sprintf (buff, templ,
13386 load ? "ld" : "st",
13387 dp ? 'd' : 's',
13388 dp ? "P" : "",
13389 integer_p ? "\t%@ int" : "");
13390 output_asm_insn (buff, ops);
13391
13392 return "";
13393 }
13394
13395 /* Output a Neon quad-word load or store, or a load or store for
13396 larger structure modes.
13397
13398 WARNING: The ordering of elements is weird in big-endian mode,
13399 because we use VSTM, as required by the EABI. GCC RTL defines
13400 element ordering based on in-memory order. This can be differ
13401 from the architectural ordering of elements within a NEON register.
13402 The intrinsics defined in arm_neon.h use the NEON register element
13403 ordering, not the GCC RTL element ordering.
13404
13405 For example, the in-memory ordering of a big-endian a quadword
13406 vector with 16-bit elements when stored from register pair {d0,d1}
13407 will be (lowest address first, d0[N] is NEON register element N):
13408
13409 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13410
13411 When necessary, quadword registers (dN, dN+1) are moved to ARM
13412 registers from rN in the order:
13413
13414 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13415
13416 So that STM/LDM can be used on vectors in ARM registers, and the
13417 same memory layout will result as if VSTM/VLDM were used. */
13418
13419 const char *
13420 output_move_neon (rtx *operands)
13421 {
13422 rtx reg, mem, addr, ops[2];
13423 int regno, load = REG_P (operands[0]);
13424 const char *templ;
13425 char buff[50];
13426 enum machine_mode mode;
13427
13428 reg = operands[!load];
13429 mem = operands[load];
13430
13431 mode = GET_MODE (reg);
13432
13433 gcc_assert (REG_P (reg));
13434 regno = REGNO (reg);
13435 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
13436 || NEON_REGNO_OK_FOR_QUAD (regno));
13437 gcc_assert (VALID_NEON_DREG_MODE (mode)
13438 || VALID_NEON_QREG_MODE (mode)
13439 || VALID_NEON_STRUCT_MODE (mode));
13440 gcc_assert (MEM_P (mem));
13441
13442 addr = XEXP (mem, 0);
13443
13444 /* Strip off const from addresses like (const (plus (...))). */
13445 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13446 addr = XEXP (addr, 0);
13447
13448 switch (GET_CODE (addr))
13449 {
13450 case POST_INC:
13451 templ = "v%smia%%?\t%%0!, %%h1";
13452 ops[0] = XEXP (addr, 0);
13453 ops[1] = reg;
13454 break;
13455
13456 case PRE_DEC:
13457 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13458 templ = "v%smdb%%?\t%%0!, %%h1";
13459 ops[0] = XEXP (addr, 0);
13460 ops[1] = reg;
13461 break;
13462
13463 case POST_MODIFY:
13464 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13465 gcc_unreachable ();
13466
13467 case LABEL_REF:
13468 case PLUS:
13469 {
13470 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13471 int i;
13472 int overlap = -1;
13473 for (i = 0; i < nregs; i++)
13474 {
13475 /* We're only using DImode here because it's a convenient size. */
13476 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
13477 ops[1] = adjust_address (mem, DImode, 8 * i);
13478 if (reg_overlap_mentioned_p (ops[0], mem))
13479 {
13480 gcc_assert (overlap == -1);
13481 overlap = i;
13482 }
13483 else
13484 {
13485 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13486 output_asm_insn (buff, ops);
13487 }
13488 }
13489 if (overlap != -1)
13490 {
13491 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
13492 ops[1] = adjust_address (mem, SImode, 8 * overlap);
13493 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13494 output_asm_insn (buff, ops);
13495 }
13496
13497 return "";
13498 }
13499
13500 default:
13501 templ = "v%smia%%?\t%%m0, %%h1";
13502 ops[0] = mem;
13503 ops[1] = reg;
13504 }
13505
13506 sprintf (buff, templ, load ? "ld" : "st");
13507 output_asm_insn (buff, ops);
13508
13509 return "";
13510 }
13511
13512 /* Compute and return the length of neon_mov<mode>, where <mode> is
13513 one of VSTRUCT modes: EI, OI, CI or XI. */
13514 int
13515 arm_attr_length_move_neon (rtx insn)
13516 {
13517 rtx reg, mem, addr;
13518 int load;
13519 enum machine_mode mode;
13520
13521 extract_insn_cached (insn);
13522
13523 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
13524 {
13525 mode = GET_MODE (recog_data.operand[0]);
13526 switch (mode)
13527 {
13528 case EImode:
13529 case OImode:
13530 return 8;
13531 case CImode:
13532 return 12;
13533 case XImode:
13534 return 16;
13535 default:
13536 gcc_unreachable ();
13537 }
13538 }
13539
13540 load = REG_P (recog_data.operand[0]);
13541 reg = recog_data.operand[!load];
13542 mem = recog_data.operand[load];
13543
13544 gcc_assert (MEM_P (mem));
13545
13546 mode = GET_MODE (reg);
13547 addr = XEXP (mem, 0);
13548
13549 /* Strip off const from addresses like (const (plus (...))). */
13550 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13551 addr = XEXP (addr, 0);
13552
13553 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
13554 {
13555 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13556 return insns * 4;
13557 }
13558 else
13559 return 4;
13560 }
13561
13562 /* Return nonzero if the offset in the address is an immediate. Otherwise,
13563 return zero. */
13564
13565 int
13566 arm_address_offset_is_imm (rtx insn)
13567 {
13568 rtx mem, addr;
13569
13570 extract_insn_cached (insn);
13571
13572 if (REG_P (recog_data.operand[0]))
13573 return 0;
13574
13575 mem = recog_data.operand[0];
13576
13577 gcc_assert (MEM_P (mem));
13578
13579 addr = XEXP (mem, 0);
13580
13581 if (GET_CODE (addr) == REG
13582 || (GET_CODE (addr) == PLUS
13583 && GET_CODE (XEXP (addr, 0)) == REG
13584 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
13585 return 1;
13586 else
13587 return 0;
13588 }
13589
13590 /* Output an ADD r, s, #n where n may be too big for one instruction.
13591 If adding zero to one register, output nothing. */
13592 const char *
13593 output_add_immediate (rtx *operands)
13594 {
13595 HOST_WIDE_INT n = INTVAL (operands[2]);
13596
13597 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
13598 {
13599 if (n < 0)
13600 output_multi_immediate (operands,
13601 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13602 -n);
13603 else
13604 output_multi_immediate (operands,
13605 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13606 n);
13607 }
13608
13609 return "";
13610 }
13611
13612 /* Output a multiple immediate operation.
13613 OPERANDS is the vector of operands referred to in the output patterns.
13614 INSTR1 is the output pattern to use for the first constant.
13615 INSTR2 is the output pattern to use for subsequent constants.
13616 IMMED_OP is the index of the constant slot in OPERANDS.
13617 N is the constant value. */
13618 static const char *
13619 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
13620 int immed_op, HOST_WIDE_INT n)
13621 {
13622 #if HOST_BITS_PER_WIDE_INT > 32
13623 n &= 0xffffffff;
13624 #endif
13625
13626 if (n == 0)
13627 {
13628 /* Quick and easy output. */
13629 operands[immed_op] = const0_rtx;
13630 output_asm_insn (instr1, operands);
13631 }
13632 else
13633 {
13634 int i;
13635 const char * instr = instr1;
13636
13637 /* Note that n is never zero here (which would give no output). */
13638 for (i = 0; i < 32; i += 2)
13639 {
13640 if (n & (3 << i))
13641 {
13642 operands[immed_op] = GEN_INT (n & (255 << i));
13643 output_asm_insn (instr, operands);
13644 instr = instr2;
13645 i += 6;
13646 }
13647 }
13648 }
13649
13650 return "";
13651 }
13652
13653 /* Return the name of a shifter operation. */
13654 static const char *
13655 arm_shift_nmem(enum rtx_code code)
13656 {
13657 switch (code)
13658 {
13659 case ASHIFT:
13660 return ARM_LSL_NAME;
13661
13662 case ASHIFTRT:
13663 return "asr";
13664
13665 case LSHIFTRT:
13666 return "lsr";
13667
13668 case ROTATERT:
13669 return "ror";
13670
13671 default:
13672 abort();
13673 }
13674 }
13675
13676 /* Return the appropriate ARM instruction for the operation code.
13677 The returned result should not be overwritten. OP is the rtx of the
13678 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13679 was shifted. */
13680 const char *
13681 arithmetic_instr (rtx op, int shift_first_arg)
13682 {
13683 switch (GET_CODE (op))
13684 {
13685 case PLUS:
13686 return "add";
13687
13688 case MINUS:
13689 return shift_first_arg ? "rsb" : "sub";
13690
13691 case IOR:
13692 return "orr";
13693
13694 case XOR:
13695 return "eor";
13696
13697 case AND:
13698 return "and";
13699
13700 case ASHIFT:
13701 case ASHIFTRT:
13702 case LSHIFTRT:
13703 case ROTATERT:
13704 return arm_shift_nmem(GET_CODE(op));
13705
13706 default:
13707 gcc_unreachable ();
13708 }
13709 }
13710
13711 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13712 for the operation code. The returned result should not be overwritten.
13713 OP is the rtx code of the shift.
13714 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13715 shift. */
13716 static const char *
13717 shift_op (rtx op, HOST_WIDE_INT *amountp)
13718 {
13719 const char * mnem;
13720 enum rtx_code code = GET_CODE (op);
13721
13722 switch (GET_CODE (XEXP (op, 1)))
13723 {
13724 case REG:
13725 case SUBREG:
13726 *amountp = -1;
13727 break;
13728
13729 case CONST_INT:
13730 *amountp = INTVAL (XEXP (op, 1));
13731 break;
13732
13733 default:
13734 gcc_unreachable ();
13735 }
13736
13737 switch (code)
13738 {
13739 case ROTATE:
13740 gcc_assert (*amountp != -1);
13741 *amountp = 32 - *amountp;
13742 code = ROTATERT;
13743
13744 /* Fall through. */
13745
13746 case ASHIFT:
13747 case ASHIFTRT:
13748 case LSHIFTRT:
13749 case ROTATERT:
13750 mnem = arm_shift_nmem(code);
13751 break;
13752
13753 case MULT:
13754 /* We never have to worry about the amount being other than a
13755 power of 2, since this case can never be reloaded from a reg. */
13756 gcc_assert (*amountp != -1);
13757 *amountp = int_log2 (*amountp);
13758 return ARM_LSL_NAME;
13759
13760 default:
13761 gcc_unreachable ();
13762 }
13763
13764 if (*amountp != -1)
13765 {
13766 /* This is not 100% correct, but follows from the desire to merge
13767 multiplication by a power of 2 with the recognizer for a
13768 shift. >=32 is not a valid shift for "lsl", so we must try and
13769 output a shift that produces the correct arithmetical result.
13770 Using lsr #32 is identical except for the fact that the carry bit
13771 is not set correctly if we set the flags; but we never use the
13772 carry bit from such an operation, so we can ignore that. */
13773 if (code == ROTATERT)
13774 /* Rotate is just modulo 32. */
13775 *amountp &= 31;
13776 else if (*amountp != (*amountp & 31))
13777 {
13778 if (code == ASHIFT)
13779 mnem = "lsr";
13780 *amountp = 32;
13781 }
13782
13783 /* Shifts of 0 are no-ops. */
13784 if (*amountp == 0)
13785 return NULL;
13786 }
13787
13788 return mnem;
13789 }
13790
13791 /* Obtain the shift from the POWER of two. */
13792
13793 static HOST_WIDE_INT
13794 int_log2 (HOST_WIDE_INT power)
13795 {
13796 HOST_WIDE_INT shift = 0;
13797
13798 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
13799 {
13800 gcc_assert (shift <= 31);
13801 shift++;
13802 }
13803
13804 return shift;
13805 }
13806
13807 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13808 because /bin/as is horribly restrictive. The judgement about
13809 whether or not each character is 'printable' (and can be output as
13810 is) or not (and must be printed with an octal escape) must be made
13811 with reference to the *host* character set -- the situation is
13812 similar to that discussed in the comments above pp_c_char in
13813 c-pretty-print.c. */
13814
13815 #define MAX_ASCII_LEN 51
13816
13817 void
13818 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
13819 {
13820 int i;
13821 int len_so_far = 0;
13822
13823 fputs ("\t.ascii\t\"", stream);
13824
13825 for (i = 0; i < len; i++)
13826 {
13827 int c = p[i];
13828
13829 if (len_so_far >= MAX_ASCII_LEN)
13830 {
13831 fputs ("\"\n\t.ascii\t\"", stream);
13832 len_so_far = 0;
13833 }
13834
13835 if (ISPRINT (c))
13836 {
13837 if (c == '\\' || c == '\"')
13838 {
13839 putc ('\\', stream);
13840 len_so_far++;
13841 }
13842 putc (c, stream);
13843 len_so_far++;
13844 }
13845 else
13846 {
13847 fprintf (stream, "\\%03o", c);
13848 len_so_far += 4;
13849 }
13850 }
13851
13852 fputs ("\"\n", stream);
13853 }
13854 \f
13855 /* Compute the register save mask for registers 0 through 12
13856 inclusive. This code is used by arm_compute_save_reg_mask. */
13857
13858 static unsigned long
13859 arm_compute_save_reg0_reg12_mask (void)
13860 {
13861 unsigned long func_type = arm_current_func_type ();
13862 unsigned long save_reg_mask = 0;
13863 unsigned int reg;
13864
13865 if (IS_INTERRUPT (func_type))
13866 {
13867 unsigned int max_reg;
13868 /* Interrupt functions must not corrupt any registers,
13869 even call clobbered ones. If this is a leaf function
13870 we can just examine the registers used by the RTL, but
13871 otherwise we have to assume that whatever function is
13872 called might clobber anything, and so we have to save
13873 all the call-clobbered registers as well. */
13874 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
13875 /* FIQ handlers have registers r8 - r12 banked, so
13876 we only need to check r0 - r7, Normal ISRs only
13877 bank r14 and r15, so we must check up to r12.
13878 r13 is the stack pointer which is always preserved,
13879 so we do not need to consider it here. */
13880 max_reg = 7;
13881 else
13882 max_reg = 12;
13883
13884 for (reg = 0; reg <= max_reg; reg++)
13885 if (df_regs_ever_live_p (reg)
13886 || (! current_function_is_leaf && call_used_regs[reg]))
13887 save_reg_mask |= (1 << reg);
13888
13889 /* Also save the pic base register if necessary. */
13890 if (flag_pic
13891 && !TARGET_SINGLE_PIC_BASE
13892 && arm_pic_register != INVALID_REGNUM
13893 && crtl->uses_pic_offset_table)
13894 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13895 }
13896 else if (IS_VOLATILE(func_type))
13897 {
13898 /* For noreturn functions we historically omitted register saves
13899 altogether. However this really messes up debugging. As a
13900 compromise save just the frame pointers. Combined with the link
13901 register saved elsewhere this should be sufficient to get
13902 a backtrace. */
13903 if (frame_pointer_needed)
13904 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13905 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
13906 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13907 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
13908 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
13909 }
13910 else
13911 {
13912 /* In the normal case we only need to save those registers
13913 which are call saved and which are used by this function. */
13914 for (reg = 0; reg <= 11; reg++)
13915 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13916 save_reg_mask |= (1 << reg);
13917
13918 /* Handle the frame pointer as a special case. */
13919 if (frame_pointer_needed)
13920 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13921
13922 /* If we aren't loading the PIC register,
13923 don't stack it even though it may be live. */
13924 if (flag_pic
13925 && !TARGET_SINGLE_PIC_BASE
13926 && arm_pic_register != INVALID_REGNUM
13927 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13928 || crtl->uses_pic_offset_table))
13929 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13930
13931 /* The prologue will copy SP into R0, so save it. */
13932 if (IS_STACKALIGN (func_type))
13933 save_reg_mask |= 1;
13934 }
13935
13936 /* Save registers so the exception handler can modify them. */
13937 if (crtl->calls_eh_return)
13938 {
13939 unsigned int i;
13940
13941 for (i = 0; ; i++)
13942 {
13943 reg = EH_RETURN_DATA_REGNO (i);
13944 if (reg == INVALID_REGNUM)
13945 break;
13946 save_reg_mask |= 1 << reg;
13947 }
13948 }
13949
13950 return save_reg_mask;
13951 }
13952
13953
13954 /* Compute the number of bytes used to store the static chain register on the
13955 stack, above the stack frame. We need to know this accurately to get the
13956 alignment of the rest of the stack frame correct. */
13957
13958 static int arm_compute_static_chain_stack_bytes (void)
13959 {
13960 unsigned long func_type = arm_current_func_type ();
13961 int static_chain_stack_bytes = 0;
13962
13963 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
13964 IS_NESTED (func_type) &&
13965 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
13966 static_chain_stack_bytes = 4;
13967
13968 return static_chain_stack_bytes;
13969 }
13970
13971
13972 /* Compute a bit mask of which registers need to be
13973 saved on the stack for the current function.
13974 This is used by arm_get_frame_offsets, which may add extra registers. */
13975
13976 static unsigned long
13977 arm_compute_save_reg_mask (void)
13978 {
13979 unsigned int save_reg_mask = 0;
13980 unsigned long func_type = arm_current_func_type ();
13981 unsigned int reg;
13982
13983 if (IS_NAKED (func_type))
13984 /* This should never really happen. */
13985 return 0;
13986
13987 /* If we are creating a stack frame, then we must save the frame pointer,
13988 IP (which will hold the old stack pointer), LR and the PC. */
13989 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13990 save_reg_mask |=
13991 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
13992 | (1 << IP_REGNUM)
13993 | (1 << LR_REGNUM)
13994 | (1 << PC_REGNUM);
13995
13996 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
13997
13998 /* Decide if we need to save the link register.
13999 Interrupt routines have their own banked link register,
14000 so they never need to save it.
14001 Otherwise if we do not use the link register we do not need to save
14002 it. If we are pushing other registers onto the stack however, we
14003 can save an instruction in the epilogue by pushing the link register
14004 now and then popping it back into the PC. This incurs extra memory
14005 accesses though, so we only do it when optimizing for size, and only
14006 if we know that we will not need a fancy return sequence. */
14007 if (df_regs_ever_live_p (LR_REGNUM)
14008 || (save_reg_mask
14009 && optimize_size
14010 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14011 && !crtl->calls_eh_return))
14012 save_reg_mask |= 1 << LR_REGNUM;
14013
14014 if (cfun->machine->lr_save_eliminated)
14015 save_reg_mask &= ~ (1 << LR_REGNUM);
14016
14017 if (TARGET_REALLY_IWMMXT
14018 && ((bit_count (save_reg_mask)
14019 + ARM_NUM_INTS (crtl->args.pretend_args_size +
14020 arm_compute_static_chain_stack_bytes())
14021 ) % 2) != 0)
14022 {
14023 /* The total number of registers that are going to be pushed
14024 onto the stack is odd. We need to ensure that the stack
14025 is 64-bit aligned before we start to save iWMMXt registers,
14026 and also before we start to create locals. (A local variable
14027 might be a double or long long which we will load/store using
14028 an iWMMXt instruction). Therefore we need to push another
14029 ARM register, so that the stack will be 64-bit aligned. We
14030 try to avoid using the arg registers (r0 -r3) as they might be
14031 used to pass values in a tail call. */
14032 for (reg = 4; reg <= 12; reg++)
14033 if ((save_reg_mask & (1 << reg)) == 0)
14034 break;
14035
14036 if (reg <= 12)
14037 save_reg_mask |= (1 << reg);
14038 else
14039 {
14040 cfun->machine->sibcall_blocked = 1;
14041 save_reg_mask |= (1 << 3);
14042 }
14043 }
14044
14045 /* We may need to push an additional register for use initializing the
14046 PIC base register. */
14047 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
14048 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
14049 {
14050 reg = thumb_find_work_register (1 << 4);
14051 if (!call_used_regs[reg])
14052 save_reg_mask |= (1 << reg);
14053 }
14054
14055 return save_reg_mask;
14056 }
14057
14058
14059 /* Compute a bit mask of which registers need to be
14060 saved on the stack for the current function. */
14061 static unsigned long
14062 thumb1_compute_save_reg_mask (void)
14063 {
14064 unsigned long mask;
14065 unsigned reg;
14066
14067 mask = 0;
14068 for (reg = 0; reg < 12; reg ++)
14069 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14070 mask |= 1 << reg;
14071
14072 if (flag_pic
14073 && !TARGET_SINGLE_PIC_BASE
14074 && arm_pic_register != INVALID_REGNUM
14075 && crtl->uses_pic_offset_table)
14076 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14077
14078 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
14079 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
14080 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
14081
14082 /* LR will also be pushed if any lo regs are pushed. */
14083 if (mask & 0xff || thumb_force_lr_save ())
14084 mask |= (1 << LR_REGNUM);
14085
14086 /* Make sure we have a low work register if we need one.
14087 We will need one if we are going to push a high register,
14088 but we are not currently intending to push a low register. */
14089 if ((mask & 0xff) == 0
14090 && ((mask & 0x0f00) || TARGET_BACKTRACE))
14091 {
14092 /* Use thumb_find_work_register to choose which register
14093 we will use. If the register is live then we will
14094 have to push it. Use LAST_LO_REGNUM as our fallback
14095 choice for the register to select. */
14096 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
14097 /* Make sure the register returned by thumb_find_work_register is
14098 not part of the return value. */
14099 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
14100 reg = LAST_LO_REGNUM;
14101
14102 if (! call_used_regs[reg])
14103 mask |= 1 << reg;
14104 }
14105
14106 /* The 504 below is 8 bytes less than 512 because there are two possible
14107 alignment words. We can't tell here if they will be present or not so we
14108 have to play it safe and assume that they are. */
14109 if ((CALLER_INTERWORKING_SLOT_SIZE +
14110 ROUND_UP_WORD (get_frame_size ()) +
14111 crtl->outgoing_args_size) >= 504)
14112 {
14113 /* This is the same as the code in thumb1_expand_prologue() which
14114 determines which register to use for stack decrement. */
14115 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
14116 if (mask & (1 << reg))
14117 break;
14118
14119 if (reg > LAST_LO_REGNUM)
14120 {
14121 /* Make sure we have a register available for stack decrement. */
14122 mask |= 1 << LAST_LO_REGNUM;
14123 }
14124 }
14125
14126 return mask;
14127 }
14128
14129
14130 /* Return the number of bytes required to save VFP registers. */
14131 static int
14132 arm_get_vfp_saved_size (void)
14133 {
14134 unsigned int regno;
14135 int count;
14136 int saved;
14137
14138 saved = 0;
14139 /* Space for saved VFP registers. */
14140 if (TARGET_HARD_FLOAT && TARGET_VFP)
14141 {
14142 count = 0;
14143 for (regno = FIRST_VFP_REGNUM;
14144 regno < LAST_VFP_REGNUM;
14145 regno += 2)
14146 {
14147 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
14148 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
14149 {
14150 if (count > 0)
14151 {
14152 /* Workaround ARM10 VFPr1 bug. */
14153 if (count == 2 && !arm_arch6)
14154 count++;
14155 saved += count * 8;
14156 }
14157 count = 0;
14158 }
14159 else
14160 count++;
14161 }
14162 if (count > 0)
14163 {
14164 if (count == 2 && !arm_arch6)
14165 count++;
14166 saved += count * 8;
14167 }
14168 }
14169 return saved;
14170 }
14171
14172
14173 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
14174 everything bar the final return instruction. */
14175 const char *
14176 output_return_instruction (rtx operand, int really_return, int reverse)
14177 {
14178 char conditional[10];
14179 char instr[100];
14180 unsigned reg;
14181 unsigned long live_regs_mask;
14182 unsigned long func_type;
14183 arm_stack_offsets *offsets;
14184
14185 func_type = arm_current_func_type ();
14186
14187 if (IS_NAKED (func_type))
14188 return "";
14189
14190 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14191 {
14192 /* If this function was declared non-returning, and we have
14193 found a tail call, then we have to trust that the called
14194 function won't return. */
14195 if (really_return)
14196 {
14197 rtx ops[2];
14198
14199 /* Otherwise, trap an attempted return by aborting. */
14200 ops[0] = operand;
14201 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
14202 : "abort");
14203 assemble_external_libcall (ops[1]);
14204 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
14205 }
14206
14207 return "";
14208 }
14209
14210 gcc_assert (!cfun->calls_alloca || really_return);
14211
14212 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
14213
14214 cfun->machine->return_used_this_function = 1;
14215
14216 offsets = arm_get_frame_offsets ();
14217 live_regs_mask = offsets->saved_regs_mask;
14218
14219 if (live_regs_mask)
14220 {
14221 const char * return_reg;
14222
14223 /* If we do not have any special requirements for function exit
14224 (e.g. interworking) then we can load the return address
14225 directly into the PC. Otherwise we must load it into LR. */
14226 if (really_return
14227 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
14228 return_reg = reg_names[PC_REGNUM];
14229 else
14230 return_reg = reg_names[LR_REGNUM];
14231
14232 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
14233 {
14234 /* There are three possible reasons for the IP register
14235 being saved. 1) a stack frame was created, in which case
14236 IP contains the old stack pointer, or 2) an ISR routine
14237 corrupted it, or 3) it was saved to align the stack on
14238 iWMMXt. In case 1, restore IP into SP, otherwise just
14239 restore IP. */
14240 if (frame_pointer_needed)
14241 {
14242 live_regs_mask &= ~ (1 << IP_REGNUM);
14243 live_regs_mask |= (1 << SP_REGNUM);
14244 }
14245 else
14246 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
14247 }
14248
14249 /* On some ARM architectures it is faster to use LDR rather than
14250 LDM to load a single register. On other architectures, the
14251 cost is the same. In 26 bit mode, or for exception handlers,
14252 we have to use LDM to load the PC so that the CPSR is also
14253 restored. */
14254 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14255 if (live_regs_mask == (1U << reg))
14256 break;
14257
14258 if (reg <= LAST_ARM_REGNUM
14259 && (reg != LR_REGNUM
14260 || ! really_return
14261 || ! IS_INTERRUPT (func_type)))
14262 {
14263 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
14264 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
14265 }
14266 else
14267 {
14268 char *p;
14269 int first = 1;
14270
14271 /* Generate the load multiple instruction to restore the
14272 registers. Note we can get here, even if
14273 frame_pointer_needed is true, but only if sp already
14274 points to the base of the saved core registers. */
14275 if (live_regs_mask & (1 << SP_REGNUM))
14276 {
14277 unsigned HOST_WIDE_INT stack_adjust;
14278
14279 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
14280 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
14281
14282 if (stack_adjust && arm_arch5 && TARGET_ARM)
14283 if (TARGET_UNIFIED_ASM)
14284 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
14285 else
14286 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
14287 else
14288 {
14289 /* If we can't use ldmib (SA110 bug),
14290 then try to pop r3 instead. */
14291 if (stack_adjust)
14292 live_regs_mask |= 1 << 3;
14293
14294 if (TARGET_UNIFIED_ASM)
14295 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
14296 else
14297 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
14298 }
14299 }
14300 else
14301 if (TARGET_UNIFIED_ASM)
14302 sprintf (instr, "pop%s\t{", conditional);
14303 else
14304 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
14305
14306 p = instr + strlen (instr);
14307
14308 for (reg = 0; reg <= SP_REGNUM; reg++)
14309 if (live_regs_mask & (1 << reg))
14310 {
14311 int l = strlen (reg_names[reg]);
14312
14313 if (first)
14314 first = 0;
14315 else
14316 {
14317 memcpy (p, ", ", 2);
14318 p += 2;
14319 }
14320
14321 memcpy (p, "%|", 2);
14322 memcpy (p + 2, reg_names[reg], l);
14323 p += l + 2;
14324 }
14325
14326 if (live_regs_mask & (1 << LR_REGNUM))
14327 {
14328 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
14329 /* If returning from an interrupt, restore the CPSR. */
14330 if (IS_INTERRUPT (func_type))
14331 strcat (p, "^");
14332 }
14333 else
14334 strcpy (p, "}");
14335 }
14336
14337 output_asm_insn (instr, & operand);
14338
14339 /* See if we need to generate an extra instruction to
14340 perform the actual function return. */
14341 if (really_return
14342 && func_type != ARM_FT_INTERWORKED
14343 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
14344 {
14345 /* The return has already been handled
14346 by loading the LR into the PC. */
14347 really_return = 0;
14348 }
14349 }
14350
14351 if (really_return)
14352 {
14353 switch ((int) ARM_FUNC_TYPE (func_type))
14354 {
14355 case ARM_FT_ISR:
14356 case ARM_FT_FIQ:
14357 /* ??? This is wrong for unified assembly syntax. */
14358 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
14359 break;
14360
14361 case ARM_FT_INTERWORKED:
14362 sprintf (instr, "bx%s\t%%|lr", conditional);
14363 break;
14364
14365 case ARM_FT_EXCEPTION:
14366 /* ??? This is wrong for unified assembly syntax. */
14367 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
14368 break;
14369
14370 default:
14371 /* Use bx if it's available. */
14372 if (arm_arch5 || arm_arch4t)
14373 sprintf (instr, "bx%s\t%%|lr", conditional);
14374 else
14375 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
14376 break;
14377 }
14378
14379 output_asm_insn (instr, & operand);
14380 }
14381
14382 return "";
14383 }
14384
14385 /* Write the function name into the code section, directly preceding
14386 the function prologue.
14387
14388 Code will be output similar to this:
14389 t0
14390 .ascii "arm_poke_function_name", 0
14391 .align
14392 t1
14393 .word 0xff000000 + (t1 - t0)
14394 arm_poke_function_name
14395 mov ip, sp
14396 stmfd sp!, {fp, ip, lr, pc}
14397 sub fp, ip, #4
14398
14399 When performing a stack backtrace, code can inspect the value
14400 of 'pc' stored at 'fp' + 0. If the trace function then looks
14401 at location pc - 12 and the top 8 bits are set, then we know
14402 that there is a function name embedded immediately preceding this
14403 location and has length ((pc[-3]) & 0xff000000).
14404
14405 We assume that pc is declared as a pointer to an unsigned long.
14406
14407 It is of no benefit to output the function name if we are assembling
14408 a leaf function. These function types will not contain a stack
14409 backtrace structure, therefore it is not possible to determine the
14410 function name. */
14411 void
14412 arm_poke_function_name (FILE *stream, const char *name)
14413 {
14414 unsigned long alignlength;
14415 unsigned long length;
14416 rtx x;
14417
14418 length = strlen (name) + 1;
14419 alignlength = ROUND_UP_WORD (length);
14420
14421 ASM_OUTPUT_ASCII (stream, name, length);
14422 ASM_OUTPUT_ALIGN (stream, 2);
14423 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
14424 assemble_aligned_integer (UNITS_PER_WORD, x);
14425 }
14426
14427 /* Place some comments into the assembler stream
14428 describing the current function. */
14429 static void
14430 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
14431 {
14432 unsigned long func_type;
14433
14434 if (TARGET_THUMB1)
14435 {
14436 thumb1_output_function_prologue (f, frame_size);
14437 return;
14438 }
14439
14440 /* Sanity check. */
14441 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
14442
14443 func_type = arm_current_func_type ();
14444
14445 switch ((int) ARM_FUNC_TYPE (func_type))
14446 {
14447 default:
14448 case ARM_FT_NORMAL:
14449 break;
14450 case ARM_FT_INTERWORKED:
14451 asm_fprintf (f, "\t%@ Function supports interworking.\n");
14452 break;
14453 case ARM_FT_ISR:
14454 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
14455 break;
14456 case ARM_FT_FIQ:
14457 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
14458 break;
14459 case ARM_FT_EXCEPTION:
14460 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
14461 break;
14462 }
14463
14464 if (IS_NAKED (func_type))
14465 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14466
14467 if (IS_VOLATILE (func_type))
14468 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
14469
14470 if (IS_NESTED (func_type))
14471 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
14472 if (IS_STACKALIGN (func_type))
14473 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14474
14475 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14476 crtl->args.size,
14477 crtl->args.pretend_args_size, frame_size);
14478
14479 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14480 frame_pointer_needed,
14481 cfun->machine->uses_anonymous_args);
14482
14483 if (cfun->machine->lr_save_eliminated)
14484 asm_fprintf (f, "\t%@ link register save eliminated.\n");
14485
14486 if (crtl->calls_eh_return)
14487 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
14488
14489 }
14490
14491 const char *
14492 arm_output_epilogue (rtx sibling)
14493 {
14494 int reg;
14495 unsigned long saved_regs_mask;
14496 unsigned long func_type;
14497 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14498 frame that is $fp + 4 for a non-variadic function. */
14499 int floats_offset = 0;
14500 rtx operands[3];
14501 FILE * f = asm_out_file;
14502 unsigned int lrm_count = 0;
14503 int really_return = (sibling == NULL);
14504 int start_reg;
14505 arm_stack_offsets *offsets;
14506
14507 /* If we have already generated the return instruction
14508 then it is futile to generate anything else. */
14509 if (use_return_insn (FALSE, sibling) &&
14510 (cfun->machine->return_used_this_function != 0))
14511 return "";
14512
14513 func_type = arm_current_func_type ();
14514
14515 if (IS_NAKED (func_type))
14516 /* Naked functions don't have epilogues. */
14517 return "";
14518
14519 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14520 {
14521 rtx op;
14522
14523 /* A volatile function should never return. Call abort. */
14524 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
14525 assemble_external_libcall (op);
14526 output_asm_insn ("bl\t%a0", &op);
14527
14528 return "";
14529 }
14530
14531 /* If we are throwing an exception, then we really must be doing a
14532 return, so we can't tail-call. */
14533 gcc_assert (!crtl->calls_eh_return || really_return);
14534
14535 offsets = arm_get_frame_offsets ();
14536 saved_regs_mask = offsets->saved_regs_mask;
14537
14538 if (TARGET_IWMMXT)
14539 lrm_count = bit_count (saved_regs_mask);
14540
14541 floats_offset = offsets->saved_args;
14542 /* Compute how far away the floats will be. */
14543 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14544 if (saved_regs_mask & (1 << reg))
14545 floats_offset += 4;
14546
14547 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14548 {
14549 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14550 int vfp_offset = offsets->frame;
14551
14552 if (TARGET_FPA_EMU2)
14553 {
14554 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14555 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14556 {
14557 floats_offset += 12;
14558 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
14559 reg, FP_REGNUM, floats_offset - vfp_offset);
14560 }
14561 }
14562 else
14563 {
14564 start_reg = LAST_FPA_REGNUM;
14565
14566 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14567 {
14568 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14569 {
14570 floats_offset += 12;
14571
14572 /* We can't unstack more than four registers at once. */
14573 if (start_reg - reg == 3)
14574 {
14575 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
14576 reg, FP_REGNUM, floats_offset - vfp_offset);
14577 start_reg = reg - 1;
14578 }
14579 }
14580 else
14581 {
14582 if (reg != start_reg)
14583 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14584 reg + 1, start_reg - reg,
14585 FP_REGNUM, floats_offset - vfp_offset);
14586 start_reg = reg - 1;
14587 }
14588 }
14589
14590 /* Just in case the last register checked also needs unstacking. */
14591 if (reg != start_reg)
14592 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14593 reg + 1, start_reg - reg,
14594 FP_REGNUM, floats_offset - vfp_offset);
14595 }
14596
14597 if (TARGET_HARD_FLOAT && TARGET_VFP)
14598 {
14599 int saved_size;
14600
14601 /* The fldmd insns do not have base+offset addressing
14602 modes, so we use IP to hold the address. */
14603 saved_size = arm_get_vfp_saved_size ();
14604
14605 if (saved_size > 0)
14606 {
14607 floats_offset += saved_size;
14608 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
14609 FP_REGNUM, floats_offset - vfp_offset);
14610 }
14611 start_reg = FIRST_VFP_REGNUM;
14612 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14613 {
14614 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14615 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14616 {
14617 if (start_reg != reg)
14618 vfp_output_fldmd (f, IP_REGNUM,
14619 (start_reg - FIRST_VFP_REGNUM) / 2,
14620 (reg - start_reg) / 2);
14621 start_reg = reg + 2;
14622 }
14623 }
14624 if (start_reg != reg)
14625 vfp_output_fldmd (f, IP_REGNUM,
14626 (start_reg - FIRST_VFP_REGNUM) / 2,
14627 (reg - start_reg) / 2);
14628 }
14629
14630 if (TARGET_IWMMXT)
14631 {
14632 /* The frame pointer is guaranteed to be non-double-word aligned.
14633 This is because it is set to (old_stack_pointer - 4) and the
14634 old_stack_pointer was double word aligned. Thus the offset to
14635 the iWMMXt registers to be loaded must also be non-double-word
14636 sized, so that the resultant address *is* double-word aligned.
14637 We can ignore floats_offset since that was already included in
14638 the live_regs_mask. */
14639 lrm_count += (lrm_count % 2 ? 2 : 1);
14640
14641 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14642 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14643 {
14644 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
14645 reg, FP_REGNUM, lrm_count * 4);
14646 lrm_count += 2;
14647 }
14648 }
14649
14650 /* saved_regs_mask should contain the IP, which at the time of stack
14651 frame generation actually contains the old stack pointer. So a
14652 quick way to unwind the stack is just pop the IP register directly
14653 into the stack pointer. */
14654 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
14655 saved_regs_mask &= ~ (1 << IP_REGNUM);
14656 saved_regs_mask |= (1 << SP_REGNUM);
14657
14658 /* There are two registers left in saved_regs_mask - LR and PC. We
14659 only need to restore the LR register (the return address), but to
14660 save time we can load it directly into the PC, unless we need a
14661 special function exit sequence, or we are not really returning. */
14662 if (really_return
14663 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14664 && !crtl->calls_eh_return)
14665 /* Delete the LR from the register mask, so that the LR on
14666 the stack is loaded into the PC in the register mask. */
14667 saved_regs_mask &= ~ (1 << LR_REGNUM);
14668 else
14669 saved_regs_mask &= ~ (1 << PC_REGNUM);
14670
14671 /* We must use SP as the base register, because SP is one of the
14672 registers being restored. If an interrupt or page fault
14673 happens in the ldm instruction, the SP might or might not
14674 have been restored. That would be bad, as then SP will no
14675 longer indicate the safe area of stack, and we can get stack
14676 corruption. Using SP as the base register means that it will
14677 be reset correctly to the original value, should an interrupt
14678 occur. If the stack pointer already points at the right
14679 place, then omit the subtraction. */
14680 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
14681 || cfun->calls_alloca)
14682 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
14683 4 * bit_count (saved_regs_mask));
14684 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
14685
14686 if (IS_INTERRUPT (func_type))
14687 /* Interrupt handlers will have pushed the
14688 IP onto the stack, so restore it now. */
14689 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
14690 }
14691 else
14692 {
14693 /* This branch is executed for ARM mode (non-apcs frames) and
14694 Thumb-2 mode. Frame layout is essentially the same for those
14695 cases, except that in ARM mode frame pointer points to the
14696 first saved register, while in Thumb-2 mode the frame pointer points
14697 to the last saved register.
14698
14699 It is possible to make frame pointer point to last saved
14700 register in both cases, and remove some conditionals below.
14701 That means that fp setup in prologue would be just "mov fp, sp"
14702 and sp restore in epilogue would be just "mov sp, fp", whereas
14703 now we have to use add/sub in those cases. However, the value
14704 of that would be marginal, as both mov and add/sub are 32-bit
14705 in ARM mode, and it would require extra conditionals
14706 in arm_expand_prologue to distingish ARM-apcs-frame case
14707 (where frame pointer is required to point at first register)
14708 and ARM-non-apcs-frame. Therefore, such change is postponed
14709 until real need arise. */
14710 unsigned HOST_WIDE_INT amount;
14711 int rfe;
14712 /* Restore stack pointer if necessary. */
14713 if (TARGET_ARM && frame_pointer_needed)
14714 {
14715 operands[0] = stack_pointer_rtx;
14716 operands[1] = hard_frame_pointer_rtx;
14717
14718 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
14719 output_add_immediate (operands);
14720 }
14721 else
14722 {
14723 if (frame_pointer_needed)
14724 {
14725 /* For Thumb-2 restore sp from the frame pointer.
14726 Operand restrictions mean we have to incrememnt FP, then copy
14727 to SP. */
14728 amount = offsets->locals_base - offsets->saved_regs;
14729 operands[0] = hard_frame_pointer_rtx;
14730 }
14731 else
14732 {
14733 unsigned long count;
14734 operands[0] = stack_pointer_rtx;
14735 amount = offsets->outgoing_args - offsets->saved_regs;
14736 /* pop call clobbered registers if it avoids a
14737 separate stack adjustment. */
14738 count = offsets->saved_regs - offsets->saved_args;
14739 if (optimize_size
14740 && count != 0
14741 && !crtl->calls_eh_return
14742 && bit_count(saved_regs_mask) * 4 == count
14743 && !IS_INTERRUPT (func_type)
14744 && !crtl->tail_call_emit)
14745 {
14746 unsigned long mask;
14747 /* Preserve return values, of any size. */
14748 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
14749 mask ^= 0xf;
14750 mask &= ~saved_regs_mask;
14751 reg = 0;
14752 while (bit_count (mask) * 4 > amount)
14753 {
14754 while ((mask & (1 << reg)) == 0)
14755 reg++;
14756 mask &= ~(1 << reg);
14757 }
14758 if (bit_count (mask) * 4 == amount) {
14759 amount = 0;
14760 saved_regs_mask |= mask;
14761 }
14762 }
14763 }
14764
14765 if (amount)
14766 {
14767 operands[1] = operands[0];
14768 operands[2] = GEN_INT (amount);
14769 output_add_immediate (operands);
14770 }
14771 if (frame_pointer_needed)
14772 asm_fprintf (f, "\tmov\t%r, %r\n",
14773 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
14774 }
14775
14776 if (TARGET_FPA_EMU2)
14777 {
14778 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14779 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14780 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
14781 reg, SP_REGNUM);
14782 }
14783 else
14784 {
14785 start_reg = FIRST_FPA_REGNUM;
14786
14787 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14788 {
14789 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14790 {
14791 if (reg - start_reg == 3)
14792 {
14793 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
14794 start_reg, SP_REGNUM);
14795 start_reg = reg + 1;
14796 }
14797 }
14798 else
14799 {
14800 if (reg != start_reg)
14801 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14802 start_reg, reg - start_reg,
14803 SP_REGNUM);
14804
14805 start_reg = reg + 1;
14806 }
14807 }
14808
14809 /* Just in case the last register checked also needs unstacking. */
14810 if (reg != start_reg)
14811 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14812 start_reg, reg - start_reg, SP_REGNUM);
14813 }
14814
14815 if (TARGET_HARD_FLOAT && TARGET_VFP)
14816 {
14817 int end_reg = LAST_VFP_REGNUM + 1;
14818
14819 /* Scan the registers in reverse order. We need to match
14820 any groupings made in the prologue and generate matching
14821 pop operations. */
14822 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
14823 {
14824 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14825 && (!df_regs_ever_live_p (reg + 1)
14826 || call_used_regs[reg + 1]))
14827 {
14828 if (end_reg > reg + 2)
14829 vfp_output_fldmd (f, SP_REGNUM,
14830 (reg + 2 - FIRST_VFP_REGNUM) / 2,
14831 (end_reg - (reg + 2)) / 2);
14832 end_reg = reg;
14833 }
14834 }
14835 if (end_reg > reg + 2)
14836 vfp_output_fldmd (f, SP_REGNUM, 0,
14837 (end_reg - (reg + 2)) / 2);
14838 }
14839
14840 if (TARGET_IWMMXT)
14841 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
14842 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14843 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
14844
14845 /* If we can, restore the LR into the PC. */
14846 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
14847 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
14848 && !IS_STACKALIGN (func_type)
14849 && really_return
14850 && crtl->args.pretend_args_size == 0
14851 && saved_regs_mask & (1 << LR_REGNUM)
14852 && !crtl->calls_eh_return)
14853 {
14854 saved_regs_mask &= ~ (1 << LR_REGNUM);
14855 saved_regs_mask |= (1 << PC_REGNUM);
14856 rfe = IS_INTERRUPT (func_type);
14857 }
14858 else
14859 rfe = 0;
14860
14861 /* Load the registers off the stack. If we only have one register
14862 to load use the LDR instruction - it is faster. For Thumb-2
14863 always use pop and the assembler will pick the best instruction.*/
14864 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
14865 && !IS_INTERRUPT(func_type))
14866 {
14867 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
14868 }
14869 else if (saved_regs_mask)
14870 {
14871 if (saved_regs_mask & (1 << SP_REGNUM))
14872 /* Note - write back to the stack register is not enabled
14873 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14874 in the list of registers and if we add writeback the
14875 instruction becomes UNPREDICTABLE. */
14876 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
14877 rfe);
14878 else if (TARGET_ARM)
14879 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
14880 rfe);
14881 else
14882 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
14883 }
14884
14885 if (crtl->args.pretend_args_size)
14886 {
14887 /* Unwind the pre-pushed regs. */
14888 operands[0] = operands[1] = stack_pointer_rtx;
14889 operands[2] = GEN_INT (crtl->args.pretend_args_size);
14890 output_add_immediate (operands);
14891 }
14892 }
14893
14894 /* We may have already restored PC directly from the stack. */
14895 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
14896 return "";
14897
14898 /* Stack adjustment for exception handler. */
14899 if (crtl->calls_eh_return)
14900 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
14901 ARM_EH_STACKADJ_REGNUM);
14902
14903 /* Generate the return instruction. */
14904 switch ((int) ARM_FUNC_TYPE (func_type))
14905 {
14906 case ARM_FT_ISR:
14907 case ARM_FT_FIQ:
14908 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
14909 break;
14910
14911 case ARM_FT_EXCEPTION:
14912 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14913 break;
14914
14915 case ARM_FT_INTERWORKED:
14916 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14917 break;
14918
14919 default:
14920 if (IS_STACKALIGN (func_type))
14921 {
14922 /* See comment in arm_expand_prologue. */
14923 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14924 }
14925 if (arm_arch5 || arm_arch4t)
14926 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14927 else
14928 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14929 break;
14930 }
14931
14932 return "";
14933 }
14934
14935 static void
14936 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14937 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
14938 {
14939 arm_stack_offsets *offsets;
14940
14941 if (TARGET_THUMB1)
14942 {
14943 int regno;
14944
14945 /* Emit any call-via-reg trampolines that are needed for v4t support
14946 of call_reg and call_value_reg type insns. */
14947 for (regno = 0; regno < LR_REGNUM; regno++)
14948 {
14949 rtx label = cfun->machine->call_via[regno];
14950
14951 if (label != NULL)
14952 {
14953 switch_to_section (function_section (current_function_decl));
14954 targetm.asm_out.internal_label (asm_out_file, "L",
14955 CODE_LABEL_NUMBER (label));
14956 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14957 }
14958 }
14959
14960 /* ??? Probably not safe to set this here, since it assumes that a
14961 function will be emitted as assembly immediately after we generate
14962 RTL for it. This does not happen for inline functions. */
14963 cfun->machine->return_used_this_function = 0;
14964 }
14965 else /* TARGET_32BIT */
14966 {
14967 /* We need to take into account any stack-frame rounding. */
14968 offsets = arm_get_frame_offsets ();
14969
14970 gcc_assert (!use_return_insn (FALSE, NULL)
14971 || (cfun->machine->return_used_this_function != 0)
14972 || offsets->saved_regs == offsets->outgoing_args
14973 || frame_pointer_needed);
14974
14975 /* Reset the ARM-specific per-function variables. */
14976 after_arm_reorg = 0;
14977 }
14978 }
14979
14980 /* Generate and emit an insn that we will recognize as a push_multi.
14981 Unfortunately, since this insn does not reflect very well the actual
14982 semantics of the operation, we need to annotate the insn for the benefit
14983 of DWARF2 frame unwind information. */
14984 static rtx
14985 emit_multi_reg_push (unsigned long mask)
14986 {
14987 int num_regs = 0;
14988 int num_dwarf_regs;
14989 int i, j;
14990 rtx par;
14991 rtx dwarf;
14992 int dwarf_par_index;
14993 rtx tmp, reg;
14994
14995 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14996 if (mask & (1 << i))
14997 num_regs++;
14998
14999 gcc_assert (num_regs && num_regs <= 16);
15000
15001 /* We don't record the PC in the dwarf frame information. */
15002 num_dwarf_regs = num_regs;
15003 if (mask & (1 << PC_REGNUM))
15004 num_dwarf_regs--;
15005
15006 /* For the body of the insn we are going to generate an UNSPEC in
15007 parallel with several USEs. This allows the insn to be recognized
15008 by the push_multi pattern in the arm.md file.
15009
15010 The body of the insn looks something like this:
15011
15012 (parallel [
15013 (set (mem:BLK (pre_modify:SI (reg:SI sp)
15014 (const_int:SI <num>)))
15015 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
15016 (use (reg:SI XX))
15017 (use (reg:SI YY))
15018 ...
15019 ])
15020
15021 For the frame note however, we try to be more explicit and actually
15022 show each register being stored into the stack frame, plus a (single)
15023 decrement of the stack pointer. We do it this way in order to be
15024 friendly to the stack unwinding code, which only wants to see a single
15025 stack decrement per instruction. The RTL we generate for the note looks
15026 something like this:
15027
15028 (sequence [
15029 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
15030 (set (mem:SI (reg:SI sp)) (reg:SI r4))
15031 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
15032 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
15033 ...
15034 ])
15035
15036 FIXME:: In an ideal world the PRE_MODIFY would not exist and
15037 instead we'd have a parallel expression detailing all
15038 the stores to the various memory addresses so that debug
15039 information is more up-to-date. Remember however while writing
15040 this to take care of the constraints with the push instruction.
15041
15042 Note also that this has to be taken care of for the VFP registers.
15043
15044 For more see PR43399. */
15045
15046 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
15047 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
15048 dwarf_par_index = 1;
15049
15050 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15051 {
15052 if (mask & (1 << i))
15053 {
15054 reg = gen_rtx_REG (SImode, i);
15055
15056 XVECEXP (par, 0, 0)
15057 = gen_rtx_SET (VOIDmode,
15058 gen_frame_mem
15059 (BLKmode,
15060 gen_rtx_PRE_MODIFY (Pmode,
15061 stack_pointer_rtx,
15062 plus_constant
15063 (stack_pointer_rtx,
15064 -4 * num_regs))
15065 ),
15066 gen_rtx_UNSPEC (BLKmode,
15067 gen_rtvec (1, reg),
15068 UNSPEC_PUSH_MULT));
15069
15070 if (i != PC_REGNUM)
15071 {
15072 tmp = gen_rtx_SET (VOIDmode,
15073 gen_frame_mem (SImode, stack_pointer_rtx),
15074 reg);
15075 RTX_FRAME_RELATED_P (tmp) = 1;
15076 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
15077 dwarf_par_index++;
15078 }
15079
15080 break;
15081 }
15082 }
15083
15084 for (j = 1, i++; j < num_regs; i++)
15085 {
15086 if (mask & (1 << i))
15087 {
15088 reg = gen_rtx_REG (SImode, i);
15089
15090 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15091
15092 if (i != PC_REGNUM)
15093 {
15094 tmp
15095 = gen_rtx_SET (VOIDmode,
15096 gen_frame_mem
15097 (SImode,
15098 plus_constant (stack_pointer_rtx,
15099 4 * j)),
15100 reg);
15101 RTX_FRAME_RELATED_P (tmp) = 1;
15102 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15103 }
15104
15105 j++;
15106 }
15107 }
15108
15109 par = emit_insn (par);
15110
15111 tmp = gen_rtx_SET (VOIDmode,
15112 stack_pointer_rtx,
15113 plus_constant (stack_pointer_rtx, -4 * num_regs));
15114 RTX_FRAME_RELATED_P (tmp) = 1;
15115 XVECEXP (dwarf, 0, 0) = tmp;
15116
15117 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15118
15119 return par;
15120 }
15121
15122 /* Calculate the size of the return value that is passed in registers. */
15123 static unsigned
15124 arm_size_return_regs (void)
15125 {
15126 enum machine_mode mode;
15127
15128 if (crtl->return_rtx != 0)
15129 mode = GET_MODE (crtl->return_rtx);
15130 else
15131 mode = DECL_MODE (DECL_RESULT (current_function_decl));
15132
15133 return GET_MODE_SIZE (mode);
15134 }
15135
15136 static rtx
15137 emit_sfm (int base_reg, int count)
15138 {
15139 rtx par;
15140 rtx dwarf;
15141 rtx tmp, reg;
15142 int i;
15143
15144 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
15145 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
15146
15147 reg = gen_rtx_REG (XFmode, base_reg++);
15148
15149 XVECEXP (par, 0, 0)
15150 = gen_rtx_SET (VOIDmode,
15151 gen_frame_mem
15152 (BLKmode,
15153 gen_rtx_PRE_MODIFY (Pmode,
15154 stack_pointer_rtx,
15155 plus_constant
15156 (stack_pointer_rtx,
15157 -12 * count))
15158 ),
15159 gen_rtx_UNSPEC (BLKmode,
15160 gen_rtvec (1, reg),
15161 UNSPEC_PUSH_MULT));
15162 tmp = gen_rtx_SET (VOIDmode,
15163 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
15164 RTX_FRAME_RELATED_P (tmp) = 1;
15165 XVECEXP (dwarf, 0, 1) = tmp;
15166
15167 for (i = 1; i < count; i++)
15168 {
15169 reg = gen_rtx_REG (XFmode, base_reg++);
15170 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
15171
15172 tmp = gen_rtx_SET (VOIDmode,
15173 gen_frame_mem (XFmode,
15174 plus_constant (stack_pointer_rtx,
15175 i * 12)),
15176 reg);
15177 RTX_FRAME_RELATED_P (tmp) = 1;
15178 XVECEXP (dwarf, 0, i + 1) = tmp;
15179 }
15180
15181 tmp = gen_rtx_SET (VOIDmode,
15182 stack_pointer_rtx,
15183 plus_constant (stack_pointer_rtx, -12 * count));
15184
15185 RTX_FRAME_RELATED_P (tmp) = 1;
15186 XVECEXP (dwarf, 0, 0) = tmp;
15187
15188 par = emit_insn (par);
15189 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15190
15191 return par;
15192 }
15193
15194
15195 /* Return true if the current function needs to save/restore LR. */
15196
15197 static bool
15198 thumb_force_lr_save (void)
15199 {
15200 return !cfun->machine->lr_save_eliminated
15201 && (!leaf_function_p ()
15202 || thumb_far_jump_used_p ()
15203 || df_regs_ever_live_p (LR_REGNUM));
15204 }
15205
15206
15207 /* Return true if r3 is used by any of the tail call insns in the
15208 current function. */
15209
15210 static bool
15211 any_sibcall_uses_r3 (void)
15212 {
15213 edge_iterator ei;
15214 edge e;
15215
15216 if (!crtl->tail_call_emit)
15217 return false;
15218 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15219 if (e->flags & EDGE_SIBCALL)
15220 {
15221 rtx call = BB_END (e->src);
15222 if (!CALL_P (call))
15223 call = prev_nonnote_nondebug_insn (call);
15224 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
15225 if (find_regno_fusage (call, USE, 3))
15226 return true;
15227 }
15228 return false;
15229 }
15230
15231
15232 /* Compute the distance from register FROM to register TO.
15233 These can be the arg pointer (26), the soft frame pointer (25),
15234 the stack pointer (13) or the hard frame pointer (11).
15235 In thumb mode r7 is used as the soft frame pointer, if needed.
15236 Typical stack layout looks like this:
15237
15238 old stack pointer -> | |
15239 ----
15240 | | \
15241 | | saved arguments for
15242 | | vararg functions
15243 | | /
15244 --
15245 hard FP & arg pointer -> | | \
15246 | | stack
15247 | | frame
15248 | | /
15249 --
15250 | | \
15251 | | call saved
15252 | | registers
15253 soft frame pointer -> | | /
15254 --
15255 | | \
15256 | | local
15257 | | variables
15258 locals base pointer -> | | /
15259 --
15260 | | \
15261 | | outgoing
15262 | | arguments
15263 current stack pointer -> | | /
15264 --
15265
15266 For a given function some or all of these stack components
15267 may not be needed, giving rise to the possibility of
15268 eliminating some of the registers.
15269
15270 The values returned by this function must reflect the behavior
15271 of arm_expand_prologue() and arm_compute_save_reg_mask().
15272
15273 The sign of the number returned reflects the direction of stack
15274 growth, so the values are positive for all eliminations except
15275 from the soft frame pointer to the hard frame pointer.
15276
15277 SFP may point just inside the local variables block to ensure correct
15278 alignment. */
15279
15280
15281 /* Calculate stack offsets. These are used to calculate register elimination
15282 offsets and in prologue/epilogue code. Also calculates which registers
15283 should be saved. */
15284
15285 static arm_stack_offsets *
15286 arm_get_frame_offsets (void)
15287 {
15288 struct arm_stack_offsets *offsets;
15289 unsigned long func_type;
15290 int leaf;
15291 int saved;
15292 int core_saved;
15293 HOST_WIDE_INT frame_size;
15294 int i;
15295
15296 offsets = &cfun->machine->stack_offsets;
15297
15298 /* We need to know if we are a leaf function. Unfortunately, it
15299 is possible to be called after start_sequence has been called,
15300 which causes get_insns to return the insns for the sequence,
15301 not the function, which will cause leaf_function_p to return
15302 the incorrect result.
15303
15304 to know about leaf functions once reload has completed, and the
15305 frame size cannot be changed after that time, so we can safely
15306 use the cached value. */
15307
15308 if (reload_completed)
15309 return offsets;
15310
15311 /* Initially this is the size of the local variables. It will translated
15312 into an offset once we have determined the size of preceding data. */
15313 frame_size = ROUND_UP_WORD (get_frame_size ());
15314
15315 leaf = leaf_function_p ();
15316
15317 /* Space for variadic functions. */
15318 offsets->saved_args = crtl->args.pretend_args_size;
15319
15320 /* In Thumb mode this is incorrect, but never used. */
15321 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
15322 arm_compute_static_chain_stack_bytes();
15323
15324 if (TARGET_32BIT)
15325 {
15326 unsigned int regno;
15327
15328 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
15329 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15330 saved = core_saved;
15331
15332 /* We know that SP will be doubleword aligned on entry, and we must
15333 preserve that condition at any subroutine call. We also require the
15334 soft frame pointer to be doubleword aligned. */
15335
15336 if (TARGET_REALLY_IWMMXT)
15337 {
15338 /* Check for the call-saved iWMMXt registers. */
15339 for (regno = FIRST_IWMMXT_REGNUM;
15340 regno <= LAST_IWMMXT_REGNUM;
15341 regno++)
15342 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15343 saved += 8;
15344 }
15345
15346 func_type = arm_current_func_type ();
15347 if (! IS_VOLATILE (func_type))
15348 {
15349 /* Space for saved FPA registers. */
15350 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
15351 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15352 saved += 12;
15353
15354 /* Space for saved VFP registers. */
15355 if (TARGET_HARD_FLOAT && TARGET_VFP)
15356 saved += arm_get_vfp_saved_size ();
15357 }
15358 }
15359 else /* TARGET_THUMB1 */
15360 {
15361 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
15362 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15363 saved = core_saved;
15364 if (TARGET_BACKTRACE)
15365 saved += 16;
15366 }
15367
15368 /* Saved registers include the stack frame. */
15369 offsets->saved_regs = offsets->saved_args + saved +
15370 arm_compute_static_chain_stack_bytes();
15371 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
15372 /* A leaf function does not need any stack alignment if it has nothing
15373 on the stack. */
15374 if (leaf && frame_size == 0
15375 /* However if it calls alloca(), we have a dynamically allocated
15376 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
15377 && ! cfun->calls_alloca)
15378 {
15379 offsets->outgoing_args = offsets->soft_frame;
15380 offsets->locals_base = offsets->soft_frame;
15381 return offsets;
15382 }
15383
15384 /* Ensure SFP has the correct alignment. */
15385 if (ARM_DOUBLEWORD_ALIGN
15386 && (offsets->soft_frame & 7))
15387 {
15388 offsets->soft_frame += 4;
15389 /* Try to align stack by pushing an extra reg. Don't bother doing this
15390 when there is a stack frame as the alignment will be rolled into
15391 the normal stack adjustment. */
15392 if (frame_size + crtl->outgoing_args_size == 0)
15393 {
15394 int reg = -1;
15395
15396 /* If it is safe to use r3, then do so. This sometimes
15397 generates better code on Thumb-2 by avoiding the need to
15398 use 32-bit push/pop instructions. */
15399 if (! any_sibcall_uses_r3 ()
15400 && arm_size_return_regs () <= 12
15401 && (offsets->saved_regs_mask & (1 << 3)) == 0)
15402 {
15403 reg = 3;
15404 }
15405 else
15406 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
15407 {
15408 if ((offsets->saved_regs_mask & (1 << i)) == 0)
15409 {
15410 reg = i;
15411 break;
15412 }
15413 }
15414
15415 if (reg != -1)
15416 {
15417 offsets->saved_regs += 4;
15418 offsets->saved_regs_mask |= (1 << reg);
15419 }
15420 }
15421 }
15422
15423 offsets->locals_base = offsets->soft_frame + frame_size;
15424 offsets->outgoing_args = (offsets->locals_base
15425 + crtl->outgoing_args_size);
15426
15427 if (ARM_DOUBLEWORD_ALIGN)
15428 {
15429 /* Ensure SP remains doubleword aligned. */
15430 if (offsets->outgoing_args & 7)
15431 offsets->outgoing_args += 4;
15432 gcc_assert (!(offsets->outgoing_args & 7));
15433 }
15434
15435 return offsets;
15436 }
15437
15438
15439 /* Calculate the relative offsets for the different stack pointers. Positive
15440 offsets are in the direction of stack growth. */
15441
15442 HOST_WIDE_INT
15443 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
15444 {
15445 arm_stack_offsets *offsets;
15446
15447 offsets = arm_get_frame_offsets ();
15448
15449 /* OK, now we have enough information to compute the distances.
15450 There must be an entry in these switch tables for each pair
15451 of registers in ELIMINABLE_REGS, even if some of the entries
15452 seem to be redundant or useless. */
15453 switch (from)
15454 {
15455 case ARG_POINTER_REGNUM:
15456 switch (to)
15457 {
15458 case THUMB_HARD_FRAME_POINTER_REGNUM:
15459 return 0;
15460
15461 case FRAME_POINTER_REGNUM:
15462 /* This is the reverse of the soft frame pointer
15463 to hard frame pointer elimination below. */
15464 return offsets->soft_frame - offsets->saved_args;
15465
15466 case ARM_HARD_FRAME_POINTER_REGNUM:
15467 /* This is only non-zero in the case where the static chain register
15468 is stored above the frame. */
15469 return offsets->frame - offsets->saved_args - 4;
15470
15471 case STACK_POINTER_REGNUM:
15472 /* If nothing has been pushed on the stack at all
15473 then this will return -4. This *is* correct! */
15474 return offsets->outgoing_args - (offsets->saved_args + 4);
15475
15476 default:
15477 gcc_unreachable ();
15478 }
15479 gcc_unreachable ();
15480
15481 case FRAME_POINTER_REGNUM:
15482 switch (to)
15483 {
15484 case THUMB_HARD_FRAME_POINTER_REGNUM:
15485 return 0;
15486
15487 case ARM_HARD_FRAME_POINTER_REGNUM:
15488 /* The hard frame pointer points to the top entry in the
15489 stack frame. The soft frame pointer to the bottom entry
15490 in the stack frame. If there is no stack frame at all,
15491 then they are identical. */
15492
15493 return offsets->frame - offsets->soft_frame;
15494
15495 case STACK_POINTER_REGNUM:
15496 return offsets->outgoing_args - offsets->soft_frame;
15497
15498 default:
15499 gcc_unreachable ();
15500 }
15501 gcc_unreachable ();
15502
15503 default:
15504 /* You cannot eliminate from the stack pointer.
15505 In theory you could eliminate from the hard frame
15506 pointer to the stack pointer, but this will never
15507 happen, since if a stack frame is not needed the
15508 hard frame pointer will never be used. */
15509 gcc_unreachable ();
15510 }
15511 }
15512
15513 /* Given FROM and TO register numbers, say whether this elimination is
15514 allowed. Frame pointer elimination is automatically handled.
15515
15516 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15517 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15518 pointer, we must eliminate FRAME_POINTER_REGNUM into
15519 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15520 ARG_POINTER_REGNUM. */
15521
15522 bool
15523 arm_can_eliminate (const int from, const int to)
15524 {
15525 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
15526 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
15527 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
15528 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
15529 true);
15530 }
15531
15532 /* Emit RTL to save coprocessor registers on function entry. Returns the
15533 number of bytes pushed. */
15534
15535 static int
15536 arm_save_coproc_regs(void)
15537 {
15538 int saved_size = 0;
15539 unsigned reg;
15540 unsigned start_reg;
15541 rtx insn;
15542
15543 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15544 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15545 {
15546 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15547 insn = gen_rtx_MEM (V2SImode, insn);
15548 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
15549 RTX_FRAME_RELATED_P (insn) = 1;
15550 saved_size += 8;
15551 }
15552
15553 /* Save any floating point call-saved registers used by this
15554 function. */
15555 if (TARGET_FPA_EMU2)
15556 {
15557 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15558 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15559 {
15560 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15561 insn = gen_rtx_MEM (XFmode, insn);
15562 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
15563 RTX_FRAME_RELATED_P (insn) = 1;
15564 saved_size += 12;
15565 }
15566 }
15567 else
15568 {
15569 start_reg = LAST_FPA_REGNUM;
15570
15571 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15572 {
15573 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15574 {
15575 if (start_reg - reg == 3)
15576 {
15577 insn = emit_sfm (reg, 4);
15578 RTX_FRAME_RELATED_P (insn) = 1;
15579 saved_size += 48;
15580 start_reg = reg - 1;
15581 }
15582 }
15583 else
15584 {
15585 if (start_reg != reg)
15586 {
15587 insn = emit_sfm (reg + 1, start_reg - reg);
15588 RTX_FRAME_RELATED_P (insn) = 1;
15589 saved_size += (start_reg - reg) * 12;
15590 }
15591 start_reg = reg - 1;
15592 }
15593 }
15594
15595 if (start_reg != reg)
15596 {
15597 insn = emit_sfm (reg + 1, start_reg - reg);
15598 saved_size += (start_reg - reg) * 12;
15599 RTX_FRAME_RELATED_P (insn) = 1;
15600 }
15601 }
15602 if (TARGET_HARD_FLOAT && TARGET_VFP)
15603 {
15604 start_reg = FIRST_VFP_REGNUM;
15605
15606 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15607 {
15608 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15609 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15610 {
15611 if (start_reg != reg)
15612 saved_size += vfp_emit_fstmd (start_reg,
15613 (reg - start_reg) / 2);
15614 start_reg = reg + 2;
15615 }
15616 }
15617 if (start_reg != reg)
15618 saved_size += vfp_emit_fstmd (start_reg,
15619 (reg - start_reg) / 2);
15620 }
15621 return saved_size;
15622 }
15623
15624
15625 /* Set the Thumb frame pointer from the stack pointer. */
15626
15627 static void
15628 thumb_set_frame_pointer (arm_stack_offsets *offsets)
15629 {
15630 HOST_WIDE_INT amount;
15631 rtx insn, dwarf;
15632
15633 amount = offsets->outgoing_args - offsets->locals_base;
15634 if (amount < 1024)
15635 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15636 stack_pointer_rtx, GEN_INT (amount)));
15637 else
15638 {
15639 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
15640 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15641 expects the first two operands to be the same. */
15642 if (TARGET_THUMB2)
15643 {
15644 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15645 stack_pointer_rtx,
15646 hard_frame_pointer_rtx));
15647 }
15648 else
15649 {
15650 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15651 hard_frame_pointer_rtx,
15652 stack_pointer_rtx));
15653 }
15654 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
15655 plus_constant (stack_pointer_rtx, amount));
15656 RTX_FRAME_RELATED_P (dwarf) = 1;
15657 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15658 }
15659
15660 RTX_FRAME_RELATED_P (insn) = 1;
15661 }
15662
15663 /* Generate the prologue instructions for entry into an ARM or Thumb-2
15664 function. */
15665 void
15666 arm_expand_prologue (void)
15667 {
15668 rtx amount;
15669 rtx insn;
15670 rtx ip_rtx;
15671 unsigned long live_regs_mask;
15672 unsigned long func_type;
15673 int fp_offset = 0;
15674 int saved_pretend_args = 0;
15675 int saved_regs = 0;
15676 unsigned HOST_WIDE_INT args_to_push;
15677 arm_stack_offsets *offsets;
15678
15679 func_type = arm_current_func_type ();
15680
15681 /* Naked functions don't have prologues. */
15682 if (IS_NAKED (func_type))
15683 return;
15684
15685 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15686 args_to_push = crtl->args.pretend_args_size;
15687
15688 /* Compute which register we will have to save onto the stack. */
15689 offsets = arm_get_frame_offsets ();
15690 live_regs_mask = offsets->saved_regs_mask;
15691
15692 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
15693
15694 if (IS_STACKALIGN (func_type))
15695 {
15696 rtx dwarf;
15697 rtx r0;
15698 rtx r1;
15699 /* Handle a word-aligned stack pointer. We generate the following:
15700
15701 mov r0, sp
15702 bic r1, r0, #7
15703 mov sp, r1
15704 <save and restore r0 in normal prologue/epilogue>
15705 mov sp, r0
15706 bx lr
15707
15708 The unwinder doesn't need to know about the stack realignment.
15709 Just tell it we saved SP in r0. */
15710 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
15711
15712 r0 = gen_rtx_REG (SImode, 0);
15713 r1 = gen_rtx_REG (SImode, 1);
15714 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15715 compiler won't choke. */
15716 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
15717 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
15718 insn = gen_movsi (r0, stack_pointer_rtx);
15719 RTX_FRAME_RELATED_P (insn) = 1;
15720 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15721 emit_insn (insn);
15722 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
15723 emit_insn (gen_movsi (stack_pointer_rtx, r1));
15724 }
15725
15726 /* For APCS frames, if IP register is clobbered
15727 when creating frame, save that register in a special
15728 way. */
15729 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15730 {
15731 if (IS_INTERRUPT (func_type))
15732 {
15733 /* Interrupt functions must not corrupt any registers.
15734 Creating a frame pointer however, corrupts the IP
15735 register, so we must push it first. */
15736 insn = emit_multi_reg_push (1 << IP_REGNUM);
15737
15738 /* Do not set RTX_FRAME_RELATED_P on this insn.
15739 The dwarf stack unwinding code only wants to see one
15740 stack decrement per function, and this is not it. If
15741 this instruction is labeled as being part of the frame
15742 creation sequence then dwarf2out_frame_debug_expr will
15743 die when it encounters the assignment of IP to FP
15744 later on, since the use of SP here establishes SP as
15745 the CFA register and not IP.
15746
15747 Anyway this instruction is not really part of the stack
15748 frame creation although it is part of the prologue. */
15749 }
15750 else if (IS_NESTED (func_type))
15751 {
15752 /* The Static chain register is the same as the IP register
15753 used as a scratch register during stack frame creation.
15754 To get around this need to find somewhere to store IP
15755 whilst the frame is being created. We try the following
15756 places in order:
15757
15758 1. The last argument register.
15759 2. A slot on the stack above the frame. (This only
15760 works if the function is not a varargs function).
15761 3. Register r3, after pushing the argument registers
15762 onto the stack.
15763
15764 Note - we only need to tell the dwarf2 backend about the SP
15765 adjustment in the second variant; the static chain register
15766 doesn't need to be unwound, as it doesn't contain a value
15767 inherited from the caller. */
15768
15769 if (df_regs_ever_live_p (3) == false)
15770 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15771 else if (args_to_push == 0)
15772 {
15773 rtx dwarf;
15774
15775 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15776 saved_regs += 4;
15777
15778 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
15779 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
15780 fp_offset = 4;
15781
15782 /* Just tell the dwarf backend that we adjusted SP. */
15783 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15784 plus_constant (stack_pointer_rtx,
15785 -fp_offset));
15786 RTX_FRAME_RELATED_P (insn) = 1;
15787 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15788 }
15789 else
15790 {
15791 /* Store the args on the stack. */
15792 if (cfun->machine->uses_anonymous_args)
15793 insn = emit_multi_reg_push
15794 ((0xf0 >> (args_to_push / 4)) & 0xf);
15795 else
15796 insn = emit_insn
15797 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15798 GEN_INT (- args_to_push)));
15799
15800 RTX_FRAME_RELATED_P (insn) = 1;
15801
15802 saved_pretend_args = 1;
15803 fp_offset = args_to_push;
15804 args_to_push = 0;
15805
15806 /* Now reuse r3 to preserve IP. */
15807 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15808 }
15809 }
15810
15811 insn = emit_set_insn (ip_rtx,
15812 plus_constant (stack_pointer_rtx, fp_offset));
15813 RTX_FRAME_RELATED_P (insn) = 1;
15814 }
15815
15816 if (args_to_push)
15817 {
15818 /* Push the argument registers, or reserve space for them. */
15819 if (cfun->machine->uses_anonymous_args)
15820 insn = emit_multi_reg_push
15821 ((0xf0 >> (args_to_push / 4)) & 0xf);
15822 else
15823 insn = emit_insn
15824 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15825 GEN_INT (- args_to_push)));
15826 RTX_FRAME_RELATED_P (insn) = 1;
15827 }
15828
15829 /* If this is an interrupt service routine, and the link register
15830 is going to be pushed, and we're not generating extra
15831 push of IP (needed when frame is needed and frame layout if apcs),
15832 subtracting four from LR now will mean that the function return
15833 can be done with a single instruction. */
15834 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
15835 && (live_regs_mask & (1 << LR_REGNUM)) != 0
15836 && !(frame_pointer_needed && TARGET_APCS_FRAME)
15837 && TARGET_ARM)
15838 {
15839 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
15840
15841 emit_set_insn (lr, plus_constant (lr, -4));
15842 }
15843
15844 if (live_regs_mask)
15845 {
15846 saved_regs += bit_count (live_regs_mask) * 4;
15847 if (optimize_size && !frame_pointer_needed
15848 && saved_regs == offsets->saved_regs - offsets->saved_args)
15849 {
15850 /* If no coprocessor registers are being pushed and we don't have
15851 to worry about a frame pointer then push extra registers to
15852 create the stack frame. This is done is a way that does not
15853 alter the frame layout, so is independent of the epilogue. */
15854 int n;
15855 int frame;
15856 n = 0;
15857 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
15858 n++;
15859 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
15860 if (frame && n * 4 >= frame)
15861 {
15862 n = frame / 4;
15863 live_regs_mask |= (1 << n) - 1;
15864 saved_regs += frame;
15865 }
15866 }
15867 insn = emit_multi_reg_push (live_regs_mask);
15868 RTX_FRAME_RELATED_P (insn) = 1;
15869 }
15870
15871 if (! IS_VOLATILE (func_type))
15872 saved_regs += arm_save_coproc_regs ();
15873
15874 if (frame_pointer_needed && TARGET_ARM)
15875 {
15876 /* Create the new frame pointer. */
15877 if (TARGET_APCS_FRAME)
15878 {
15879 insn = GEN_INT (-(4 + args_to_push + fp_offset));
15880 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
15881 RTX_FRAME_RELATED_P (insn) = 1;
15882
15883 if (IS_NESTED (func_type))
15884 {
15885 /* Recover the static chain register. */
15886 if (!df_regs_ever_live_p (3)
15887 || saved_pretend_args)
15888 insn = gen_rtx_REG (SImode, 3);
15889 else /* if (crtl->args.pretend_args_size == 0) */
15890 {
15891 insn = plus_constant (hard_frame_pointer_rtx, 4);
15892 insn = gen_frame_mem (SImode, insn);
15893 }
15894 emit_set_insn (ip_rtx, insn);
15895 /* Add a USE to stop propagate_one_insn() from barfing. */
15896 emit_insn (gen_prologue_use (ip_rtx));
15897 }
15898 }
15899 else
15900 {
15901 insn = GEN_INT (saved_regs - 4);
15902 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15903 stack_pointer_rtx, insn));
15904 RTX_FRAME_RELATED_P (insn) = 1;
15905 }
15906 }
15907
15908 if (flag_stack_usage_info)
15909 current_function_static_stack_size
15910 = offsets->outgoing_args - offsets->saved_args;
15911
15912 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
15913 {
15914 /* This add can produce multiple insns for a large constant, so we
15915 need to get tricky. */
15916 rtx last = get_last_insn ();
15917
15918 amount = GEN_INT (offsets->saved_args + saved_regs
15919 - offsets->outgoing_args);
15920
15921 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15922 amount));
15923 do
15924 {
15925 last = last ? NEXT_INSN (last) : get_insns ();
15926 RTX_FRAME_RELATED_P (last) = 1;
15927 }
15928 while (last != insn);
15929
15930 /* If the frame pointer is needed, emit a special barrier that
15931 will prevent the scheduler from moving stores to the frame
15932 before the stack adjustment. */
15933 if (frame_pointer_needed)
15934 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
15935 hard_frame_pointer_rtx));
15936 }
15937
15938
15939 if (frame_pointer_needed && TARGET_THUMB2)
15940 thumb_set_frame_pointer (offsets);
15941
15942 if (flag_pic && arm_pic_register != INVALID_REGNUM)
15943 {
15944 unsigned long mask;
15945
15946 mask = live_regs_mask;
15947 mask &= THUMB2_WORK_REGS;
15948 if (!IS_NESTED (func_type))
15949 mask |= (1 << IP_REGNUM);
15950 arm_load_pic_register (mask);
15951 }
15952
15953 /* If we are profiling, make sure no instructions are scheduled before
15954 the call to mcount. Similarly if the user has requested no
15955 scheduling in the prolog. Similarly if we want non-call exceptions
15956 using the EABI unwinder, to prevent faulting instructions from being
15957 swapped with a stack adjustment. */
15958 if (crtl->profile || !TARGET_SCHED_PROLOG
15959 || (arm_except_unwind_info (&global_options) == UI_TARGET
15960 && cfun->can_throw_non_call_exceptions))
15961 emit_insn (gen_blockage ());
15962
15963 /* If the link register is being kept alive, with the return address in it,
15964 then make sure that it does not get reused by the ce2 pass. */
15965 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
15966 cfun->machine->lr_save_eliminated = 1;
15967 }
15968 \f
15969 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15970 static void
15971 arm_print_condition (FILE *stream)
15972 {
15973 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
15974 {
15975 /* Branch conversion is not implemented for Thumb-2. */
15976 if (TARGET_THUMB)
15977 {
15978 output_operand_lossage ("predicated Thumb instruction");
15979 return;
15980 }
15981 if (current_insn_predicate != NULL)
15982 {
15983 output_operand_lossage
15984 ("predicated instruction in conditional sequence");
15985 return;
15986 }
15987
15988 fputs (arm_condition_codes[arm_current_cc], stream);
15989 }
15990 else if (current_insn_predicate)
15991 {
15992 enum arm_cond_code code;
15993
15994 if (TARGET_THUMB1)
15995 {
15996 output_operand_lossage ("predicated Thumb instruction");
15997 return;
15998 }
15999
16000 code = get_arm_condition_code (current_insn_predicate);
16001 fputs (arm_condition_codes[code], stream);
16002 }
16003 }
16004
16005
16006 /* If CODE is 'd', then the X is a condition operand and the instruction
16007 should only be executed if the condition is true.
16008 if CODE is 'D', then the X is a condition operand and the instruction
16009 should only be executed if the condition is false: however, if the mode
16010 of the comparison is CCFPEmode, then always execute the instruction -- we
16011 do this because in these circumstances !GE does not necessarily imply LT;
16012 in these cases the instruction pattern will take care to make sure that
16013 an instruction containing %d will follow, thereby undoing the effects of
16014 doing this instruction unconditionally.
16015 If CODE is 'N' then X is a floating point operand that must be negated
16016 before output.
16017 If CODE is 'B' then output a bitwise inverted value of X (a const int).
16018 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
16019 static void
16020 arm_print_operand (FILE *stream, rtx x, int code)
16021 {
16022 switch (code)
16023 {
16024 case '@':
16025 fputs (ASM_COMMENT_START, stream);
16026 return;
16027
16028 case '_':
16029 fputs (user_label_prefix, stream);
16030 return;
16031
16032 case '|':
16033 fputs (REGISTER_PREFIX, stream);
16034 return;
16035
16036 case '?':
16037 arm_print_condition (stream);
16038 return;
16039
16040 case '(':
16041 /* Nothing in unified syntax, otherwise the current condition code. */
16042 if (!TARGET_UNIFIED_ASM)
16043 arm_print_condition (stream);
16044 break;
16045
16046 case ')':
16047 /* The current condition code in unified syntax, otherwise nothing. */
16048 if (TARGET_UNIFIED_ASM)
16049 arm_print_condition (stream);
16050 break;
16051
16052 case '.':
16053 /* The current condition code for a condition code setting instruction.
16054 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16055 if (TARGET_UNIFIED_ASM)
16056 {
16057 fputc('s', stream);
16058 arm_print_condition (stream);
16059 }
16060 else
16061 {
16062 arm_print_condition (stream);
16063 fputc('s', stream);
16064 }
16065 return;
16066
16067 case '!':
16068 /* If the instruction is conditionally executed then print
16069 the current condition code, otherwise print 's'. */
16070 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
16071 if (current_insn_predicate)
16072 arm_print_condition (stream);
16073 else
16074 fputc('s', stream);
16075 break;
16076
16077 /* %# is a "break" sequence. It doesn't output anything, but is used to
16078 separate e.g. operand numbers from following text, if that text consists
16079 of further digits which we don't want to be part of the operand
16080 number. */
16081 case '#':
16082 return;
16083
16084 case 'N':
16085 {
16086 REAL_VALUE_TYPE r;
16087 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16088 r = real_value_negate (&r);
16089 fprintf (stream, "%s", fp_const_from_val (&r));
16090 }
16091 return;
16092
16093 /* An integer or symbol address without a preceding # sign. */
16094 case 'c':
16095 switch (GET_CODE (x))
16096 {
16097 case CONST_INT:
16098 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16099 break;
16100
16101 case SYMBOL_REF:
16102 output_addr_const (stream, x);
16103 break;
16104
16105 default:
16106 gcc_unreachable ();
16107 }
16108 return;
16109
16110 case 'B':
16111 if (GET_CODE (x) == CONST_INT)
16112 {
16113 HOST_WIDE_INT val;
16114 val = ARM_SIGN_EXTEND (~INTVAL (x));
16115 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
16116 }
16117 else
16118 {
16119 putc ('~', stream);
16120 output_addr_const (stream, x);
16121 }
16122 return;
16123
16124 case 'L':
16125 /* The low 16 bits of an immediate constant. */
16126 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
16127 return;
16128
16129 case 'i':
16130 fprintf (stream, "%s", arithmetic_instr (x, 1));
16131 return;
16132
16133 /* Truncate Cirrus shift counts. */
16134 case 's':
16135 if (GET_CODE (x) == CONST_INT)
16136 {
16137 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
16138 return;
16139 }
16140 arm_print_operand (stream, x, 0);
16141 return;
16142
16143 case 'I':
16144 fprintf (stream, "%s", arithmetic_instr (x, 0));
16145 return;
16146
16147 case 'S':
16148 {
16149 HOST_WIDE_INT val;
16150 const char *shift;
16151
16152 if (!shift_operator (x, SImode))
16153 {
16154 output_operand_lossage ("invalid shift operand");
16155 break;
16156 }
16157
16158 shift = shift_op (x, &val);
16159
16160 if (shift)
16161 {
16162 fprintf (stream, ", %s ", shift);
16163 if (val == -1)
16164 arm_print_operand (stream, XEXP (x, 1), 0);
16165 else
16166 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
16167 }
16168 }
16169 return;
16170
16171 /* An explanation of the 'Q', 'R' and 'H' register operands:
16172
16173 In a pair of registers containing a DI or DF value the 'Q'
16174 operand returns the register number of the register containing
16175 the least significant part of the value. The 'R' operand returns
16176 the register number of the register containing the most
16177 significant part of the value.
16178
16179 The 'H' operand returns the higher of the two register numbers.
16180 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16181 same as the 'Q' operand, since the most significant part of the
16182 value is held in the lower number register. The reverse is true
16183 on systems where WORDS_BIG_ENDIAN is false.
16184
16185 The purpose of these operands is to distinguish between cases
16186 where the endian-ness of the values is important (for example
16187 when they are added together), and cases where the endian-ness
16188 is irrelevant, but the order of register operations is important.
16189 For example when loading a value from memory into a register
16190 pair, the endian-ness does not matter. Provided that the value
16191 from the lower memory address is put into the lower numbered
16192 register, and the value from the higher address is put into the
16193 higher numbered register, the load will work regardless of whether
16194 the value being loaded is big-wordian or little-wordian. The
16195 order of the two register loads can matter however, if the address
16196 of the memory location is actually held in one of the registers
16197 being overwritten by the load.
16198
16199 The 'Q' and 'R' constraints are also available for 64-bit
16200 constants. */
16201 case 'Q':
16202 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16203 {
16204 rtx part = gen_lowpart (SImode, x);
16205 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16206 return;
16207 }
16208
16209 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16210 {
16211 output_operand_lossage ("invalid operand for code '%c'", code);
16212 return;
16213 }
16214
16215 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
16216 return;
16217
16218 case 'R':
16219 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16220 {
16221 enum machine_mode mode = GET_MODE (x);
16222 rtx part;
16223
16224 if (mode == VOIDmode)
16225 mode = DImode;
16226 part = gen_highpart_mode (SImode, mode, x);
16227 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16228 return;
16229 }
16230
16231 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16232 {
16233 output_operand_lossage ("invalid operand for code '%c'", code);
16234 return;
16235 }
16236
16237 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
16238 return;
16239
16240 case 'H':
16241 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16242 {
16243 output_operand_lossage ("invalid operand for code '%c'", code);
16244 return;
16245 }
16246
16247 asm_fprintf (stream, "%r", REGNO (x) + 1);
16248 return;
16249
16250 case 'J':
16251 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16252 {
16253 output_operand_lossage ("invalid operand for code '%c'", code);
16254 return;
16255 }
16256
16257 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
16258 return;
16259
16260 case 'K':
16261 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16262 {
16263 output_operand_lossage ("invalid operand for code '%c'", code);
16264 return;
16265 }
16266
16267 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
16268 return;
16269
16270 case 'm':
16271 asm_fprintf (stream, "%r",
16272 GET_CODE (XEXP (x, 0)) == REG
16273 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
16274 return;
16275
16276 case 'M':
16277 asm_fprintf (stream, "{%r-%r}",
16278 REGNO (x),
16279 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
16280 return;
16281
16282 /* Like 'M', but writing doubleword vector registers, for use by Neon
16283 insns. */
16284 case 'h':
16285 {
16286 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
16287 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
16288 if (numregs == 1)
16289 asm_fprintf (stream, "{d%d}", regno);
16290 else
16291 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
16292 }
16293 return;
16294
16295 case 'd':
16296 /* CONST_TRUE_RTX means always -- that's the default. */
16297 if (x == const_true_rtx)
16298 return;
16299
16300 if (!COMPARISON_P (x))
16301 {
16302 output_operand_lossage ("invalid operand for code '%c'", code);
16303 return;
16304 }
16305
16306 fputs (arm_condition_codes[get_arm_condition_code (x)],
16307 stream);
16308 return;
16309
16310 case 'D':
16311 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16312 want to do that. */
16313 if (x == const_true_rtx)
16314 {
16315 output_operand_lossage ("instruction never executed");
16316 return;
16317 }
16318 if (!COMPARISON_P (x))
16319 {
16320 output_operand_lossage ("invalid operand for code '%c'", code);
16321 return;
16322 }
16323
16324 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
16325 (get_arm_condition_code (x))],
16326 stream);
16327 return;
16328
16329 /* Cirrus registers can be accessed in a variety of ways:
16330 single floating point (f)
16331 double floating point (d)
16332 32bit integer (fx)
16333 64bit integer (dx). */
16334 case 'W': /* Cirrus register in F mode. */
16335 case 'X': /* Cirrus register in D mode. */
16336 case 'Y': /* Cirrus register in FX mode. */
16337 case 'Z': /* Cirrus register in DX mode. */
16338 gcc_assert (GET_CODE (x) == REG
16339 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
16340
16341 fprintf (stream, "mv%s%s",
16342 code == 'W' ? "f"
16343 : code == 'X' ? "d"
16344 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
16345
16346 return;
16347
16348 /* Print cirrus register in the mode specified by the register's mode. */
16349 case 'V':
16350 {
16351 int mode = GET_MODE (x);
16352
16353 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
16354 {
16355 output_operand_lossage ("invalid operand for code '%c'", code);
16356 return;
16357 }
16358
16359 fprintf (stream, "mv%s%s",
16360 mode == DFmode ? "d"
16361 : mode == SImode ? "fx"
16362 : mode == DImode ? "dx"
16363 : "f", reg_names[REGNO (x)] + 2);
16364
16365 return;
16366 }
16367
16368 case 'U':
16369 if (GET_CODE (x) != REG
16370 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
16371 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
16372 /* Bad value for wCG register number. */
16373 {
16374 output_operand_lossage ("invalid operand for code '%c'", code);
16375 return;
16376 }
16377
16378 else
16379 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
16380 return;
16381
16382 /* Print an iWMMXt control register name. */
16383 case 'w':
16384 if (GET_CODE (x) != CONST_INT
16385 || INTVAL (x) < 0
16386 || INTVAL (x) >= 16)
16387 /* Bad value for wC register number. */
16388 {
16389 output_operand_lossage ("invalid operand for code '%c'", code);
16390 return;
16391 }
16392
16393 else
16394 {
16395 static const char * wc_reg_names [16] =
16396 {
16397 "wCID", "wCon", "wCSSF", "wCASF",
16398 "wC4", "wC5", "wC6", "wC7",
16399 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16400 "wC12", "wC13", "wC14", "wC15"
16401 };
16402
16403 fprintf (stream, wc_reg_names [INTVAL (x)]);
16404 }
16405 return;
16406
16407 /* Print the high single-precision register of a VFP double-precision
16408 register. */
16409 case 'p':
16410 {
16411 int mode = GET_MODE (x);
16412 int regno;
16413
16414 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
16415 {
16416 output_operand_lossage ("invalid operand for code '%c'", code);
16417 return;
16418 }
16419
16420 regno = REGNO (x);
16421 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
16422 {
16423 output_operand_lossage ("invalid operand for code '%c'", code);
16424 return;
16425 }
16426
16427 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
16428 }
16429 return;
16430
16431 /* Print a VFP/Neon double precision or quad precision register name. */
16432 case 'P':
16433 case 'q':
16434 {
16435 int mode = GET_MODE (x);
16436 int is_quad = (code == 'q');
16437 int regno;
16438
16439 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
16440 {
16441 output_operand_lossage ("invalid operand for code '%c'", code);
16442 return;
16443 }
16444
16445 if (GET_CODE (x) != REG
16446 || !IS_VFP_REGNUM (REGNO (x)))
16447 {
16448 output_operand_lossage ("invalid operand for code '%c'", code);
16449 return;
16450 }
16451
16452 regno = REGNO (x);
16453 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
16454 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
16455 {
16456 output_operand_lossage ("invalid operand for code '%c'", code);
16457 return;
16458 }
16459
16460 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
16461 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
16462 }
16463 return;
16464
16465 /* These two codes print the low/high doubleword register of a Neon quad
16466 register, respectively. For pair-structure types, can also print
16467 low/high quadword registers. */
16468 case 'e':
16469 case 'f':
16470 {
16471 int mode = GET_MODE (x);
16472 int regno;
16473
16474 if ((GET_MODE_SIZE (mode) != 16
16475 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
16476 {
16477 output_operand_lossage ("invalid operand for code '%c'", code);
16478 return;
16479 }
16480
16481 regno = REGNO (x);
16482 if (!NEON_REGNO_OK_FOR_QUAD (regno))
16483 {
16484 output_operand_lossage ("invalid operand for code '%c'", code);
16485 return;
16486 }
16487
16488 if (GET_MODE_SIZE (mode) == 16)
16489 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
16490 + (code == 'f' ? 1 : 0));
16491 else
16492 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
16493 + (code == 'f' ? 1 : 0));
16494 }
16495 return;
16496
16497 /* Print a VFPv3 floating-point constant, represented as an integer
16498 index. */
16499 case 'G':
16500 {
16501 int index = vfp3_const_double_index (x);
16502 gcc_assert (index != -1);
16503 fprintf (stream, "%d", index);
16504 }
16505 return;
16506
16507 /* Print bits representing opcode features for Neon.
16508
16509 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16510 and polynomials as unsigned.
16511
16512 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16513
16514 Bit 2 is 1 for rounding functions, 0 otherwise. */
16515
16516 /* Identify the type as 's', 'u', 'p' or 'f'. */
16517 case 'T':
16518 {
16519 HOST_WIDE_INT bits = INTVAL (x);
16520 fputc ("uspf"[bits & 3], stream);
16521 }
16522 return;
16523
16524 /* Likewise, but signed and unsigned integers are both 'i'. */
16525 case 'F':
16526 {
16527 HOST_WIDE_INT bits = INTVAL (x);
16528 fputc ("iipf"[bits & 3], stream);
16529 }
16530 return;
16531
16532 /* As for 'T', but emit 'u' instead of 'p'. */
16533 case 't':
16534 {
16535 HOST_WIDE_INT bits = INTVAL (x);
16536 fputc ("usuf"[bits & 3], stream);
16537 }
16538 return;
16539
16540 /* Bit 2: rounding (vs none). */
16541 case 'O':
16542 {
16543 HOST_WIDE_INT bits = INTVAL (x);
16544 fputs ((bits & 4) != 0 ? "r" : "", stream);
16545 }
16546 return;
16547
16548 /* Memory operand for vld1/vst1 instruction. */
16549 case 'A':
16550 {
16551 rtx addr;
16552 bool postinc = FALSE;
16553 unsigned align, memsize, align_bits;
16554
16555 gcc_assert (GET_CODE (x) == MEM);
16556 addr = XEXP (x, 0);
16557 if (GET_CODE (addr) == POST_INC)
16558 {
16559 postinc = 1;
16560 addr = XEXP (addr, 0);
16561 }
16562 asm_fprintf (stream, "[%r", REGNO (addr));
16563
16564 /* We know the alignment of this access, so we can emit a hint in the
16565 instruction (for some alignments) as an aid to the memory subsystem
16566 of the target. */
16567 align = MEM_ALIGN (x) >> 3;
16568 memsize = INTVAL (MEM_SIZE (x));
16569
16570 /* Only certain alignment specifiers are supported by the hardware. */
16571 if (memsize == 16 && (align % 32) == 0)
16572 align_bits = 256;
16573 else if ((memsize == 8 || memsize == 16) && (align % 16) == 0)
16574 align_bits = 128;
16575 else if ((align % 8) == 0)
16576 align_bits = 64;
16577 else
16578 align_bits = 0;
16579
16580 if (align_bits != 0)
16581 asm_fprintf (stream, ":%d", align_bits);
16582
16583 asm_fprintf (stream, "]");
16584
16585 if (postinc)
16586 fputs("!", stream);
16587 }
16588 return;
16589
16590 case 'C':
16591 {
16592 rtx addr;
16593
16594 gcc_assert (GET_CODE (x) == MEM);
16595 addr = XEXP (x, 0);
16596 gcc_assert (GET_CODE (addr) == REG);
16597 asm_fprintf (stream, "[%r]", REGNO (addr));
16598 }
16599 return;
16600
16601 /* Translate an S register number into a D register number and element index. */
16602 case 'y':
16603 {
16604 int mode = GET_MODE (x);
16605 int regno;
16606
16607 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
16608 {
16609 output_operand_lossage ("invalid operand for code '%c'", code);
16610 return;
16611 }
16612
16613 regno = REGNO (x);
16614 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16615 {
16616 output_operand_lossage ("invalid operand for code '%c'", code);
16617 return;
16618 }
16619
16620 regno = regno - FIRST_VFP_REGNUM;
16621 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
16622 }
16623 return;
16624
16625 /* Register specifier for vld1.16/vst1.16. Translate the S register
16626 number into a D register number and element index. */
16627 case 'z':
16628 {
16629 int mode = GET_MODE (x);
16630 int regno;
16631
16632 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
16633 {
16634 output_operand_lossage ("invalid operand for code '%c'", code);
16635 return;
16636 }
16637
16638 regno = REGNO (x);
16639 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16640 {
16641 output_operand_lossage ("invalid operand for code '%c'", code);
16642 return;
16643 }
16644
16645 regno = regno - FIRST_VFP_REGNUM;
16646 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
16647 }
16648 return;
16649
16650 default:
16651 if (x == 0)
16652 {
16653 output_operand_lossage ("missing operand");
16654 return;
16655 }
16656
16657 switch (GET_CODE (x))
16658 {
16659 case REG:
16660 asm_fprintf (stream, "%r", REGNO (x));
16661 break;
16662
16663 case MEM:
16664 output_memory_reference_mode = GET_MODE (x);
16665 output_address (XEXP (x, 0));
16666 break;
16667
16668 case CONST_DOUBLE:
16669 if (TARGET_NEON)
16670 {
16671 char fpstr[20];
16672 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16673 sizeof (fpstr), 0, 1);
16674 fprintf (stream, "#%s", fpstr);
16675 }
16676 else
16677 fprintf (stream, "#%s", fp_immediate_constant (x));
16678 break;
16679
16680 default:
16681 gcc_assert (GET_CODE (x) != NEG);
16682 fputc ('#', stream);
16683 if (GET_CODE (x) == HIGH)
16684 {
16685 fputs (":lower16:", stream);
16686 x = XEXP (x, 0);
16687 }
16688
16689 output_addr_const (stream, x);
16690 break;
16691 }
16692 }
16693 }
16694 \f
16695 /* Target hook for printing a memory address. */
16696 static void
16697 arm_print_operand_address (FILE *stream, rtx x)
16698 {
16699 if (TARGET_32BIT)
16700 {
16701 int is_minus = GET_CODE (x) == MINUS;
16702
16703 if (GET_CODE (x) == REG)
16704 asm_fprintf (stream, "[%r, #0]", REGNO (x));
16705 else if (GET_CODE (x) == PLUS || is_minus)
16706 {
16707 rtx base = XEXP (x, 0);
16708 rtx index = XEXP (x, 1);
16709 HOST_WIDE_INT offset = 0;
16710 if (GET_CODE (base) != REG
16711 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
16712 {
16713 /* Ensure that BASE is a register. */
16714 /* (one of them must be). */
16715 /* Also ensure the SP is not used as in index register. */
16716 rtx temp = base;
16717 base = index;
16718 index = temp;
16719 }
16720 switch (GET_CODE (index))
16721 {
16722 case CONST_INT:
16723 offset = INTVAL (index);
16724 if (is_minus)
16725 offset = -offset;
16726 asm_fprintf (stream, "[%r, #%wd]",
16727 REGNO (base), offset);
16728 break;
16729
16730 case REG:
16731 asm_fprintf (stream, "[%r, %s%r]",
16732 REGNO (base), is_minus ? "-" : "",
16733 REGNO (index));
16734 break;
16735
16736 case MULT:
16737 case ASHIFTRT:
16738 case LSHIFTRT:
16739 case ASHIFT:
16740 case ROTATERT:
16741 {
16742 asm_fprintf (stream, "[%r, %s%r",
16743 REGNO (base), is_minus ? "-" : "",
16744 REGNO (XEXP (index, 0)));
16745 arm_print_operand (stream, index, 'S');
16746 fputs ("]", stream);
16747 break;
16748 }
16749
16750 default:
16751 gcc_unreachable ();
16752 }
16753 }
16754 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16755 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16756 {
16757 extern enum machine_mode output_memory_reference_mode;
16758
16759 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16760
16761 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16762 asm_fprintf (stream, "[%r, #%s%d]!",
16763 REGNO (XEXP (x, 0)),
16764 GET_CODE (x) == PRE_DEC ? "-" : "",
16765 GET_MODE_SIZE (output_memory_reference_mode));
16766 else
16767 asm_fprintf (stream, "[%r], #%s%d",
16768 REGNO (XEXP (x, 0)),
16769 GET_CODE (x) == POST_DEC ? "-" : "",
16770 GET_MODE_SIZE (output_memory_reference_mode));
16771 }
16772 else if (GET_CODE (x) == PRE_MODIFY)
16773 {
16774 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16775 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16776 asm_fprintf (stream, "#%wd]!",
16777 INTVAL (XEXP (XEXP (x, 1), 1)));
16778 else
16779 asm_fprintf (stream, "%r]!",
16780 REGNO (XEXP (XEXP (x, 1), 1)));
16781 }
16782 else if (GET_CODE (x) == POST_MODIFY)
16783 {
16784 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16785 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16786 asm_fprintf (stream, "#%wd",
16787 INTVAL (XEXP (XEXP (x, 1), 1)));
16788 else
16789 asm_fprintf (stream, "%r",
16790 REGNO (XEXP (XEXP (x, 1), 1)));
16791 }
16792 else output_addr_const (stream, x);
16793 }
16794 else
16795 {
16796 if (GET_CODE (x) == REG)
16797 asm_fprintf (stream, "[%r]", REGNO (x));
16798 else if (GET_CODE (x) == POST_INC)
16799 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16800 else if (GET_CODE (x) == PLUS)
16801 {
16802 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16803 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16804 asm_fprintf (stream, "[%r, #%wd]",
16805 REGNO (XEXP (x, 0)),
16806 INTVAL (XEXP (x, 1)));
16807 else
16808 asm_fprintf (stream, "[%r, %r]",
16809 REGNO (XEXP (x, 0)),
16810 REGNO (XEXP (x, 1)));
16811 }
16812 else
16813 output_addr_const (stream, x);
16814 }
16815 }
16816 \f
16817 /* Target hook for indicating whether a punctuation character for
16818 TARGET_PRINT_OPERAND is valid. */
16819 static bool
16820 arm_print_operand_punct_valid_p (unsigned char code)
16821 {
16822 return (code == '@' || code == '|' || code == '.'
16823 || code == '(' || code == ')' || code == '#'
16824 || (TARGET_32BIT && (code == '?'))
16825 || (TARGET_THUMB2 && (code == '!'))
16826 || (TARGET_THUMB && (code == '_')));
16827 }
16828 \f
16829 /* Target hook for assembling integer objects. The ARM version needs to
16830 handle word-sized values specially. */
16831 static bool
16832 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
16833 {
16834 enum machine_mode mode;
16835
16836 if (size == UNITS_PER_WORD && aligned_p)
16837 {
16838 fputs ("\t.word\t", asm_out_file);
16839 output_addr_const (asm_out_file, x);
16840
16841 /* Mark symbols as position independent. We only do this in the
16842 .text segment, not in the .data segment. */
16843 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
16844 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
16845 {
16846 /* See legitimize_pic_address for an explanation of the
16847 TARGET_VXWORKS_RTP check. */
16848 if (TARGET_VXWORKS_RTP
16849 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
16850 fputs ("(GOT)", asm_out_file);
16851 else
16852 fputs ("(GOTOFF)", asm_out_file);
16853 }
16854 fputc ('\n', asm_out_file);
16855 return true;
16856 }
16857
16858 mode = GET_MODE (x);
16859
16860 if (arm_vector_mode_supported_p (mode))
16861 {
16862 int i, units;
16863
16864 gcc_assert (GET_CODE (x) == CONST_VECTOR);
16865
16866 units = CONST_VECTOR_NUNITS (x);
16867 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16868
16869 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16870 for (i = 0; i < units; i++)
16871 {
16872 rtx elt = CONST_VECTOR_ELT (x, i);
16873 assemble_integer
16874 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
16875 }
16876 else
16877 for (i = 0; i < units; i++)
16878 {
16879 rtx elt = CONST_VECTOR_ELT (x, i);
16880 REAL_VALUE_TYPE rval;
16881
16882 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
16883
16884 assemble_real
16885 (rval, GET_MODE_INNER (mode),
16886 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
16887 }
16888
16889 return true;
16890 }
16891
16892 return default_assemble_integer (x, size, aligned_p);
16893 }
16894
16895 static void
16896 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
16897 {
16898 section *s;
16899
16900 if (!TARGET_AAPCS_BASED)
16901 {
16902 (is_ctor ?
16903 default_named_section_asm_out_constructor
16904 : default_named_section_asm_out_destructor) (symbol, priority);
16905 return;
16906 }
16907
16908 /* Put these in the .init_array section, using a special relocation. */
16909 if (priority != DEFAULT_INIT_PRIORITY)
16910 {
16911 char buf[18];
16912 sprintf (buf, "%s.%.5u",
16913 is_ctor ? ".init_array" : ".fini_array",
16914 priority);
16915 s = get_section (buf, SECTION_WRITE, NULL_TREE);
16916 }
16917 else if (is_ctor)
16918 s = ctors_section;
16919 else
16920 s = dtors_section;
16921
16922 switch_to_section (s);
16923 assemble_align (POINTER_SIZE);
16924 fputs ("\t.word\t", asm_out_file);
16925 output_addr_const (asm_out_file, symbol);
16926 fputs ("(target1)\n", asm_out_file);
16927 }
16928
16929 /* Add a function to the list of static constructors. */
16930
16931 static void
16932 arm_elf_asm_constructor (rtx symbol, int priority)
16933 {
16934 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
16935 }
16936
16937 /* Add a function to the list of static destructors. */
16938
16939 static void
16940 arm_elf_asm_destructor (rtx symbol, int priority)
16941 {
16942 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
16943 }
16944 \f
16945 /* A finite state machine takes care of noticing whether or not instructions
16946 can be conditionally executed, and thus decrease execution time and code
16947 size by deleting branch instructions. The fsm is controlled by
16948 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
16949
16950 /* The state of the fsm controlling condition codes are:
16951 0: normal, do nothing special
16952 1: make ASM_OUTPUT_OPCODE not output this instruction
16953 2: make ASM_OUTPUT_OPCODE not output this instruction
16954 3: make instructions conditional
16955 4: make instructions conditional
16956
16957 State transitions (state->state by whom under condition):
16958 0 -> 1 final_prescan_insn if the `target' is a label
16959 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
16960 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
16961 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
16962 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
16963 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
16964 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
16965 (the target insn is arm_target_insn).
16966
16967 If the jump clobbers the conditions then we use states 2 and 4.
16968
16969 A similar thing can be done with conditional return insns.
16970
16971 XXX In case the `target' is an unconditional branch, this conditionalising
16972 of the instructions always reduces code size, but not always execution
16973 time. But then, I want to reduce the code size to somewhere near what
16974 /bin/cc produces. */
16975
16976 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
16977 instructions. When a COND_EXEC instruction is seen the subsequent
16978 instructions are scanned so that multiple conditional instructions can be
16979 combined into a single IT block. arm_condexec_count and arm_condexec_mask
16980 specify the length and true/false mask for the IT block. These will be
16981 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
16982
16983 /* Returns the index of the ARM condition code string in
16984 `arm_condition_codes'. COMPARISON should be an rtx like
16985 `(eq (...) (...))'. */
16986 static enum arm_cond_code
16987 get_arm_condition_code (rtx comparison)
16988 {
16989 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
16990 enum arm_cond_code code;
16991 enum rtx_code comp_code = GET_CODE (comparison);
16992
16993 if (GET_MODE_CLASS (mode) != MODE_CC)
16994 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
16995 XEXP (comparison, 1));
16996
16997 switch (mode)
16998 {
16999 case CC_DNEmode: code = ARM_NE; goto dominance;
17000 case CC_DEQmode: code = ARM_EQ; goto dominance;
17001 case CC_DGEmode: code = ARM_GE; goto dominance;
17002 case CC_DGTmode: code = ARM_GT; goto dominance;
17003 case CC_DLEmode: code = ARM_LE; goto dominance;
17004 case CC_DLTmode: code = ARM_LT; goto dominance;
17005 case CC_DGEUmode: code = ARM_CS; goto dominance;
17006 case CC_DGTUmode: code = ARM_HI; goto dominance;
17007 case CC_DLEUmode: code = ARM_LS; goto dominance;
17008 case CC_DLTUmode: code = ARM_CC;
17009
17010 dominance:
17011 gcc_assert (comp_code == EQ || comp_code == NE);
17012
17013 if (comp_code == EQ)
17014 return ARM_INVERSE_CONDITION_CODE (code);
17015 return code;
17016
17017 case CC_NOOVmode:
17018 switch (comp_code)
17019 {
17020 case NE: return ARM_NE;
17021 case EQ: return ARM_EQ;
17022 case GE: return ARM_PL;
17023 case LT: return ARM_MI;
17024 default: gcc_unreachable ();
17025 }
17026
17027 case CC_Zmode:
17028 switch (comp_code)
17029 {
17030 case NE: return ARM_NE;
17031 case EQ: return ARM_EQ;
17032 default: gcc_unreachable ();
17033 }
17034
17035 case CC_Nmode:
17036 switch (comp_code)
17037 {
17038 case NE: return ARM_MI;
17039 case EQ: return ARM_PL;
17040 default: gcc_unreachable ();
17041 }
17042
17043 case CCFPEmode:
17044 case CCFPmode:
17045 /* These encodings assume that AC=1 in the FPA system control
17046 byte. This allows us to handle all cases except UNEQ and
17047 LTGT. */
17048 switch (comp_code)
17049 {
17050 case GE: return ARM_GE;
17051 case GT: return ARM_GT;
17052 case LE: return ARM_LS;
17053 case LT: return ARM_MI;
17054 case NE: return ARM_NE;
17055 case EQ: return ARM_EQ;
17056 case ORDERED: return ARM_VC;
17057 case UNORDERED: return ARM_VS;
17058 case UNLT: return ARM_LT;
17059 case UNLE: return ARM_LE;
17060 case UNGT: return ARM_HI;
17061 case UNGE: return ARM_PL;
17062 /* UNEQ and LTGT do not have a representation. */
17063 case UNEQ: /* Fall through. */
17064 case LTGT: /* Fall through. */
17065 default: gcc_unreachable ();
17066 }
17067
17068 case CC_SWPmode:
17069 switch (comp_code)
17070 {
17071 case NE: return ARM_NE;
17072 case EQ: return ARM_EQ;
17073 case GE: return ARM_LE;
17074 case GT: return ARM_LT;
17075 case LE: return ARM_GE;
17076 case LT: return ARM_GT;
17077 case GEU: return ARM_LS;
17078 case GTU: return ARM_CC;
17079 case LEU: return ARM_CS;
17080 case LTU: return ARM_HI;
17081 default: gcc_unreachable ();
17082 }
17083
17084 case CC_Cmode:
17085 switch (comp_code)
17086 {
17087 case LTU: return ARM_CS;
17088 case GEU: return ARM_CC;
17089 default: gcc_unreachable ();
17090 }
17091
17092 case CC_CZmode:
17093 switch (comp_code)
17094 {
17095 case NE: return ARM_NE;
17096 case EQ: return ARM_EQ;
17097 case GEU: return ARM_CS;
17098 case GTU: return ARM_HI;
17099 case LEU: return ARM_LS;
17100 case LTU: return ARM_CC;
17101 default: gcc_unreachable ();
17102 }
17103
17104 case CC_NCVmode:
17105 switch (comp_code)
17106 {
17107 case GE: return ARM_GE;
17108 case LT: return ARM_LT;
17109 case GEU: return ARM_CS;
17110 case LTU: return ARM_CC;
17111 default: gcc_unreachable ();
17112 }
17113
17114 case CCmode:
17115 switch (comp_code)
17116 {
17117 case NE: return ARM_NE;
17118 case EQ: return ARM_EQ;
17119 case GE: return ARM_GE;
17120 case GT: return ARM_GT;
17121 case LE: return ARM_LE;
17122 case LT: return ARM_LT;
17123 case GEU: return ARM_CS;
17124 case GTU: return ARM_HI;
17125 case LEU: return ARM_LS;
17126 case LTU: return ARM_CC;
17127 default: gcc_unreachable ();
17128 }
17129
17130 default: gcc_unreachable ();
17131 }
17132 }
17133
17134 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17135 instructions. */
17136 void
17137 thumb2_final_prescan_insn (rtx insn)
17138 {
17139 rtx first_insn = insn;
17140 rtx body = PATTERN (insn);
17141 rtx predicate;
17142 enum arm_cond_code code;
17143 int n;
17144 int mask;
17145
17146 /* Remove the previous insn from the count of insns to be output. */
17147 if (arm_condexec_count)
17148 arm_condexec_count--;
17149
17150 /* Nothing to do if we are already inside a conditional block. */
17151 if (arm_condexec_count)
17152 return;
17153
17154 if (GET_CODE (body) != COND_EXEC)
17155 return;
17156
17157 /* Conditional jumps are implemented directly. */
17158 if (GET_CODE (insn) == JUMP_INSN)
17159 return;
17160
17161 predicate = COND_EXEC_TEST (body);
17162 arm_current_cc = get_arm_condition_code (predicate);
17163
17164 n = get_attr_ce_count (insn);
17165 arm_condexec_count = 1;
17166 arm_condexec_mask = (1 << n) - 1;
17167 arm_condexec_masklen = n;
17168 /* See if subsequent instructions can be combined into the same block. */
17169 for (;;)
17170 {
17171 insn = next_nonnote_insn (insn);
17172
17173 /* Jumping into the middle of an IT block is illegal, so a label or
17174 barrier terminates the block. */
17175 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
17176 break;
17177
17178 body = PATTERN (insn);
17179 /* USE and CLOBBER aren't really insns, so just skip them. */
17180 if (GET_CODE (body) == USE
17181 || GET_CODE (body) == CLOBBER)
17182 continue;
17183
17184 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17185 if (GET_CODE (body) != COND_EXEC)
17186 break;
17187 /* Allow up to 4 conditionally executed instructions in a block. */
17188 n = get_attr_ce_count (insn);
17189 if (arm_condexec_masklen + n > 4)
17190 break;
17191
17192 predicate = COND_EXEC_TEST (body);
17193 code = get_arm_condition_code (predicate);
17194 mask = (1 << n) - 1;
17195 if (arm_current_cc == code)
17196 arm_condexec_mask |= (mask << arm_condexec_masklen);
17197 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
17198 break;
17199
17200 arm_condexec_count++;
17201 arm_condexec_masklen += n;
17202
17203 /* A jump must be the last instruction in a conditional block. */
17204 if (GET_CODE(insn) == JUMP_INSN)
17205 break;
17206 }
17207 /* Restore recog_data (getting the attributes of other insns can
17208 destroy this array, but final.c assumes that it remains intact
17209 across this call). */
17210 extract_constrain_insn_cached (first_insn);
17211 }
17212
17213 void
17214 arm_final_prescan_insn (rtx insn)
17215 {
17216 /* BODY will hold the body of INSN. */
17217 rtx body = PATTERN (insn);
17218
17219 /* This will be 1 if trying to repeat the trick, and things need to be
17220 reversed if it appears to fail. */
17221 int reverse = 0;
17222
17223 /* If we start with a return insn, we only succeed if we find another one. */
17224 int seeking_return = 0;
17225
17226 /* START_INSN will hold the insn from where we start looking. This is the
17227 first insn after the following code_label if REVERSE is true. */
17228 rtx start_insn = insn;
17229
17230 /* If in state 4, check if the target branch is reached, in order to
17231 change back to state 0. */
17232 if (arm_ccfsm_state == 4)
17233 {
17234 if (insn == arm_target_insn)
17235 {
17236 arm_target_insn = NULL;
17237 arm_ccfsm_state = 0;
17238 }
17239 return;
17240 }
17241
17242 /* If in state 3, it is possible to repeat the trick, if this insn is an
17243 unconditional branch to a label, and immediately following this branch
17244 is the previous target label which is only used once, and the label this
17245 branch jumps to is not too far off. */
17246 if (arm_ccfsm_state == 3)
17247 {
17248 if (simplejump_p (insn))
17249 {
17250 start_insn = next_nonnote_insn (start_insn);
17251 if (GET_CODE (start_insn) == BARRIER)
17252 {
17253 /* XXX Isn't this always a barrier? */
17254 start_insn = next_nonnote_insn (start_insn);
17255 }
17256 if (GET_CODE (start_insn) == CODE_LABEL
17257 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17258 && LABEL_NUSES (start_insn) == 1)
17259 reverse = TRUE;
17260 else
17261 return;
17262 }
17263 else if (GET_CODE (body) == RETURN)
17264 {
17265 start_insn = next_nonnote_insn (start_insn);
17266 if (GET_CODE (start_insn) == BARRIER)
17267 start_insn = next_nonnote_insn (start_insn);
17268 if (GET_CODE (start_insn) == CODE_LABEL
17269 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17270 && LABEL_NUSES (start_insn) == 1)
17271 {
17272 reverse = TRUE;
17273 seeking_return = 1;
17274 }
17275 else
17276 return;
17277 }
17278 else
17279 return;
17280 }
17281
17282 gcc_assert (!arm_ccfsm_state || reverse);
17283 if (GET_CODE (insn) != JUMP_INSN)
17284 return;
17285
17286 /* This jump might be paralleled with a clobber of the condition codes
17287 the jump should always come first */
17288 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
17289 body = XVECEXP (body, 0, 0);
17290
17291 if (reverse
17292 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
17293 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
17294 {
17295 int insns_skipped;
17296 int fail = FALSE, succeed = FALSE;
17297 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17298 int then_not_else = TRUE;
17299 rtx this_insn = start_insn, label = 0;
17300
17301 /* Register the insn jumped to. */
17302 if (reverse)
17303 {
17304 if (!seeking_return)
17305 label = XEXP (SET_SRC (body), 0);
17306 }
17307 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
17308 label = XEXP (XEXP (SET_SRC (body), 1), 0);
17309 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
17310 {
17311 label = XEXP (XEXP (SET_SRC (body), 2), 0);
17312 then_not_else = FALSE;
17313 }
17314 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
17315 seeking_return = 1;
17316 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
17317 {
17318 seeking_return = 1;
17319 then_not_else = FALSE;
17320 }
17321 else
17322 gcc_unreachable ();
17323
17324 /* See how many insns this branch skips, and what kind of insns. If all
17325 insns are okay, and the label or unconditional branch to the same
17326 label is not too far away, succeed. */
17327 for (insns_skipped = 0;
17328 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
17329 {
17330 rtx scanbody;
17331
17332 this_insn = next_nonnote_insn (this_insn);
17333 if (!this_insn)
17334 break;
17335
17336 switch (GET_CODE (this_insn))
17337 {
17338 case CODE_LABEL:
17339 /* Succeed if it is the target label, otherwise fail since
17340 control falls in from somewhere else. */
17341 if (this_insn == label)
17342 {
17343 arm_ccfsm_state = 1;
17344 succeed = TRUE;
17345 }
17346 else
17347 fail = TRUE;
17348 break;
17349
17350 case BARRIER:
17351 /* Succeed if the following insn is the target label.
17352 Otherwise fail.
17353 If return insns are used then the last insn in a function
17354 will be a barrier. */
17355 this_insn = next_nonnote_insn (this_insn);
17356 if (this_insn && this_insn == label)
17357 {
17358 arm_ccfsm_state = 1;
17359 succeed = TRUE;
17360 }
17361 else
17362 fail = TRUE;
17363 break;
17364
17365 case CALL_INSN:
17366 /* The AAPCS says that conditional calls should not be
17367 used since they make interworking inefficient (the
17368 linker can't transform BL<cond> into BLX). That's
17369 only a problem if the machine has BLX. */
17370 if (arm_arch5)
17371 {
17372 fail = TRUE;
17373 break;
17374 }
17375
17376 /* Succeed if the following insn is the target label, or
17377 if the following two insns are a barrier and the
17378 target label. */
17379 this_insn = next_nonnote_insn (this_insn);
17380 if (this_insn && GET_CODE (this_insn) == BARRIER)
17381 this_insn = next_nonnote_insn (this_insn);
17382
17383 if (this_insn && this_insn == label
17384 && insns_skipped < max_insns_skipped)
17385 {
17386 arm_ccfsm_state = 1;
17387 succeed = TRUE;
17388 }
17389 else
17390 fail = TRUE;
17391 break;
17392
17393 case JUMP_INSN:
17394 /* If this is an unconditional branch to the same label, succeed.
17395 If it is to another label, do nothing. If it is conditional,
17396 fail. */
17397 /* XXX Probably, the tests for SET and the PC are
17398 unnecessary. */
17399
17400 scanbody = PATTERN (this_insn);
17401 if (GET_CODE (scanbody) == SET
17402 && GET_CODE (SET_DEST (scanbody)) == PC)
17403 {
17404 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
17405 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
17406 {
17407 arm_ccfsm_state = 2;
17408 succeed = TRUE;
17409 }
17410 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
17411 fail = TRUE;
17412 }
17413 /* Fail if a conditional return is undesirable (e.g. on a
17414 StrongARM), but still allow this if optimizing for size. */
17415 else if (GET_CODE (scanbody) == RETURN
17416 && !use_return_insn (TRUE, NULL)
17417 && !optimize_size)
17418 fail = TRUE;
17419 else if (GET_CODE (scanbody) == RETURN
17420 && seeking_return)
17421 {
17422 arm_ccfsm_state = 2;
17423 succeed = TRUE;
17424 }
17425 else if (GET_CODE (scanbody) == PARALLEL)
17426 {
17427 switch (get_attr_conds (this_insn))
17428 {
17429 case CONDS_NOCOND:
17430 break;
17431 default:
17432 fail = TRUE;
17433 break;
17434 }
17435 }
17436 else
17437 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
17438
17439 break;
17440
17441 case INSN:
17442 /* Instructions using or affecting the condition codes make it
17443 fail. */
17444 scanbody = PATTERN (this_insn);
17445 if (!(GET_CODE (scanbody) == SET
17446 || GET_CODE (scanbody) == PARALLEL)
17447 || get_attr_conds (this_insn) != CONDS_NOCOND)
17448 fail = TRUE;
17449
17450 /* A conditional cirrus instruction must be followed by
17451 a non Cirrus instruction. However, since we
17452 conditionalize instructions in this function and by
17453 the time we get here we can't add instructions
17454 (nops), because shorten_branches() has already been
17455 called, we will disable conditionalizing Cirrus
17456 instructions to be safe. */
17457 if (GET_CODE (scanbody) != USE
17458 && GET_CODE (scanbody) != CLOBBER
17459 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
17460 fail = TRUE;
17461 break;
17462
17463 default:
17464 break;
17465 }
17466 }
17467 if (succeed)
17468 {
17469 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
17470 arm_target_label = CODE_LABEL_NUMBER (label);
17471 else
17472 {
17473 gcc_assert (seeking_return || arm_ccfsm_state == 2);
17474
17475 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
17476 {
17477 this_insn = next_nonnote_insn (this_insn);
17478 gcc_assert (!this_insn
17479 || (GET_CODE (this_insn) != BARRIER
17480 && GET_CODE (this_insn) != CODE_LABEL));
17481 }
17482 if (!this_insn)
17483 {
17484 /* Oh, dear! we ran off the end.. give up. */
17485 extract_constrain_insn_cached (insn);
17486 arm_ccfsm_state = 0;
17487 arm_target_insn = NULL;
17488 return;
17489 }
17490 arm_target_insn = this_insn;
17491 }
17492
17493 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17494 what it was. */
17495 if (!reverse)
17496 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
17497
17498 if (reverse || then_not_else)
17499 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
17500 }
17501
17502 /* Restore recog_data (getting the attributes of other insns can
17503 destroy this array, but final.c assumes that it remains intact
17504 across this call. */
17505 extract_constrain_insn_cached (insn);
17506 }
17507 }
17508
17509 /* Output IT instructions. */
17510 void
17511 thumb2_asm_output_opcode (FILE * stream)
17512 {
17513 char buff[5];
17514 int n;
17515
17516 if (arm_condexec_mask)
17517 {
17518 for (n = 0; n < arm_condexec_masklen; n++)
17519 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
17520 buff[n] = 0;
17521 asm_fprintf(stream, "i%s\t%s\n\t", buff,
17522 arm_condition_codes[arm_current_cc]);
17523 arm_condexec_mask = 0;
17524 }
17525 }
17526
17527 /* Returns true if REGNO is a valid register
17528 for holding a quantity of type MODE. */
17529 int
17530 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
17531 {
17532 if (GET_MODE_CLASS (mode) == MODE_CC)
17533 return (regno == CC_REGNUM
17534 || (TARGET_HARD_FLOAT && TARGET_VFP
17535 && regno == VFPCC_REGNUM));
17536
17537 if (TARGET_THUMB1)
17538 /* For the Thumb we only allow values bigger than SImode in
17539 registers 0 - 6, so that there is always a second low
17540 register available to hold the upper part of the value.
17541 We probably we ought to ensure that the register is the
17542 start of an even numbered register pair. */
17543 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
17544
17545 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
17546 && IS_CIRRUS_REGNUM (regno))
17547 /* We have outlawed SI values in Cirrus registers because they
17548 reside in the lower 32 bits, but SF values reside in the
17549 upper 32 bits. This causes gcc all sorts of grief. We can't
17550 even split the registers into pairs because Cirrus SI values
17551 get sign extended to 64bits-- aldyh. */
17552 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
17553
17554 if (TARGET_HARD_FLOAT && TARGET_VFP
17555 && IS_VFP_REGNUM (regno))
17556 {
17557 if (mode == SFmode || mode == SImode)
17558 return VFP_REGNO_OK_FOR_SINGLE (regno);
17559
17560 if (mode == DFmode)
17561 return VFP_REGNO_OK_FOR_DOUBLE (regno);
17562
17563 /* VFP registers can hold HFmode values, but there is no point in
17564 putting them there unless we have hardware conversion insns. */
17565 if (mode == HFmode)
17566 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
17567
17568 if (TARGET_NEON)
17569 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
17570 || (VALID_NEON_QREG_MODE (mode)
17571 && NEON_REGNO_OK_FOR_QUAD (regno))
17572 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
17573 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
17574 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
17575 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
17576 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
17577
17578 return FALSE;
17579 }
17580
17581 if (TARGET_REALLY_IWMMXT)
17582 {
17583 if (IS_IWMMXT_GR_REGNUM (regno))
17584 return mode == SImode;
17585
17586 if (IS_IWMMXT_REGNUM (regno))
17587 return VALID_IWMMXT_REG_MODE (mode);
17588 }
17589
17590 /* We allow almost any value to be stored in the general registers.
17591 Restrict doubleword quantities to even register pairs so that we can
17592 use ldrd. Do not allow very large Neon structure opaque modes in
17593 general registers; they would use too many. */
17594 if (regno <= LAST_ARM_REGNUM)
17595 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
17596 && ARM_NUM_REGS (mode) <= 4;
17597
17598 if (regno == FRAME_POINTER_REGNUM
17599 || regno == ARG_POINTER_REGNUM)
17600 /* We only allow integers in the fake hard registers. */
17601 return GET_MODE_CLASS (mode) == MODE_INT;
17602
17603 /* The only registers left are the FPA registers
17604 which we only allow to hold FP values. */
17605 return (TARGET_HARD_FLOAT && TARGET_FPA
17606 && GET_MODE_CLASS (mode) == MODE_FLOAT
17607 && regno >= FIRST_FPA_REGNUM
17608 && regno <= LAST_FPA_REGNUM);
17609 }
17610
17611 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17612 not used in arm mode. */
17613
17614 enum reg_class
17615 arm_regno_class (int regno)
17616 {
17617 if (TARGET_THUMB1)
17618 {
17619 if (regno == STACK_POINTER_REGNUM)
17620 return STACK_REG;
17621 if (regno == CC_REGNUM)
17622 return CC_REG;
17623 if (regno < 8)
17624 return LO_REGS;
17625 return HI_REGS;
17626 }
17627
17628 if (TARGET_THUMB2 && regno < 8)
17629 return LO_REGS;
17630
17631 if ( regno <= LAST_ARM_REGNUM
17632 || regno == FRAME_POINTER_REGNUM
17633 || regno == ARG_POINTER_REGNUM)
17634 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
17635
17636 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
17637 return TARGET_THUMB2 ? CC_REG : NO_REGS;
17638
17639 if (IS_CIRRUS_REGNUM (regno))
17640 return CIRRUS_REGS;
17641
17642 if (IS_VFP_REGNUM (regno))
17643 {
17644 if (regno <= D7_VFP_REGNUM)
17645 return VFP_D0_D7_REGS;
17646 else if (regno <= LAST_LO_VFP_REGNUM)
17647 return VFP_LO_REGS;
17648 else
17649 return VFP_HI_REGS;
17650 }
17651
17652 if (IS_IWMMXT_REGNUM (regno))
17653 return IWMMXT_REGS;
17654
17655 if (IS_IWMMXT_GR_REGNUM (regno))
17656 return IWMMXT_GR_REGS;
17657
17658 return FPA_REGS;
17659 }
17660
17661 /* Handle a special case when computing the offset
17662 of an argument from the frame pointer. */
17663 int
17664 arm_debugger_arg_offset (int value, rtx addr)
17665 {
17666 rtx insn;
17667
17668 /* We are only interested if dbxout_parms() failed to compute the offset. */
17669 if (value != 0)
17670 return 0;
17671
17672 /* We can only cope with the case where the address is held in a register. */
17673 if (GET_CODE (addr) != REG)
17674 return 0;
17675
17676 /* If we are using the frame pointer to point at the argument, then
17677 an offset of 0 is correct. */
17678 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
17679 return 0;
17680
17681 /* If we are using the stack pointer to point at the
17682 argument, then an offset of 0 is correct. */
17683 /* ??? Check this is consistent with thumb2 frame layout. */
17684 if ((TARGET_THUMB || !frame_pointer_needed)
17685 && REGNO (addr) == SP_REGNUM)
17686 return 0;
17687
17688 /* Oh dear. The argument is pointed to by a register rather
17689 than being held in a register, or being stored at a known
17690 offset from the frame pointer. Since GDB only understands
17691 those two kinds of argument we must translate the address
17692 held in the register into an offset from the frame pointer.
17693 We do this by searching through the insns for the function
17694 looking to see where this register gets its value. If the
17695 register is initialized from the frame pointer plus an offset
17696 then we are in luck and we can continue, otherwise we give up.
17697
17698 This code is exercised by producing debugging information
17699 for a function with arguments like this:
17700
17701 double func (double a, double b, int c, double d) {return d;}
17702
17703 Without this code the stab for parameter 'd' will be set to
17704 an offset of 0 from the frame pointer, rather than 8. */
17705
17706 /* The if() statement says:
17707
17708 If the insn is a normal instruction
17709 and if the insn is setting the value in a register
17710 and if the register being set is the register holding the address of the argument
17711 and if the address is computing by an addition
17712 that involves adding to a register
17713 which is the frame pointer
17714 a constant integer
17715
17716 then... */
17717
17718 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17719 {
17720 if ( GET_CODE (insn) == INSN
17721 && GET_CODE (PATTERN (insn)) == SET
17722 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
17723 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
17724 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
17725 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17726 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
17727 )
17728 {
17729 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
17730
17731 break;
17732 }
17733 }
17734
17735 if (value == 0)
17736 {
17737 debug_rtx (addr);
17738 warning (0, "unable to compute real location of stacked parameter");
17739 value = 8; /* XXX magic hack */
17740 }
17741
17742 return value;
17743 }
17744 \f
17745 typedef enum {
17746 T_V8QI,
17747 T_V4HI,
17748 T_V2SI,
17749 T_V2SF,
17750 T_DI,
17751 T_V16QI,
17752 T_V8HI,
17753 T_V4SI,
17754 T_V4SF,
17755 T_V2DI,
17756 T_TI,
17757 T_EI,
17758 T_OI,
17759 T_MAX /* Size of enum. Keep last. */
17760 } neon_builtin_type_mode;
17761
17762 #define TYPE_MODE_BIT(X) (1 << (X))
17763
17764 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
17765 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
17766 | TYPE_MODE_BIT (T_DI))
17767 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
17768 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
17769 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
17770
17771 #define v8qi_UP T_V8QI
17772 #define v4hi_UP T_V4HI
17773 #define v2si_UP T_V2SI
17774 #define v2sf_UP T_V2SF
17775 #define di_UP T_DI
17776 #define v16qi_UP T_V16QI
17777 #define v8hi_UP T_V8HI
17778 #define v4si_UP T_V4SI
17779 #define v4sf_UP T_V4SF
17780 #define v2di_UP T_V2DI
17781 #define ti_UP T_TI
17782 #define ei_UP T_EI
17783 #define oi_UP T_OI
17784
17785 #define UP(X) X##_UP
17786
17787 typedef enum {
17788 NEON_BINOP,
17789 NEON_TERNOP,
17790 NEON_UNOP,
17791 NEON_GETLANE,
17792 NEON_SETLANE,
17793 NEON_CREATE,
17794 NEON_DUP,
17795 NEON_DUPLANE,
17796 NEON_COMBINE,
17797 NEON_SPLIT,
17798 NEON_LANEMUL,
17799 NEON_LANEMULL,
17800 NEON_LANEMULH,
17801 NEON_LANEMAC,
17802 NEON_SCALARMUL,
17803 NEON_SCALARMULL,
17804 NEON_SCALARMULH,
17805 NEON_SCALARMAC,
17806 NEON_CONVERT,
17807 NEON_FIXCONV,
17808 NEON_SELECT,
17809 NEON_RESULTPAIR,
17810 NEON_REINTERP,
17811 NEON_VTBL,
17812 NEON_VTBX,
17813 NEON_LOAD1,
17814 NEON_LOAD1LANE,
17815 NEON_STORE1,
17816 NEON_STORE1LANE,
17817 NEON_LOADSTRUCT,
17818 NEON_LOADSTRUCTLANE,
17819 NEON_STORESTRUCT,
17820 NEON_STORESTRUCTLANE,
17821 NEON_LOGICBINOP,
17822 NEON_SHIFTINSERT,
17823 NEON_SHIFTIMM,
17824 NEON_SHIFTACC
17825 } neon_itype;
17826
17827 typedef struct {
17828 const char *name;
17829 const neon_itype itype;
17830 const neon_builtin_type_mode mode;
17831 const enum insn_code code;
17832 unsigned int fcode;
17833 } neon_builtin_datum;
17834
17835 #define CF(N,X) CODE_FOR_neon_##N##X
17836
17837 #define VAR1(T, N, A) \
17838 {#N, NEON_##T, UP (A), CF (N, A), 0}
17839 #define VAR2(T, N, A, B) \
17840 VAR1 (T, N, A), \
17841 {#N, NEON_##T, UP (B), CF (N, B), 0}
17842 #define VAR3(T, N, A, B, C) \
17843 VAR2 (T, N, A, B), \
17844 {#N, NEON_##T, UP (C), CF (N, C), 0}
17845 #define VAR4(T, N, A, B, C, D) \
17846 VAR3 (T, N, A, B, C), \
17847 {#N, NEON_##T, UP (D), CF (N, D), 0}
17848 #define VAR5(T, N, A, B, C, D, E) \
17849 VAR4 (T, N, A, B, C, D), \
17850 {#N, NEON_##T, UP (E), CF (N, E), 0}
17851 #define VAR6(T, N, A, B, C, D, E, F) \
17852 VAR5 (T, N, A, B, C, D, E), \
17853 {#N, NEON_##T, UP (F), CF (N, F), 0}
17854 #define VAR7(T, N, A, B, C, D, E, F, G) \
17855 VAR6 (T, N, A, B, C, D, E, F), \
17856 {#N, NEON_##T, UP (G), CF (N, G), 0}
17857 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
17858 VAR7 (T, N, A, B, C, D, E, F, G), \
17859 {#N, NEON_##T, UP (H), CF (N, H), 0}
17860 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
17861 VAR8 (T, N, A, B, C, D, E, F, G, H), \
17862 {#N, NEON_##T, UP (I), CF (N, I), 0}
17863 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
17864 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
17865 {#N, NEON_##T, UP (J), CF (N, J), 0}
17866
17867 /* The mode entries in the following table correspond to the "key" type of the
17868 instruction variant, i.e. equivalent to that which would be specified after
17869 the assembler mnemonic, which usually refers to the last vector operand.
17870 (Signed/unsigned/polynomial types are not differentiated between though, and
17871 are all mapped onto the same mode for a given element size.) The modes
17872 listed per instruction should be the same as those defined for that
17873 instruction's pattern in neon.md. */
17874
17875 static neon_builtin_datum neon_builtin_data[] =
17876 {
17877 VAR10 (BINOP, vadd,
17878 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17879 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
17880 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
17881 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17882 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17883 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
17884 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17885 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17886 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
17887 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17888 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
17889 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
17890 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
17891 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
17892 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
17893 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
17894 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
17895 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
17896 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
17897 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
17898 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
17899 VAR2 (BINOP, vqdmull, v4hi, v2si),
17900 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17901 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17902 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17903 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
17904 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
17905 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
17906 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17907 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17908 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17909 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
17910 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17911 VAR10 (BINOP, vsub,
17912 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17913 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
17914 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
17915 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17916 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17917 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
17918 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17919 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17920 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17921 VAR2 (BINOP, vcage, v2sf, v4sf),
17922 VAR2 (BINOP, vcagt, v2sf, v4sf),
17923 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17924 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17925 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
17926 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17927 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
17928 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17929 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17930 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
17931 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17932 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17933 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
17934 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
17935 VAR2 (BINOP, vrecps, v2sf, v4sf),
17936 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
17937 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17938 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17939 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17940 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17941 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17942 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17943 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17944 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17945 VAR2 (UNOP, vcnt, v8qi, v16qi),
17946 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
17947 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
17948 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17949 /* FIXME: vget_lane supports more variants than this! */
17950 VAR10 (GETLANE, vget_lane,
17951 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17952 VAR10 (SETLANE, vset_lane,
17953 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17954 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
17955 VAR10 (DUP, vdup_n,
17956 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17957 VAR10 (DUPLANE, vdup_lane,
17958 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17959 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
17960 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
17961 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
17962 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
17963 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
17964 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
17965 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
17966 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17967 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17968 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
17969 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
17970 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17971 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
17972 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
17973 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17974 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17975 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
17976 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
17977 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17978 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
17979 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
17980 VAR10 (BINOP, vext,
17981 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17982 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17983 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
17984 VAR2 (UNOP, vrev16, v8qi, v16qi),
17985 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
17986 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
17987 VAR10 (SELECT, vbsl,
17988 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17989 VAR1 (VTBL, vtbl1, v8qi),
17990 VAR1 (VTBL, vtbl2, v8qi),
17991 VAR1 (VTBL, vtbl3, v8qi),
17992 VAR1 (VTBL, vtbl4, v8qi),
17993 VAR1 (VTBX, vtbx1, v8qi),
17994 VAR1 (VTBX, vtbx2, v8qi),
17995 VAR1 (VTBX, vtbx3, v8qi),
17996 VAR1 (VTBX, vtbx4, v8qi),
17997 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17998 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17999 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18000 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
18001 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
18002 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
18003 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
18004 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
18005 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
18006 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
18007 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
18008 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
18009 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
18010 VAR10 (LOAD1, vld1,
18011 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18012 VAR10 (LOAD1LANE, vld1_lane,
18013 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18014 VAR10 (LOAD1, vld1_dup,
18015 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18016 VAR10 (STORE1, vst1,
18017 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18018 VAR10 (STORE1LANE, vst1_lane,
18019 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18020 VAR9 (LOADSTRUCT,
18021 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18022 VAR7 (LOADSTRUCTLANE, vld2_lane,
18023 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18024 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
18025 VAR9 (STORESTRUCT, vst2,
18026 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18027 VAR7 (STORESTRUCTLANE, vst2_lane,
18028 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18029 VAR9 (LOADSTRUCT,
18030 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18031 VAR7 (LOADSTRUCTLANE, vld3_lane,
18032 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18033 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
18034 VAR9 (STORESTRUCT, vst3,
18035 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18036 VAR7 (STORESTRUCTLANE, vst3_lane,
18037 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18038 VAR9 (LOADSTRUCT, vld4,
18039 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18040 VAR7 (LOADSTRUCTLANE, vld4_lane,
18041 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18042 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
18043 VAR9 (STORESTRUCT, vst4,
18044 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18045 VAR7 (STORESTRUCTLANE, vst4_lane,
18046 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18047 VAR10 (LOGICBINOP, vand,
18048 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18049 VAR10 (LOGICBINOP, vorr,
18050 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18051 VAR10 (BINOP, veor,
18052 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18053 VAR10 (LOGICBINOP, vbic,
18054 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18055 VAR10 (LOGICBINOP, vorn,
18056 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
18057 };
18058
18059 #undef CF
18060 #undef VAR1
18061 #undef VAR2
18062 #undef VAR3
18063 #undef VAR4
18064 #undef VAR5
18065 #undef VAR6
18066 #undef VAR7
18067 #undef VAR8
18068 #undef VAR9
18069 #undef VAR10
18070
18071 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
18072 symbolic names defined here (which would require too much duplication).
18073 FIXME? */
18074 enum arm_builtins
18075 {
18076 ARM_BUILTIN_GETWCX,
18077 ARM_BUILTIN_SETWCX,
18078
18079 ARM_BUILTIN_WZERO,
18080
18081 ARM_BUILTIN_WAVG2BR,
18082 ARM_BUILTIN_WAVG2HR,
18083 ARM_BUILTIN_WAVG2B,
18084 ARM_BUILTIN_WAVG2H,
18085
18086 ARM_BUILTIN_WACCB,
18087 ARM_BUILTIN_WACCH,
18088 ARM_BUILTIN_WACCW,
18089
18090 ARM_BUILTIN_WMACS,
18091 ARM_BUILTIN_WMACSZ,
18092 ARM_BUILTIN_WMACU,
18093 ARM_BUILTIN_WMACUZ,
18094
18095 ARM_BUILTIN_WSADB,
18096 ARM_BUILTIN_WSADBZ,
18097 ARM_BUILTIN_WSADH,
18098 ARM_BUILTIN_WSADHZ,
18099
18100 ARM_BUILTIN_WALIGN,
18101
18102 ARM_BUILTIN_TMIA,
18103 ARM_BUILTIN_TMIAPH,
18104 ARM_BUILTIN_TMIABB,
18105 ARM_BUILTIN_TMIABT,
18106 ARM_BUILTIN_TMIATB,
18107 ARM_BUILTIN_TMIATT,
18108
18109 ARM_BUILTIN_TMOVMSKB,
18110 ARM_BUILTIN_TMOVMSKH,
18111 ARM_BUILTIN_TMOVMSKW,
18112
18113 ARM_BUILTIN_TBCSTB,
18114 ARM_BUILTIN_TBCSTH,
18115 ARM_BUILTIN_TBCSTW,
18116
18117 ARM_BUILTIN_WMADDS,
18118 ARM_BUILTIN_WMADDU,
18119
18120 ARM_BUILTIN_WPACKHSS,
18121 ARM_BUILTIN_WPACKWSS,
18122 ARM_BUILTIN_WPACKDSS,
18123 ARM_BUILTIN_WPACKHUS,
18124 ARM_BUILTIN_WPACKWUS,
18125 ARM_BUILTIN_WPACKDUS,
18126
18127 ARM_BUILTIN_WADDB,
18128 ARM_BUILTIN_WADDH,
18129 ARM_BUILTIN_WADDW,
18130 ARM_BUILTIN_WADDSSB,
18131 ARM_BUILTIN_WADDSSH,
18132 ARM_BUILTIN_WADDSSW,
18133 ARM_BUILTIN_WADDUSB,
18134 ARM_BUILTIN_WADDUSH,
18135 ARM_BUILTIN_WADDUSW,
18136 ARM_BUILTIN_WSUBB,
18137 ARM_BUILTIN_WSUBH,
18138 ARM_BUILTIN_WSUBW,
18139 ARM_BUILTIN_WSUBSSB,
18140 ARM_BUILTIN_WSUBSSH,
18141 ARM_BUILTIN_WSUBSSW,
18142 ARM_BUILTIN_WSUBUSB,
18143 ARM_BUILTIN_WSUBUSH,
18144 ARM_BUILTIN_WSUBUSW,
18145
18146 ARM_BUILTIN_WAND,
18147 ARM_BUILTIN_WANDN,
18148 ARM_BUILTIN_WOR,
18149 ARM_BUILTIN_WXOR,
18150
18151 ARM_BUILTIN_WCMPEQB,
18152 ARM_BUILTIN_WCMPEQH,
18153 ARM_BUILTIN_WCMPEQW,
18154 ARM_BUILTIN_WCMPGTUB,
18155 ARM_BUILTIN_WCMPGTUH,
18156 ARM_BUILTIN_WCMPGTUW,
18157 ARM_BUILTIN_WCMPGTSB,
18158 ARM_BUILTIN_WCMPGTSH,
18159 ARM_BUILTIN_WCMPGTSW,
18160
18161 ARM_BUILTIN_TEXTRMSB,
18162 ARM_BUILTIN_TEXTRMSH,
18163 ARM_BUILTIN_TEXTRMSW,
18164 ARM_BUILTIN_TEXTRMUB,
18165 ARM_BUILTIN_TEXTRMUH,
18166 ARM_BUILTIN_TEXTRMUW,
18167 ARM_BUILTIN_TINSRB,
18168 ARM_BUILTIN_TINSRH,
18169 ARM_BUILTIN_TINSRW,
18170
18171 ARM_BUILTIN_WMAXSW,
18172 ARM_BUILTIN_WMAXSH,
18173 ARM_BUILTIN_WMAXSB,
18174 ARM_BUILTIN_WMAXUW,
18175 ARM_BUILTIN_WMAXUH,
18176 ARM_BUILTIN_WMAXUB,
18177 ARM_BUILTIN_WMINSW,
18178 ARM_BUILTIN_WMINSH,
18179 ARM_BUILTIN_WMINSB,
18180 ARM_BUILTIN_WMINUW,
18181 ARM_BUILTIN_WMINUH,
18182 ARM_BUILTIN_WMINUB,
18183
18184 ARM_BUILTIN_WMULUM,
18185 ARM_BUILTIN_WMULSM,
18186 ARM_BUILTIN_WMULUL,
18187
18188 ARM_BUILTIN_PSADBH,
18189 ARM_BUILTIN_WSHUFH,
18190
18191 ARM_BUILTIN_WSLLH,
18192 ARM_BUILTIN_WSLLW,
18193 ARM_BUILTIN_WSLLD,
18194 ARM_BUILTIN_WSRAH,
18195 ARM_BUILTIN_WSRAW,
18196 ARM_BUILTIN_WSRAD,
18197 ARM_BUILTIN_WSRLH,
18198 ARM_BUILTIN_WSRLW,
18199 ARM_BUILTIN_WSRLD,
18200 ARM_BUILTIN_WRORH,
18201 ARM_BUILTIN_WRORW,
18202 ARM_BUILTIN_WRORD,
18203 ARM_BUILTIN_WSLLHI,
18204 ARM_BUILTIN_WSLLWI,
18205 ARM_BUILTIN_WSLLDI,
18206 ARM_BUILTIN_WSRAHI,
18207 ARM_BUILTIN_WSRAWI,
18208 ARM_BUILTIN_WSRADI,
18209 ARM_BUILTIN_WSRLHI,
18210 ARM_BUILTIN_WSRLWI,
18211 ARM_BUILTIN_WSRLDI,
18212 ARM_BUILTIN_WRORHI,
18213 ARM_BUILTIN_WRORWI,
18214 ARM_BUILTIN_WRORDI,
18215
18216 ARM_BUILTIN_WUNPCKIHB,
18217 ARM_BUILTIN_WUNPCKIHH,
18218 ARM_BUILTIN_WUNPCKIHW,
18219 ARM_BUILTIN_WUNPCKILB,
18220 ARM_BUILTIN_WUNPCKILH,
18221 ARM_BUILTIN_WUNPCKILW,
18222
18223 ARM_BUILTIN_WUNPCKEHSB,
18224 ARM_BUILTIN_WUNPCKEHSH,
18225 ARM_BUILTIN_WUNPCKEHSW,
18226 ARM_BUILTIN_WUNPCKEHUB,
18227 ARM_BUILTIN_WUNPCKEHUH,
18228 ARM_BUILTIN_WUNPCKEHUW,
18229 ARM_BUILTIN_WUNPCKELSB,
18230 ARM_BUILTIN_WUNPCKELSH,
18231 ARM_BUILTIN_WUNPCKELSW,
18232 ARM_BUILTIN_WUNPCKELUB,
18233 ARM_BUILTIN_WUNPCKELUH,
18234 ARM_BUILTIN_WUNPCKELUW,
18235
18236 ARM_BUILTIN_THREAD_POINTER,
18237
18238 ARM_BUILTIN_NEON_BASE,
18239
18240 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
18241 };
18242
18243 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
18244
18245 static void
18246 arm_init_neon_builtins (void)
18247 {
18248 unsigned int i, fcode;
18249 tree decl;
18250
18251 tree neon_intQI_type_node;
18252 tree neon_intHI_type_node;
18253 tree neon_polyQI_type_node;
18254 tree neon_polyHI_type_node;
18255 tree neon_intSI_type_node;
18256 tree neon_intDI_type_node;
18257 tree neon_float_type_node;
18258
18259 tree intQI_pointer_node;
18260 tree intHI_pointer_node;
18261 tree intSI_pointer_node;
18262 tree intDI_pointer_node;
18263 tree float_pointer_node;
18264
18265 tree const_intQI_node;
18266 tree const_intHI_node;
18267 tree const_intSI_node;
18268 tree const_intDI_node;
18269 tree const_float_node;
18270
18271 tree const_intQI_pointer_node;
18272 tree const_intHI_pointer_node;
18273 tree const_intSI_pointer_node;
18274 tree const_intDI_pointer_node;
18275 tree const_float_pointer_node;
18276
18277 tree V8QI_type_node;
18278 tree V4HI_type_node;
18279 tree V2SI_type_node;
18280 tree V2SF_type_node;
18281 tree V16QI_type_node;
18282 tree V8HI_type_node;
18283 tree V4SI_type_node;
18284 tree V4SF_type_node;
18285 tree V2DI_type_node;
18286
18287 tree intUQI_type_node;
18288 tree intUHI_type_node;
18289 tree intUSI_type_node;
18290 tree intUDI_type_node;
18291
18292 tree intEI_type_node;
18293 tree intOI_type_node;
18294 tree intCI_type_node;
18295 tree intXI_type_node;
18296
18297 tree V8QI_pointer_node;
18298 tree V4HI_pointer_node;
18299 tree V2SI_pointer_node;
18300 tree V2SF_pointer_node;
18301 tree V16QI_pointer_node;
18302 tree V8HI_pointer_node;
18303 tree V4SI_pointer_node;
18304 tree V4SF_pointer_node;
18305 tree V2DI_pointer_node;
18306
18307 tree void_ftype_pv8qi_v8qi_v8qi;
18308 tree void_ftype_pv4hi_v4hi_v4hi;
18309 tree void_ftype_pv2si_v2si_v2si;
18310 tree void_ftype_pv2sf_v2sf_v2sf;
18311 tree void_ftype_pdi_di_di;
18312 tree void_ftype_pv16qi_v16qi_v16qi;
18313 tree void_ftype_pv8hi_v8hi_v8hi;
18314 tree void_ftype_pv4si_v4si_v4si;
18315 tree void_ftype_pv4sf_v4sf_v4sf;
18316 tree void_ftype_pv2di_v2di_v2di;
18317
18318 tree reinterp_ftype_dreg[5][5];
18319 tree reinterp_ftype_qreg[5][5];
18320 tree dreg_types[5], qreg_types[5];
18321
18322 /* Create distinguished type nodes for NEON vector element types,
18323 and pointers to values of such types, so we can detect them later. */
18324 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18325 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18326 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18327 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18328 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
18329 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
18330 neon_float_type_node = make_node (REAL_TYPE);
18331 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
18332 layout_type (neon_float_type_node);
18333
18334 /* Define typedefs which exactly correspond to the modes we are basing vector
18335 types on. If you change these names you'll need to change
18336 the table used by arm_mangle_type too. */
18337 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
18338 "__builtin_neon_qi");
18339 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
18340 "__builtin_neon_hi");
18341 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
18342 "__builtin_neon_si");
18343 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
18344 "__builtin_neon_sf");
18345 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
18346 "__builtin_neon_di");
18347 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
18348 "__builtin_neon_poly8");
18349 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
18350 "__builtin_neon_poly16");
18351
18352 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
18353 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
18354 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
18355 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
18356 float_pointer_node = build_pointer_type (neon_float_type_node);
18357
18358 /* Next create constant-qualified versions of the above types. */
18359 const_intQI_node = build_qualified_type (neon_intQI_type_node,
18360 TYPE_QUAL_CONST);
18361 const_intHI_node = build_qualified_type (neon_intHI_type_node,
18362 TYPE_QUAL_CONST);
18363 const_intSI_node = build_qualified_type (neon_intSI_type_node,
18364 TYPE_QUAL_CONST);
18365 const_intDI_node = build_qualified_type (neon_intDI_type_node,
18366 TYPE_QUAL_CONST);
18367 const_float_node = build_qualified_type (neon_float_type_node,
18368 TYPE_QUAL_CONST);
18369
18370 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
18371 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
18372 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
18373 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
18374 const_float_pointer_node = build_pointer_type (const_float_node);
18375
18376 /* Now create vector types based on our NEON element types. */
18377 /* 64-bit vectors. */
18378 V8QI_type_node =
18379 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
18380 V4HI_type_node =
18381 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
18382 V2SI_type_node =
18383 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
18384 V2SF_type_node =
18385 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
18386 /* 128-bit vectors. */
18387 V16QI_type_node =
18388 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
18389 V8HI_type_node =
18390 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
18391 V4SI_type_node =
18392 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
18393 V4SF_type_node =
18394 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
18395 V2DI_type_node =
18396 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
18397
18398 /* Unsigned integer types for various mode sizes. */
18399 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
18400 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
18401 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
18402 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
18403
18404 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
18405 "__builtin_neon_uqi");
18406 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
18407 "__builtin_neon_uhi");
18408 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
18409 "__builtin_neon_usi");
18410 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
18411 "__builtin_neon_udi");
18412
18413 /* Opaque integer types for structures of vectors. */
18414 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
18415 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
18416 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
18417 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
18418
18419 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
18420 "__builtin_neon_ti");
18421 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
18422 "__builtin_neon_ei");
18423 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
18424 "__builtin_neon_oi");
18425 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
18426 "__builtin_neon_ci");
18427 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
18428 "__builtin_neon_xi");
18429
18430 /* Pointers to vector types. */
18431 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
18432 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
18433 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
18434 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
18435 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
18436 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
18437 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
18438 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18439 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18440
18441 /* Operations which return results as pairs. */
18442 void_ftype_pv8qi_v8qi_v8qi =
18443 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18444 V8QI_type_node, NULL);
18445 void_ftype_pv4hi_v4hi_v4hi =
18446 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18447 V4HI_type_node, NULL);
18448 void_ftype_pv2si_v2si_v2si =
18449 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18450 V2SI_type_node, NULL);
18451 void_ftype_pv2sf_v2sf_v2sf =
18452 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18453 V2SF_type_node, NULL);
18454 void_ftype_pdi_di_di =
18455 build_function_type_list (void_type_node, intDI_pointer_node,
18456 neon_intDI_type_node, neon_intDI_type_node, NULL);
18457 void_ftype_pv16qi_v16qi_v16qi =
18458 build_function_type_list (void_type_node, V16QI_pointer_node,
18459 V16QI_type_node, V16QI_type_node, NULL);
18460 void_ftype_pv8hi_v8hi_v8hi =
18461 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18462 V8HI_type_node, NULL);
18463 void_ftype_pv4si_v4si_v4si =
18464 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18465 V4SI_type_node, NULL);
18466 void_ftype_pv4sf_v4sf_v4sf =
18467 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18468 V4SF_type_node, NULL);
18469 void_ftype_pv2di_v2di_v2di =
18470 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18471 V2DI_type_node, NULL);
18472
18473 dreg_types[0] = V8QI_type_node;
18474 dreg_types[1] = V4HI_type_node;
18475 dreg_types[2] = V2SI_type_node;
18476 dreg_types[3] = V2SF_type_node;
18477 dreg_types[4] = neon_intDI_type_node;
18478
18479 qreg_types[0] = V16QI_type_node;
18480 qreg_types[1] = V8HI_type_node;
18481 qreg_types[2] = V4SI_type_node;
18482 qreg_types[3] = V4SF_type_node;
18483 qreg_types[4] = V2DI_type_node;
18484
18485 for (i = 0; i < 5; i++)
18486 {
18487 int j;
18488 for (j = 0; j < 5; j++)
18489 {
18490 reinterp_ftype_dreg[i][j]
18491 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18492 reinterp_ftype_qreg[i][j]
18493 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18494 }
18495 }
18496
18497 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
18498 i < ARRAY_SIZE (neon_builtin_data);
18499 i++, fcode++)
18500 {
18501 neon_builtin_datum *d = &neon_builtin_data[i];
18502
18503 const char* const modenames[] = {
18504 "v8qi", "v4hi", "v2si", "v2sf", "di",
18505 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
18506 "ti", "ei", "oi"
18507 };
18508 char namebuf[60];
18509 tree ftype = NULL;
18510 int is_load = 0, is_store = 0;
18511
18512 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
18513
18514 d->fcode = fcode;
18515
18516 switch (d->itype)
18517 {
18518 case NEON_LOAD1:
18519 case NEON_LOAD1LANE:
18520 case NEON_LOADSTRUCT:
18521 case NEON_LOADSTRUCTLANE:
18522 is_load = 1;
18523 /* Fall through. */
18524 case NEON_STORE1:
18525 case NEON_STORE1LANE:
18526 case NEON_STORESTRUCT:
18527 case NEON_STORESTRUCTLANE:
18528 if (!is_load)
18529 is_store = 1;
18530 /* Fall through. */
18531 case NEON_UNOP:
18532 case NEON_BINOP:
18533 case NEON_LOGICBINOP:
18534 case NEON_SHIFTINSERT:
18535 case NEON_TERNOP:
18536 case NEON_GETLANE:
18537 case NEON_SETLANE:
18538 case NEON_CREATE:
18539 case NEON_DUP:
18540 case NEON_DUPLANE:
18541 case NEON_SHIFTIMM:
18542 case NEON_SHIFTACC:
18543 case NEON_COMBINE:
18544 case NEON_SPLIT:
18545 case NEON_CONVERT:
18546 case NEON_FIXCONV:
18547 case NEON_LANEMUL:
18548 case NEON_LANEMULL:
18549 case NEON_LANEMULH:
18550 case NEON_LANEMAC:
18551 case NEON_SCALARMUL:
18552 case NEON_SCALARMULL:
18553 case NEON_SCALARMULH:
18554 case NEON_SCALARMAC:
18555 case NEON_SELECT:
18556 case NEON_VTBL:
18557 case NEON_VTBX:
18558 {
18559 int k;
18560 tree return_type = void_type_node, args = void_list_node;
18561
18562 /* Build a function type directly from the insn_data for
18563 this builtin. The build_function_type() function takes
18564 care of removing duplicates for us. */
18565 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
18566 {
18567 tree eltype;
18568
18569 if (is_load && k == 1)
18570 {
18571 /* Neon load patterns always have the memory
18572 operand in the operand 1 position. */
18573 gcc_assert (insn_data[d->code].operand[k].predicate
18574 == neon_struct_operand);
18575
18576 switch (d->mode)
18577 {
18578 case T_V8QI:
18579 case T_V16QI:
18580 eltype = const_intQI_pointer_node;
18581 break;
18582
18583 case T_V4HI:
18584 case T_V8HI:
18585 eltype = const_intHI_pointer_node;
18586 break;
18587
18588 case T_V2SI:
18589 case T_V4SI:
18590 eltype = const_intSI_pointer_node;
18591 break;
18592
18593 case T_V2SF:
18594 case T_V4SF:
18595 eltype = const_float_pointer_node;
18596 break;
18597
18598 case T_DI:
18599 case T_V2DI:
18600 eltype = const_intDI_pointer_node;
18601 break;
18602
18603 default: gcc_unreachable ();
18604 }
18605 }
18606 else if (is_store && k == 0)
18607 {
18608 /* Similarly, Neon store patterns use operand 0 as
18609 the memory location to store to. */
18610 gcc_assert (insn_data[d->code].operand[k].predicate
18611 == neon_struct_operand);
18612
18613 switch (d->mode)
18614 {
18615 case T_V8QI:
18616 case T_V16QI:
18617 eltype = intQI_pointer_node;
18618 break;
18619
18620 case T_V4HI:
18621 case T_V8HI:
18622 eltype = intHI_pointer_node;
18623 break;
18624
18625 case T_V2SI:
18626 case T_V4SI:
18627 eltype = intSI_pointer_node;
18628 break;
18629
18630 case T_V2SF:
18631 case T_V4SF:
18632 eltype = float_pointer_node;
18633 break;
18634
18635 case T_DI:
18636 case T_V2DI:
18637 eltype = intDI_pointer_node;
18638 break;
18639
18640 default: gcc_unreachable ();
18641 }
18642 }
18643 else
18644 {
18645 switch (insn_data[d->code].operand[k].mode)
18646 {
18647 case VOIDmode: eltype = void_type_node; break;
18648 /* Scalars. */
18649 case QImode: eltype = neon_intQI_type_node; break;
18650 case HImode: eltype = neon_intHI_type_node; break;
18651 case SImode: eltype = neon_intSI_type_node; break;
18652 case SFmode: eltype = neon_float_type_node; break;
18653 case DImode: eltype = neon_intDI_type_node; break;
18654 case TImode: eltype = intTI_type_node; break;
18655 case EImode: eltype = intEI_type_node; break;
18656 case OImode: eltype = intOI_type_node; break;
18657 case CImode: eltype = intCI_type_node; break;
18658 case XImode: eltype = intXI_type_node; break;
18659 /* 64-bit vectors. */
18660 case V8QImode: eltype = V8QI_type_node; break;
18661 case V4HImode: eltype = V4HI_type_node; break;
18662 case V2SImode: eltype = V2SI_type_node; break;
18663 case V2SFmode: eltype = V2SF_type_node; break;
18664 /* 128-bit vectors. */
18665 case V16QImode: eltype = V16QI_type_node; break;
18666 case V8HImode: eltype = V8HI_type_node; break;
18667 case V4SImode: eltype = V4SI_type_node; break;
18668 case V4SFmode: eltype = V4SF_type_node; break;
18669 case V2DImode: eltype = V2DI_type_node; break;
18670 default: gcc_unreachable ();
18671 }
18672 }
18673
18674 if (k == 0 && !is_store)
18675 return_type = eltype;
18676 else
18677 args = tree_cons (NULL_TREE, eltype, args);
18678 }
18679
18680 ftype = build_function_type (return_type, args);
18681 }
18682 break;
18683
18684 case NEON_RESULTPAIR:
18685 {
18686 switch (insn_data[d->code].operand[1].mode)
18687 {
18688 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
18689 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
18690 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
18691 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
18692 case DImode: ftype = void_ftype_pdi_di_di; break;
18693 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
18694 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
18695 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
18696 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
18697 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
18698 default: gcc_unreachable ();
18699 }
18700 }
18701 break;
18702
18703 case NEON_REINTERP:
18704 {
18705 /* We iterate over 5 doubleword types, then 5 quadword
18706 types. */
18707 int rhs = d->mode % 5;
18708 switch (insn_data[d->code].operand[0].mode)
18709 {
18710 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
18711 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
18712 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
18713 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
18714 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
18715 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
18716 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
18717 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
18718 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
18719 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
18720 default: gcc_unreachable ();
18721 }
18722 }
18723 break;
18724
18725 default:
18726 gcc_unreachable ();
18727 }
18728
18729 gcc_assert (ftype != NULL);
18730
18731 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
18732
18733 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
18734 NULL_TREE);
18735 arm_builtin_decls[fcode] = decl;
18736 }
18737 }
18738
18739 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
18740 do \
18741 { \
18742 if ((MASK) & insn_flags) \
18743 { \
18744 tree bdecl; \
18745 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
18746 BUILT_IN_MD, NULL, NULL_TREE); \
18747 arm_builtin_decls[CODE] = bdecl; \
18748 } \
18749 } \
18750 while (0)
18751
18752 struct builtin_description
18753 {
18754 const unsigned int mask;
18755 const enum insn_code icode;
18756 const char * const name;
18757 const enum arm_builtins code;
18758 const enum rtx_code comparison;
18759 const unsigned int flag;
18760 };
18761
18762 static const struct builtin_description bdesc_2arg[] =
18763 {
18764 #define IWMMXT_BUILTIN(code, string, builtin) \
18765 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
18766 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
18767
18768 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
18769 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
18770 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
18771 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
18772 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
18773 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
18774 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
18775 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
18776 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
18777 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
18778 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
18779 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
18780 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
18781 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
18782 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
18783 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
18784 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
18785 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
18786 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
18787 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
18788 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
18789 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
18790 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
18791 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
18792 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
18793 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
18794 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
18795 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
18796 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
18797 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
18798 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
18799 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
18800 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
18801 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
18802 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
18803 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
18804 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
18805 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
18806 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
18807 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
18808 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
18809 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
18810 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
18811 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
18812 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
18813 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
18814 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
18815 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
18816 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
18817 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
18818 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
18819 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
18820 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
18821 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
18822 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
18823 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
18824 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
18825 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
18826
18827 #define IWMMXT_BUILTIN2(code, builtin) \
18828 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
18829
18830 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
18831 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
18832 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
18833 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
18834 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
18835 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
18836 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
18837 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
18838 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
18839 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
18840 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
18841 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
18842 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
18843 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
18844 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
18845 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
18846 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
18847 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
18848 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
18849 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
18850 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
18851 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
18852 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
18853 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
18854 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
18855 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
18856 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
18857 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
18858 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
18859 IWMMXT_BUILTIN2 (rordi3, WRORDI)
18860 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
18861 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
18862 };
18863
18864 static const struct builtin_description bdesc_1arg[] =
18865 {
18866 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
18867 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
18868 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
18869 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
18870 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
18871 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
18872 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
18873 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
18874 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
18875 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
18876 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
18877 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
18878 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
18879 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
18880 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
18881 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
18882 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
18883 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
18884 };
18885
18886 /* Set up all the iWMMXt builtins. This is not called if
18887 TARGET_IWMMXT is zero. */
18888
18889 static void
18890 arm_init_iwmmxt_builtins (void)
18891 {
18892 const struct builtin_description * d;
18893 size_t i;
18894
18895 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
18896 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
18897 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
18898
18899 tree int_ftype_int
18900 = build_function_type_list (integer_type_node,
18901 integer_type_node, NULL_TREE);
18902 tree v8qi_ftype_v8qi_v8qi_int
18903 = build_function_type_list (V8QI_type_node,
18904 V8QI_type_node, V8QI_type_node,
18905 integer_type_node, NULL_TREE);
18906 tree v4hi_ftype_v4hi_int
18907 = build_function_type_list (V4HI_type_node,
18908 V4HI_type_node, integer_type_node, NULL_TREE);
18909 tree v2si_ftype_v2si_int
18910 = build_function_type_list (V2SI_type_node,
18911 V2SI_type_node, integer_type_node, NULL_TREE);
18912 tree v2si_ftype_di_di
18913 = build_function_type_list (V2SI_type_node,
18914 long_long_integer_type_node,
18915 long_long_integer_type_node,
18916 NULL_TREE);
18917 tree di_ftype_di_int
18918 = build_function_type_list (long_long_integer_type_node,
18919 long_long_integer_type_node,
18920 integer_type_node, NULL_TREE);
18921 tree di_ftype_di_int_int
18922 = build_function_type_list (long_long_integer_type_node,
18923 long_long_integer_type_node,
18924 integer_type_node,
18925 integer_type_node, NULL_TREE);
18926 tree int_ftype_v8qi
18927 = build_function_type_list (integer_type_node,
18928 V8QI_type_node, NULL_TREE);
18929 tree int_ftype_v4hi
18930 = build_function_type_list (integer_type_node,
18931 V4HI_type_node, NULL_TREE);
18932 tree int_ftype_v2si
18933 = build_function_type_list (integer_type_node,
18934 V2SI_type_node, NULL_TREE);
18935 tree int_ftype_v8qi_int
18936 = build_function_type_list (integer_type_node,
18937 V8QI_type_node, integer_type_node, NULL_TREE);
18938 tree int_ftype_v4hi_int
18939 = build_function_type_list (integer_type_node,
18940 V4HI_type_node, integer_type_node, NULL_TREE);
18941 tree int_ftype_v2si_int
18942 = build_function_type_list (integer_type_node,
18943 V2SI_type_node, integer_type_node, NULL_TREE);
18944 tree v8qi_ftype_v8qi_int_int
18945 = build_function_type_list (V8QI_type_node,
18946 V8QI_type_node, integer_type_node,
18947 integer_type_node, NULL_TREE);
18948 tree v4hi_ftype_v4hi_int_int
18949 = build_function_type_list (V4HI_type_node,
18950 V4HI_type_node, integer_type_node,
18951 integer_type_node, NULL_TREE);
18952 tree v2si_ftype_v2si_int_int
18953 = build_function_type_list (V2SI_type_node,
18954 V2SI_type_node, integer_type_node,
18955 integer_type_node, NULL_TREE);
18956 /* Miscellaneous. */
18957 tree v8qi_ftype_v4hi_v4hi
18958 = build_function_type_list (V8QI_type_node,
18959 V4HI_type_node, V4HI_type_node, NULL_TREE);
18960 tree v4hi_ftype_v2si_v2si
18961 = build_function_type_list (V4HI_type_node,
18962 V2SI_type_node, V2SI_type_node, NULL_TREE);
18963 tree v2si_ftype_v4hi_v4hi
18964 = build_function_type_list (V2SI_type_node,
18965 V4HI_type_node, V4HI_type_node, NULL_TREE);
18966 tree v2si_ftype_v8qi_v8qi
18967 = build_function_type_list (V2SI_type_node,
18968 V8QI_type_node, V8QI_type_node, NULL_TREE);
18969 tree v4hi_ftype_v4hi_di
18970 = build_function_type_list (V4HI_type_node,
18971 V4HI_type_node, long_long_integer_type_node,
18972 NULL_TREE);
18973 tree v2si_ftype_v2si_di
18974 = build_function_type_list (V2SI_type_node,
18975 V2SI_type_node, long_long_integer_type_node,
18976 NULL_TREE);
18977 tree void_ftype_int_int
18978 = build_function_type_list (void_type_node,
18979 integer_type_node, integer_type_node,
18980 NULL_TREE);
18981 tree di_ftype_void
18982 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
18983 tree di_ftype_v8qi
18984 = build_function_type_list (long_long_integer_type_node,
18985 V8QI_type_node, NULL_TREE);
18986 tree di_ftype_v4hi
18987 = build_function_type_list (long_long_integer_type_node,
18988 V4HI_type_node, NULL_TREE);
18989 tree di_ftype_v2si
18990 = build_function_type_list (long_long_integer_type_node,
18991 V2SI_type_node, NULL_TREE);
18992 tree v2si_ftype_v4hi
18993 = build_function_type_list (V2SI_type_node,
18994 V4HI_type_node, NULL_TREE);
18995 tree v4hi_ftype_v8qi
18996 = build_function_type_list (V4HI_type_node,
18997 V8QI_type_node, NULL_TREE);
18998
18999 tree di_ftype_di_v4hi_v4hi
19000 = build_function_type_list (long_long_unsigned_type_node,
19001 long_long_unsigned_type_node,
19002 V4HI_type_node, V4HI_type_node,
19003 NULL_TREE);
19004
19005 tree di_ftype_v4hi_v4hi
19006 = build_function_type_list (long_long_unsigned_type_node,
19007 V4HI_type_node,V4HI_type_node,
19008 NULL_TREE);
19009
19010 /* Normal vector binops. */
19011 tree v8qi_ftype_v8qi_v8qi
19012 = build_function_type_list (V8QI_type_node,
19013 V8QI_type_node, V8QI_type_node, NULL_TREE);
19014 tree v4hi_ftype_v4hi_v4hi
19015 = build_function_type_list (V4HI_type_node,
19016 V4HI_type_node,V4HI_type_node, NULL_TREE);
19017 tree v2si_ftype_v2si_v2si
19018 = build_function_type_list (V2SI_type_node,
19019 V2SI_type_node, V2SI_type_node, NULL_TREE);
19020 tree di_ftype_di_di
19021 = build_function_type_list (long_long_unsigned_type_node,
19022 long_long_unsigned_type_node,
19023 long_long_unsigned_type_node,
19024 NULL_TREE);
19025
19026 /* Add all builtins that are more or less simple operations on two
19027 operands. */
19028 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19029 {
19030 /* Use one of the operands; the target can have a different mode for
19031 mask-generating compares. */
19032 enum machine_mode mode;
19033 tree type;
19034
19035 if (d->name == 0)
19036 continue;
19037
19038 mode = insn_data[d->icode].operand[1].mode;
19039
19040 switch (mode)
19041 {
19042 case V8QImode:
19043 type = v8qi_ftype_v8qi_v8qi;
19044 break;
19045 case V4HImode:
19046 type = v4hi_ftype_v4hi_v4hi;
19047 break;
19048 case V2SImode:
19049 type = v2si_ftype_v2si_v2si;
19050 break;
19051 case DImode:
19052 type = di_ftype_di_di;
19053 break;
19054
19055 default:
19056 gcc_unreachable ();
19057 }
19058
19059 def_mbuiltin (d->mask, d->name, type, d->code);
19060 }
19061
19062 /* Add the remaining MMX insns with somewhat more complicated types. */
19063 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
19064 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
19065 ARM_BUILTIN_ ## CODE)
19066
19067 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
19068 iwmmx_mbuiltin ("setwcx", void_ftype_int_int, SETWCX);
19069 iwmmx_mbuiltin ("getwcx", int_ftype_int, GETWCX);
19070
19071 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
19072 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
19073 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
19074 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
19075 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
19076 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
19077
19078 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
19079 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
19080 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
19081 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
19082 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
19083 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
19084
19085 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
19086 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
19087 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
19088 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
19089 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
19090 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
19091
19092 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
19093 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
19094 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
19095 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
19096 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
19097 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
19098
19099 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
19100
19101 iwmmx_mbuiltin ("wsadb", v2si_ftype_v8qi_v8qi, WSADB);
19102 iwmmx_mbuiltin ("wsadh", v2si_ftype_v4hi_v4hi, WSADH);
19103 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
19104 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
19105
19106 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
19107 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
19108 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
19109 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
19110 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
19111 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
19112 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
19113 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
19114 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
19115
19116 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
19117 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
19118 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
19119
19120 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
19121 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
19122 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
19123
19124 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
19125 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
19126 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
19127 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
19128 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
19129 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
19130
19131 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
19132 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
19133 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
19134 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
19135 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
19136 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
19137 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
19138 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
19139 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
19140 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
19141 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
19142 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
19143
19144 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
19145 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
19146 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
19147 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
19148
19149 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGN);
19150 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
19151 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
19152 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
19153 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
19154 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
19155 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
19156
19157 #undef iwmmx_mbuiltin
19158 }
19159
19160 static void
19161 arm_init_tls_builtins (void)
19162 {
19163 tree ftype, decl;
19164
19165 ftype = build_function_type (ptr_type_node, void_list_node);
19166 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
19167 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
19168 NULL, NULL_TREE);
19169 TREE_NOTHROW (decl) = 1;
19170 TREE_READONLY (decl) = 1;
19171 arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
19172 }
19173
19174 static void
19175 arm_init_fp16_builtins (void)
19176 {
19177 tree fp16_type = make_node (REAL_TYPE);
19178 TYPE_PRECISION (fp16_type) = 16;
19179 layout_type (fp16_type);
19180 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
19181 }
19182
19183 static void
19184 arm_init_builtins (void)
19185 {
19186 arm_init_tls_builtins ();
19187
19188 if (TARGET_REALLY_IWMMXT)
19189 arm_init_iwmmxt_builtins ();
19190
19191 if (TARGET_NEON)
19192 arm_init_neon_builtins ();
19193
19194 if (arm_fp16_format)
19195 arm_init_fp16_builtins ();
19196 }
19197
19198 /* Return the ARM builtin for CODE. */
19199
19200 static tree
19201 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
19202 {
19203 if (code >= ARM_BUILTIN_MAX)
19204 return error_mark_node;
19205
19206 return arm_builtin_decls[code];
19207 }
19208
19209 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19210
19211 static const char *
19212 arm_invalid_parameter_type (const_tree t)
19213 {
19214 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19215 return N_("function parameters cannot have __fp16 type");
19216 return NULL;
19217 }
19218
19219 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19220
19221 static const char *
19222 arm_invalid_return_type (const_tree t)
19223 {
19224 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19225 return N_("functions cannot return __fp16 type");
19226 return NULL;
19227 }
19228
19229 /* Implement TARGET_PROMOTED_TYPE. */
19230
19231 static tree
19232 arm_promoted_type (const_tree t)
19233 {
19234 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19235 return float_type_node;
19236 return NULL_TREE;
19237 }
19238
19239 /* Implement TARGET_CONVERT_TO_TYPE.
19240 Specifically, this hook implements the peculiarity of the ARM
19241 half-precision floating-point C semantics that requires conversions between
19242 __fp16 to or from double to do an intermediate conversion to float. */
19243
19244 static tree
19245 arm_convert_to_type (tree type, tree expr)
19246 {
19247 tree fromtype = TREE_TYPE (expr);
19248 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
19249 return NULL_TREE;
19250 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
19251 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
19252 return convert (type, convert (float_type_node, expr));
19253 return NULL_TREE;
19254 }
19255
19256 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
19257 This simply adds HFmode as a supported mode; even though we don't
19258 implement arithmetic on this type directly, it's supported by
19259 optabs conversions, much the way the double-word arithmetic is
19260 special-cased in the default hook. */
19261
19262 static bool
19263 arm_scalar_mode_supported_p (enum machine_mode mode)
19264 {
19265 if (mode == HFmode)
19266 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
19267 else
19268 return default_scalar_mode_supported_p (mode);
19269 }
19270
19271 /* Errors in the source file can cause expand_expr to return const0_rtx
19272 where we expect a vector. To avoid crashing, use one of the vector
19273 clear instructions. */
19274
19275 static rtx
19276 safe_vector_operand (rtx x, enum machine_mode mode)
19277 {
19278 if (x != const0_rtx)
19279 return x;
19280 x = gen_reg_rtx (mode);
19281
19282 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
19283 : gen_rtx_SUBREG (DImode, x, 0)));
19284 return x;
19285 }
19286
19287 /* Subroutine of arm_expand_builtin to take care of binop insns. */
19288
19289 static rtx
19290 arm_expand_binop_builtin (enum insn_code icode,
19291 tree exp, rtx target)
19292 {
19293 rtx pat;
19294 tree arg0 = CALL_EXPR_ARG (exp, 0);
19295 tree arg1 = CALL_EXPR_ARG (exp, 1);
19296 rtx op0 = expand_normal (arg0);
19297 rtx op1 = expand_normal (arg1);
19298 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19299 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19300 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19301
19302 if (VECTOR_MODE_P (mode0))
19303 op0 = safe_vector_operand (op0, mode0);
19304 if (VECTOR_MODE_P (mode1))
19305 op1 = safe_vector_operand (op1, mode1);
19306
19307 if (! target
19308 || GET_MODE (target) != tmode
19309 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19310 target = gen_reg_rtx (tmode);
19311
19312 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
19313
19314 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19315 op0 = copy_to_mode_reg (mode0, op0);
19316 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19317 op1 = copy_to_mode_reg (mode1, op1);
19318
19319 pat = GEN_FCN (icode) (target, op0, op1);
19320 if (! pat)
19321 return 0;
19322 emit_insn (pat);
19323 return target;
19324 }
19325
19326 /* Subroutine of arm_expand_builtin to take care of unop insns. */
19327
19328 static rtx
19329 arm_expand_unop_builtin (enum insn_code icode,
19330 tree exp, rtx target, int do_load)
19331 {
19332 rtx pat;
19333 tree arg0 = CALL_EXPR_ARG (exp, 0);
19334 rtx op0 = expand_normal (arg0);
19335 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19336 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19337
19338 if (! target
19339 || GET_MODE (target) != tmode
19340 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19341 target = gen_reg_rtx (tmode);
19342 if (do_load)
19343 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19344 else
19345 {
19346 if (VECTOR_MODE_P (mode0))
19347 op0 = safe_vector_operand (op0, mode0);
19348
19349 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19350 op0 = copy_to_mode_reg (mode0, op0);
19351 }
19352
19353 pat = GEN_FCN (icode) (target, op0);
19354 if (! pat)
19355 return 0;
19356 emit_insn (pat);
19357 return target;
19358 }
19359
19360 typedef enum {
19361 NEON_ARG_COPY_TO_REG,
19362 NEON_ARG_CONSTANT,
19363 NEON_ARG_MEMORY,
19364 NEON_ARG_STOP
19365 } builtin_arg;
19366
19367 #define NEON_MAX_BUILTIN_ARGS 5
19368
19369 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
19370 and return an expression for the accessed memory.
19371
19372 The intrinsic function operates on a block of registers that has
19373 mode REG_MODE. This block contains vectors of type TYPE_MODE.
19374 The function references the memory at EXP in mode MEM_MODE;
19375 this mode may be BLKmode if no more suitable mode is available. */
19376
19377 static tree
19378 neon_dereference_pointer (tree exp, enum machine_mode mem_mode,
19379 enum machine_mode reg_mode,
19380 neon_builtin_type_mode type_mode)
19381 {
19382 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
19383 tree elem_type, upper_bound, array_type;
19384
19385 /* Work out the size of the register block in bytes. */
19386 reg_size = GET_MODE_SIZE (reg_mode);
19387
19388 /* Work out the size of each vector in bytes. */
19389 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
19390 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
19391
19392 /* Work out how many vectors there are. */
19393 gcc_assert (reg_size % vector_size == 0);
19394 nvectors = reg_size / vector_size;
19395
19396 /* Work out how many elements are being loaded or stored.
19397 MEM_MODE == REG_MODE implies a one-to-one mapping between register
19398 and memory elements; anything else implies a lane load or store. */
19399 if (mem_mode == reg_mode)
19400 nelems = vector_size * nvectors;
19401 else
19402 nelems = nvectors;
19403
19404 /* Work out the type of each element. */
19405 gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp)));
19406 elem_type = TREE_TYPE (TREE_TYPE (exp));
19407
19408 /* Create a type that describes the full access. */
19409 upper_bound = build_int_cst (size_type_node, nelems - 1);
19410 array_type = build_array_type (elem_type, build_index_type (upper_bound));
19411
19412 /* Dereference EXP using that type. */
19413 exp = convert (build_pointer_type (array_type), exp);
19414 return fold_build2 (MEM_REF, array_type, exp,
19415 build_int_cst (TREE_TYPE (exp), 0));
19416 }
19417
19418 /* Expand a Neon builtin. */
19419 static rtx
19420 arm_expand_neon_args (rtx target, int icode, int have_retval,
19421 neon_builtin_type_mode type_mode,
19422 tree exp, ...)
19423 {
19424 va_list ap;
19425 rtx pat;
19426 tree arg[NEON_MAX_BUILTIN_ARGS];
19427 rtx op[NEON_MAX_BUILTIN_ARGS];
19428 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19429 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
19430 enum machine_mode other_mode;
19431 int argc = 0;
19432 int opno;
19433
19434 if (have_retval
19435 && (!target
19436 || GET_MODE (target) != tmode
19437 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
19438 target = gen_reg_rtx (tmode);
19439
19440 va_start (ap, exp);
19441
19442 for (;;)
19443 {
19444 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
19445
19446 if (thisarg == NEON_ARG_STOP)
19447 break;
19448 else
19449 {
19450 opno = argc + have_retval;
19451 mode[argc] = insn_data[icode].operand[opno].mode;
19452 arg[argc] = CALL_EXPR_ARG (exp, argc);
19453 if (thisarg == NEON_ARG_MEMORY)
19454 {
19455 other_mode = insn_data[icode].operand[1 - opno].mode;
19456 arg[argc] = neon_dereference_pointer (arg[argc], mode[argc],
19457 other_mode, type_mode);
19458 }
19459 op[argc] = expand_normal (arg[argc]);
19460
19461 switch (thisarg)
19462 {
19463 case NEON_ARG_COPY_TO_REG:
19464 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
19465 if (!(*insn_data[icode].operand[opno].predicate)
19466 (op[argc], mode[argc]))
19467 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
19468 break;
19469
19470 case NEON_ARG_CONSTANT:
19471 /* FIXME: This error message is somewhat unhelpful. */
19472 if (!(*insn_data[icode].operand[opno].predicate)
19473 (op[argc], mode[argc]))
19474 error ("argument must be a constant");
19475 break;
19476
19477 case NEON_ARG_MEMORY:
19478 gcc_assert (MEM_P (op[argc]));
19479 PUT_MODE (op[argc], mode[argc]);
19480 /* ??? arm_neon.h uses the same built-in functions for signed
19481 and unsigned accesses, casting where necessary. This isn't
19482 alias safe. */
19483 set_mem_alias_set (op[argc], 0);
19484 if (!(*insn_data[icode].operand[opno].predicate)
19485 (op[argc], mode[argc]))
19486 op[argc] = (replace_equiv_address
19487 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
19488 break;
19489
19490 case NEON_ARG_STOP:
19491 gcc_unreachable ();
19492 }
19493
19494 argc++;
19495 }
19496 }
19497
19498 va_end (ap);
19499
19500 if (have_retval)
19501 switch (argc)
19502 {
19503 case 1:
19504 pat = GEN_FCN (icode) (target, op[0]);
19505 break;
19506
19507 case 2:
19508 pat = GEN_FCN (icode) (target, op[0], op[1]);
19509 break;
19510
19511 case 3:
19512 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
19513 break;
19514
19515 case 4:
19516 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
19517 break;
19518
19519 case 5:
19520 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
19521 break;
19522
19523 default:
19524 gcc_unreachable ();
19525 }
19526 else
19527 switch (argc)
19528 {
19529 case 1:
19530 pat = GEN_FCN (icode) (op[0]);
19531 break;
19532
19533 case 2:
19534 pat = GEN_FCN (icode) (op[0], op[1]);
19535 break;
19536
19537 case 3:
19538 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
19539 break;
19540
19541 case 4:
19542 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
19543 break;
19544
19545 case 5:
19546 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
19547 break;
19548
19549 default:
19550 gcc_unreachable ();
19551 }
19552
19553 if (!pat)
19554 return 0;
19555
19556 emit_insn (pat);
19557
19558 return target;
19559 }
19560
19561 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19562 constants defined per-instruction or per instruction-variant. Instead, the
19563 required info is looked up in the table neon_builtin_data. */
19564 static rtx
19565 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
19566 {
19567 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
19568 neon_itype itype = d->itype;
19569 enum insn_code icode = d->code;
19570 neon_builtin_type_mode type_mode = d->mode;
19571
19572 switch (itype)
19573 {
19574 case NEON_UNOP:
19575 case NEON_CONVERT:
19576 case NEON_DUPLANE:
19577 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19578 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19579
19580 case NEON_BINOP:
19581 case NEON_SETLANE:
19582 case NEON_SCALARMUL:
19583 case NEON_SCALARMULL:
19584 case NEON_SCALARMULH:
19585 case NEON_SHIFTINSERT:
19586 case NEON_LOGICBINOP:
19587 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19588 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19589 NEON_ARG_STOP);
19590
19591 case NEON_TERNOP:
19592 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19593 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19594 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19595
19596 case NEON_GETLANE:
19597 case NEON_FIXCONV:
19598 case NEON_SHIFTIMM:
19599 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19600 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
19601 NEON_ARG_STOP);
19602
19603 case NEON_CREATE:
19604 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19605 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19606
19607 case NEON_DUP:
19608 case NEON_SPLIT:
19609 case NEON_REINTERP:
19610 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19611 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19612
19613 case NEON_COMBINE:
19614 case NEON_VTBL:
19615 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19616 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19617
19618 case NEON_RESULTPAIR:
19619 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
19620 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19621 NEON_ARG_STOP);
19622
19623 case NEON_LANEMUL:
19624 case NEON_LANEMULL:
19625 case NEON_LANEMULH:
19626 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19627 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19628 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19629
19630 case NEON_LANEMAC:
19631 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19632 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19633 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19634
19635 case NEON_SHIFTACC:
19636 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19637 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19638 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19639
19640 case NEON_SCALARMAC:
19641 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19642 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19643 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19644
19645 case NEON_SELECT:
19646 case NEON_VTBX:
19647 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19648 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19649 NEON_ARG_STOP);
19650
19651 case NEON_LOAD1:
19652 case NEON_LOADSTRUCT:
19653 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19654 NEON_ARG_MEMORY, NEON_ARG_STOP);
19655
19656 case NEON_LOAD1LANE:
19657 case NEON_LOADSTRUCTLANE:
19658 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19659 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19660 NEON_ARG_STOP);
19661
19662 case NEON_STORE1:
19663 case NEON_STORESTRUCT:
19664 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
19665 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19666
19667 case NEON_STORE1LANE:
19668 case NEON_STORESTRUCTLANE:
19669 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
19670 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19671 NEON_ARG_STOP);
19672 }
19673
19674 gcc_unreachable ();
19675 }
19676
19677 /* Emit code to reinterpret one Neon type as another, without altering bits. */
19678 void
19679 neon_reinterpret (rtx dest, rtx src)
19680 {
19681 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
19682 }
19683
19684 /* Emit code to place a Neon pair result in memory locations (with equal
19685 registers). */
19686 void
19687 neon_emit_pair_result_insn (enum machine_mode mode,
19688 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
19689 rtx op1, rtx op2)
19690 {
19691 rtx mem = gen_rtx_MEM (mode, destaddr);
19692 rtx tmp1 = gen_reg_rtx (mode);
19693 rtx tmp2 = gen_reg_rtx (mode);
19694
19695 emit_insn (intfn (tmp1, op1, op2, tmp2));
19696
19697 emit_move_insn (mem, tmp1);
19698 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
19699 emit_move_insn (mem, tmp2);
19700 }
19701
19702 /* Set up operands for a register copy from src to dest, taking care not to
19703 clobber registers in the process.
19704 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
19705 be called with a large N, so that should be OK. */
19706
19707 void
19708 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
19709 {
19710 unsigned int copied = 0, opctr = 0;
19711 unsigned int done = (1 << count) - 1;
19712 unsigned int i, j;
19713
19714 while (copied != done)
19715 {
19716 for (i = 0; i < count; i++)
19717 {
19718 int good = 1;
19719
19720 for (j = 0; good && j < count; j++)
19721 if (i != j && (copied & (1 << j)) == 0
19722 && reg_overlap_mentioned_p (src[j], dest[i]))
19723 good = 0;
19724
19725 if (good)
19726 {
19727 operands[opctr++] = dest[i];
19728 operands[opctr++] = src[i];
19729 copied |= 1 << i;
19730 }
19731 }
19732 }
19733
19734 gcc_assert (opctr == count * 2);
19735 }
19736
19737 /* Expand an expression EXP that calls a built-in function,
19738 with result going to TARGET if that's convenient
19739 (and in mode MODE if that's convenient).
19740 SUBTARGET may be used as the target for computing one of EXP's operands.
19741 IGNORE is nonzero if the value is to be ignored. */
19742
19743 static rtx
19744 arm_expand_builtin (tree exp,
19745 rtx target,
19746 rtx subtarget ATTRIBUTE_UNUSED,
19747 enum machine_mode mode ATTRIBUTE_UNUSED,
19748 int ignore ATTRIBUTE_UNUSED)
19749 {
19750 const struct builtin_description * d;
19751 enum insn_code icode;
19752 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19753 tree arg0;
19754 tree arg1;
19755 tree arg2;
19756 rtx op0;
19757 rtx op1;
19758 rtx op2;
19759 rtx pat;
19760 int fcode = DECL_FUNCTION_CODE (fndecl);
19761 size_t i;
19762 enum machine_mode tmode;
19763 enum machine_mode mode0;
19764 enum machine_mode mode1;
19765 enum machine_mode mode2;
19766
19767 if (fcode >= ARM_BUILTIN_NEON_BASE)
19768 return arm_expand_neon_builtin (fcode, exp, target);
19769
19770 switch (fcode)
19771 {
19772 case ARM_BUILTIN_TEXTRMSB:
19773 case ARM_BUILTIN_TEXTRMUB:
19774 case ARM_BUILTIN_TEXTRMSH:
19775 case ARM_BUILTIN_TEXTRMUH:
19776 case ARM_BUILTIN_TEXTRMSW:
19777 case ARM_BUILTIN_TEXTRMUW:
19778 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
19779 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
19780 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
19781 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
19782 : CODE_FOR_iwmmxt_textrmw);
19783
19784 arg0 = CALL_EXPR_ARG (exp, 0);
19785 arg1 = CALL_EXPR_ARG (exp, 1);
19786 op0 = expand_normal (arg0);
19787 op1 = expand_normal (arg1);
19788 tmode = insn_data[icode].operand[0].mode;
19789 mode0 = insn_data[icode].operand[1].mode;
19790 mode1 = insn_data[icode].operand[2].mode;
19791
19792 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19793 op0 = copy_to_mode_reg (mode0, op0);
19794 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19795 {
19796 /* @@@ better error message */
19797 error ("selector must be an immediate");
19798 return gen_reg_rtx (tmode);
19799 }
19800 if (target == 0
19801 || GET_MODE (target) != tmode
19802 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19803 target = gen_reg_rtx (tmode);
19804 pat = GEN_FCN (icode) (target, op0, op1);
19805 if (! pat)
19806 return 0;
19807 emit_insn (pat);
19808 return target;
19809
19810 case ARM_BUILTIN_TINSRB:
19811 case ARM_BUILTIN_TINSRH:
19812 case ARM_BUILTIN_TINSRW:
19813 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
19814 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
19815 : CODE_FOR_iwmmxt_tinsrw);
19816 arg0 = CALL_EXPR_ARG (exp, 0);
19817 arg1 = CALL_EXPR_ARG (exp, 1);
19818 arg2 = CALL_EXPR_ARG (exp, 2);
19819 op0 = expand_normal (arg0);
19820 op1 = expand_normal (arg1);
19821 op2 = expand_normal (arg2);
19822 tmode = insn_data[icode].operand[0].mode;
19823 mode0 = insn_data[icode].operand[1].mode;
19824 mode1 = insn_data[icode].operand[2].mode;
19825 mode2 = insn_data[icode].operand[3].mode;
19826
19827 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19828 op0 = copy_to_mode_reg (mode0, op0);
19829 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19830 op1 = copy_to_mode_reg (mode1, op1);
19831 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19832 {
19833 /* @@@ better error message */
19834 error ("selector must be an immediate");
19835 return const0_rtx;
19836 }
19837 if (target == 0
19838 || GET_MODE (target) != tmode
19839 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19840 target = gen_reg_rtx (tmode);
19841 pat = GEN_FCN (icode) (target, op0, op1, op2);
19842 if (! pat)
19843 return 0;
19844 emit_insn (pat);
19845 return target;
19846
19847 case ARM_BUILTIN_SETWCX:
19848 arg0 = CALL_EXPR_ARG (exp, 0);
19849 arg1 = CALL_EXPR_ARG (exp, 1);
19850 op0 = force_reg (SImode, expand_normal (arg0));
19851 op1 = expand_normal (arg1);
19852 emit_insn (gen_iwmmxt_tmcr (op1, op0));
19853 return 0;
19854
19855 case ARM_BUILTIN_GETWCX:
19856 arg0 = CALL_EXPR_ARG (exp, 0);
19857 op0 = expand_normal (arg0);
19858 target = gen_reg_rtx (SImode);
19859 emit_insn (gen_iwmmxt_tmrc (target, op0));
19860 return target;
19861
19862 case ARM_BUILTIN_WSHUFH:
19863 icode = CODE_FOR_iwmmxt_wshufh;
19864 arg0 = CALL_EXPR_ARG (exp, 0);
19865 arg1 = CALL_EXPR_ARG (exp, 1);
19866 op0 = expand_normal (arg0);
19867 op1 = expand_normal (arg1);
19868 tmode = insn_data[icode].operand[0].mode;
19869 mode1 = insn_data[icode].operand[1].mode;
19870 mode2 = insn_data[icode].operand[2].mode;
19871
19872 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19873 op0 = copy_to_mode_reg (mode1, op0);
19874 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19875 {
19876 /* @@@ better error message */
19877 error ("mask must be an immediate");
19878 return const0_rtx;
19879 }
19880 if (target == 0
19881 || GET_MODE (target) != tmode
19882 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19883 target = gen_reg_rtx (tmode);
19884 pat = GEN_FCN (icode) (target, op0, op1);
19885 if (! pat)
19886 return 0;
19887 emit_insn (pat);
19888 return target;
19889
19890 case ARM_BUILTIN_WSADB:
19891 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
19892 case ARM_BUILTIN_WSADH:
19893 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
19894 case ARM_BUILTIN_WSADBZ:
19895 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
19896 case ARM_BUILTIN_WSADHZ:
19897 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
19898
19899 /* Several three-argument builtins. */
19900 case ARM_BUILTIN_WMACS:
19901 case ARM_BUILTIN_WMACU:
19902 case ARM_BUILTIN_WALIGN:
19903 case ARM_BUILTIN_TMIA:
19904 case ARM_BUILTIN_TMIAPH:
19905 case ARM_BUILTIN_TMIATT:
19906 case ARM_BUILTIN_TMIATB:
19907 case ARM_BUILTIN_TMIABT:
19908 case ARM_BUILTIN_TMIABB:
19909 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
19910 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
19911 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
19912 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
19913 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
19914 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
19915 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
19916 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
19917 : CODE_FOR_iwmmxt_walign);
19918 arg0 = CALL_EXPR_ARG (exp, 0);
19919 arg1 = CALL_EXPR_ARG (exp, 1);
19920 arg2 = CALL_EXPR_ARG (exp, 2);
19921 op0 = expand_normal (arg0);
19922 op1 = expand_normal (arg1);
19923 op2 = expand_normal (arg2);
19924 tmode = insn_data[icode].operand[0].mode;
19925 mode0 = insn_data[icode].operand[1].mode;
19926 mode1 = insn_data[icode].operand[2].mode;
19927 mode2 = insn_data[icode].operand[3].mode;
19928
19929 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19930 op0 = copy_to_mode_reg (mode0, op0);
19931 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19932 op1 = copy_to_mode_reg (mode1, op1);
19933 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19934 op2 = copy_to_mode_reg (mode2, op2);
19935 if (target == 0
19936 || GET_MODE (target) != tmode
19937 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19938 target = gen_reg_rtx (tmode);
19939 pat = GEN_FCN (icode) (target, op0, op1, op2);
19940 if (! pat)
19941 return 0;
19942 emit_insn (pat);
19943 return target;
19944
19945 case ARM_BUILTIN_WZERO:
19946 target = gen_reg_rtx (DImode);
19947 emit_insn (gen_iwmmxt_clrdi (target));
19948 return target;
19949
19950 case ARM_BUILTIN_THREAD_POINTER:
19951 return arm_load_tp (target);
19952
19953 default:
19954 break;
19955 }
19956
19957 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19958 if (d->code == (const enum arm_builtins) fcode)
19959 return arm_expand_binop_builtin (d->icode, exp, target);
19960
19961 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19962 if (d->code == (const enum arm_builtins) fcode)
19963 return arm_expand_unop_builtin (d->icode, exp, target, 0);
19964
19965 /* @@@ Should really do something sensible here. */
19966 return NULL_RTX;
19967 }
19968 \f
19969 /* Return the number (counting from 0) of
19970 the least significant set bit in MASK. */
19971
19972 inline static int
19973 number_of_first_bit_set (unsigned mask)
19974 {
19975 int bit;
19976
19977 for (bit = 0;
19978 (mask & (1 << bit)) == 0;
19979 ++bit)
19980 continue;
19981
19982 return bit;
19983 }
19984
19985 /* Emit code to push or pop registers to or from the stack. F is the
19986 assembly file. MASK is the registers to push or pop. PUSH is
19987 nonzero if we should push, and zero if we should pop. For debugging
19988 output, if pushing, adjust CFA_OFFSET by the amount of space added
19989 to the stack. REAL_REGS should have the same number of bits set as
19990 MASK, and will be used instead (in the same order) to describe which
19991 registers were saved - this is used to mark the save slots when we
19992 push high registers after moving them to low registers. */
19993 static void
19994 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
19995 unsigned long real_regs)
19996 {
19997 int regno;
19998 int lo_mask = mask & 0xFF;
19999 int pushed_words = 0;
20000
20001 gcc_assert (mask);
20002
20003 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
20004 {
20005 /* Special case. Do not generate a POP PC statement here, do it in
20006 thumb_exit() */
20007 thumb_exit (f, -1);
20008 return;
20009 }
20010
20011 if (push && arm_except_unwind_info (&global_options) == UI_TARGET)
20012 {
20013 fprintf (f, "\t.save\t{");
20014 for (regno = 0; regno < 15; regno++)
20015 {
20016 if (real_regs & (1 << regno))
20017 {
20018 if (real_regs & ((1 << regno) -1))
20019 fprintf (f, ", ");
20020 asm_fprintf (f, "%r", regno);
20021 }
20022 }
20023 fprintf (f, "}\n");
20024 }
20025
20026 fprintf (f, "\t%s\t{", push ? "push" : "pop");
20027
20028 /* Look at the low registers first. */
20029 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
20030 {
20031 if (lo_mask & 1)
20032 {
20033 asm_fprintf (f, "%r", regno);
20034
20035 if ((lo_mask & ~1) != 0)
20036 fprintf (f, ", ");
20037
20038 pushed_words++;
20039 }
20040 }
20041
20042 if (push && (mask & (1 << LR_REGNUM)))
20043 {
20044 /* Catch pushing the LR. */
20045 if (mask & 0xFF)
20046 fprintf (f, ", ");
20047
20048 asm_fprintf (f, "%r", LR_REGNUM);
20049
20050 pushed_words++;
20051 }
20052 else if (!push && (mask & (1 << PC_REGNUM)))
20053 {
20054 /* Catch popping the PC. */
20055 if (TARGET_INTERWORK || TARGET_BACKTRACE
20056 || crtl->calls_eh_return)
20057 {
20058 /* The PC is never poped directly, instead
20059 it is popped into r3 and then BX is used. */
20060 fprintf (f, "}\n");
20061
20062 thumb_exit (f, -1);
20063
20064 return;
20065 }
20066 else
20067 {
20068 if (mask & 0xFF)
20069 fprintf (f, ", ");
20070
20071 asm_fprintf (f, "%r", PC_REGNUM);
20072 }
20073 }
20074
20075 fprintf (f, "}\n");
20076
20077 if (push && pushed_words && dwarf2out_do_frame ())
20078 {
20079 char *l = dwarf2out_cfi_label (false);
20080 int pushed_mask = real_regs;
20081
20082 *cfa_offset += pushed_words * 4;
20083 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
20084
20085 pushed_words = 0;
20086 pushed_mask = real_regs;
20087 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
20088 {
20089 if (pushed_mask & 1)
20090 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
20091 }
20092 }
20093 }
20094
20095 /* Generate code to return from a thumb function.
20096 If 'reg_containing_return_addr' is -1, then the return address is
20097 actually on the stack, at the stack pointer. */
20098 static void
20099 thumb_exit (FILE *f, int reg_containing_return_addr)
20100 {
20101 unsigned regs_available_for_popping;
20102 unsigned regs_to_pop;
20103 int pops_needed;
20104 unsigned available;
20105 unsigned required;
20106 int mode;
20107 int size;
20108 int restore_a4 = FALSE;
20109
20110 /* Compute the registers we need to pop. */
20111 regs_to_pop = 0;
20112 pops_needed = 0;
20113
20114 if (reg_containing_return_addr == -1)
20115 {
20116 regs_to_pop |= 1 << LR_REGNUM;
20117 ++pops_needed;
20118 }
20119
20120 if (TARGET_BACKTRACE)
20121 {
20122 /* Restore the (ARM) frame pointer and stack pointer. */
20123 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
20124 pops_needed += 2;
20125 }
20126
20127 /* If there is nothing to pop then just emit the BX instruction and
20128 return. */
20129 if (pops_needed == 0)
20130 {
20131 if (crtl->calls_eh_return)
20132 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20133
20134 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20135 return;
20136 }
20137 /* Otherwise if we are not supporting interworking and we have not created
20138 a backtrace structure and the function was not entered in ARM mode then
20139 just pop the return address straight into the PC. */
20140 else if (!TARGET_INTERWORK
20141 && !TARGET_BACKTRACE
20142 && !is_called_in_ARM_mode (current_function_decl)
20143 && !crtl->calls_eh_return)
20144 {
20145 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
20146 return;
20147 }
20148
20149 /* Find out how many of the (return) argument registers we can corrupt. */
20150 regs_available_for_popping = 0;
20151
20152 /* If returning via __builtin_eh_return, the bottom three registers
20153 all contain information needed for the return. */
20154 if (crtl->calls_eh_return)
20155 size = 12;
20156 else
20157 {
20158 /* If we can deduce the registers used from the function's
20159 return value. This is more reliable that examining
20160 df_regs_ever_live_p () because that will be set if the register is
20161 ever used in the function, not just if the register is used
20162 to hold a return value. */
20163
20164 if (crtl->return_rtx != 0)
20165 mode = GET_MODE (crtl->return_rtx);
20166 else
20167 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20168
20169 size = GET_MODE_SIZE (mode);
20170
20171 if (size == 0)
20172 {
20173 /* In a void function we can use any argument register.
20174 In a function that returns a structure on the stack
20175 we can use the second and third argument registers. */
20176 if (mode == VOIDmode)
20177 regs_available_for_popping =
20178 (1 << ARG_REGISTER (1))
20179 | (1 << ARG_REGISTER (2))
20180 | (1 << ARG_REGISTER (3));
20181 else
20182 regs_available_for_popping =
20183 (1 << ARG_REGISTER (2))
20184 | (1 << ARG_REGISTER (3));
20185 }
20186 else if (size <= 4)
20187 regs_available_for_popping =
20188 (1 << ARG_REGISTER (2))
20189 | (1 << ARG_REGISTER (3));
20190 else if (size <= 8)
20191 regs_available_for_popping =
20192 (1 << ARG_REGISTER (3));
20193 }
20194
20195 /* Match registers to be popped with registers into which we pop them. */
20196 for (available = regs_available_for_popping,
20197 required = regs_to_pop;
20198 required != 0 && available != 0;
20199 available &= ~(available & - available),
20200 required &= ~(required & - required))
20201 -- pops_needed;
20202
20203 /* If we have any popping registers left over, remove them. */
20204 if (available > 0)
20205 regs_available_for_popping &= ~available;
20206
20207 /* Otherwise if we need another popping register we can use
20208 the fourth argument register. */
20209 else if (pops_needed)
20210 {
20211 /* If we have not found any free argument registers and
20212 reg a4 contains the return address, we must move it. */
20213 if (regs_available_for_popping == 0
20214 && reg_containing_return_addr == LAST_ARG_REGNUM)
20215 {
20216 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20217 reg_containing_return_addr = LR_REGNUM;
20218 }
20219 else if (size > 12)
20220 {
20221 /* Register a4 is being used to hold part of the return value,
20222 but we have dire need of a free, low register. */
20223 restore_a4 = TRUE;
20224
20225 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
20226 }
20227
20228 if (reg_containing_return_addr != LAST_ARG_REGNUM)
20229 {
20230 /* The fourth argument register is available. */
20231 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
20232
20233 --pops_needed;
20234 }
20235 }
20236
20237 /* Pop as many registers as we can. */
20238 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20239 regs_available_for_popping);
20240
20241 /* Process the registers we popped. */
20242 if (reg_containing_return_addr == -1)
20243 {
20244 /* The return address was popped into the lowest numbered register. */
20245 regs_to_pop &= ~(1 << LR_REGNUM);
20246
20247 reg_containing_return_addr =
20248 number_of_first_bit_set (regs_available_for_popping);
20249
20250 /* Remove this register for the mask of available registers, so that
20251 the return address will not be corrupted by further pops. */
20252 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
20253 }
20254
20255 /* If we popped other registers then handle them here. */
20256 if (regs_available_for_popping)
20257 {
20258 int frame_pointer;
20259
20260 /* Work out which register currently contains the frame pointer. */
20261 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
20262
20263 /* Move it into the correct place. */
20264 asm_fprintf (f, "\tmov\t%r, %r\n",
20265 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
20266
20267 /* (Temporarily) remove it from the mask of popped registers. */
20268 regs_available_for_popping &= ~(1 << frame_pointer);
20269 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
20270
20271 if (regs_available_for_popping)
20272 {
20273 int stack_pointer;
20274
20275 /* We popped the stack pointer as well,
20276 find the register that contains it. */
20277 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
20278
20279 /* Move it into the stack register. */
20280 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
20281
20282 /* At this point we have popped all necessary registers, so
20283 do not worry about restoring regs_available_for_popping
20284 to its correct value:
20285
20286 assert (pops_needed == 0)
20287 assert (regs_available_for_popping == (1 << frame_pointer))
20288 assert (regs_to_pop == (1 << STACK_POINTER)) */
20289 }
20290 else
20291 {
20292 /* Since we have just move the popped value into the frame
20293 pointer, the popping register is available for reuse, and
20294 we know that we still have the stack pointer left to pop. */
20295 regs_available_for_popping |= (1 << frame_pointer);
20296 }
20297 }
20298
20299 /* If we still have registers left on the stack, but we no longer have
20300 any registers into which we can pop them, then we must move the return
20301 address into the link register and make available the register that
20302 contained it. */
20303 if (regs_available_for_popping == 0 && pops_needed > 0)
20304 {
20305 regs_available_for_popping |= 1 << reg_containing_return_addr;
20306
20307 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
20308 reg_containing_return_addr);
20309
20310 reg_containing_return_addr = LR_REGNUM;
20311 }
20312
20313 /* If we have registers left on the stack then pop some more.
20314 We know that at most we will want to pop FP and SP. */
20315 if (pops_needed > 0)
20316 {
20317 int popped_into;
20318 int move_to;
20319
20320 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20321 regs_available_for_popping);
20322
20323 /* We have popped either FP or SP.
20324 Move whichever one it is into the correct register. */
20325 popped_into = number_of_first_bit_set (regs_available_for_popping);
20326 move_to = number_of_first_bit_set (regs_to_pop);
20327
20328 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
20329
20330 regs_to_pop &= ~(1 << move_to);
20331
20332 --pops_needed;
20333 }
20334
20335 /* If we still have not popped everything then we must have only
20336 had one register available to us and we are now popping the SP. */
20337 if (pops_needed > 0)
20338 {
20339 int popped_into;
20340
20341 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20342 regs_available_for_popping);
20343
20344 popped_into = number_of_first_bit_set (regs_available_for_popping);
20345
20346 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
20347 /*
20348 assert (regs_to_pop == (1 << STACK_POINTER))
20349 assert (pops_needed == 1)
20350 */
20351 }
20352
20353 /* If necessary restore the a4 register. */
20354 if (restore_a4)
20355 {
20356 if (reg_containing_return_addr != LR_REGNUM)
20357 {
20358 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20359 reg_containing_return_addr = LR_REGNUM;
20360 }
20361
20362 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
20363 }
20364
20365 if (crtl->calls_eh_return)
20366 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20367
20368 /* Return to caller. */
20369 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20370 }
20371 \f
20372 /* Scan INSN just before assembler is output for it.
20373 For Thumb-1, we track the status of the condition codes; this
20374 information is used in the cbranchsi4_insn pattern. */
20375 void
20376 thumb1_final_prescan_insn (rtx insn)
20377 {
20378 if (flag_print_asm_name)
20379 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
20380 INSN_ADDRESSES (INSN_UID (insn)));
20381 /* Don't overwrite the previous setter when we get to a cbranch. */
20382 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
20383 {
20384 enum attr_conds conds;
20385
20386 if (cfun->machine->thumb1_cc_insn)
20387 {
20388 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
20389 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
20390 CC_STATUS_INIT;
20391 }
20392 conds = get_attr_conds (insn);
20393 if (conds == CONDS_SET)
20394 {
20395 rtx set = single_set (insn);
20396 cfun->machine->thumb1_cc_insn = insn;
20397 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
20398 cfun->machine->thumb1_cc_op1 = const0_rtx;
20399 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
20400 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
20401 {
20402 rtx src1 = XEXP (SET_SRC (set), 1);
20403 if (src1 == const0_rtx)
20404 cfun->machine->thumb1_cc_mode = CCmode;
20405 }
20406 }
20407 else if (conds != CONDS_NOCOND)
20408 cfun->machine->thumb1_cc_insn = NULL_RTX;
20409 }
20410 }
20411
20412 int
20413 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
20414 {
20415 unsigned HOST_WIDE_INT mask = 0xff;
20416 int i;
20417
20418 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
20419 if (val == 0) /* XXX */
20420 return 0;
20421
20422 for (i = 0; i < 25; i++)
20423 if ((val & (mask << i)) == val)
20424 return 1;
20425
20426 return 0;
20427 }
20428
20429 /* Returns nonzero if the current function contains,
20430 or might contain a far jump. */
20431 static int
20432 thumb_far_jump_used_p (void)
20433 {
20434 rtx insn;
20435
20436 /* This test is only important for leaf functions. */
20437 /* assert (!leaf_function_p ()); */
20438
20439 /* If we have already decided that far jumps may be used,
20440 do not bother checking again, and always return true even if
20441 it turns out that they are not being used. Once we have made
20442 the decision that far jumps are present (and that hence the link
20443 register will be pushed onto the stack) we cannot go back on it. */
20444 if (cfun->machine->far_jump_used)
20445 return 1;
20446
20447 /* If this function is not being called from the prologue/epilogue
20448 generation code then it must be being called from the
20449 INITIAL_ELIMINATION_OFFSET macro. */
20450 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
20451 {
20452 /* In this case we know that we are being asked about the elimination
20453 of the arg pointer register. If that register is not being used,
20454 then there are no arguments on the stack, and we do not have to
20455 worry that a far jump might force the prologue to push the link
20456 register, changing the stack offsets. In this case we can just
20457 return false, since the presence of far jumps in the function will
20458 not affect stack offsets.
20459
20460 If the arg pointer is live (or if it was live, but has now been
20461 eliminated and so set to dead) then we do have to test to see if
20462 the function might contain a far jump. This test can lead to some
20463 false negatives, since before reload is completed, then length of
20464 branch instructions is not known, so gcc defaults to returning their
20465 longest length, which in turn sets the far jump attribute to true.
20466
20467 A false negative will not result in bad code being generated, but it
20468 will result in a needless push and pop of the link register. We
20469 hope that this does not occur too often.
20470
20471 If we need doubleword stack alignment this could affect the other
20472 elimination offsets so we can't risk getting it wrong. */
20473 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
20474 cfun->machine->arg_pointer_live = 1;
20475 else if (!cfun->machine->arg_pointer_live)
20476 return 0;
20477 }
20478
20479 /* Check to see if the function contains a branch
20480 insn with the far jump attribute set. */
20481 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20482 {
20483 if (GET_CODE (insn) == JUMP_INSN
20484 /* Ignore tablejump patterns. */
20485 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20486 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
20487 && get_attr_far_jump (insn) == FAR_JUMP_YES
20488 )
20489 {
20490 /* Record the fact that we have decided that
20491 the function does use far jumps. */
20492 cfun->machine->far_jump_used = 1;
20493 return 1;
20494 }
20495 }
20496
20497 return 0;
20498 }
20499
20500 /* Return nonzero if FUNC must be entered in ARM mode. */
20501 int
20502 is_called_in_ARM_mode (tree func)
20503 {
20504 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
20505
20506 /* Ignore the problem about functions whose address is taken. */
20507 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
20508 return TRUE;
20509
20510 #ifdef ARM_PE
20511 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
20512 #else
20513 return FALSE;
20514 #endif
20515 }
20516
20517 /* Given the stack offsets and register mask in OFFSETS, decide how
20518 many additional registers to push instead of subtracting a constant
20519 from SP. For epilogues the principle is the same except we use pop.
20520 FOR_PROLOGUE indicates which we're generating. */
20521 static int
20522 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
20523 {
20524 HOST_WIDE_INT amount;
20525 unsigned long live_regs_mask = offsets->saved_regs_mask;
20526 /* Extract a mask of the ones we can give to the Thumb's push/pop
20527 instruction. */
20528 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
20529 /* Then count how many other high registers will need to be pushed. */
20530 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20531 int n_free, reg_base;
20532
20533 if (!for_prologue && frame_pointer_needed)
20534 amount = offsets->locals_base - offsets->saved_regs;
20535 else
20536 amount = offsets->outgoing_args - offsets->saved_regs;
20537
20538 /* If the stack frame size is 512 exactly, we can save one load
20539 instruction, which should make this a win even when optimizing
20540 for speed. */
20541 if (!optimize_size && amount != 512)
20542 return 0;
20543
20544 /* Can't do this if there are high registers to push. */
20545 if (high_regs_pushed != 0)
20546 return 0;
20547
20548 /* Shouldn't do it in the prologue if no registers would normally
20549 be pushed at all. In the epilogue, also allow it if we'll have
20550 a pop insn for the PC. */
20551 if (l_mask == 0
20552 && (for_prologue
20553 || TARGET_BACKTRACE
20554 || (live_regs_mask & 1 << LR_REGNUM) == 0
20555 || TARGET_INTERWORK
20556 || crtl->args.pretend_args_size != 0))
20557 return 0;
20558
20559 /* Don't do this if thumb_expand_prologue wants to emit instructions
20560 between the push and the stack frame allocation. */
20561 if (for_prologue
20562 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
20563 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
20564 return 0;
20565
20566 reg_base = 0;
20567 n_free = 0;
20568 if (!for_prologue)
20569 {
20570 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
20571 live_regs_mask >>= reg_base;
20572 }
20573
20574 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
20575 && (for_prologue || call_used_regs[reg_base + n_free]))
20576 {
20577 live_regs_mask >>= 1;
20578 n_free++;
20579 }
20580
20581 if (n_free == 0)
20582 return 0;
20583 gcc_assert (amount / 4 * 4 == amount);
20584
20585 if (amount >= 512 && (amount - n_free * 4) < 512)
20586 return (amount - 508) / 4;
20587 if (amount <= n_free * 4)
20588 return amount / 4;
20589 return 0;
20590 }
20591
20592 /* The bits which aren't usefully expanded as rtl. */
20593 const char *
20594 thumb_unexpanded_epilogue (void)
20595 {
20596 arm_stack_offsets *offsets;
20597 int regno;
20598 unsigned long live_regs_mask = 0;
20599 int high_regs_pushed = 0;
20600 int extra_pop;
20601 int had_to_push_lr;
20602 int size;
20603
20604 if (cfun->machine->return_used_this_function != 0)
20605 return "";
20606
20607 if (IS_NAKED (arm_current_func_type ()))
20608 return "";
20609
20610 offsets = arm_get_frame_offsets ();
20611 live_regs_mask = offsets->saved_regs_mask;
20612 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20613
20614 /* If we can deduce the registers used from the function's return value.
20615 This is more reliable that examining df_regs_ever_live_p () because that
20616 will be set if the register is ever used in the function, not just if
20617 the register is used to hold a return value. */
20618 size = arm_size_return_regs ();
20619
20620 extra_pop = thumb1_extra_regs_pushed (offsets, false);
20621 if (extra_pop > 0)
20622 {
20623 unsigned long extra_mask = (1 << extra_pop) - 1;
20624 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
20625 }
20626
20627 /* The prolog may have pushed some high registers to use as
20628 work registers. e.g. the testsuite file:
20629 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20630 compiles to produce:
20631 push {r4, r5, r6, r7, lr}
20632 mov r7, r9
20633 mov r6, r8
20634 push {r6, r7}
20635 as part of the prolog. We have to undo that pushing here. */
20636
20637 if (high_regs_pushed)
20638 {
20639 unsigned long mask = live_regs_mask & 0xff;
20640 int next_hi_reg;
20641
20642 /* The available low registers depend on the size of the value we are
20643 returning. */
20644 if (size <= 12)
20645 mask |= 1 << 3;
20646 if (size <= 8)
20647 mask |= 1 << 2;
20648
20649 if (mask == 0)
20650 /* Oh dear! We have no low registers into which we can pop
20651 high registers! */
20652 internal_error
20653 ("no low registers available for popping high registers");
20654
20655 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
20656 if (live_regs_mask & (1 << next_hi_reg))
20657 break;
20658
20659 while (high_regs_pushed)
20660 {
20661 /* Find lo register(s) into which the high register(s) can
20662 be popped. */
20663 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20664 {
20665 if (mask & (1 << regno))
20666 high_regs_pushed--;
20667 if (high_regs_pushed == 0)
20668 break;
20669 }
20670
20671 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
20672
20673 /* Pop the values into the low register(s). */
20674 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
20675
20676 /* Move the value(s) into the high registers. */
20677 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20678 {
20679 if (mask & (1 << regno))
20680 {
20681 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
20682 regno);
20683
20684 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
20685 if (live_regs_mask & (1 << next_hi_reg))
20686 break;
20687 }
20688 }
20689 }
20690 live_regs_mask &= ~0x0f00;
20691 }
20692
20693 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
20694 live_regs_mask &= 0xff;
20695
20696 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
20697 {
20698 /* Pop the return address into the PC. */
20699 if (had_to_push_lr)
20700 live_regs_mask |= 1 << PC_REGNUM;
20701
20702 /* Either no argument registers were pushed or a backtrace
20703 structure was created which includes an adjusted stack
20704 pointer, so just pop everything. */
20705 if (live_regs_mask)
20706 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20707 live_regs_mask);
20708
20709 /* We have either just popped the return address into the
20710 PC or it is was kept in LR for the entire function.
20711 Note that thumb_pushpop has already called thumb_exit if the
20712 PC was in the list. */
20713 if (!had_to_push_lr)
20714 thumb_exit (asm_out_file, LR_REGNUM);
20715 }
20716 else
20717 {
20718 /* Pop everything but the return address. */
20719 if (live_regs_mask)
20720 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20721 live_regs_mask);
20722
20723 if (had_to_push_lr)
20724 {
20725 if (size > 12)
20726 {
20727 /* We have no free low regs, so save one. */
20728 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
20729 LAST_ARG_REGNUM);
20730 }
20731
20732 /* Get the return address into a temporary register. */
20733 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
20734 1 << LAST_ARG_REGNUM);
20735
20736 if (size > 12)
20737 {
20738 /* Move the return address to lr. */
20739 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
20740 LAST_ARG_REGNUM);
20741 /* Restore the low register. */
20742 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
20743 IP_REGNUM);
20744 regno = LR_REGNUM;
20745 }
20746 else
20747 regno = LAST_ARG_REGNUM;
20748 }
20749 else
20750 regno = LR_REGNUM;
20751
20752 /* Remove the argument registers that were pushed onto the stack. */
20753 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
20754 SP_REGNUM, SP_REGNUM,
20755 crtl->args.pretend_args_size);
20756
20757 thumb_exit (asm_out_file, regno);
20758 }
20759
20760 return "";
20761 }
20762
20763 /* Functions to save and restore machine-specific function data. */
20764 static struct machine_function *
20765 arm_init_machine_status (void)
20766 {
20767 struct machine_function *machine;
20768 machine = ggc_alloc_cleared_machine_function ();
20769
20770 #if ARM_FT_UNKNOWN != 0
20771 machine->func_type = ARM_FT_UNKNOWN;
20772 #endif
20773 return machine;
20774 }
20775
20776 /* Return an RTX indicating where the return address to the
20777 calling function can be found. */
20778 rtx
20779 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
20780 {
20781 if (count != 0)
20782 return NULL_RTX;
20783
20784 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
20785 }
20786
20787 /* Do anything needed before RTL is emitted for each function. */
20788 void
20789 arm_init_expanders (void)
20790 {
20791 /* Arrange to initialize and mark the machine per-function status. */
20792 init_machine_status = arm_init_machine_status;
20793
20794 /* This is to stop the combine pass optimizing away the alignment
20795 adjustment of va_arg. */
20796 /* ??? It is claimed that this should not be necessary. */
20797 if (cfun)
20798 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
20799 }
20800
20801
20802 /* Like arm_compute_initial_elimination offset. Simpler because there
20803 isn't an ABI specified frame pointer for Thumb. Instead, we set it
20804 to point at the base of the local variables after static stack
20805 space for a function has been allocated. */
20806
20807 HOST_WIDE_INT
20808 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20809 {
20810 arm_stack_offsets *offsets;
20811
20812 offsets = arm_get_frame_offsets ();
20813
20814 switch (from)
20815 {
20816 case ARG_POINTER_REGNUM:
20817 switch (to)
20818 {
20819 case STACK_POINTER_REGNUM:
20820 return offsets->outgoing_args - offsets->saved_args;
20821
20822 case FRAME_POINTER_REGNUM:
20823 return offsets->soft_frame - offsets->saved_args;
20824
20825 case ARM_HARD_FRAME_POINTER_REGNUM:
20826 return offsets->saved_regs - offsets->saved_args;
20827
20828 case THUMB_HARD_FRAME_POINTER_REGNUM:
20829 return offsets->locals_base - offsets->saved_args;
20830
20831 default:
20832 gcc_unreachable ();
20833 }
20834 break;
20835
20836 case FRAME_POINTER_REGNUM:
20837 switch (to)
20838 {
20839 case STACK_POINTER_REGNUM:
20840 return offsets->outgoing_args - offsets->soft_frame;
20841
20842 case ARM_HARD_FRAME_POINTER_REGNUM:
20843 return offsets->saved_regs - offsets->soft_frame;
20844
20845 case THUMB_HARD_FRAME_POINTER_REGNUM:
20846 return offsets->locals_base - offsets->soft_frame;
20847
20848 default:
20849 gcc_unreachable ();
20850 }
20851 break;
20852
20853 default:
20854 gcc_unreachable ();
20855 }
20856 }
20857
20858 /* Generate the rest of a function's prologue. */
20859 void
20860 thumb1_expand_prologue (void)
20861 {
20862 rtx insn, dwarf;
20863
20864 HOST_WIDE_INT amount;
20865 arm_stack_offsets *offsets;
20866 unsigned long func_type;
20867 int regno;
20868 unsigned long live_regs_mask;
20869
20870 func_type = arm_current_func_type ();
20871
20872 /* Naked functions don't have prologues. */
20873 if (IS_NAKED (func_type))
20874 return;
20875
20876 if (IS_INTERRUPT (func_type))
20877 {
20878 error ("interrupt Service Routines cannot be coded in Thumb mode");
20879 return;
20880 }
20881
20882 offsets = arm_get_frame_offsets ();
20883 live_regs_mask = offsets->saved_regs_mask;
20884 /* Load the pic register before setting the frame pointer,
20885 so we can use r7 as a temporary work register. */
20886 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20887 arm_load_pic_register (live_regs_mask);
20888
20889 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
20890 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
20891 stack_pointer_rtx);
20892
20893 if (flag_stack_usage_info)
20894 current_function_static_stack_size
20895 = offsets->outgoing_args - offsets->saved_args;
20896
20897 amount = offsets->outgoing_args - offsets->saved_regs;
20898 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
20899 if (amount)
20900 {
20901 if (amount < 512)
20902 {
20903 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20904 GEN_INT (- amount)));
20905 RTX_FRAME_RELATED_P (insn) = 1;
20906 }
20907 else
20908 {
20909 rtx reg;
20910
20911 /* The stack decrement is too big for an immediate value in a single
20912 insn. In theory we could issue multiple subtracts, but after
20913 three of them it becomes more space efficient to place the full
20914 value in the constant pool and load into a register. (Also the
20915 ARM debugger really likes to see only one stack decrement per
20916 function). So instead we look for a scratch register into which
20917 we can load the decrement, and then we subtract this from the
20918 stack pointer. Unfortunately on the thumb the only available
20919 scratch registers are the argument registers, and we cannot use
20920 these as they may hold arguments to the function. Instead we
20921 attempt to locate a call preserved register which is used by this
20922 function. If we can find one, then we know that it will have
20923 been pushed at the start of the prologue and so we can corrupt
20924 it now. */
20925 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
20926 if (live_regs_mask & (1 << regno))
20927 break;
20928
20929 gcc_assert(regno <= LAST_LO_REGNUM);
20930
20931 reg = gen_rtx_REG (SImode, regno);
20932
20933 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
20934
20935 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
20936 stack_pointer_rtx, reg));
20937 RTX_FRAME_RELATED_P (insn) = 1;
20938 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20939 plus_constant (stack_pointer_rtx,
20940 -amount));
20941 RTX_FRAME_RELATED_P (dwarf) = 1;
20942 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20943 }
20944 }
20945
20946 if (frame_pointer_needed)
20947 thumb_set_frame_pointer (offsets);
20948
20949 /* If we are profiling, make sure no instructions are scheduled before
20950 the call to mcount. Similarly if the user has requested no
20951 scheduling in the prolog. Similarly if we want non-call exceptions
20952 using the EABI unwinder, to prevent faulting instructions from being
20953 swapped with a stack adjustment. */
20954 if (crtl->profile || !TARGET_SCHED_PROLOG
20955 || (arm_except_unwind_info (&global_options) == UI_TARGET
20956 && cfun->can_throw_non_call_exceptions))
20957 emit_insn (gen_blockage ());
20958
20959 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
20960 if (live_regs_mask & 0xff)
20961 cfun->machine->lr_save_eliminated = 0;
20962 }
20963
20964
20965 void
20966 thumb1_expand_epilogue (void)
20967 {
20968 HOST_WIDE_INT amount;
20969 arm_stack_offsets *offsets;
20970 int regno;
20971
20972 /* Naked functions don't have prologues. */
20973 if (IS_NAKED (arm_current_func_type ()))
20974 return;
20975
20976 offsets = arm_get_frame_offsets ();
20977 amount = offsets->outgoing_args - offsets->saved_regs;
20978
20979 if (frame_pointer_needed)
20980 {
20981 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
20982 amount = offsets->locals_base - offsets->saved_regs;
20983 }
20984 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
20985
20986 gcc_assert (amount >= 0);
20987 if (amount)
20988 {
20989 if (amount < 512)
20990 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20991 GEN_INT (amount)));
20992 else
20993 {
20994 /* r3 is always free in the epilogue. */
20995 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
20996
20997 emit_insn (gen_movsi (reg, GEN_INT (amount)));
20998 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
20999 }
21000 }
21001
21002 /* Emit a USE (stack_pointer_rtx), so that
21003 the stack adjustment will not be deleted. */
21004 emit_insn (gen_prologue_use (stack_pointer_rtx));
21005
21006 if (crtl->profile || !TARGET_SCHED_PROLOG)
21007 emit_insn (gen_blockage ());
21008
21009 /* Emit a clobber for each insn that will be restored in the epilogue,
21010 so that flow2 will get register lifetimes correct. */
21011 for (regno = 0; regno < 13; regno++)
21012 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
21013 emit_clobber (gen_rtx_REG (SImode, regno));
21014
21015 if (! df_regs_ever_live_p (LR_REGNUM))
21016 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
21017 }
21018
21019 static void
21020 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
21021 {
21022 arm_stack_offsets *offsets;
21023 unsigned long live_regs_mask = 0;
21024 unsigned long l_mask;
21025 unsigned high_regs_pushed = 0;
21026 int cfa_offset = 0;
21027 int regno;
21028
21029 if (IS_NAKED (arm_current_func_type ()))
21030 return;
21031
21032 if (is_called_in_ARM_mode (current_function_decl))
21033 {
21034 const char * name;
21035
21036 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
21037 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
21038 == SYMBOL_REF);
21039 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
21040
21041 /* Generate code sequence to switch us into Thumb mode. */
21042 /* The .code 32 directive has already been emitted by
21043 ASM_DECLARE_FUNCTION_NAME. */
21044 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
21045 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
21046
21047 /* Generate a label, so that the debugger will notice the
21048 change in instruction sets. This label is also used by
21049 the assembler to bypass the ARM code when this function
21050 is called from a Thumb encoded function elsewhere in the
21051 same file. Hence the definition of STUB_NAME here must
21052 agree with the definition in gas/config/tc-arm.c. */
21053
21054 #define STUB_NAME ".real_start_of"
21055
21056 fprintf (f, "\t.code\t16\n");
21057 #ifdef ARM_PE
21058 if (arm_dllexport_name_p (name))
21059 name = arm_strip_name_encoding (name);
21060 #endif
21061 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
21062 fprintf (f, "\t.thumb_func\n");
21063 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
21064 }
21065
21066 if (crtl->args.pretend_args_size)
21067 {
21068 /* Output unwind directive for the stack adjustment. */
21069 if (arm_except_unwind_info (&global_options) == UI_TARGET)
21070 fprintf (f, "\t.pad #%d\n",
21071 crtl->args.pretend_args_size);
21072
21073 if (cfun->machine->uses_anonymous_args)
21074 {
21075 int num_pushes;
21076
21077 fprintf (f, "\tpush\t{");
21078
21079 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
21080
21081 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
21082 regno <= LAST_ARG_REGNUM;
21083 regno++)
21084 asm_fprintf (f, "%r%s", regno,
21085 regno == LAST_ARG_REGNUM ? "" : ", ");
21086
21087 fprintf (f, "}\n");
21088 }
21089 else
21090 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
21091 SP_REGNUM, SP_REGNUM,
21092 crtl->args.pretend_args_size);
21093
21094 /* We don't need to record the stores for unwinding (would it
21095 help the debugger any if we did?), but record the change in
21096 the stack pointer. */
21097 if (dwarf2out_do_frame ())
21098 {
21099 char *l = dwarf2out_cfi_label (false);
21100
21101 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
21102 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
21103 }
21104 }
21105
21106 /* Get the registers we are going to push. */
21107 offsets = arm_get_frame_offsets ();
21108 live_regs_mask = offsets->saved_regs_mask;
21109 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
21110 l_mask = live_regs_mask & 0x40ff;
21111 /* Then count how many other high registers will need to be pushed. */
21112 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21113
21114 if (TARGET_BACKTRACE)
21115 {
21116 unsigned offset;
21117 unsigned work_register;
21118
21119 /* We have been asked to create a stack backtrace structure.
21120 The code looks like this:
21121
21122 0 .align 2
21123 0 func:
21124 0 sub SP, #16 Reserve space for 4 registers.
21125 2 push {R7} Push low registers.
21126 4 add R7, SP, #20 Get the stack pointer before the push.
21127 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
21128 8 mov R7, PC Get hold of the start of this code plus 12.
21129 10 str R7, [SP, #16] Store it.
21130 12 mov R7, FP Get hold of the current frame pointer.
21131 14 str R7, [SP, #4] Store it.
21132 16 mov R7, LR Get hold of the current return address.
21133 18 str R7, [SP, #12] Store it.
21134 20 add R7, SP, #16 Point at the start of the backtrace structure.
21135 22 mov FP, R7 Put this value into the frame pointer. */
21136
21137 work_register = thumb_find_work_register (live_regs_mask);
21138
21139 if (arm_except_unwind_info (&global_options) == UI_TARGET)
21140 asm_fprintf (f, "\t.pad #16\n");
21141
21142 asm_fprintf
21143 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
21144 SP_REGNUM, SP_REGNUM);
21145
21146 if (dwarf2out_do_frame ())
21147 {
21148 char *l = dwarf2out_cfi_label (false);
21149
21150 cfa_offset = cfa_offset + 16;
21151 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
21152 }
21153
21154 if (l_mask)
21155 {
21156 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
21157 offset = bit_count (l_mask) * UNITS_PER_WORD;
21158 }
21159 else
21160 offset = 0;
21161
21162 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
21163 offset + 16 + crtl->args.pretend_args_size);
21164
21165 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21166 offset + 4);
21167
21168 /* Make sure that the instruction fetching the PC is in the right place
21169 to calculate "start of backtrace creation code + 12". */
21170 if (l_mask)
21171 {
21172 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
21173 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21174 offset + 12);
21175 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
21176 ARM_HARD_FRAME_POINTER_REGNUM);
21177 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21178 offset);
21179 }
21180 else
21181 {
21182 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
21183 ARM_HARD_FRAME_POINTER_REGNUM);
21184 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21185 offset);
21186 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
21187 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21188 offset + 12);
21189 }
21190
21191 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
21192 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21193 offset + 8);
21194 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
21195 offset + 12);
21196 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
21197 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
21198 }
21199 /* Optimization: If we are not pushing any low registers but we are going
21200 to push some high registers then delay our first push. This will just
21201 be a push of LR and we can combine it with the push of the first high
21202 register. */
21203 else if ((l_mask & 0xff) != 0
21204 || (high_regs_pushed == 0 && l_mask))
21205 {
21206 unsigned long mask = l_mask;
21207 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
21208 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
21209 }
21210
21211 if (high_regs_pushed)
21212 {
21213 unsigned pushable_regs;
21214 unsigned next_hi_reg;
21215
21216 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
21217 if (live_regs_mask & (1 << next_hi_reg))
21218 break;
21219
21220 pushable_regs = l_mask & 0xff;
21221
21222 if (pushable_regs == 0)
21223 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
21224
21225 while (high_regs_pushed > 0)
21226 {
21227 unsigned long real_regs_mask = 0;
21228
21229 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
21230 {
21231 if (pushable_regs & (1 << regno))
21232 {
21233 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
21234
21235 high_regs_pushed --;
21236 real_regs_mask |= (1 << next_hi_reg);
21237
21238 if (high_regs_pushed)
21239 {
21240 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
21241 next_hi_reg --)
21242 if (live_regs_mask & (1 << next_hi_reg))
21243 break;
21244 }
21245 else
21246 {
21247 pushable_regs &= ~((1 << regno) - 1);
21248 break;
21249 }
21250 }
21251 }
21252
21253 /* If we had to find a work register and we have not yet
21254 saved the LR then add it to the list of regs to push. */
21255 if (l_mask == (1 << LR_REGNUM))
21256 {
21257 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
21258 1, &cfa_offset,
21259 real_regs_mask | (1 << LR_REGNUM));
21260 l_mask = 0;
21261 }
21262 else
21263 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
21264 }
21265 }
21266 }
21267
21268 /* Handle the case of a double word load into a low register from
21269 a computed memory address. The computed address may involve a
21270 register which is overwritten by the load. */
21271 const char *
21272 thumb_load_double_from_address (rtx *operands)
21273 {
21274 rtx addr;
21275 rtx base;
21276 rtx offset;
21277 rtx arg1;
21278 rtx arg2;
21279
21280 gcc_assert (GET_CODE (operands[0]) == REG);
21281 gcc_assert (GET_CODE (operands[1]) == MEM);
21282
21283 /* Get the memory address. */
21284 addr = XEXP (operands[1], 0);
21285
21286 /* Work out how the memory address is computed. */
21287 switch (GET_CODE (addr))
21288 {
21289 case REG:
21290 operands[2] = adjust_address (operands[1], SImode, 4);
21291
21292 if (REGNO (operands[0]) == REGNO (addr))
21293 {
21294 output_asm_insn ("ldr\t%H0, %2", operands);
21295 output_asm_insn ("ldr\t%0, %1", operands);
21296 }
21297 else
21298 {
21299 output_asm_insn ("ldr\t%0, %1", operands);
21300 output_asm_insn ("ldr\t%H0, %2", operands);
21301 }
21302 break;
21303
21304 case CONST:
21305 /* Compute <address> + 4 for the high order load. */
21306 operands[2] = adjust_address (operands[1], SImode, 4);
21307
21308 output_asm_insn ("ldr\t%0, %1", operands);
21309 output_asm_insn ("ldr\t%H0, %2", operands);
21310 break;
21311
21312 case PLUS:
21313 arg1 = XEXP (addr, 0);
21314 arg2 = XEXP (addr, 1);
21315
21316 if (CONSTANT_P (arg1))
21317 base = arg2, offset = arg1;
21318 else
21319 base = arg1, offset = arg2;
21320
21321 gcc_assert (GET_CODE (base) == REG);
21322
21323 /* Catch the case of <address> = <reg> + <reg> */
21324 if (GET_CODE (offset) == REG)
21325 {
21326 int reg_offset = REGNO (offset);
21327 int reg_base = REGNO (base);
21328 int reg_dest = REGNO (operands[0]);
21329
21330 /* Add the base and offset registers together into the
21331 higher destination register. */
21332 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
21333 reg_dest + 1, reg_base, reg_offset);
21334
21335 /* Load the lower destination register from the address in
21336 the higher destination register. */
21337 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
21338 reg_dest, reg_dest + 1);
21339
21340 /* Load the higher destination register from its own address
21341 plus 4. */
21342 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
21343 reg_dest + 1, reg_dest + 1);
21344 }
21345 else
21346 {
21347 /* Compute <address> + 4 for the high order load. */
21348 operands[2] = adjust_address (operands[1], SImode, 4);
21349
21350 /* If the computed address is held in the low order register
21351 then load the high order register first, otherwise always
21352 load the low order register first. */
21353 if (REGNO (operands[0]) == REGNO (base))
21354 {
21355 output_asm_insn ("ldr\t%H0, %2", operands);
21356 output_asm_insn ("ldr\t%0, %1", operands);
21357 }
21358 else
21359 {
21360 output_asm_insn ("ldr\t%0, %1", operands);
21361 output_asm_insn ("ldr\t%H0, %2", operands);
21362 }
21363 }
21364 break;
21365
21366 case LABEL_REF:
21367 /* With no registers to worry about we can just load the value
21368 directly. */
21369 operands[2] = adjust_address (operands[1], SImode, 4);
21370
21371 output_asm_insn ("ldr\t%H0, %2", operands);
21372 output_asm_insn ("ldr\t%0, %1", operands);
21373 break;
21374
21375 default:
21376 gcc_unreachable ();
21377 }
21378
21379 return "";
21380 }
21381
21382 const char *
21383 thumb_output_move_mem_multiple (int n, rtx *operands)
21384 {
21385 rtx tmp;
21386
21387 switch (n)
21388 {
21389 case 2:
21390 if (REGNO (operands[4]) > REGNO (operands[5]))
21391 {
21392 tmp = operands[4];
21393 operands[4] = operands[5];
21394 operands[5] = tmp;
21395 }
21396 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
21397 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
21398 break;
21399
21400 case 3:
21401 if (REGNO (operands[4]) > REGNO (operands[5]))
21402 {
21403 tmp = operands[4];
21404 operands[4] = operands[5];
21405 operands[5] = tmp;
21406 }
21407 if (REGNO (operands[5]) > REGNO (operands[6]))
21408 {
21409 tmp = operands[5];
21410 operands[5] = operands[6];
21411 operands[6] = tmp;
21412 }
21413 if (REGNO (operands[4]) > REGNO (operands[5]))
21414 {
21415 tmp = operands[4];
21416 operands[4] = operands[5];
21417 operands[5] = tmp;
21418 }
21419
21420 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
21421 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
21422 break;
21423
21424 default:
21425 gcc_unreachable ();
21426 }
21427
21428 return "";
21429 }
21430
21431 /* Output a call-via instruction for thumb state. */
21432 const char *
21433 thumb_call_via_reg (rtx reg)
21434 {
21435 int regno = REGNO (reg);
21436 rtx *labelp;
21437
21438 gcc_assert (regno < LR_REGNUM);
21439
21440 /* If we are in the normal text section we can use a single instance
21441 per compilation unit. If we are doing function sections, then we need
21442 an entry per section, since we can't rely on reachability. */
21443 if (in_section == text_section)
21444 {
21445 thumb_call_reg_needed = 1;
21446
21447 if (thumb_call_via_label[regno] == NULL)
21448 thumb_call_via_label[regno] = gen_label_rtx ();
21449 labelp = thumb_call_via_label + regno;
21450 }
21451 else
21452 {
21453 if (cfun->machine->call_via[regno] == NULL)
21454 cfun->machine->call_via[regno] = gen_label_rtx ();
21455 labelp = cfun->machine->call_via + regno;
21456 }
21457
21458 output_asm_insn ("bl\t%a0", labelp);
21459 return "";
21460 }
21461
21462 /* Routines for generating rtl. */
21463 void
21464 thumb_expand_movmemqi (rtx *operands)
21465 {
21466 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
21467 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
21468 HOST_WIDE_INT len = INTVAL (operands[2]);
21469 HOST_WIDE_INT offset = 0;
21470
21471 while (len >= 12)
21472 {
21473 emit_insn (gen_movmem12b (out, in, out, in));
21474 len -= 12;
21475 }
21476
21477 if (len >= 8)
21478 {
21479 emit_insn (gen_movmem8b (out, in, out, in));
21480 len -= 8;
21481 }
21482
21483 if (len >= 4)
21484 {
21485 rtx reg = gen_reg_rtx (SImode);
21486 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
21487 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
21488 len -= 4;
21489 offset += 4;
21490 }
21491
21492 if (len >= 2)
21493 {
21494 rtx reg = gen_reg_rtx (HImode);
21495 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
21496 plus_constant (in, offset))));
21497 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
21498 reg));
21499 len -= 2;
21500 offset += 2;
21501 }
21502
21503 if (len)
21504 {
21505 rtx reg = gen_reg_rtx (QImode);
21506 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
21507 plus_constant (in, offset))));
21508 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
21509 reg));
21510 }
21511 }
21512
21513 void
21514 thumb_reload_out_hi (rtx *operands)
21515 {
21516 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
21517 }
21518
21519 /* Handle reading a half-word from memory during reload. */
21520 void
21521 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
21522 {
21523 gcc_unreachable ();
21524 }
21525
21526 /* Return the length of a function name prefix
21527 that starts with the character 'c'. */
21528 static int
21529 arm_get_strip_length (int c)
21530 {
21531 switch (c)
21532 {
21533 ARM_NAME_ENCODING_LENGTHS
21534 default: return 0;
21535 }
21536 }
21537
21538 /* Return a pointer to a function's name with any
21539 and all prefix encodings stripped from it. */
21540 const char *
21541 arm_strip_name_encoding (const char *name)
21542 {
21543 int skip;
21544
21545 while ((skip = arm_get_strip_length (* name)))
21546 name += skip;
21547
21548 return name;
21549 }
21550
21551 /* If there is a '*' anywhere in the name's prefix, then
21552 emit the stripped name verbatim, otherwise prepend an
21553 underscore if leading underscores are being used. */
21554 void
21555 arm_asm_output_labelref (FILE *stream, const char *name)
21556 {
21557 int skip;
21558 int verbatim = 0;
21559
21560 while ((skip = arm_get_strip_length (* name)))
21561 {
21562 verbatim |= (*name == '*');
21563 name += skip;
21564 }
21565
21566 if (verbatim)
21567 fputs (name, stream);
21568 else
21569 asm_fprintf (stream, "%U%s", name);
21570 }
21571
21572 static void
21573 arm_file_start (void)
21574 {
21575 int val;
21576
21577 if (TARGET_UNIFIED_ASM)
21578 asm_fprintf (asm_out_file, "\t.syntax unified\n");
21579
21580 if (TARGET_BPABI)
21581 {
21582 const char *fpu_name;
21583 if (arm_selected_arch)
21584 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
21585 else
21586 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
21587
21588 if (TARGET_SOFT_FLOAT)
21589 {
21590 if (TARGET_VFP)
21591 fpu_name = "softvfp";
21592 else
21593 fpu_name = "softfpa";
21594 }
21595 else
21596 {
21597 fpu_name = arm_fpu_desc->name;
21598 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
21599 {
21600 if (TARGET_HARD_FLOAT)
21601 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
21602 if (TARGET_HARD_FLOAT_ABI)
21603 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
21604 }
21605 }
21606 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
21607
21608 /* Some of these attributes only apply when the corresponding features
21609 are used. However we don't have any easy way of figuring this out.
21610 Conservatively record the setting that would have been used. */
21611
21612 /* Tag_ABI_FP_rounding. */
21613 if (flag_rounding_math)
21614 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
21615 if (!flag_unsafe_math_optimizations)
21616 {
21617 /* Tag_ABI_FP_denomal. */
21618 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
21619 /* Tag_ABI_FP_exceptions. */
21620 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
21621 }
21622 /* Tag_ABI_FP_user_exceptions. */
21623 if (flag_signaling_nans)
21624 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
21625 /* Tag_ABI_FP_number_model. */
21626 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
21627 flag_finite_math_only ? 1 : 3);
21628
21629 /* Tag_ABI_align8_needed. */
21630 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
21631 /* Tag_ABI_align8_preserved. */
21632 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
21633 /* Tag_ABI_enum_size. */
21634 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
21635 flag_short_enums ? 1 : 2);
21636
21637 /* Tag_ABI_optimization_goals. */
21638 if (optimize_size)
21639 val = 4;
21640 else if (optimize >= 2)
21641 val = 2;
21642 else if (optimize)
21643 val = 1;
21644 else
21645 val = 6;
21646 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
21647
21648 /* Tag_ABI_FP_16bit_format. */
21649 if (arm_fp16_format)
21650 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
21651 (int)arm_fp16_format);
21652
21653 if (arm_lang_output_object_attributes_hook)
21654 arm_lang_output_object_attributes_hook();
21655 }
21656 default_file_start();
21657 }
21658
21659 static void
21660 arm_file_end (void)
21661 {
21662 int regno;
21663
21664 if (NEED_INDICATE_EXEC_STACK)
21665 /* Add .note.GNU-stack. */
21666 file_end_indicate_exec_stack ();
21667
21668 if (! thumb_call_reg_needed)
21669 return;
21670
21671 switch_to_section (text_section);
21672 asm_fprintf (asm_out_file, "\t.code 16\n");
21673 ASM_OUTPUT_ALIGN (asm_out_file, 1);
21674
21675 for (regno = 0; regno < LR_REGNUM; regno++)
21676 {
21677 rtx label = thumb_call_via_label[regno];
21678
21679 if (label != 0)
21680 {
21681 targetm.asm_out.internal_label (asm_out_file, "L",
21682 CODE_LABEL_NUMBER (label));
21683 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21684 }
21685 }
21686 }
21687
21688 #ifndef ARM_PE
21689 /* Symbols in the text segment can be accessed without indirecting via the
21690 constant pool; it may take an extra binary operation, but this is still
21691 faster than indirecting via memory. Don't do this when not optimizing,
21692 since we won't be calculating al of the offsets necessary to do this
21693 simplification. */
21694
21695 static void
21696 arm_encode_section_info (tree decl, rtx rtl, int first)
21697 {
21698 if (optimize > 0 && TREE_CONSTANT (decl))
21699 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
21700
21701 default_encode_section_info (decl, rtl, first);
21702 }
21703 #endif /* !ARM_PE */
21704
21705 static void
21706 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
21707 {
21708 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
21709 && !strcmp (prefix, "L"))
21710 {
21711 arm_ccfsm_state = 0;
21712 arm_target_insn = NULL;
21713 }
21714 default_internal_label (stream, prefix, labelno);
21715 }
21716
21717 /* Output code to add DELTA to the first argument, and then jump
21718 to FUNCTION. Used for C++ multiple inheritance. */
21719 static void
21720 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
21721 HOST_WIDE_INT delta,
21722 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
21723 tree function)
21724 {
21725 static int thunk_label = 0;
21726 char label[256];
21727 char labelpc[256];
21728 int mi_delta = delta;
21729 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
21730 int shift = 0;
21731 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
21732 ? 1 : 0);
21733 if (mi_delta < 0)
21734 mi_delta = - mi_delta;
21735
21736 if (TARGET_THUMB1)
21737 {
21738 int labelno = thunk_label++;
21739 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
21740 /* Thunks are entered in arm mode when avaiable. */
21741 if (TARGET_THUMB1_ONLY)
21742 {
21743 /* push r3 so we can use it as a temporary. */
21744 /* TODO: Omit this save if r3 is not used. */
21745 fputs ("\tpush {r3}\n", file);
21746 fputs ("\tldr\tr3, ", file);
21747 }
21748 else
21749 {
21750 fputs ("\tldr\tr12, ", file);
21751 }
21752 assemble_name (file, label);
21753 fputc ('\n', file);
21754 if (flag_pic)
21755 {
21756 /* If we are generating PIC, the ldr instruction below loads
21757 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
21758 the address of the add + 8, so we have:
21759
21760 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
21761 = target + 1.
21762
21763 Note that we have "+ 1" because some versions of GNU ld
21764 don't set the low bit of the result for R_ARM_REL32
21765 relocations against thumb function symbols.
21766 On ARMv6M this is +4, not +8. */
21767 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
21768 assemble_name (file, labelpc);
21769 fputs (":\n", file);
21770 if (TARGET_THUMB1_ONLY)
21771 {
21772 /* This is 2 insns after the start of the thunk, so we know it
21773 is 4-byte aligned. */
21774 fputs ("\tadd\tr3, pc, r3\n", file);
21775 fputs ("\tmov r12, r3\n", file);
21776 }
21777 else
21778 fputs ("\tadd\tr12, pc, r12\n", file);
21779 }
21780 else if (TARGET_THUMB1_ONLY)
21781 fputs ("\tmov r12, r3\n", file);
21782 }
21783 if (TARGET_THUMB1_ONLY)
21784 {
21785 if (mi_delta > 255)
21786 {
21787 fputs ("\tldr\tr3, ", file);
21788 assemble_name (file, label);
21789 fputs ("+4\n", file);
21790 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
21791 mi_op, this_regno, this_regno);
21792 }
21793 else if (mi_delta != 0)
21794 {
21795 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21796 mi_op, this_regno, this_regno,
21797 mi_delta);
21798 }
21799 }
21800 else
21801 {
21802 /* TODO: Use movw/movt for large constants when available. */
21803 while (mi_delta != 0)
21804 {
21805 if ((mi_delta & (3 << shift)) == 0)
21806 shift += 2;
21807 else
21808 {
21809 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21810 mi_op, this_regno, this_regno,
21811 mi_delta & (0xff << shift));
21812 mi_delta &= ~(0xff << shift);
21813 shift += 8;
21814 }
21815 }
21816 }
21817 if (TARGET_THUMB1)
21818 {
21819 if (TARGET_THUMB1_ONLY)
21820 fputs ("\tpop\t{r3}\n", file);
21821
21822 fprintf (file, "\tbx\tr12\n");
21823 ASM_OUTPUT_ALIGN (file, 2);
21824 assemble_name (file, label);
21825 fputs (":\n", file);
21826 if (flag_pic)
21827 {
21828 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
21829 rtx tem = XEXP (DECL_RTL (function), 0);
21830 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
21831 tem = gen_rtx_MINUS (GET_MODE (tem),
21832 tem,
21833 gen_rtx_SYMBOL_REF (Pmode,
21834 ggc_strdup (labelpc)));
21835 assemble_integer (tem, 4, BITS_PER_WORD, 1);
21836 }
21837 else
21838 /* Output ".word .LTHUNKn". */
21839 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
21840
21841 if (TARGET_THUMB1_ONLY && mi_delta > 255)
21842 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
21843 }
21844 else
21845 {
21846 fputs ("\tb\t", file);
21847 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
21848 if (NEED_PLT_RELOC)
21849 fputs ("(PLT)", file);
21850 fputc ('\n', file);
21851 }
21852 }
21853
21854 int
21855 arm_emit_vector_const (FILE *file, rtx x)
21856 {
21857 int i;
21858 const char * pattern;
21859
21860 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21861
21862 switch (GET_MODE (x))
21863 {
21864 case V2SImode: pattern = "%08x"; break;
21865 case V4HImode: pattern = "%04x"; break;
21866 case V8QImode: pattern = "%02x"; break;
21867 default: gcc_unreachable ();
21868 }
21869
21870 fprintf (file, "0x");
21871 for (i = CONST_VECTOR_NUNITS (x); i--;)
21872 {
21873 rtx element;
21874
21875 element = CONST_VECTOR_ELT (x, i);
21876 fprintf (file, pattern, INTVAL (element));
21877 }
21878
21879 return 1;
21880 }
21881
21882 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
21883 HFmode constant pool entries are actually loaded with ldr. */
21884 void
21885 arm_emit_fp16_const (rtx c)
21886 {
21887 REAL_VALUE_TYPE r;
21888 long bits;
21889
21890 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
21891 bits = real_to_target (NULL, &r, HFmode);
21892 if (WORDS_BIG_ENDIAN)
21893 assemble_zeros (2);
21894 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
21895 if (!WORDS_BIG_ENDIAN)
21896 assemble_zeros (2);
21897 }
21898
21899 const char *
21900 arm_output_load_gr (rtx *operands)
21901 {
21902 rtx reg;
21903 rtx offset;
21904 rtx wcgr;
21905 rtx sum;
21906
21907 if (GET_CODE (operands [1]) != MEM
21908 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
21909 || GET_CODE (reg = XEXP (sum, 0)) != REG
21910 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
21911 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
21912 return "wldrw%?\t%0, %1";
21913
21914 /* Fix up an out-of-range load of a GR register. */
21915 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
21916 wcgr = operands[0];
21917 operands[0] = reg;
21918 output_asm_insn ("ldr%?\t%0, %1", operands);
21919
21920 operands[0] = wcgr;
21921 operands[1] = reg;
21922 output_asm_insn ("tmcr%?\t%0, %1", operands);
21923 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
21924
21925 return "";
21926 }
21927
21928 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
21929
21930 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
21931 named arg and all anonymous args onto the stack.
21932 XXX I know the prologue shouldn't be pushing registers, but it is faster
21933 that way. */
21934
21935 static void
21936 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
21937 enum machine_mode mode,
21938 tree type,
21939 int *pretend_size,
21940 int second_time ATTRIBUTE_UNUSED)
21941 {
21942 int nregs;
21943
21944 cfun->machine->uses_anonymous_args = 1;
21945 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
21946 {
21947 nregs = pcum->aapcs_ncrn;
21948 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
21949 nregs++;
21950 }
21951 else
21952 nregs = pcum->nregs;
21953
21954 if (nregs < NUM_ARG_REGS)
21955 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
21956 }
21957
21958 /* Return nonzero if the CONSUMER instruction (a store) does not need
21959 PRODUCER's value to calculate the address. */
21960
21961 int
21962 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
21963 {
21964 rtx value = PATTERN (producer);
21965 rtx addr = PATTERN (consumer);
21966
21967 if (GET_CODE (value) == COND_EXEC)
21968 value = COND_EXEC_CODE (value);
21969 if (GET_CODE (value) == PARALLEL)
21970 value = XVECEXP (value, 0, 0);
21971 value = XEXP (value, 0);
21972 if (GET_CODE (addr) == COND_EXEC)
21973 addr = COND_EXEC_CODE (addr);
21974 if (GET_CODE (addr) == PARALLEL)
21975 addr = XVECEXP (addr, 0, 0);
21976 addr = XEXP (addr, 0);
21977
21978 return !reg_overlap_mentioned_p (value, addr);
21979 }
21980
21981 /* Return nonzero if the CONSUMER instruction (a store) does need
21982 PRODUCER's value to calculate the address. */
21983
21984 int
21985 arm_early_store_addr_dep (rtx producer, rtx consumer)
21986 {
21987 return !arm_no_early_store_addr_dep (producer, consumer);
21988 }
21989
21990 /* Return nonzero if the CONSUMER instruction (a load) does need
21991 PRODUCER's value to calculate the address. */
21992
21993 int
21994 arm_early_load_addr_dep (rtx producer, rtx consumer)
21995 {
21996 rtx value = PATTERN (producer);
21997 rtx addr = PATTERN (consumer);
21998
21999 if (GET_CODE (value) == COND_EXEC)
22000 value = COND_EXEC_CODE (value);
22001 if (GET_CODE (value) == PARALLEL)
22002 value = XVECEXP (value, 0, 0);
22003 value = XEXP (value, 0);
22004 if (GET_CODE (addr) == COND_EXEC)
22005 addr = COND_EXEC_CODE (addr);
22006 if (GET_CODE (addr) == PARALLEL)
22007 addr = XVECEXP (addr, 0, 0);
22008 addr = XEXP (addr, 1);
22009
22010 return reg_overlap_mentioned_p (value, addr);
22011 }
22012
22013 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
22014 have an early register shift value or amount dependency on the
22015 result of PRODUCER. */
22016
22017 int
22018 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
22019 {
22020 rtx value = PATTERN (producer);
22021 rtx op = PATTERN (consumer);
22022 rtx early_op;
22023
22024 if (GET_CODE (value) == COND_EXEC)
22025 value = COND_EXEC_CODE (value);
22026 if (GET_CODE (value) == PARALLEL)
22027 value = XVECEXP (value, 0, 0);
22028 value = XEXP (value, 0);
22029 if (GET_CODE (op) == COND_EXEC)
22030 op = COND_EXEC_CODE (op);
22031 if (GET_CODE (op) == PARALLEL)
22032 op = XVECEXP (op, 0, 0);
22033 op = XEXP (op, 1);
22034
22035 early_op = XEXP (op, 0);
22036 /* This is either an actual independent shift, or a shift applied to
22037 the first operand of another operation. We want the whole shift
22038 operation. */
22039 if (GET_CODE (early_op) == REG)
22040 early_op = op;
22041
22042 return !reg_overlap_mentioned_p (value, early_op);
22043 }
22044
22045 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
22046 have an early register shift value dependency on the result of
22047 PRODUCER. */
22048
22049 int
22050 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
22051 {
22052 rtx value = PATTERN (producer);
22053 rtx op = PATTERN (consumer);
22054 rtx early_op;
22055
22056 if (GET_CODE (value) == COND_EXEC)
22057 value = COND_EXEC_CODE (value);
22058 if (GET_CODE (value) == PARALLEL)
22059 value = XVECEXP (value, 0, 0);
22060 value = XEXP (value, 0);
22061 if (GET_CODE (op) == COND_EXEC)
22062 op = COND_EXEC_CODE (op);
22063 if (GET_CODE (op) == PARALLEL)
22064 op = XVECEXP (op, 0, 0);
22065 op = XEXP (op, 1);
22066
22067 early_op = XEXP (op, 0);
22068
22069 /* This is either an actual independent shift, or a shift applied to
22070 the first operand of another operation. We want the value being
22071 shifted, in either case. */
22072 if (GET_CODE (early_op) != REG)
22073 early_op = XEXP (early_op, 0);
22074
22075 return !reg_overlap_mentioned_p (value, early_op);
22076 }
22077
22078 /* Return nonzero if the CONSUMER (a mul or mac op) does not
22079 have an early register mult dependency on the result of
22080 PRODUCER. */
22081
22082 int
22083 arm_no_early_mul_dep (rtx producer, rtx consumer)
22084 {
22085 rtx value = PATTERN (producer);
22086 rtx op = PATTERN (consumer);
22087
22088 if (GET_CODE (value) == COND_EXEC)
22089 value = COND_EXEC_CODE (value);
22090 if (GET_CODE (value) == PARALLEL)
22091 value = XVECEXP (value, 0, 0);
22092 value = XEXP (value, 0);
22093 if (GET_CODE (op) == COND_EXEC)
22094 op = COND_EXEC_CODE (op);
22095 if (GET_CODE (op) == PARALLEL)
22096 op = XVECEXP (op, 0, 0);
22097 op = XEXP (op, 1);
22098
22099 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
22100 {
22101 if (GET_CODE (XEXP (op, 0)) == MULT)
22102 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
22103 else
22104 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
22105 }
22106
22107 return 0;
22108 }
22109
22110 /* We can't rely on the caller doing the proper promotion when
22111 using APCS or ATPCS. */
22112
22113 static bool
22114 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
22115 {
22116 return !TARGET_AAPCS_BASED;
22117 }
22118
22119 static enum machine_mode
22120 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
22121 enum machine_mode mode,
22122 int *punsignedp ATTRIBUTE_UNUSED,
22123 const_tree fntype ATTRIBUTE_UNUSED,
22124 int for_return ATTRIBUTE_UNUSED)
22125 {
22126 if (GET_MODE_CLASS (mode) == MODE_INT
22127 && GET_MODE_SIZE (mode) < 4)
22128 return SImode;
22129
22130 return mode;
22131 }
22132
22133 /* AAPCS based ABIs use short enums by default. */
22134
22135 static bool
22136 arm_default_short_enums (void)
22137 {
22138 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
22139 }
22140
22141
22142 /* AAPCS requires that anonymous bitfields affect structure alignment. */
22143
22144 static bool
22145 arm_align_anon_bitfield (void)
22146 {
22147 return TARGET_AAPCS_BASED;
22148 }
22149
22150
22151 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
22152
22153 static tree
22154 arm_cxx_guard_type (void)
22155 {
22156 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
22157 }
22158
22159 /* Return non-zero if the consumer (a multiply-accumulate instruction)
22160 has an accumulator dependency on the result of the producer (a
22161 multiplication instruction) and no other dependency on that result. */
22162 int
22163 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
22164 {
22165 rtx mul = PATTERN (producer);
22166 rtx mac = PATTERN (consumer);
22167 rtx mul_result;
22168 rtx mac_op0, mac_op1, mac_acc;
22169
22170 if (GET_CODE (mul) == COND_EXEC)
22171 mul = COND_EXEC_CODE (mul);
22172 if (GET_CODE (mac) == COND_EXEC)
22173 mac = COND_EXEC_CODE (mac);
22174
22175 /* Check that mul is of the form (set (...) (mult ...))
22176 and mla is of the form (set (...) (plus (mult ...) (...))). */
22177 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
22178 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
22179 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
22180 return 0;
22181
22182 mul_result = XEXP (mul, 0);
22183 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
22184 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
22185 mac_acc = XEXP (XEXP (mac, 1), 1);
22186
22187 return (reg_overlap_mentioned_p (mul_result, mac_acc)
22188 && !reg_overlap_mentioned_p (mul_result, mac_op0)
22189 && !reg_overlap_mentioned_p (mul_result, mac_op1));
22190 }
22191
22192
22193 /* The EABI says test the least significant bit of a guard variable. */
22194
22195 static bool
22196 arm_cxx_guard_mask_bit (void)
22197 {
22198 return TARGET_AAPCS_BASED;
22199 }
22200
22201
22202 /* The EABI specifies that all array cookies are 8 bytes long. */
22203
22204 static tree
22205 arm_get_cookie_size (tree type)
22206 {
22207 tree size;
22208
22209 if (!TARGET_AAPCS_BASED)
22210 return default_cxx_get_cookie_size (type);
22211
22212 size = build_int_cst (sizetype, 8);
22213 return size;
22214 }
22215
22216
22217 /* The EABI says that array cookies should also contain the element size. */
22218
22219 static bool
22220 arm_cookie_has_size (void)
22221 {
22222 return TARGET_AAPCS_BASED;
22223 }
22224
22225
22226 /* The EABI says constructors and destructors should return a pointer to
22227 the object constructed/destroyed. */
22228
22229 static bool
22230 arm_cxx_cdtor_returns_this (void)
22231 {
22232 return TARGET_AAPCS_BASED;
22233 }
22234
22235 /* The EABI says that an inline function may never be the key
22236 method. */
22237
22238 static bool
22239 arm_cxx_key_method_may_be_inline (void)
22240 {
22241 return !TARGET_AAPCS_BASED;
22242 }
22243
22244 static void
22245 arm_cxx_determine_class_data_visibility (tree decl)
22246 {
22247 if (!TARGET_AAPCS_BASED
22248 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
22249 return;
22250
22251 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
22252 is exported. However, on systems without dynamic vague linkage,
22253 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
22254 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
22255 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
22256 else
22257 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
22258 DECL_VISIBILITY_SPECIFIED (decl) = 1;
22259 }
22260
22261 static bool
22262 arm_cxx_class_data_always_comdat (void)
22263 {
22264 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
22265 vague linkage if the class has no key function. */
22266 return !TARGET_AAPCS_BASED;
22267 }
22268
22269
22270 /* The EABI says __aeabi_atexit should be used to register static
22271 destructors. */
22272
22273 static bool
22274 arm_cxx_use_aeabi_atexit (void)
22275 {
22276 return TARGET_AAPCS_BASED;
22277 }
22278
22279
22280 void
22281 arm_set_return_address (rtx source, rtx scratch)
22282 {
22283 arm_stack_offsets *offsets;
22284 HOST_WIDE_INT delta;
22285 rtx addr;
22286 unsigned long saved_regs;
22287
22288 offsets = arm_get_frame_offsets ();
22289 saved_regs = offsets->saved_regs_mask;
22290
22291 if ((saved_regs & (1 << LR_REGNUM)) == 0)
22292 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22293 else
22294 {
22295 if (frame_pointer_needed)
22296 addr = plus_constant(hard_frame_pointer_rtx, -4);
22297 else
22298 {
22299 /* LR will be the first saved register. */
22300 delta = offsets->outgoing_args - (offsets->frame + 4);
22301
22302
22303 if (delta >= 4096)
22304 {
22305 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
22306 GEN_INT (delta & ~4095)));
22307 addr = scratch;
22308 delta &= 4095;
22309 }
22310 else
22311 addr = stack_pointer_rtx;
22312
22313 addr = plus_constant (addr, delta);
22314 }
22315 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22316 }
22317 }
22318
22319
22320 void
22321 thumb_set_return_address (rtx source, rtx scratch)
22322 {
22323 arm_stack_offsets *offsets;
22324 HOST_WIDE_INT delta;
22325 HOST_WIDE_INT limit;
22326 int reg;
22327 rtx addr;
22328 unsigned long mask;
22329
22330 emit_use (source);
22331
22332 offsets = arm_get_frame_offsets ();
22333 mask = offsets->saved_regs_mask;
22334 if (mask & (1 << LR_REGNUM))
22335 {
22336 limit = 1024;
22337 /* Find the saved regs. */
22338 if (frame_pointer_needed)
22339 {
22340 delta = offsets->soft_frame - offsets->saved_args;
22341 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
22342 if (TARGET_THUMB1)
22343 limit = 128;
22344 }
22345 else
22346 {
22347 delta = offsets->outgoing_args - offsets->saved_args;
22348 reg = SP_REGNUM;
22349 }
22350 /* Allow for the stack frame. */
22351 if (TARGET_THUMB1 && TARGET_BACKTRACE)
22352 delta -= 16;
22353 /* The link register is always the first saved register. */
22354 delta -= 4;
22355
22356 /* Construct the address. */
22357 addr = gen_rtx_REG (SImode, reg);
22358 if (delta > limit)
22359 {
22360 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
22361 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
22362 addr = scratch;
22363 }
22364 else
22365 addr = plus_constant (addr, delta);
22366
22367 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22368 }
22369 else
22370 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22371 }
22372
22373 /* Implements target hook vector_mode_supported_p. */
22374 bool
22375 arm_vector_mode_supported_p (enum machine_mode mode)
22376 {
22377 /* Neon also supports V2SImode, etc. listed in the clause below. */
22378 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
22379 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
22380 return true;
22381
22382 if ((TARGET_NEON || TARGET_IWMMXT)
22383 && ((mode == V2SImode)
22384 || (mode == V4HImode)
22385 || (mode == V8QImode)))
22386 return true;
22387
22388 return false;
22389 }
22390
22391 /* Implements target hook array_mode_supported_p. */
22392
22393 static bool
22394 arm_array_mode_supported_p (enum machine_mode mode,
22395 unsigned HOST_WIDE_INT nelems)
22396 {
22397 if (TARGET_NEON
22398 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
22399 && (nelems >= 2 && nelems <= 4))
22400 return true;
22401
22402 return false;
22403 }
22404
22405 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
22406 registers when autovectorizing for Neon, at least until multiple vector
22407 widths are supported properly by the middle-end. */
22408
22409 static enum machine_mode
22410 arm_preferred_simd_mode (enum machine_mode mode)
22411 {
22412 if (TARGET_NEON)
22413 switch (mode)
22414 {
22415 case SFmode:
22416 return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
22417 case SImode:
22418 return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
22419 case HImode:
22420 return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
22421 case QImode:
22422 return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
22423 case DImode:
22424 if (TARGET_NEON_VECTORIZE_QUAD)
22425 return V2DImode;
22426 break;
22427
22428 default:;
22429 }
22430
22431 if (TARGET_REALLY_IWMMXT)
22432 switch (mode)
22433 {
22434 case SImode:
22435 return V2SImode;
22436 case HImode:
22437 return V4HImode;
22438 case QImode:
22439 return V8QImode;
22440
22441 default:;
22442 }
22443
22444 return word_mode;
22445 }
22446
22447 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
22448
22449 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
22450 using r0-r4 for function arguments, r7 for the stack frame and don't have
22451 enough left over to do doubleword arithmetic. For Thumb-2 all the
22452 potentially problematic instructions accept high registers so this is not
22453 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
22454 that require many low registers. */
22455 static bool
22456 arm_class_likely_spilled_p (reg_class_t rclass)
22457 {
22458 if ((TARGET_THUMB1 && rclass == LO_REGS)
22459 || rclass == CC_REG)
22460 return true;
22461
22462 return false;
22463 }
22464
22465 /* Implements target hook small_register_classes_for_mode_p. */
22466 bool
22467 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
22468 {
22469 return TARGET_THUMB1;
22470 }
22471
22472 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
22473 ARM insns and therefore guarantee that the shift count is modulo 256.
22474 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
22475 guarantee no particular behavior for out-of-range counts. */
22476
22477 static unsigned HOST_WIDE_INT
22478 arm_shift_truncation_mask (enum machine_mode mode)
22479 {
22480 return mode == SImode ? 255 : 0;
22481 }
22482
22483
22484 /* Map internal gcc register numbers to DWARF2 register numbers. */
22485
22486 unsigned int
22487 arm_dbx_register_number (unsigned int regno)
22488 {
22489 if (regno < 16)
22490 return regno;
22491
22492 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
22493 compatibility. The EABI defines them as registers 96-103. */
22494 if (IS_FPA_REGNUM (regno))
22495 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
22496
22497 if (IS_VFP_REGNUM (regno))
22498 {
22499 /* See comment in arm_dwarf_register_span. */
22500 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22501 return 64 + regno - FIRST_VFP_REGNUM;
22502 else
22503 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
22504 }
22505
22506 if (IS_IWMMXT_GR_REGNUM (regno))
22507 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
22508
22509 if (IS_IWMMXT_REGNUM (regno))
22510 return 112 + regno - FIRST_IWMMXT_REGNUM;
22511
22512 gcc_unreachable ();
22513 }
22514
22515 /* Dwarf models VFPv3 registers as 32 64-bit registers.
22516 GCC models tham as 64 32-bit registers, so we need to describe this to
22517 the DWARF generation code. Other registers can use the default. */
22518 static rtx
22519 arm_dwarf_register_span (rtx rtl)
22520 {
22521 unsigned regno;
22522 int nregs;
22523 int i;
22524 rtx p;
22525
22526 regno = REGNO (rtl);
22527 if (!IS_VFP_REGNUM (regno))
22528 return NULL_RTX;
22529
22530 /* XXX FIXME: The EABI defines two VFP register ranges:
22531 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
22532 256-287: D0-D31
22533 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
22534 corresponding D register. Until GDB supports this, we shall use the
22535 legacy encodings. We also use these encodings for D0-D15 for
22536 compatibility with older debuggers. */
22537 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22538 return NULL_RTX;
22539
22540 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
22541 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
22542 regno = (regno - FIRST_VFP_REGNUM) / 2;
22543 for (i = 0; i < nregs; i++)
22544 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
22545
22546 return p;
22547 }
22548
22549 #if ARM_UNWIND_INFO
22550 /* Emit unwind directives for a store-multiple instruction or stack pointer
22551 push during alignment.
22552 These should only ever be generated by the function prologue code, so
22553 expect them to have a particular form. */
22554
22555 static void
22556 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
22557 {
22558 int i;
22559 HOST_WIDE_INT offset;
22560 HOST_WIDE_INT nregs;
22561 int reg_size;
22562 unsigned reg;
22563 unsigned lastreg;
22564 rtx e;
22565
22566 e = XVECEXP (p, 0, 0);
22567 if (GET_CODE (e) != SET)
22568 abort ();
22569
22570 /* First insn will adjust the stack pointer. */
22571 if (GET_CODE (e) != SET
22572 || GET_CODE (XEXP (e, 0)) != REG
22573 || REGNO (XEXP (e, 0)) != SP_REGNUM
22574 || GET_CODE (XEXP (e, 1)) != PLUS)
22575 abort ();
22576
22577 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
22578 nregs = XVECLEN (p, 0) - 1;
22579
22580 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
22581 if (reg < 16)
22582 {
22583 /* The function prologue may also push pc, but not annotate it as it is
22584 never restored. We turn this into a stack pointer adjustment. */
22585 if (nregs * 4 == offset - 4)
22586 {
22587 fprintf (asm_out_file, "\t.pad #4\n");
22588 offset -= 4;
22589 }
22590 reg_size = 4;
22591 fprintf (asm_out_file, "\t.save {");
22592 }
22593 else if (IS_VFP_REGNUM (reg))
22594 {
22595 reg_size = 8;
22596 fprintf (asm_out_file, "\t.vsave {");
22597 }
22598 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
22599 {
22600 /* FPA registers are done differently. */
22601 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
22602 return;
22603 }
22604 else
22605 /* Unknown register type. */
22606 abort ();
22607
22608 /* If the stack increment doesn't match the size of the saved registers,
22609 something has gone horribly wrong. */
22610 if (offset != nregs * reg_size)
22611 abort ();
22612
22613 offset = 0;
22614 lastreg = 0;
22615 /* The remaining insns will describe the stores. */
22616 for (i = 1; i <= nregs; i++)
22617 {
22618 /* Expect (set (mem <addr>) (reg)).
22619 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22620 e = XVECEXP (p, 0, i);
22621 if (GET_CODE (e) != SET
22622 || GET_CODE (XEXP (e, 0)) != MEM
22623 || GET_CODE (XEXP (e, 1)) != REG)
22624 abort ();
22625
22626 reg = REGNO (XEXP (e, 1));
22627 if (reg < lastreg)
22628 abort ();
22629
22630 if (i != 1)
22631 fprintf (asm_out_file, ", ");
22632 /* We can't use %r for vfp because we need to use the
22633 double precision register names. */
22634 if (IS_VFP_REGNUM (reg))
22635 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
22636 else
22637 asm_fprintf (asm_out_file, "%r", reg);
22638
22639 #ifdef ENABLE_CHECKING
22640 /* Check that the addresses are consecutive. */
22641 e = XEXP (XEXP (e, 0), 0);
22642 if (GET_CODE (e) == PLUS)
22643 {
22644 offset += reg_size;
22645 if (GET_CODE (XEXP (e, 0)) != REG
22646 || REGNO (XEXP (e, 0)) != SP_REGNUM
22647 || GET_CODE (XEXP (e, 1)) != CONST_INT
22648 || offset != INTVAL (XEXP (e, 1)))
22649 abort ();
22650 }
22651 else if (i != 1
22652 || GET_CODE (e) != REG
22653 || REGNO (e) != SP_REGNUM)
22654 abort ();
22655 #endif
22656 }
22657 fprintf (asm_out_file, "}\n");
22658 }
22659
22660 /* Emit unwind directives for a SET. */
22661
22662 static void
22663 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
22664 {
22665 rtx e0;
22666 rtx e1;
22667 unsigned reg;
22668
22669 e0 = XEXP (p, 0);
22670 e1 = XEXP (p, 1);
22671 switch (GET_CODE (e0))
22672 {
22673 case MEM:
22674 /* Pushing a single register. */
22675 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
22676 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
22677 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
22678 abort ();
22679
22680 asm_fprintf (asm_out_file, "\t.save ");
22681 if (IS_VFP_REGNUM (REGNO (e1)))
22682 asm_fprintf(asm_out_file, "{d%d}\n",
22683 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
22684 else
22685 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
22686 break;
22687
22688 case REG:
22689 if (REGNO (e0) == SP_REGNUM)
22690 {
22691 /* A stack increment. */
22692 if (GET_CODE (e1) != PLUS
22693 || GET_CODE (XEXP (e1, 0)) != REG
22694 || REGNO (XEXP (e1, 0)) != SP_REGNUM
22695 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22696 abort ();
22697
22698 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
22699 -INTVAL (XEXP (e1, 1)));
22700 }
22701 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
22702 {
22703 HOST_WIDE_INT offset;
22704
22705 if (GET_CODE (e1) == PLUS)
22706 {
22707 if (GET_CODE (XEXP (e1, 0)) != REG
22708 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22709 abort ();
22710 reg = REGNO (XEXP (e1, 0));
22711 offset = INTVAL (XEXP (e1, 1));
22712 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
22713 HARD_FRAME_POINTER_REGNUM, reg,
22714 offset);
22715 }
22716 else if (GET_CODE (e1) == REG)
22717 {
22718 reg = REGNO (e1);
22719 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
22720 HARD_FRAME_POINTER_REGNUM, reg);
22721 }
22722 else
22723 abort ();
22724 }
22725 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
22726 {
22727 /* Move from sp to reg. */
22728 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
22729 }
22730 else if (GET_CODE (e1) == PLUS
22731 && GET_CODE (XEXP (e1, 0)) == REG
22732 && REGNO (XEXP (e1, 0)) == SP_REGNUM
22733 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
22734 {
22735 /* Set reg to offset from sp. */
22736 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
22737 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
22738 }
22739 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
22740 {
22741 /* Stack pointer save before alignment. */
22742 reg = REGNO (e0);
22743 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
22744 reg + 0x90, reg);
22745 }
22746 else
22747 abort ();
22748 break;
22749
22750 default:
22751 abort ();
22752 }
22753 }
22754
22755
22756 /* Emit unwind directives for the given insn. */
22757
22758 static void
22759 arm_unwind_emit (FILE * asm_out_file, rtx insn)
22760 {
22761 rtx pat;
22762
22763 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22764 return;
22765
22766 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22767 && (TREE_NOTHROW (current_function_decl)
22768 || crtl->all_throwers_are_sibcalls))
22769 return;
22770
22771 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
22772 return;
22773
22774 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
22775 if (pat)
22776 pat = XEXP (pat, 0);
22777 else
22778 pat = PATTERN (insn);
22779
22780 switch (GET_CODE (pat))
22781 {
22782 case SET:
22783 arm_unwind_emit_set (asm_out_file, pat);
22784 break;
22785
22786 case SEQUENCE:
22787 /* Store multiple. */
22788 arm_unwind_emit_sequence (asm_out_file, pat);
22789 break;
22790
22791 default:
22792 abort();
22793 }
22794 }
22795
22796
22797 /* Output a reference from a function exception table to the type_info
22798 object X. The EABI specifies that the symbol should be relocated by
22799 an R_ARM_TARGET2 relocation. */
22800
22801 static bool
22802 arm_output_ttype (rtx x)
22803 {
22804 fputs ("\t.word\t", asm_out_file);
22805 output_addr_const (asm_out_file, x);
22806 /* Use special relocations for symbol references. */
22807 if (GET_CODE (x) != CONST_INT)
22808 fputs ("(TARGET2)", asm_out_file);
22809 fputc ('\n', asm_out_file);
22810
22811 return TRUE;
22812 }
22813
22814 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
22815
22816 static void
22817 arm_asm_emit_except_personality (rtx personality)
22818 {
22819 fputs ("\t.personality\t", asm_out_file);
22820 output_addr_const (asm_out_file, personality);
22821 fputc ('\n', asm_out_file);
22822 }
22823
22824 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
22825
22826 static void
22827 arm_asm_init_sections (void)
22828 {
22829 exception_section = get_unnamed_section (0, output_section_asm_op,
22830 "\t.handlerdata");
22831 }
22832 #endif /* ARM_UNWIND_INFO */
22833
22834 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
22835 stack alignment. */
22836
22837 static void
22838 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
22839 {
22840 rtx unspec = SET_SRC (pattern);
22841 gcc_assert (GET_CODE (unspec) == UNSPEC);
22842
22843 switch (index)
22844 {
22845 case UNSPEC_STACK_ALIGN:
22846 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
22847 put anything on the stack, so hopefully it won't matter.
22848 CFA = SP will be correct after alignment. */
22849 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
22850 SET_DEST (pattern));
22851 break;
22852 default:
22853 gcc_unreachable ();
22854 }
22855 }
22856
22857
22858 /* Output unwind directives for the start/end of a function. */
22859
22860 void
22861 arm_output_fn_unwind (FILE * f, bool prologue)
22862 {
22863 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22864 return;
22865
22866 if (prologue)
22867 fputs ("\t.fnstart\n", f);
22868 else
22869 {
22870 /* If this function will never be unwound, then mark it as such.
22871 The came condition is used in arm_unwind_emit to suppress
22872 the frame annotations. */
22873 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22874 && (TREE_NOTHROW (current_function_decl)
22875 || crtl->all_throwers_are_sibcalls))
22876 fputs("\t.cantunwind\n", f);
22877
22878 fputs ("\t.fnend\n", f);
22879 }
22880 }
22881
22882 static bool
22883 arm_emit_tls_decoration (FILE *fp, rtx x)
22884 {
22885 enum tls_reloc reloc;
22886 rtx val;
22887
22888 val = XVECEXP (x, 0, 0);
22889 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
22890
22891 output_addr_const (fp, val);
22892
22893 switch (reloc)
22894 {
22895 case TLS_GD32:
22896 fputs ("(tlsgd)", fp);
22897 break;
22898 case TLS_LDM32:
22899 fputs ("(tlsldm)", fp);
22900 break;
22901 case TLS_LDO32:
22902 fputs ("(tlsldo)", fp);
22903 break;
22904 case TLS_IE32:
22905 fputs ("(gottpoff)", fp);
22906 break;
22907 case TLS_LE32:
22908 fputs ("(tpoff)", fp);
22909 break;
22910 default:
22911 gcc_unreachable ();
22912 }
22913
22914 switch (reloc)
22915 {
22916 case TLS_GD32:
22917 case TLS_LDM32:
22918 case TLS_IE32:
22919 fputs (" + (. - ", fp);
22920 output_addr_const (fp, XVECEXP (x, 0, 2));
22921 fputs (" - ", fp);
22922 output_addr_const (fp, XVECEXP (x, 0, 3));
22923 fputc (')', fp);
22924 break;
22925 default:
22926 break;
22927 }
22928
22929 return TRUE;
22930 }
22931
22932 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
22933
22934 static void
22935 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
22936 {
22937 gcc_assert (size == 4);
22938 fputs ("\t.word\t", file);
22939 output_addr_const (file, x);
22940 fputs ("(tlsldo)", file);
22941 }
22942
22943 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
22944
22945 static bool
22946 arm_output_addr_const_extra (FILE *fp, rtx x)
22947 {
22948 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
22949 return arm_emit_tls_decoration (fp, x);
22950 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
22951 {
22952 char label[256];
22953 int labelno = INTVAL (XVECEXP (x, 0, 0));
22954
22955 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
22956 assemble_name_raw (fp, label);
22957
22958 return TRUE;
22959 }
22960 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
22961 {
22962 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
22963 if (GOT_PCREL)
22964 fputs ("+.", fp);
22965 fputs ("-(", fp);
22966 output_addr_const (fp, XVECEXP (x, 0, 0));
22967 fputc (')', fp);
22968 return TRUE;
22969 }
22970 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
22971 {
22972 output_addr_const (fp, XVECEXP (x, 0, 0));
22973 if (GOT_PCREL)
22974 fputs ("+.", fp);
22975 fputs ("-(", fp);
22976 output_addr_const (fp, XVECEXP (x, 0, 1));
22977 fputc (')', fp);
22978 return TRUE;
22979 }
22980 else if (GET_CODE (x) == CONST_VECTOR)
22981 return arm_emit_vector_const (fp, x);
22982
22983 return FALSE;
22984 }
22985
22986 /* Output assembly for a shift instruction.
22987 SET_FLAGS determines how the instruction modifies the condition codes.
22988 0 - Do not set condition codes.
22989 1 - Set condition codes.
22990 2 - Use smallest instruction. */
22991 const char *
22992 arm_output_shift(rtx * operands, int set_flags)
22993 {
22994 char pattern[100];
22995 static const char flag_chars[3] = {'?', '.', '!'};
22996 const char *shift;
22997 HOST_WIDE_INT val;
22998 char c;
22999
23000 c = flag_chars[set_flags];
23001 if (TARGET_UNIFIED_ASM)
23002 {
23003 shift = shift_op(operands[3], &val);
23004 if (shift)
23005 {
23006 if (val != -1)
23007 operands[2] = GEN_INT(val);
23008 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
23009 }
23010 else
23011 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
23012 }
23013 else
23014 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
23015 output_asm_insn (pattern, operands);
23016 return "";
23017 }
23018
23019 /* Output a Thumb-1 casesi dispatch sequence. */
23020 const char *
23021 thumb1_output_casesi (rtx *operands)
23022 {
23023 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
23024
23025 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
23026
23027 switch (GET_MODE(diff_vec))
23028 {
23029 case QImode:
23030 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
23031 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
23032 case HImode:
23033 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
23034 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
23035 case SImode:
23036 return "bl\t%___gnu_thumb1_case_si";
23037 default:
23038 gcc_unreachable ();
23039 }
23040 }
23041
23042 /* Output a Thumb-2 casesi instruction. */
23043 const char *
23044 thumb2_output_casesi (rtx *operands)
23045 {
23046 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
23047
23048 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
23049
23050 output_asm_insn ("cmp\t%0, %1", operands);
23051 output_asm_insn ("bhi\t%l3", operands);
23052 switch (GET_MODE(diff_vec))
23053 {
23054 case QImode:
23055 return "tbb\t[%|pc, %0]";
23056 case HImode:
23057 return "tbh\t[%|pc, %0, lsl #1]";
23058 case SImode:
23059 if (flag_pic)
23060 {
23061 output_asm_insn ("adr\t%4, %l2", operands);
23062 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
23063 output_asm_insn ("add\t%4, %4, %5", operands);
23064 return "bx\t%4";
23065 }
23066 else
23067 {
23068 output_asm_insn ("adr\t%4, %l2", operands);
23069 return "ldr\t%|pc, [%4, %0, lsl #2]";
23070 }
23071 default:
23072 gcc_unreachable ();
23073 }
23074 }
23075
23076 /* Most ARM cores are single issue, but some newer ones can dual issue.
23077 The scheduler descriptions rely on this being correct. */
23078 static int
23079 arm_issue_rate (void)
23080 {
23081 switch (arm_tune)
23082 {
23083 case cortexr4:
23084 case cortexr4f:
23085 case cortexr5:
23086 case cortexa5:
23087 case cortexa8:
23088 case cortexa9:
23089 case fa726te:
23090 return 2;
23091
23092 default:
23093 return 1;
23094 }
23095 }
23096
23097 /* A table and a function to perform ARM-specific name mangling for
23098 NEON vector types in order to conform to the AAPCS (see "Procedure
23099 Call Standard for the ARM Architecture", Appendix A). To qualify
23100 for emission with the mangled names defined in that document, a
23101 vector type must not only be of the correct mode but also be
23102 composed of NEON vector element types (e.g. __builtin_neon_qi). */
23103 typedef struct
23104 {
23105 enum machine_mode mode;
23106 const char *element_type_name;
23107 const char *aapcs_name;
23108 } arm_mangle_map_entry;
23109
23110 static arm_mangle_map_entry arm_mangle_map[] = {
23111 /* 64-bit containerized types. */
23112 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
23113 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
23114 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
23115 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
23116 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
23117 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
23118 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
23119 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
23120 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
23121 /* 128-bit containerized types. */
23122 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
23123 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
23124 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
23125 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
23126 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
23127 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
23128 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
23129 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
23130 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
23131 { VOIDmode, NULL, NULL }
23132 };
23133
23134 const char *
23135 arm_mangle_type (const_tree type)
23136 {
23137 arm_mangle_map_entry *pos = arm_mangle_map;
23138
23139 /* The ARM ABI documents (10th October 2008) say that "__va_list"
23140 has to be managled as if it is in the "std" namespace. */
23141 if (TARGET_AAPCS_BASED
23142 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
23143 {
23144 static bool warned;
23145 if (!warned && warn_psabi && !in_system_header)
23146 {
23147 warned = true;
23148 inform (input_location,
23149 "the mangling of %<va_list%> has changed in GCC 4.4");
23150 }
23151 return "St9__va_list";
23152 }
23153
23154 /* Half-precision float. */
23155 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
23156 return "Dh";
23157
23158 if (TREE_CODE (type) != VECTOR_TYPE)
23159 return NULL;
23160
23161 /* Check the mode of the vector type, and the name of the vector
23162 element type, against the table. */
23163 while (pos->mode != VOIDmode)
23164 {
23165 tree elt_type = TREE_TYPE (type);
23166
23167 if (pos->mode == TYPE_MODE (type)
23168 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
23169 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
23170 pos->element_type_name))
23171 return pos->aapcs_name;
23172
23173 pos++;
23174 }
23175
23176 /* Use the default mangling for unrecognized (possibly user-defined)
23177 vector types. */
23178 return NULL;
23179 }
23180
23181 /* Order of allocation of core registers for Thumb: this allocation is
23182 written over the corresponding initial entries of the array
23183 initialized with REG_ALLOC_ORDER. We allocate all low registers
23184 first. Saving and restoring a low register is usually cheaper than
23185 using a call-clobbered high register. */
23186
23187 static const int thumb_core_reg_alloc_order[] =
23188 {
23189 3, 2, 1, 0, 4, 5, 6, 7,
23190 14, 12, 8, 9, 10, 11, 13, 15
23191 };
23192
23193 /* Adjust register allocation order when compiling for Thumb. */
23194
23195 void
23196 arm_order_regs_for_local_alloc (void)
23197 {
23198 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
23199 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
23200 if (TARGET_THUMB)
23201 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
23202 sizeof (thumb_core_reg_alloc_order));
23203 }
23204
23205 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
23206
23207 bool
23208 arm_frame_pointer_required (void)
23209 {
23210 return (cfun->has_nonlocal_label
23211 || SUBTARGET_FRAME_POINTER_REQUIRED
23212 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
23213 }
23214
23215 /* Only thumb1 can't support conditional execution, so return true if
23216 the target is not thumb1. */
23217 static bool
23218 arm_have_conditional_execution (void)
23219 {
23220 return !TARGET_THUMB1;
23221 }
23222
23223 /* Legitimize a memory reference for sync primitive implemented using
23224 ldrex / strex. We currently force the form of the reference to be
23225 indirect without offset. We do not yet support the indirect offset
23226 addressing supported by some ARM targets for these
23227 instructions. */
23228 static rtx
23229 arm_legitimize_sync_memory (rtx memory)
23230 {
23231 rtx addr = force_reg (Pmode, XEXP (memory, 0));
23232 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
23233
23234 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
23235 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
23236 return legitimate_memory;
23237 }
23238
23239 /* An instruction emitter. */
23240 typedef void (* emit_f) (int label, const char *, rtx *);
23241
23242 /* An instruction emitter that emits via the conventional
23243 output_asm_insn. */
23244 static void
23245 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
23246 {
23247 output_asm_insn (pattern, operands);
23248 }
23249
23250 /* Count the number of emitted synchronization instructions. */
23251 static unsigned arm_insn_count;
23252
23253 /* An emitter that counts emitted instructions but does not actually
23254 emit instruction into the instruction stream. */
23255 static void
23256 arm_count (int label,
23257 const char *pattern ATTRIBUTE_UNUSED,
23258 rtx *operands ATTRIBUTE_UNUSED)
23259 {
23260 if (! label)
23261 ++ arm_insn_count;
23262 }
23263
23264 /* Construct a pattern using conventional output formatting and feed
23265 it to output_asm_insn. Provides a mechanism to construct the
23266 output pattern on the fly. Note the hard limit on the pattern
23267 buffer size. */
23268 static void ATTRIBUTE_PRINTF_4
23269 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
23270 const char *pattern, ...)
23271 {
23272 va_list ap;
23273 char buffer[256];
23274
23275 va_start (ap, pattern);
23276 vsprintf (buffer, pattern, ap);
23277 va_end (ap);
23278 emit (label, buffer, operands);
23279 }
23280
23281 /* Emit the memory barrier instruction, if any, provided by this
23282 target to a specified emitter. */
23283 static void
23284 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
23285 {
23286 if (TARGET_HAVE_DMB)
23287 {
23288 /* Note we issue a system level barrier. We should consider
23289 issuing a inner shareabilty zone barrier here instead, ie.
23290 "DMB ISH". */
23291 emit (0, "dmb\tsy", operands);
23292 return;
23293 }
23294
23295 if (TARGET_HAVE_DMB_MCR)
23296 {
23297 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
23298 return;
23299 }
23300
23301 gcc_unreachable ();
23302 }
23303
23304 /* Emit the memory barrier instruction, if any, provided by this
23305 target. */
23306 const char *
23307 arm_output_memory_barrier (rtx *operands)
23308 {
23309 arm_process_output_memory_barrier (arm_emit, operands);
23310 return "";
23311 }
23312
23313 /* Helper to figure out the instruction suffix required on ldrex/strex
23314 for operations on an object of the specified mode. */
23315 static const char *
23316 arm_ldrex_suffix (enum machine_mode mode)
23317 {
23318 switch (mode)
23319 {
23320 case QImode: return "b";
23321 case HImode: return "h";
23322 case SImode: return "";
23323 case DImode: return "d";
23324 default:
23325 gcc_unreachable ();
23326 }
23327 return "";
23328 }
23329
23330 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
23331 mode. */
23332 static void
23333 arm_output_ldrex (emit_f emit,
23334 enum machine_mode mode,
23335 rtx target,
23336 rtx memory)
23337 {
23338 const char *suffix = arm_ldrex_suffix (mode);
23339 rtx operands[2];
23340
23341 operands[0] = target;
23342 operands[1] = memory;
23343 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
23344 }
23345
23346 /* Emit a strex{b,h,d, } instruction appropriate for the specified
23347 mode. */
23348 static void
23349 arm_output_strex (emit_f emit,
23350 enum machine_mode mode,
23351 const char *cc,
23352 rtx result,
23353 rtx value,
23354 rtx memory)
23355 {
23356 const char *suffix = arm_ldrex_suffix (mode);
23357 rtx operands[3];
23358
23359 operands[0] = result;
23360 operands[1] = value;
23361 operands[2] = memory;
23362 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
23363 cc);
23364 }
23365
23366 /* Helper to emit a two operand instruction. */
23367 static void
23368 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
23369 {
23370 rtx operands[2];
23371
23372 operands[0] = d;
23373 operands[1] = s;
23374 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
23375 }
23376
23377 /* Helper to emit a three operand instruction. */
23378 static void
23379 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
23380 {
23381 rtx operands[3];
23382
23383 operands[0] = d;
23384 operands[1] = a;
23385 operands[2] = b;
23386 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
23387 }
23388
23389 /* Emit a load store exclusive synchronization loop.
23390
23391 do
23392 old_value = [mem]
23393 if old_value != required_value
23394 break;
23395 t1 = sync_op (old_value, new_value)
23396 [mem] = t1, t2 = [0|1]
23397 while ! t2
23398
23399 Note:
23400 t1 == t2 is not permitted
23401 t1 == old_value is permitted
23402
23403 required_value:
23404
23405 RTX register or const_int representing the required old_value for
23406 the modify to continue, if NULL no comparsion is performed. */
23407 static void
23408 arm_output_sync_loop (emit_f emit,
23409 enum machine_mode mode,
23410 rtx old_value,
23411 rtx memory,
23412 rtx required_value,
23413 rtx new_value,
23414 rtx t1,
23415 rtx t2,
23416 enum attr_sync_op sync_op,
23417 int early_barrier_required)
23418 {
23419 rtx operands[1];
23420
23421 gcc_assert (t1 != t2);
23422
23423 if (early_barrier_required)
23424 arm_process_output_memory_barrier (emit, NULL);
23425
23426 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
23427
23428 arm_output_ldrex (emit, mode, old_value, memory);
23429
23430 if (required_value)
23431 {
23432 rtx operands[2];
23433
23434 operands[0] = old_value;
23435 operands[1] = required_value;
23436 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
23437 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
23438 }
23439
23440 switch (sync_op)
23441 {
23442 case SYNC_OP_ADD:
23443 arm_output_op3 (emit, "add", t1, old_value, new_value);
23444 break;
23445
23446 case SYNC_OP_SUB:
23447 arm_output_op3 (emit, "sub", t1, old_value, new_value);
23448 break;
23449
23450 case SYNC_OP_IOR:
23451 arm_output_op3 (emit, "orr", t1, old_value, new_value);
23452 break;
23453
23454 case SYNC_OP_XOR:
23455 arm_output_op3 (emit, "eor", t1, old_value, new_value);
23456 break;
23457
23458 case SYNC_OP_AND:
23459 arm_output_op3 (emit,"and", t1, old_value, new_value);
23460 break;
23461
23462 case SYNC_OP_NAND:
23463 arm_output_op3 (emit, "and", t1, old_value, new_value);
23464 arm_output_op2 (emit, "mvn", t1, t1);
23465 break;
23466
23467 case SYNC_OP_NONE:
23468 t1 = new_value;
23469 break;
23470 }
23471
23472 if (t2)
23473 {
23474 arm_output_strex (emit, mode, "", t2, t1, memory);
23475 operands[0] = t2;
23476 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23477 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23478 LOCAL_LABEL_PREFIX);
23479 }
23480 else
23481 {
23482 /* Use old_value for the return value because for some operations
23483 the old_value can easily be restored. This saves one register. */
23484 arm_output_strex (emit, mode, "", old_value, t1, memory);
23485 operands[0] = old_value;
23486 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23487 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23488 LOCAL_LABEL_PREFIX);
23489
23490 switch (sync_op)
23491 {
23492 case SYNC_OP_ADD:
23493 arm_output_op3 (emit, "sub", old_value, t1, new_value);
23494 break;
23495
23496 case SYNC_OP_SUB:
23497 arm_output_op3 (emit, "add", old_value, t1, new_value);
23498 break;
23499
23500 case SYNC_OP_XOR:
23501 arm_output_op3 (emit, "eor", old_value, t1, new_value);
23502 break;
23503
23504 case SYNC_OP_NONE:
23505 arm_output_op2 (emit, "mov", old_value, required_value);
23506 break;
23507
23508 default:
23509 gcc_unreachable ();
23510 }
23511 }
23512
23513 arm_process_output_memory_barrier (emit, NULL);
23514 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
23515 }
23516
23517 static rtx
23518 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
23519 {
23520 if (index > 0)
23521 default_value = operands[index - 1];
23522
23523 return default_value;
23524 }
23525
23526 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
23527 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
23528
23529 /* Extract the operands for a synchroniztion instruction from the
23530 instructions attributes and emit the instruction. */
23531 static void
23532 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
23533 {
23534 rtx result, memory, required_value, new_value, t1, t2;
23535 int early_barrier;
23536 enum machine_mode mode;
23537 enum attr_sync_op sync_op;
23538
23539 result = FETCH_SYNC_OPERAND(result, 0);
23540 memory = FETCH_SYNC_OPERAND(memory, 0);
23541 required_value = FETCH_SYNC_OPERAND(required_value, 0);
23542 new_value = FETCH_SYNC_OPERAND(new_value, 0);
23543 t1 = FETCH_SYNC_OPERAND(t1, 0);
23544 t2 = FETCH_SYNC_OPERAND(t2, 0);
23545 early_barrier =
23546 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
23547 sync_op = get_attr_sync_op (insn);
23548 mode = GET_MODE (memory);
23549
23550 arm_output_sync_loop (emit, mode, result, memory, required_value,
23551 new_value, t1, t2, sync_op, early_barrier);
23552 }
23553
23554 /* Emit a synchronization instruction loop. */
23555 const char *
23556 arm_output_sync_insn (rtx insn, rtx *operands)
23557 {
23558 arm_process_output_sync_insn (arm_emit, insn, operands);
23559 return "";
23560 }
23561
23562 /* Count the number of machine instruction that will be emitted for a
23563 synchronization instruction. Note that the emitter used does not
23564 emit instructions, it just counts instructions being carefull not
23565 to count labels. */
23566 unsigned int
23567 arm_sync_loop_insns (rtx insn, rtx *operands)
23568 {
23569 arm_insn_count = 0;
23570 arm_process_output_sync_insn (arm_count, insn, operands);
23571 return arm_insn_count;
23572 }
23573
23574 /* Helper to call a target sync instruction generator, dealing with
23575 the variation in operands required by the different generators. */
23576 static rtx
23577 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
23578 rtx memory, rtx required_value, rtx new_value)
23579 {
23580 switch (generator->op)
23581 {
23582 case arm_sync_generator_omn:
23583 gcc_assert (! required_value);
23584 return generator->u.omn (old_value, memory, new_value);
23585
23586 case arm_sync_generator_omrn:
23587 gcc_assert (required_value);
23588 return generator->u.omrn (old_value, memory, required_value, new_value);
23589 }
23590
23591 return NULL;
23592 }
23593
23594 /* Expand a synchronization loop. The synchronization loop is expanded
23595 as an opaque block of instructions in order to ensure that we do
23596 not subsequently get extraneous memory accesses inserted within the
23597 critical region. The exclusive access property of ldrex/strex is
23598 only guaranteed in there are no intervening memory accesses. */
23599 void
23600 arm_expand_sync (enum machine_mode mode,
23601 struct arm_sync_generator *generator,
23602 rtx target, rtx memory, rtx required_value, rtx new_value)
23603 {
23604 if (target == NULL)
23605 target = gen_reg_rtx (mode);
23606
23607 memory = arm_legitimize_sync_memory (memory);
23608 if (mode != SImode)
23609 {
23610 rtx load_temp = gen_reg_rtx (SImode);
23611
23612 if (required_value)
23613 required_value = convert_modes (SImode, mode, required_value, true);
23614
23615 new_value = convert_modes (SImode, mode, new_value, true);
23616 emit_insn (arm_call_generator (generator, load_temp, memory,
23617 required_value, new_value));
23618 emit_move_insn (target, gen_lowpart (mode, load_temp));
23619 }
23620 else
23621 {
23622 emit_insn (arm_call_generator (generator, target, memory, required_value,
23623 new_value));
23624 }
23625 }
23626
23627 static unsigned int
23628 arm_autovectorize_vector_sizes (void)
23629 {
23630 return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
23631 }
23632
23633 static bool
23634 arm_vector_alignment_reachable (const_tree type, bool is_packed)
23635 {
23636 /* Vectors which aren't in packed structures will not be less aligned than
23637 the natural alignment of their element type, so this is safe. */
23638 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23639 return !is_packed;
23640
23641 return default_builtin_vector_alignment_reachable (type, is_packed);
23642 }
23643
23644 static bool
23645 arm_builtin_support_vector_misalignment (enum machine_mode mode,
23646 const_tree type, int misalignment,
23647 bool is_packed)
23648 {
23649 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23650 {
23651 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
23652
23653 if (is_packed)
23654 return align == 1;
23655
23656 /* If the misalignment is unknown, we should be able to handle the access
23657 so long as it is not to a member of a packed data structure. */
23658 if (misalignment == -1)
23659 return true;
23660
23661 /* Return true if the misalignment is a multiple of the natural alignment
23662 of the vector's element type. This is probably always going to be
23663 true in practice, since we've already established that this isn't a
23664 packed access. */
23665 return ((misalignment % align) == 0);
23666 }
23667
23668 return default_builtin_support_vector_misalignment (mode, type, misalignment,
23669 is_packed);
23670 }
23671
23672 static void
23673 arm_conditional_register_usage (void)
23674 {
23675 int regno;
23676
23677 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
23678 {
23679 for (regno = FIRST_FPA_REGNUM;
23680 regno <= LAST_FPA_REGNUM; ++regno)
23681 fixed_regs[regno] = call_used_regs[regno] = 1;
23682 }
23683
23684 if (TARGET_THUMB1 && optimize_size)
23685 {
23686 /* When optimizing for size on Thumb-1, it's better not
23687 to use the HI regs, because of the overhead of
23688 stacking them. */
23689 for (regno = FIRST_HI_REGNUM;
23690 regno <= LAST_HI_REGNUM; ++regno)
23691 fixed_regs[regno] = call_used_regs[regno] = 1;
23692 }
23693
23694 /* The link register can be clobbered by any branch insn,
23695 but we have no way to track that at present, so mark
23696 it as unavailable. */
23697 if (TARGET_THUMB1)
23698 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
23699
23700 if (TARGET_32BIT && TARGET_HARD_FLOAT)
23701 {
23702 if (TARGET_MAVERICK)
23703 {
23704 for (regno = FIRST_FPA_REGNUM;
23705 regno <= LAST_FPA_REGNUM; ++ regno)
23706 fixed_regs[regno] = call_used_regs[regno] = 1;
23707 for (regno = FIRST_CIRRUS_FP_REGNUM;
23708 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
23709 {
23710 fixed_regs[regno] = 0;
23711 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
23712 }
23713 }
23714 if (TARGET_VFP)
23715 {
23716 /* VFPv3 registers are disabled when earlier VFP
23717 versions are selected due to the definition of
23718 LAST_VFP_REGNUM. */
23719 for (regno = FIRST_VFP_REGNUM;
23720 regno <= LAST_VFP_REGNUM; ++ regno)
23721 {
23722 fixed_regs[regno] = 0;
23723 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
23724 || regno >= FIRST_VFP_REGNUM + 32;
23725 }
23726 }
23727 }
23728
23729 if (TARGET_REALLY_IWMMXT)
23730 {
23731 regno = FIRST_IWMMXT_GR_REGNUM;
23732 /* The 2002/10/09 revision of the XScale ABI has wCG0
23733 and wCG1 as call-preserved registers. The 2002/11/21
23734 revision changed this so that all wCG registers are
23735 scratch registers. */
23736 for (regno = FIRST_IWMMXT_GR_REGNUM;
23737 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
23738 fixed_regs[regno] = 0;
23739 /* The XScale ABI has wR0 - wR9 as scratch registers,
23740 the rest as call-preserved registers. */
23741 for (regno = FIRST_IWMMXT_REGNUM;
23742 regno <= LAST_IWMMXT_REGNUM; ++ regno)
23743 {
23744 fixed_regs[regno] = 0;
23745 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
23746 }
23747 }
23748
23749 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
23750 {
23751 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23752 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23753 }
23754 else if (TARGET_APCS_STACK)
23755 {
23756 fixed_regs[10] = 1;
23757 call_used_regs[10] = 1;
23758 }
23759 /* -mcaller-super-interworking reserves r11 for calls to
23760 _interwork_r11_call_via_rN(). Making the register global
23761 is an easy way of ensuring that it remains valid for all
23762 calls. */
23763 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
23764 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
23765 {
23766 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23767 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23768 if (TARGET_CALLER_INTERWORKING)
23769 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23770 }
23771 SUBTARGET_CONDITIONAL_REGISTER_USAGE
23772 }
23773
23774 static reg_class_t
23775 arm_preferred_rename_class (reg_class_t rclass)
23776 {
23777 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
23778 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
23779 and code size can be reduced. */
23780 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
23781 return LO_REGS;
23782 else
23783 return NO_REGS;
23784 }
23785
23786 /* Compute the atrribute "length" of insn "*push_multi".
23787 So this function MUST be kept in sync with that insn pattern. */
23788 int
23789 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
23790 {
23791 int i, regno, hi_reg;
23792 int num_saves = XVECLEN (parallel_op, 0);
23793
23794 /* ARM mode. */
23795 if (TARGET_ARM)
23796 return 4;
23797
23798 /* Thumb2 mode. */
23799 regno = REGNO (first_op);
23800 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
23801 for (i = 1; i < num_saves && !hi_reg; i++)
23802 {
23803 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
23804 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
23805 }
23806
23807 if (!hi_reg)
23808 return 2;
23809 return 4;
23810 }
23811
23812 /* Check the validity of operands in an ldrd/strd instruction. */
23813 bool
23814 arm_check_ldrd_operands (rtx reg1, rtx reg2, rtx off1, rtx off2)
23815 {
23816 HOST_WIDE_INT offset1 = 0;
23817 HOST_WIDE_INT offset2 = 0;
23818 int regno1 = REGNO (reg1);
23819 int regno2 = REGNO (reg2);
23820 HOST_WIDE_INT max_offset = 1020;
23821
23822 if (TARGET_ARM)
23823 max_offset = 255;
23824
23825 if (off1 != NULL_RTX)
23826 offset1 = INTVAL (off1);
23827 if (off2 != NULL_RTX)
23828 offset2 = INTVAL (off2);
23829
23830 /* The offset range of LDRD is [-max_offset, max_offset]. Here we check if
23831 both offsets lie in the range [-max_offset, max_offset+4]. If one of the
23832 offsets is max_offset+4, the following condition
23833 ((offset1 + 4) == offset2)
23834 will ensure offset1 to be max_offset, suitable for instruction LDRD. */
23835 if ((offset1 > (max_offset + 4)) || (offset1 < -max_offset)
23836 || ((offset1 & 3) != 0))
23837 return false;
23838 if ((offset2 > (max_offset + 4)) || (offset2 < -max_offset)
23839 || ((offset2 & 3) != 0))
23840 return false;
23841
23842 if ((offset1 + 4) == offset2)
23843 {
23844 if (TARGET_THUMB2)
23845 return true;
23846
23847 /* TARGET_ARM */
23848 if (((regno1 & 1) == 0) && ((regno1 + 1) == regno2)) /* ldrd */
23849 return true;
23850
23851 if ((regno1 < regno2) && ((offset1 <= 4) && (offset1 >= -8))) /* ldm */
23852 return true;
23853 }
23854 if ((offset2 + 4) == offset1)
23855 {
23856 if (TARGET_THUMB2)
23857 return true;
23858
23859 /* TARGET_ARM */
23860 if (((regno2 & 1) == 0) && ((regno2 + 1) == regno1)) /* ldrd */
23861 return true;
23862
23863 if ((regno2 < regno1) && ((offset2 <= 4) && (offset2 >= -8))) /* ldm */
23864 return true;
23865 }
23866
23867 return false;
23868 }
23869
23870 /* Check if the two memory accesses can be merged to an ldrd/strd instruction.
23871 That is they use the same base register, and the gap between constant
23872 offsets should be 4. */
23873 bool
23874 arm_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd)
23875 {
23876 rtx base1, base2;
23877 rtx offset1 = NULL_RTX;
23878 rtx offset2 = NULL_RTX;
23879 rtx addr1 = XEXP (mem1, 0);
23880 rtx addr2 = XEXP (mem2, 0);
23881
23882 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
23883 return false;
23884
23885 if (REG_P (addr1))
23886 base1 = addr1;
23887 else if (GET_CODE (addr1) == PLUS)
23888 {
23889 base1 = XEXP (addr1, 0);
23890 offset1 = XEXP (addr1, 1);
23891 if (!REG_P (base1) || (GET_CODE (offset1) != CONST_INT))
23892 return false;
23893 }
23894 else
23895 return false;
23896
23897 if (REG_P (addr2))
23898 base2 = addr2;
23899 else if (GET_CODE (addr2) == PLUS)
23900 {
23901 base2 = XEXP (addr2, 0);
23902 offset2 = XEXP (addr2, 1);
23903 if (!REG_P (base2) || (GET_CODE (offset2) != CONST_INT))
23904 return false;
23905 }
23906 else
23907 return false;
23908
23909 if (base1 != base2)
23910 return false;
23911
23912 if (ldrd && ((reg1 == reg2) || (reg1 == base1)))
23913 return false;
23914
23915 return arm_check_ldrd_operands (reg1, reg2, offset1, offset2);
23916 }
23917
23918 /* Output instructions for ldrd and count the number of bytes has been
23919 outputted. Do not actually output instructions if EMIT_P is false. */
23920 int
23921 arm_output_ldrd (rtx reg1, rtx reg2, rtx base, rtx off1, rtx off2, bool emit_p)
23922 {
23923 int length = 0;
23924 rtx operands[5];
23925 HOST_WIDE_INT offset1 = 0;
23926 HOST_WIDE_INT offset2 = 0;
23927
23928 if (off1 != NULL_RTX)
23929 offset1 = INTVAL (off1);
23930 else
23931 off1 = GEN_INT (0);
23932 if (off2 != NULL_RTX)
23933 offset2 = INTVAL (off2);
23934 else
23935 off2 = GEN_INT (0);
23936 if (offset1 > offset2)
23937 {
23938 rtx tmp;
23939 HOST_WIDE_INT t = offset1; offset1 = offset2; offset2 = t;
23940 tmp = off1; off1 = off2; off2 = tmp;
23941 tmp = reg1; reg1 = reg2; reg2 = tmp;
23942 }
23943
23944 operands[0] = reg1;
23945 operands[1] = reg2;
23946 operands[2] = base;
23947 operands[3] = off1;
23948 operands[4] = off2;
23949
23950 if (TARGET_THUMB2)
23951 {
23952 if (fix_cm3_ldrd && (base == reg1))
23953 {
23954 if (offset1 <= -256)
23955 {
23956 if (emit_p)
23957 output_asm_insn ("sub\t%2, %2, %n3", operands);
23958 length = 4;
23959
23960 if (emit_p)
23961 output_asm_insn ("ldr\t%1, [%2, #4]", operands);
23962 if (low_register_operand (reg2, SImode)
23963 && low_register_operand (base, SImode))
23964 length += 2;
23965 else
23966 length += 4;
23967
23968 if (emit_p)
23969 output_asm_insn ("ldr\t%0, [%2]", operands);
23970 if (low_register_operand (base, SImode))
23971 length += 2;
23972 else
23973 length += 4;
23974 }
23975 else
23976 {
23977 if (emit_p)
23978 output_asm_insn ("ldr\t%1, [%2, %4]", operands);
23979 if (low_register_operand (reg2, SImode) && (offset2 >= 0)
23980 && low_register_operand (base, SImode) && (offset2 < 128))
23981 length += 2;
23982 else
23983 length += 4;
23984
23985 if (emit_p)
23986 output_asm_insn ("ldr\t%0, [%2, %3]", operands);
23987 if (low_register_operand (base, SImode)
23988 && (offset1 >= 0) && (offset1 < 128))
23989 length += 2;
23990 else
23991 length += 4;
23992 }
23993 }
23994 else
23995 {
23996 if (emit_p)
23997 output_asm_insn ("ldrd\t%0, %1, [%2, %3]", operands);
23998 length = 4;
23999 }
24000 }
24001 else /* TARGET_ARM */
24002 {
24003 if ((REGNO (reg2) == (REGNO (reg1) + 1)) && ((REGNO (reg1) & 1) == 0))
24004 {
24005 if (emit_p)
24006 output_asm_insn ("ldrd\t%0, %1, [%2, %3]", operands);
24007 length = 4;
24008 }
24009 else
24010 {
24011 if (emit_p)
24012 {
24013 switch (offset1)
24014 {
24015 case -8:
24016 output_asm_insn ("ldm%(db%)\t%2, {%0, %1}", operands);
24017 break;
24018
24019 case -4:
24020 output_asm_insn ("ldm%(da%)\t%2, {%0, %1}", operands);
24021 break;
24022
24023 case 0:
24024 output_asm_insn ("ldm%(ia%)\t%2, {%0, %1}", operands);
24025 break;
24026
24027 case 4:
24028 output_asm_insn ("ldm%(ib%)\t%2, {%0, %1}", operands);
24029 break;
24030
24031 default:
24032 gcc_unreachable ();
24033 }
24034 }
24035 length = 4;
24036 }
24037 }
24038
24039 return length;
24040 }
24041
24042 #include "gt-arm.h"